From fea6fc04aab5e0ba54476f5793bc7f3b51de3b63 Mon Sep 17 00:00:00 2001 From: Franck Bret Date: Thu, 17 Mar 2022 12:13:04 +0100 Subject: [PATCH] lister: Add new rust crates lister The Crates lister retrieves crates package for Rust lang. It basically fetches https://github.com/rust-lang/crates.io-index.git to a temp directory and then walks through each file to get the crate's info. --- .pre-commit-config.yaml | 1 + setup.py | 1 + swh/lister/crates/__init__.py | 12 ++ swh/lister/crates/lister.py | 138 ++++++++++++++++++ swh/lister/crates/tasks.py | 19 +++ swh/lister/crates/tests/__init__.py | 29 ++++ .../tests/data/fake-crates-repository.tar.gz | Bin 0 -> 4467 bytes .../tests/data/fake_crates_repository_init.sh | 37 +++++ swh/lister/crates/tests/test_lister.py | 89 +++++++++++ swh/lister/crates/tests/test_tasks.py | 31 ++++ 10 files changed, 357 insertions(+) create mode 100644 swh/lister/crates/__init__.py create mode 100644 swh/lister/crates/lister.py create mode 100644 swh/lister/crates/tasks.py create mode 100644 swh/lister/crates/tests/__init__.py create mode 100644 swh/lister/crates/tests/data/fake-crates-repository.tar.gz create mode 100755 swh/lister/crates/tests/data/fake_crates_repository_init.sh create mode 100644 swh/lister/crates/tests/test_lister.py create mode 100644 swh/lister/crates/tests/test_tasks.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 325888d..f7e550d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,6 +17,7 @@ repos: - id: codespell name: Check source code spelling exclude: ^(swh/lister/.*/tests/data/.*)$ + args: [-L crate] stages: [commit] - id: codespell name: Check commit message spelling diff --git a/setup.py b/setup.py index 3460bc8..6a374fd 100755 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ setup( lister.bitbucket=swh.lister.bitbucket:register lister.cgit=swh.lister.cgit:register lister.cran=swh.lister.cran:register + lister.crates=swh.lister.crates:register lister.debian=swh.lister.debian:register lister.gitea=swh.lister.gitea:register lister.github=swh.lister.github:register diff --git a/swh/lister/crates/__init__.py b/swh/lister/crates/__init__.py new file mode 100644 index 0000000..2b31785 --- /dev/null +++ b/swh/lister/crates/__init__.py @@ -0,0 +1,12 @@ +# Copyright (C) 2022 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + from .lister import CratesLister + + return { + "lister": CratesLister, + "task_modules": ["%s.tasks" % __name__], + } diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py new file mode 100644 index 0000000..5a95049 --- /dev/null +++ b/swh/lister/crates/lister.py @@ -0,0 +1,138 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json +import logging +from pathlib import Path +import subprocess +from typing import Any, Dict, Iterator, List + +import iso8601 + +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from ..pattern import CredentialsType, StatelessLister + +logger = logging.getLogger(__name__) + +# Aliasing the page results returned by `get_pages` method from the lister. +CratesListerPage = List[Dict[str, Any]] + + +class CratesLister(StatelessLister[CratesListerPage]): + """List origins from the "crates.io" forge. + + It basically fetches https://github.com/rust-lang/crates.io-index.git to a + temp directory and then walks through each file to get the crate's info. + """ + + # Part of the lister API, that identifies this lister + LISTER_NAME = "crates" + # (Optional) CVS type of the origins listed by this lister, if constant + VISIT_TYPE = "rust-crate" + + INSTANCE = "crates" + INDEX_REPOSITORY_URL = "https://github.com/rust-lang/crates.io-index.git" + DESTINATION_PATH = Path("/tmp/crates.io-index") + CRATE_FILE_URL_PATTERN = ( + "https://static.crates.io/crates/{crate}/{crate}-{version}.crate" + ) + + def __init__( + self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + url=self.INDEX_REPOSITORY_URL, + instance=self.INSTANCE, + ) + + def get_index_repository(self) -> None: + """Get crates.io-index repository up to date running git command.""" + + subprocess.check_call( + ["git", "clone", self.INDEX_REPOSITORY_URL, self.DESTINATION_PATH,] + ) + + def get_crates_index(self) -> List[Path]: + """Build a sorted list of file paths excluding dotted directories and + dotted files. + + Each file path corresponds to a crate that lists all available + versions. + """ + + crates_index = sorted( + path + for path in self.DESTINATION_PATH.rglob("*") + if not any(part.startswith(".") for part in path.parts) + and path.is_file() + and path != self.DESTINATION_PATH / "config.json" + ) + + return crates_index + + def get_pages(self) -> Iterator[CratesListerPage]: + """Yield an iterator sorted by name in ascending order of pages. + + Each page is a list of crate versions with: + - name: Name of the crate + - version: Version + - checksum: Checksum + - crate_file: Url of the crate file + - last_update: Date of the last commit of the corresponding index + file + """ + # Fetch crates.io index repository + self.get_index_repository() + # Get a list of all crates files from the index repository + crates_index = self.get_crates_index() + logger.debug("found %s crates in crates_index", len(crates_index)) + + for crate in crates_index: + page = [] + # %cI is for strict iso8601 date formatting + last_update_str = subprocess.check_output( + ["git", "log", "-1", "--pretty=format:%cI", str(crate)], + cwd=self.DESTINATION_PATH, + ) + last_update = iso8601.parse_date(last_update_str.decode().strip()) + + with crate.open("rb") as current_file: + for line in current_file: + data = json.loads(line) + # pick only the data we need + page.append( + dict( + name=data["name"], + version=data["vers"], + checksum=data["cksum"], + crate_file=self.CRATE_FILE_URL_PATTERN.format( + crate=data["name"], version=data["vers"] + ), + last_update=last_update, + ) + ) + yield page + + def get_origins_from_page(self, page: CratesListerPage) -> Iterator[ListedOrigin]: + """Iterate on all crate pages and yield ListedOrigin instances.""" + + assert self.lister_obj.id is not None + + for version in page: + yield ListedOrigin( + lister_id=self.lister_obj.id, + visit_type=self.VISIT_TYPE, + url=version["crate_file"], + last_update=version["last_update"], + extra_loader_arguments={ + "name": version["name"], + "version": version["version"], + "checksum": version["checksum"], + }, + ) diff --git a/swh/lister/crates/tasks.py b/swh/lister/crates/tasks.py new file mode 100644 index 0000000..33be977 --- /dev/null +++ b/swh/lister/crates/tasks.py @@ -0,0 +1,19 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.lister.crates.lister import CratesLister + + +@shared_task(name=__name__ + ".CratesListerTask") +def list_crates(**lister_args): + """Lister task for crates (rust) registry""" + return CratesLister.from_configfile(**lister_args).run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping(): + return "OK" diff --git a/swh/lister/crates/tests/__init__.py b/swh/lister/crates/tests/__init__.py new file mode 100644 index 0000000..8b98baa --- /dev/null +++ b/swh/lister/crates/tests/__init__.py @@ -0,0 +1,29 @@ +# Copyright (C) 2022 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +from pathlib import PosixPath +import subprocess +from typing import Optional, Union + + +def prepare_repository_from_archive( + archive_path: str, + filename: Optional[str] = None, + tmp_path: Union[PosixPath, str] = "/tmp", +) -> str: + """Given an existing archive_path, uncompress it. + Returns a file repo url which can be used as origin url. + + This does not deal with the case where the archive passed along does not exist. + + """ + if not isinstance(tmp_path, str): + tmp_path = str(tmp_path) + # uncompress folder/repositories/dump for the loader to ingest + subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path]) + # build the origin url (or some derivative form) + _fname = filename if filename else os.path.basename(archive_path) + repo_url = f"file://{tmp_path}/{_fname}" + return repo_url diff --git a/swh/lister/crates/tests/data/fake-crates-repository.tar.gz b/swh/lister/crates/tests/data/fake-crates-repository.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8b384b4a12a9ba15ba133fd50a0ab08b6c1468bf GIT binary patch literal 4467 zcmZ`+cRbaN_c!CQHyM|Zosn$1Rw6q)gr}QAh(t!NRdlV)WL=a!u9=x@?~%;9xc1)b zF7D^&`TqYq|D6BMd7bfozt7>1r6&2Gk?-fwhD_;#`Wp+JJ(!=YF=^4QGfCA$M7|nm zck1N742RA6(%1?l8npVg+Fy{aGchsihQog3S?*?5u|N`Cyq6db}8ZuXWJKKlBtBhl-@c?W8XBuzS4ivEAn$+<`PRRNo-e zUcAG^MA-Gb&C#I~rV>TECMC6`{IptjE6BmfN?p2f55g6Dkh25tl`Mhbvw)#+Z?c^w zfRdu*DocwVk7^*X30K8J8MMlfH>WC)$My{!>&yKBxz3>kUWl6M9B`FY9g=?Wxn#`z z>EJh2y?6nWFE#yr5w^~I?!uFEe5t?X9vKG&-i`BqXTFit^^AR}VCYkJ4MLoxR-ZI` zs5D0GW0Sgzm9?luM41Fk_cwbUb4Pg$W+R{V&%O_f(DKBy>zK*=+q$tk6~l_0H(57U zBbEwj@#|t)R_5`EIgq9r`KFUvnRQt)iJ_)<4~tMc)x$qxo8P5OJWm|eSo8&V>)6B%C>$tq*7vdBxzS6l-y~#FPITSnn|BcbU!GJP_k-8Ws2~m=}^Lo zVr^Fx_#SC4#j!9y9}{Ef`pQ{KI0EFV%>5gA!(2PiSedH(L`luRfYOKI(e;A021Rl! zwM`@!d%%06oyUp6P5os>THcGUpJkU%ena&3oN%w&)?RdQ8B05vgfwYX;1_5TFP$j;gQP_cR zy#7x0D8w-5Y*RVmzrpT!H|!>mD3ZPX#eZZ8wJ33fCKvuyeo}u4uKV3d%|%$0|Ns7jgm9UwZ+|o14c!Ya&aYIBHhcrC_OoAwaK5~d{kZhqDMEl+!~7S z3h{xm`owyaRi~GKadG;{Gc4#Y|665LwCB%SywF>ZnUmRAG8T-i*%=Xwj=m`#41O&O z;w;5p=8p9ZPSiy5`ob{q8m&WbV5Ve(sIc-lX9nhzs=2m2P9!C<+J|GtfT9cN)wVZ= zq1r7j1)}O;-Po-nk#H+>+QZ74bM-BaQLB?xwExr9lU#F`rY|P2!jI>K?|Os)7LBlF z=)wwE6}aY|S#bQo6KD6%>D|Oz`@q*o@RdB|3-fD7dr)XU)O`BFX({l!X*4a`pNm{_ z9tv72e^}K`5@Ip083;rQvRG4C3#xjefs83Hz)M9zQ%jTY670NOyny3@-9(q2oUiWt8s`^)%+yN&Y)VFL`(LUi_i?BX86TkVL949W;Bl9> z4E_4nB30u2BF@P=I%j*R+9iL{C66vzFHYldsgVHE);bTqZK{0rHe^Jk?8( zrsEmsyK5fBPQjO);{k4FfoU||<+h9)#UmJ*LY_=v{$d9S@p*7-s*=Gz-nJ4Jq_so`Vo!mPX${PCSjp=9K}DXQCG68g*ro~*YDw1L@Ni*2!Pieu^@PTGis!6@$7K0&eZ7dqF|`=lderqHDeD& z1;Kmk;i0Szr?F$RgeJnJTjz`*7q%X1oOcYp4K*WRijgL=fd#nf&ep=;G*DC@p&{bmyUDTqZ*sM%_2 zEa$Z408Z#k+!gdCC~)i%l7$4FLf*bvaYO_68uz#4yAKxYUL6i2c-2~!+RpwUM2%GI zgq}Nhzc^e$+oL4r7q_-_vRTk4#JxS9qhpk3uZ$lL=H*XEcw?MpkZEwSnCrq!vKBPg zGQHj{bVWB%(|s_#@B)>gk_xSt;A971ZSNM)g z`LrJ&W%HOWa9@oK^N&bs->y8^6G5fl@$e6bx{Cgq0?eN;;Tsd3GPuEkCJCGG%@%C~ zsYN|wDLznT(K#Kh6UMOjD@6lxm5bfBR)>-`0>Q>{;HmjH?gOe-iJrH&-8f($C1e&ABW~dRIs`%ITtK~quXlN>>k&YZ~pKWOmUcwSPuT*HX1R@>SHy%i4x z>+4-2D;w(KFxh**&d%K^Yh=E+EsNVvLHnAf3XZe5A!C2r>zbtOy%dCJiHyOktP68q zZ(57!LdAb7dxXr*%!A`&(8%4=wl>8F1kqOpD8grZmF(Kx=1-n5lo#Al#^d-`uwmt& zM#j;$D^m|Rv)T{P?DOu5>XQvMr^w~u+|YNI;2tEs22XCNs<@=;rmoxA6sFO7p()fhxgbZ2L_k9l?Jj%9-=f8w9@?r}Dgv?)4+W#j%r4W_Ee7h$skyxFE9% z1&L^^zMe!mnH7_t)Yj}4U`JPuhd%p)hsqDoY`-=|^bn=s{s(n3;!6Wowy!(C`^ z8N*jUEV(uAY9PB$J)-$dwu0NDDMcT$Q4nC_;XTDyhOd5IA~iRGjO)n++NFoZgp2vk zQJ=AMz~+wDY5Ii(SdA@ZEp@ll7y%-3=oGAX$oOj$YP`$|ARNws{a|)XI;O3;i@xQs zd-Goo5cZjM07&Q~8>VMPw|YL_T4L}NT7_Zam#o{f z&UfN*yXm*9Xd7Pb@*fm`d5F_knIxX)^#Wo2Q$SWY_)5XilASs%jzlv|{(^4bRySti zMoESyC3$#j9}Y}n9S^8%IRcT_`ea%NaGp7>06p!7AFEW*2GUP*TRPsjZqI-s6LUb+x=Ls+*UM14a_)E5U9+nXy6x# zX-lI({vn`nU1@o;^fSRz)yEXfF9kUubPDta>d9f>ogu6cEBVl?|X>iCEw9igT$&%NhSNC840J27nnlQ?kEk}1@RIZl&b zzwAcRfC1S`o|vKFavN=uM6z%x*Q2=r&^7z5mGuH7zc-Oa`M-hFVM*?=EE->E+&tW# z*9v)*aL3s7%$`XJyIvhG`K;9yG|qh*`7QWokVA?{3eaBg&idrL<*Xw=^x*z6W?jhU zOO*!BHTK0*wYSgE#ovNG7iL;Z#t#a%Dp86uG&Ya%4~*x`Lj-QU-@y5(eOpP;xQ`o2 z8g+DDE=M4nWJA1LeWL#ztGuH;aZ1$$^?7WGns&75Q>c%sGNfB0M2j`7qKJ_kfKEam za9tW88LXCfsD6ouwr2tlGZOwMkb8SQ)yA(@KZ&E!KicdLvn>+5G(gB^tE@kaHO(1* z?WK9nW8_~qX}EcLQtTxjfE5cJEv_~UQcgHS2aN2`M{|UV?#PIR?2n~H?_x5X3g}d` zdlm0=uHXN4Hs3c`PGcGNoAzft zi_4wu7@O3wJJUAx<@zt&LQjYGd(XZPPqy9)v9nxMA6~Kws1$f{p`@`;0I|MExpT+G zq&dZLcg>Mxaz*Xu*q34(@9Be69h=njf*PpGV|8atDu2JO#@E>)U&C|uMh^p-gOfAu z-1#pKpr081;pt~tW1pm7)t^p>b|+8XNoqhWWnSuswAI)-chYhC7Ys}mm(tOt|+i0JgqH%v`))-=yq8p_`hOiA`F5N|#C+}L-Z(d;zDu3)^Fbp8A$z7mb~ib-#ax$fRLhZwML4Y$ukkEe%|I~7)ixFY}%n-z?r5yr5E%2X^U7y{rWk@lb zfC_c_PU03GxzX|T)<~K(TjyDYnAk?BsE@p3sC0vE@7(r=*#blFDy)GO^OLTlP2kqx z=dA4xvUK|MstwnB5B4kdA|Yv>7(dsSlMu276m*FrylnUitceExr3pP_91t|KK_E#e wf`D1JDt*Aifnui&1symEy8w*Be*kPWAGGt$#Qr~bLRf>kuf{Bs43Log9~Iv&DgXcg literal 0 HcmV?d00001 diff --git a/swh/lister/crates/tests/data/fake_crates_repository_init.sh b/swh/lister/crates/tests/data/fake_crates_repository_init.sh new file mode 100755 index 0000000..60680d6 --- /dev/null +++ b/swh/lister/crates/tests/data/fake_crates_repository_init.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# Script to generate fake-crates-repository.tar.gz +# Creates a git repository like https://github.com/rust-lang/crates.io-index +# for tests purposes + +set -euo pipefail + +# files and directories +mkdir -p tmp_dir/crates.io-index/ +cd tmp_dir/crates.io-index/ + +mkdir -p .dot-dir +touch .dot-dir/empty +mkdir -p ra/nd +mkdir -p re/ge + +touch .dot-file +touch config.json + +echo '{"name":"rand","vers":"0.1.1","deps":[],"cksum":"48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d","features":{},"yanked":false}' > ra/nd/rand +echo '{"name":"rand","vers":"0.1.2","deps":[{"name":"libc","req":"^0.1.1","features":[""],"optional":false,"default_features":true,"target":null,"kind":"normal"},{"name":"log","req":"^0.2.1","features":[""],"optional":false,"default_features":true,"target":null,"kind":"normal"}],"cksum":"6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7","features":{},"yanked":false}' >> ra/nd/rand + +echo '{"name":"regex","vers":"0.1.0","deps":[],"cksum":"f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5","features":{},"yanked":false}' > re/ge/regex +echo '{"name":"regex","vers":"0.1.1","deps":[{"name":"regex_macros","req":"^0.1.0","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36","features":{},"yanked":false}' >> re/ge/regex +echo '{"name":"regex","vers":"0.1.2","deps":[{"name":"regex_macros","req":"^0.1.0","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9","features":{},"yanked":false}' >> re/ge/regex +echo '{"name":"regex","vers":"0.1.3","deps":[{"name":"regex_macros","req":"^0.1.0","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3","features":{},"yanked":false}' >> re/ge/regex + +echo '{"name":"regex-syntax","vers":"0.1.0","deps":[{"name":"rand","req":"^0.3","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"},{"name":"quickcheck","req":"^0.2","features":[""],"optional":false,"default_features":true,"target":null,"kind":"dev"}],"cksum":"398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944","features":{},"yanked":false}' > re/ge/regex-syntax + +# Init as a git repository +git init +git add . +git commit -m "Init fake crates.io-index repository for tests purpose" + +# Save some space +rm .git/hooks/*.sample diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py new file mode 100644 index 0000000..b92ce56 --- /dev/null +++ b/swh/lister/crates/tests/test_lister.py @@ -0,0 +1,89 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from pathlib import Path + +from swh.lister.crates.lister import CratesLister +from swh.lister.crates.tests import prepare_repository_from_archive + +expected_origins = [ + { + "name": "rand", + "version": "0.1.1", + "checksum": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d", + "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate", + }, + { + "name": "rand", + "version": "0.1.2", + "checksum": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7", + "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate", + }, + { + "name": "regex", + "version": "0.1.0", + "checksum": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5", + "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate", + }, + { + "name": "regex", + "version": "0.1.1", + "checksum": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36", + "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate", + }, + { + "name": "regex", + "version": "0.1.2", + "checksum": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9", + "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate", + }, + { + "name": "regex", + "version": "0.1.3", + "checksum": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3", + "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate", + }, + { + "name": "regex-syntax", + "version": "0.1.0", + "checksum": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944", + "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate", + }, +] + + +def test_crates_lister(datadir, tmp_path, swh_scheduler): + archive_path = Path(datadir, "fake-crates-repository.tar.gz") + repo_url = prepare_repository_from_archive( + archive_path, "crates.io-index", tmp_path + ) + + lister = CratesLister(scheduler=swh_scheduler) + lister.INDEX_REPOSITORY_URL = repo_url + lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests" + + res = lister.run() + + assert res.pages == 3 + assert res.origins == 7 + + expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url")) + scheduler_origins_sorted = sorted( + swh_scheduler.get_listed_origins(lister.lister_obj.id).results, + key=lambda x: x.url, + ) + + for scheduled, expected in zip(scheduler_origins_sorted, expected_origins_sorted): + assert scheduled.visit_type == "rust-crate" + assert scheduled.url == expected.get("url") + assert scheduled.extra_loader_arguments.get("name") == expected.get("name") + assert scheduled.extra_loader_arguments.get("version") == expected.get( + "version" + ) + assert scheduled.extra_loader_arguments.get("checksum") == expected.get( + "checksum" + ) + + assert len(scheduler_origins_sorted) == len(expected_origins_sorted) diff --git a/swh/lister/crates/tests/test_tasks.py b/swh/lister/crates/tests/test_tasks.py new file mode 100644 index 0000000..09c8b97 --- /dev/null +++ b/swh/lister/crates/tests/test_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.pattern import ListerStats + + +def test_crates_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.crates.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +def test_crates_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker): + # setup the mocked CratesLister + lister = mocker.patch("swh.lister.crates.tasks.CratesLister") + lister.from_configfile.return_value = lister + stats = ListerStats(pages=42, origins=42) + lister.run.return_value = stats + + res = swh_scheduler_celery_app.send_task("swh.lister.crates.tasks.CratesListerTask") + assert res + res.wait() + assert res.successful() + assert res.result == stats.dict() + + lister.from_configfile.assert_called_once_with() + lister.run.assert_called_once_with()