crates: Create one origin per package instead of per version
Previously we had as many origins as version for a crate package, url was a link to a specific crate version package. Refactor to have one origin per package name and add an 'artifacts' entry to extra_loader_arguments that list all versions, package url and checksum. Origin url is now a link to the related http api endpoint for a package name. Related to T4104
This commit is contained in:
parent
c251594a1f
commit
985b71e80c
2 changed files with 97 additions and 55 deletions
|
@ -8,6 +8,7 @@ import logging
|
|||
from pathlib import Path
|
||||
import subprocess
|
||||
from typing import Any, Dict, Iterator, List
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import iso8601
|
||||
|
||||
|
@ -32,7 +33,7 @@ class CratesLister(StatelessLister[CratesListerPage]):
|
|||
# Part of the lister API, that identifies this lister
|
||||
LISTER_NAME = "crates"
|
||||
# (Optional) CVS type of the origins listed by this lister, if constant
|
||||
VISIT_TYPE = "rust-crate"
|
||||
VISIT_TYPE = "crates"
|
||||
|
||||
INSTANCE = "crates"
|
||||
INDEX_REPOSITORY_URL = "https://github.com/rust-lang/crates.io-index.git"
|
||||
|
@ -40,6 +41,7 @@ class CratesLister(StatelessLister[CratesListerPage]):
|
|||
CRATE_FILE_URL_PATTERN = (
|
||||
"https://static.crates.io/crates/{crate}/{crate}-{version}.crate"
|
||||
)
|
||||
CRATE_API_URL_PATTERN = "https://crates.io/api/v1/crates/{crate}"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -131,15 +133,30 @@ class CratesLister(StatelessLister[CratesListerPage]):
|
|||
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
url = self.CRATE_API_URL_PATTERN.format(crate=page[0]["name"])
|
||||
last_update = page[0]["last_update"]
|
||||
artifacts = []
|
||||
|
||||
for version in page:
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=version["crate_file"],
|
||||
last_update=version["last_update"],
|
||||
extra_loader_arguments={
|
||||
"name": version["name"],
|
||||
"version": version["version"],
|
||||
"checksum": version["checksum"],
|
||||
filename = urlparse(version["crate_file"]).path.split("/")[-1]
|
||||
# Build an artifact entry following original-artifacts-json specification
|
||||
# https://docs.softwareheritage.org/devel/swh-storage/extrinsic-metadata-specification.html#original-artifacts-json # noqa: B950
|
||||
artifact = {
|
||||
"filename": f"{filename}",
|
||||
"checksums": {
|
||||
"sha256": f"{version['checksum']}",
|
||||
},
|
||||
)
|
||||
"url": version["crate_file"],
|
||||
"version": version["version"],
|
||||
}
|
||||
artifacts.append(artifact)
|
||||
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=url,
|
||||
last_update=last_update,
|
||||
extra_loader_arguments={
|
||||
"artifacts": artifacts,
|
||||
},
|
||||
)
|
||||
|
|
|
@ -10,46 +10,75 @@ from swh.lister.crates.tests import prepare_repository_from_archive
|
|||
|
||||
expected_origins = [
|
||||
{
|
||||
"name": "rand",
|
||||
"version": "0.1.1",
|
||||
"checksum": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d",
|
||||
"url": "https://static.crates.io/crates/rand/rand-0.1.1.crate",
|
||||
"url": "https://crates.io/api/v1/crates/rand",
|
||||
"artifacts": [
|
||||
{
|
||||
"checksums": {
|
||||
"sha256": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d", # noqa: B950
|
||||
},
|
||||
"filename": "rand-0.1.1.crate",
|
||||
"url": "https://static.crates.io/crates/rand/rand-0.1.1.crate",
|
||||
"version": "0.1.1",
|
||||
},
|
||||
{
|
||||
"checksums": {
|
||||
"sha256": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7", # noqa: B950
|
||||
},
|
||||
"filename": "rand-0.1.2.crate",
|
||||
"url": "https://static.crates.io/crates/rand/rand-0.1.2.crate",
|
||||
"version": "0.1.2",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "rand",
|
||||
"version": "0.1.2",
|
||||
"checksum": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7",
|
||||
"url": "https://static.crates.io/crates/rand/rand-0.1.2.crate",
|
||||
"url": "https://crates.io/api/v1/crates/regex",
|
||||
"artifacts": [
|
||||
{
|
||||
"checksums": {
|
||||
"sha256": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5", # noqa: B950
|
||||
},
|
||||
"filename": "regex-0.1.0.crate",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.0.crate",
|
||||
"version": "0.1.0",
|
||||
},
|
||||
{
|
||||
"checksums": {
|
||||
"sha256": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36", # noqa: B950
|
||||
},
|
||||
"filename": "regex-0.1.1.crate",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.1.crate",
|
||||
"version": "0.1.1",
|
||||
},
|
||||
{
|
||||
"checksums": {
|
||||
"sha256": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9", # noqa: B950
|
||||
},
|
||||
"filename": "regex-0.1.2.crate",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.2.crate",
|
||||
"version": "0.1.2",
|
||||
},
|
||||
{
|
||||
"checksums": {
|
||||
"sha256": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3", # noqa: B950
|
||||
},
|
||||
"filename": "regex-0.1.3.crate",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.3.crate",
|
||||
"version": "0.1.3",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "regex",
|
||||
"version": "0.1.0",
|
||||
"checksum": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.0.crate",
|
||||
},
|
||||
{
|
||||
"name": "regex",
|
||||
"version": "0.1.1",
|
||||
"checksum": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.1.crate",
|
||||
},
|
||||
{
|
||||
"name": "regex",
|
||||
"version": "0.1.2",
|
||||
"checksum": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.2.crate",
|
||||
},
|
||||
{
|
||||
"name": "regex",
|
||||
"version": "0.1.3",
|
||||
"checksum": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3",
|
||||
"url": "https://static.crates.io/crates/regex/regex-0.1.3.crate",
|
||||
},
|
||||
{
|
||||
"name": "regex-syntax",
|
||||
"version": "0.1.0",
|
||||
"checksum": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944",
|
||||
"url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate",
|
||||
"url": "https://crates.io/api/v1/crates/regex-syntax",
|
||||
"artifacts": [
|
||||
{
|
||||
"checksums": {
|
||||
"sha256": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944", # noqa: B950
|
||||
},
|
||||
"filename": "regex-syntax-0.1.0.crate",
|
||||
"url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate",
|
||||
"version": "0.1.0",
|
||||
},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
|
@ -67,7 +96,7 @@ def test_crates_lister(datadir, tmp_path, swh_scheduler):
|
|||
res = lister.run()
|
||||
|
||||
assert res.pages == 3
|
||||
assert res.origins == 7
|
||||
assert res.origins == 3
|
||||
|
||||
expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url"))
|
||||
scheduler_origins_sorted = sorted(
|
||||
|
@ -76,14 +105,10 @@ def test_crates_lister(datadir, tmp_path, swh_scheduler):
|
|||
)
|
||||
|
||||
for scheduled, expected in zip(scheduler_origins_sorted, expected_origins_sorted):
|
||||
assert scheduled.visit_type == "rust-crate"
|
||||
assert scheduled.visit_type == "crates"
|
||||
assert scheduled.url == expected.get("url")
|
||||
assert scheduled.extra_loader_arguments.get("name") == expected.get("name")
|
||||
assert scheduled.extra_loader_arguments.get("version") == expected.get(
|
||||
"version"
|
||||
)
|
||||
assert scheduled.extra_loader_arguments.get("checksum") == expected.get(
|
||||
"checksum"
|
||||
assert scheduled.extra_loader_arguments.get("artifacts") == expected.get(
|
||||
"artifacts"
|
||||
)
|
||||
|
||||
assert len(scheduler_origins_sorted) == len(expected_origins_sorted)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue