nixguix: Reject Git SSH URLs and pseudo-URLs

For consistency with Maven and Packagist listers
This commit is contained in:
Valentin Lorentz 2022-11-04 13:50:25 +01:00
parent 8ea4200909
commit e8699422d7
3 changed files with 21 additions and 4 deletions

View file

@ -402,7 +402,7 @@ class NixGuixLister(StatelessLister[PageResult]):
urls = []
for url in origin_urls:
urlparsed = urlparse(url)
if urlparsed.scheme == "":
if urlparsed.scheme == "" and not re.match(r"^\w+@[^/]+:", url):
logger.warning("Missing scheme for <%s>: fallback to http", url)
fixed_url = f"http://{url}"
else:

View file

@ -53,6 +53,16 @@
"urls": [ "unknown://example.org/wrong-scheme-so-skipped.txt" ],
"integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
},
{
"type": "url",
"urls": [ "ssh://git@example.org:wrong-scheme-so-skipped.txt" ],
"integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
},
{
"type": "url",
"urls": [ "git@example.org:git-pseudourl/so-skipped" ],
"integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
},
{
"type": "url",
"urls": [ "https://code.9front.org/hg/plan9front" ],

View file

@ -353,13 +353,20 @@ def test_lister_nixguix_mostly_noop(datadir, swh_scheduler, requests_mock):
)
listed_result = lister.run()
# only the origin upstream is listed, every other entries are unsupported or incomplete
assert listed_result == ListerStats(pages=1, origins=1)
expected_origins = ["https://github.com/NixOS/nixpkgs"]
scheduler_origins = lister.scheduler.get_listed_origins(
lister.lister_obj.id
).results
assert len(scheduler_origins) == 1
scheduler_origin_urls = [orig.url for orig in scheduler_origins]
assert scheduler_origin_urls == expected_origins
# only the origin upstream is listed, every other entries are unsupported or incomplete
assert listed_result == ListerStats(pages=1, origins=1), (
f"Expected origins: {' '.join(expected_origins)}, got: "
f"{' '.join(scheduler_origin_urls)}"
)
assert scheduler_origins[0].visit_type == "git"