nixguix: Reject Git SSH URLs and pseudo-URLs
For consistency with Maven and Packagist listers
This commit is contained in:
parent
8ea4200909
commit
e8699422d7
3 changed files with 21 additions and 4 deletions
|
@ -402,7 +402,7 @@ class NixGuixLister(StatelessLister[PageResult]):
|
|||
urls = []
|
||||
for url in origin_urls:
|
||||
urlparsed = urlparse(url)
|
||||
if urlparsed.scheme == "":
|
||||
if urlparsed.scheme == "" and not re.match(r"^\w+@[^/]+:", url):
|
||||
logger.warning("Missing scheme for <%s>: fallback to http", url)
|
||||
fixed_url = f"http://{url}"
|
||||
else:
|
||||
|
|
|
@ -53,6 +53,16 @@
|
|||
"urls": [ "unknown://example.org/wrong-scheme-so-skipped.txt" ],
|
||||
"integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
|
||||
},
|
||||
{
|
||||
"type": "url",
|
||||
"urls": [ "ssh://git@example.org:wrong-scheme-so-skipped.txt" ],
|
||||
"integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
|
||||
},
|
||||
{
|
||||
"type": "url",
|
||||
"urls": [ "git@example.org:git-pseudourl/so-skipped" ],
|
||||
"integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI="
|
||||
},
|
||||
{
|
||||
"type": "url",
|
||||
"urls": [ "https://code.9front.org/hg/plan9front" ],
|
||||
|
|
|
@ -353,13 +353,20 @@ def test_lister_nixguix_mostly_noop(datadir, swh_scheduler, requests_mock):
|
|||
)
|
||||
|
||||
listed_result = lister.run()
|
||||
# only the origin upstream is listed, every other entries are unsupported or incomplete
|
||||
assert listed_result == ListerStats(pages=1, origins=1)
|
||||
|
||||
expected_origins = ["https://github.com/NixOS/nixpkgs"]
|
||||
scheduler_origins = lister.scheduler.get_listed_origins(
|
||||
lister.lister_obj.id
|
||||
).results
|
||||
assert len(scheduler_origins) == 1
|
||||
scheduler_origin_urls = [orig.url for orig in scheduler_origins]
|
||||
|
||||
assert scheduler_origin_urls == expected_origins
|
||||
|
||||
# only the origin upstream is listed, every other entries are unsupported or incomplete
|
||||
assert listed_result == ListerStats(pages=1, origins=1), (
|
||||
f"Expected origins: {' '.join(expected_origins)}, got: "
|
||||
f"{' '.join(scheduler_origin_urls)}"
|
||||
)
|
||||
|
||||
assert scheduler_origins[0].visit_type == "git"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue