From 5daead68adcbde07762eb4824001637a15dda6b2 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Tue, 4 Oct 2022 16:01:00 +0200 Subject: [PATCH] nixguix: Add support for pseudo url with missing schema Related to T3294 Related to T3781 --- swh/lister/nixguix/lister.py | 21 +++++++++++++------ .../tests/data/nixpkgs-swh_sources.json | 7 +++++++ swh/lister/nixguix/tests/test_lister.py | 2 +- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py index 3fb6f92..e27a99b 100644 --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -129,8 +129,7 @@ def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, st url = urls[index] try: - is_tar = _is_tarball(url) - return is_tar, urls[0] + return _is_tarball(url), urls[0] except IndexError: if request is None: raise ArtifactNatureUndetected( @@ -285,15 +284,25 @@ class NixGuixLister(StatelessLister[PageResult]): ) elif artifact_type == "url": # It's either a tarball or a file - urls = artifact.get("urls") - if not urls: + origin_urls = artifact.get("urls") + if not origin_urls: # Nothing to fetch logger.warning("Skipping url <%s>: empty artifact", artifact) continue - assert urls is not None + assert origin_urls is not None + + # Deal with urls with empty scheme (basic fallback to http) + urls = [] + for url in origin_urls: + urlparsed = urlparse(url) + if urlparsed.scheme == "": + logger.warning("Missing scheme for <%s>, fallback to http", url) + fixed_url = f"http://{url}" + else: + fixed_url = url + urls.append(fixed_url) - # FIXME: T3294: Fix missing scheme in urls origin, *fallback_urls = urls integrity = artifact.get("integrity") diff --git a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json index cde2185..57e32f5 100644 --- a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json +++ b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json @@ -29,6 +29,13 @@ ], "integrity": "sha256-bss09x9yOnuW+Q5BHHjf8nNcCNxCKMdl9/2/jKSFcrQ=" }, + { + "type": "url", + "urls": [ + "www.roudoudou.com/export/cpc/rasm/rasm_v0117_src.zip" + ], + "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" + }, { "type": "url", "urls": [ diff --git a/swh/lister/nixguix/tests/test_lister.py b/swh/lister/nixguix/tests/test_lister.py index 34ba62f..6ed4d1d 100644 --- a/swh/lister/nixguix/tests/test_lister.py +++ b/swh/lister/nixguix/tests/test_lister.py @@ -146,7 +146,7 @@ def test_is_tarball_complex_with_content_type_result( assert origin == url -def test_lister_nixguix(datadir, swh_scheduler, requests_mock): +def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock): """NixGuixLister should list all origins per visit type""" url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json" origin_upstream = "https://github.com/NixOS/nixpkgs"