nixguix: Improve is_tarball detection pattern

This actually includes all query param values as paths to check. When paths have
extensions, it then pattern matches against tarballs if any. When no extension is
detected, it's doing as before, fallbacks to head query the url to have more information
on the file.

Prior to this commit, this only looked over a hard-coded list of values (for hard-coded
keys: file, f, name, url) detected through docker runs. This way of doing it should
decrease future misdetections (when new unknown "keys" show up in the wild).

Related to T3781
This commit is contained in:
Antoine R. Dumont (@ardumont) 2022-10-05 11:52:43 +02:00
parent 2ee103e2bc
commit f2377c283a
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
2 changed files with 7 additions and 17 deletions

View file

@ -157,22 +157,12 @@ def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, st
if urlparsed.scheme not in ("http", "https", "ftp"):
raise ArtifactNatureMistyped(f"Mistyped artifact '{url}'")
errors = []
query_params = dict(parse_qsl(urlparsed.query))
for path in [query_params.get(key) for key in ["f", "file", "url", "name"]] + [
urlparsed.path
]:
if not path:
continue
try:
file_ = Path(path).suffixes[-1]
break
except IndexError as e:
errors.append(ArtifactWithoutExtension(e))
if errors:
raise errors[-1]
return file_.lstrip(".") in TARBALL_EXTENSIONS
paths = [
Path(p) for (_, p) in [("_", urlparsed.path)] + parse_qsl(urlparsed.query)
]
if not any(path.suffix != "" for path in paths):
raise ArtifactWithoutExtension
return any(path.suffix.endswith(tuple(TARBALL_EXTENSIONS)) for path in paths)
index = random.randrange(len(urls))
url = urls[index]

View file

@ -47,7 +47,7 @@ def test_is_tarball_simple(tarballs):
@pytest.mark.parametrize(
"query_param",
["file", "f", "url", "name"],
["file", "f", "url", "name", "anykeyreally"],
)
def test_is_tarball_not_so_simple(query_param):
"""More involved check on tarball should discriminate between tarball and file"""