nixguix: Use content-disposition from http head request if provided

As a last fallback after the content-type check, instead of raising immediately.

Related to T3781
This commit is contained in:
Antoine R. Dumont (@ardumont) 2022-10-25 17:46:30 +02:00
parent 026fea21da
commit 81688ca17e
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
3 changed files with 50 additions and 2 deletions

View file

@ -242,12 +242,33 @@ def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, st
url,
)
origin = urls[0]
content_type = response.headers.get("Content-Type")
if content_type:
logger.debug("Content-Type: %s", content_type)
if content_type == "application/json":
return False, urls[0]
return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), urls[0]
return False, origin
return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), origin
content_disposition = response.headers.get("Content-Disposition")
if content_disposition:
logger.debug("Content-Disposition: %s", content_disposition)
if "filename=" in content_disposition:
fields = content_disposition.split("; ")
for field in fields:
if "filename=" in field:
_, filename = field.split("filename=")
break
return (
url_endswith(
urlparse(filename),
TARBALL_EXTENSIONS,
raise_when_no_extension=False,
),
origin,
)
raise ArtifactNatureUndetected(
f"Cannot determine artifact type from url <{url}>"

View file

@ -272,6 +272,20 @@
"https://codeload.github.com/fifengine/fifechan/tar.gz/0.1.5"
],
"integrity": "sha256-Kb5f9LN54vxPiO99i8FyNCEw3T53owYfZMinXv5OunM="
},
{
"type": "url",
"urls": [
"https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1"
],
"integrity": "sha256-pBf9PTQiEv0ZDk8hvoLvE8EOHtfCiPu+RuRiAM895Ng="
},
{
"type": "url",
"urls": [
"https://codeload.github.com/fifengine/fifengine/tar.gz/0.4.2"
],
"integrity": "sha256-6IK1W++jauLxqJraFq8PgUobePfL5gIexbFgVgTPj/g="
}
],
"version": "1",

View file

@ -240,6 +240,19 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
"Content-Type": "application/x-gzip",
},
)
requests_mock.head(
"https://codeload.github.com/unknown-horizons/unknown-horizons/tar.gz/2019.1",
headers={
"Content-Disposition": "attachment; filename=unknown-horizons-2019.1.tar.gz",
},
)
requests_mock.head(
"https://codeload.github.com/fifengine/fifengine/tar.gz/0.4.2",
headers={
"Content-Disposition": "attachment; name=fieldName; "
"filename=fifengine-0.4.2.tar.gz; other=stuff",
},
)
expected_visit_types = defaultdict(int)
# origin upstream is added as origin