nixguix: Improve further tarball detection
The current content type detection was a bit off mostly for content which includes charset. This commit fixes it. Related to T3781
This commit is contained in:
parent
ff80a91f0a
commit
2ee103e2bc
3 changed files with 13 additions and 2 deletions
|
@ -121,7 +121,7 @@ PageResult = Tuple[ArtifactType, Union[Artifact, VCS]]
|
|||
VCS_SUPPORTED = ("git", "svn", "hg")
|
||||
|
||||
# Rough approximation of what we can find of mimetypes for tarballs "out there"
|
||||
POSSIBLE_TARBALL_MIMETYPES = set(MIMETYPE_TO_ARCHIVE_FORMAT.keys())
|
||||
POSSIBLE_TARBALL_MIMETYPES = tuple(MIMETYPE_TO_ARCHIVE_FORMAT.keys())
|
||||
|
||||
|
||||
def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, str]:
|
||||
|
@ -218,7 +218,7 @@ def is_tarball(urls: List[str], request: Optional[Any] = None) -> Tuple[bool, st
|
|||
logger.debug("Content-Type: %s", content_type)
|
||||
if content_type == "application/json":
|
||||
return False, urls[0]
|
||||
return content_type in POSSIBLE_TARBALL_MIMETYPES, urls[0]
|
||||
return content_type.startswith(POSSIBLE_TARBALL_MIMETYPES), urls[0]
|
||||
|
||||
raise ArtifactNatureUndetected(
|
||||
f"Cannot determine artifact type from url <{url}>"
|
||||
|
|
|
@ -79,6 +79,13 @@
|
|||
"svn_url": "https://code.call-cc.org/svn/chicken-eggs/release/5/iset/tags/2.2",
|
||||
"svn_revision": 39057
|
||||
},
|
||||
{
|
||||
"type": "url",
|
||||
"urls": [
|
||||
"http://git.marmaro.de/?p=mmh;a=snapshot;h=431604647f89d5aac7b199a7883e98e56e4ccf9e;sf=tgz"
|
||||
],
|
||||
"integrity": "sha256-G/7oY5qdCSJ59VlwHtIbvMdT6+mriXhMqQIHNx65J+E="
|
||||
},
|
||||
{
|
||||
"type": "url",
|
||||
"urls": ["svn://svn.code.sf.net/p/acme-crossass/code-0/trunk"],
|
||||
|
|
|
@ -173,6 +173,10 @@ def test_lister_nixguix_ok(datadir, swh_scheduler, requests_mock):
|
|||
"https://api.github.com/repos/trie/trie",
|
||||
[{"json": {"html_url": "https://github.com/trie/trie.git"}}],
|
||||
)
|
||||
requests_mock.head(
|
||||
"http://git.marmaro.de/?p=mmh;a=snapshot;h=431604647f89d5aac7b199a7883e98e56e4ccf9e;sf=tgz",
|
||||
headers={"Content-Type": "application/gzip; charset=ISO-8859-1"},
|
||||
)
|
||||
|
||||
expected_visit_types = defaultdict(int)
|
||||
# origin upstream is added as origin
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue