Enable black

- blackify all the python files,
- enable black in pre-commit,
- add a black tox environment.
This commit is contained in:
David Douard 2020-04-08 16:31:22 +02:00
parent 1ae75166c7
commit 93a4d8b784
97 changed files with 1734 additions and 1642 deletions

View file

@ -7,7 +7,8 @@ def register():
from .models import GNUModel
from .lister import GNULister
return {'models': [GNUModel],
'lister': GNULister,
'task_modules': ['%s.tasks' % __name__],
}
return {
"models": [GNUModel],
"lister": GNULister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -18,12 +18,12 @@ logger = logging.getLogger(__name__)
class GNULister(SimpleLister):
MODEL = GNUModel
LISTER_NAME = 'gnu'
instance = 'gnu'
LISTER_NAME = "gnu"
instance = "gnu"
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.gnu_tree = GNUTree('https://ftp.gnu.org/tree.json.gz')
self.gnu_tree = GNUTree("https://ftp.gnu.org/tree.json.gz")
def task_dict(self, origin_type, origin_url, **kwargs):
"""Return task format dict
@ -51,10 +51,10 @@ class GNULister(SimpleLister):
"""
artifacts = self.gnu_tree.artifacts[origin_url]
assert origin_type == 'tar'
assert origin_type == "tar"
return utils.create_task_dict(
'load-archive-files',
kwargs.get('policy', 'oneshot'),
"load-archive-files",
kwargs.get("policy", "oneshot"),
url=origin_url,
artifacts=artifacts,
retries_left=3,
@ -103,11 +103,11 @@ class GNULister(SimpleLister):
"""
return {
'uid': repo['url'],
'name': repo['name'],
'full_name': repo['name'],
'html_url': repo['url'],
'origin_url': repo['url'],
'time_last_updated': repo['time_modified'],
'origin_type': 'tar',
"uid": repo["url"],
"name": repo["name"],
"full_name": repo["name"],
"html_url": repo["url"],
"origin_url": repo["url"],
"time_last_updated": repo["time_modified"],
"origin_type": "tar",
}

View file

@ -11,7 +11,8 @@ class GNUModel(ModelBase):
"""a GNU repository representation
"""
__tablename__ = 'gnu_repo'
__tablename__ = "gnu_repo"
uid = Column(String, primary_key=True)
time_last_updated = Column(DateTime)

View file

@ -7,12 +7,12 @@ from celery import shared_task
from .lister import GNULister
@shared_task(name=__name__ + '.GNUListerTask')
@shared_task(name=__name__ + ".GNUListerTask")
def list_gnu_full(**lister_args):
"""List lister for the GNU source code archive"""
return GNULister(**lister_args).run()
@shared_task(name=__name__ + '.ping')
@shared_task(name=__name__ + ".ping")
def _ping():
return 'OK'
return "OK"

View file

@ -10,43 +10,41 @@ logger = logging.getLogger(__name__)
def test_gnu_lister(swh_listers, requests_mock_datadir):
lister = swh_listers['gnu']
lister = swh_listers["gnu"]
lister.run()
r = lister.scheduler.search_tasks(task_type='load-archive-files')
r = lister.scheduler.search_tasks(task_type="load-archive-files")
assert len(r) == 383
for row in r:
assert row['type'] == 'load-archive-files'
assert row["type"] == "load-archive-files"
# arguments check
args = row['arguments']['args']
args = row["arguments"]["args"]
assert len(args) == 0
# kwargs
kwargs = row['arguments']['kwargs']
assert set(kwargs.keys()) == {'url', 'artifacts'}
kwargs = row["arguments"]["kwargs"]
assert set(kwargs.keys()) == {"url", "artifacts"}
url = kwargs['url']
assert url.startswith('https://ftp.gnu.org')
url = kwargs["url"]
assert url.startswith("https://ftp.gnu.org")
url_suffix = url.split('https://ftp.gnu.org')[1]
assert 'gnu' in url_suffix or 'old-gnu' in url_suffix
url_suffix = url.split("https://ftp.gnu.org")[1]
assert "gnu" in url_suffix or "old-gnu" in url_suffix
artifacts = kwargs['artifacts']
artifacts = kwargs["artifacts"]
# check the artifact's structure
artifact = artifacts[0]
assert set(artifact.keys()) == {
'url', 'length', 'time', 'filename', 'version'
}
assert set(artifact.keys()) == {"url", "length", "time", "filename", "version"}
for artifact in artifacts:
logger.debug(artifact)
# 'time' is an isoformat string now
for key in ['url', 'time', 'filename', 'version']:
for key in ["url", "time", "filename", "version"]:
assert isinstance(artifact[key], str)
assert isinstance(artifact['length'], int)
assert isinstance(artifact["length"], int)
assert row['policy'] == 'oneshot'
assert row['priority'] is None
assert row['retries_left'] == 3
assert row["policy"] == "oneshot"
assert row["priority"] is None
assert row["retries_left"] == 3

View file

@ -2,22 +2,20 @@ from unittest.mock import patch
def test_ping(swh_app, celery_session_worker):
res = swh_app.send_task(
'swh.lister.gnu.tasks.ping')
res = swh_app.send_task("swh.lister.gnu.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == 'OK'
assert res.result == "OK"
@patch('swh.lister.gnu.tasks.GNULister')
@patch("swh.lister.gnu.tasks.GNULister")
def test_lister(lister, swh_app, celery_session_worker):
# setup the mocked GNULister
lister.return_value = lister
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.gnu.tasks.GNUListerTask')
res = swh_app.send_task("swh.lister.gnu.tasks.GNUListerTask")
assert res
res.wait()
assert res.successful()

View file

@ -9,26 +9,30 @@ import pytest
from os import path
from swh.lister.gnu.tree import (
GNUTree, find_artifacts, check_filename_is_archive, load_raw_data,
get_version, format_date
GNUTree,
find_artifacts,
check_filename_is_archive,
load_raw_data,
get_version,
format_date,
)
def test_load_raw_data_from_query(requests_mock_datadir):
actual_json = load_raw_data('https://ftp.gnu.org/tree.json.gz')
actual_json = load_raw_data("https://ftp.gnu.org/tree.json.gz")
assert actual_json is not None
assert isinstance(actual_json, list)
assert len(actual_json) == 2
def test_load_raw_data_from_query_failure(requests_mock_datadir):
inexistant_url = 'https://ftp2.gnu.org/tree.unknown.gz'
with pytest.raises(ValueError, match='Error during query'):
inexistant_url = "https://ftp2.gnu.org/tree.unknown.gz"
with pytest.raises(ValueError, match="Error during query"):
load_raw_data(inexistant_url)
def test_load_raw_data_from_file(datadir):
filepath = path.join(datadir, 'https_ftp.gnu.org', 'tree.json.gz')
filepath = path.join(datadir, "https_ftp.gnu.org", "tree.json.gz")
actual_json = load_raw_data(filepath)
assert actual_json is not None
assert isinstance(actual_json, list)
@ -36,115 +40,115 @@ def test_load_raw_data_from_file(datadir):
def test_load_raw_data_from_file_failure(datadir):
unknown_path = path.join(datadir, 'ftp.gnu.org2', 'tree.json.gz')
unknown_path = path.join(datadir, "ftp.gnu.org2", "tree.json.gz")
with pytest.raises(FileNotFoundError):
load_raw_data(unknown_path)
def test_tree_json(requests_mock_datadir):
tree_json = GNUTree('https://ftp.gnu.org/tree.json.gz')
tree_json = GNUTree("https://ftp.gnu.org/tree.json.gz")
assert tree_json.projects['https://ftp.gnu.org/gnu/8sync/'] == {
'name': '8sync',
'time_modified': '2017-03-18T06:10:08+00:00',
'url': 'https://ftp.gnu.org/gnu/8sync/'
assert tree_json.projects["https://ftp.gnu.org/gnu/8sync/"] == {
"name": "8sync",
"time_modified": "2017-03-18T06:10:08+00:00",
"url": "https://ftp.gnu.org/gnu/8sync/",
}
assert tree_json.projects['https://ftp.gnu.org/gnu/3dldf/'] == {
'name': '3dldf',
'time_modified': '2013-12-13T19:00:36+00:00',
'url': 'https://ftp.gnu.org/gnu/3dldf/'
assert tree_json.projects["https://ftp.gnu.org/gnu/3dldf/"] == {
"name": "3dldf",
"time_modified": "2013-12-13T19:00:36+00:00",
"url": "https://ftp.gnu.org/gnu/3dldf/",
}
assert tree_json.projects['https://ftp.gnu.org/gnu/a2ps/'] == {
'name': 'a2ps',
'time_modified': '2007-12-29T03:55:05+00:00',
'url': 'https://ftp.gnu.org/gnu/a2ps/'
assert tree_json.projects["https://ftp.gnu.org/gnu/a2ps/"] == {
"name": "a2ps",
"time_modified": "2007-12-29T03:55:05+00:00",
"url": "https://ftp.gnu.org/gnu/a2ps/",
}
assert tree_json.projects['https://ftp.gnu.org/old-gnu/xshogi/'] == {
'name': 'xshogi',
'time_modified': '2003-08-02T11:15:22+00:00',
'url': 'https://ftp.gnu.org/old-gnu/xshogi/'
assert tree_json.projects["https://ftp.gnu.org/old-gnu/xshogi/"] == {
"name": "xshogi",
"time_modified": "2003-08-02T11:15:22+00:00",
"url": "https://ftp.gnu.org/old-gnu/xshogi/",
}
assert tree_json.artifacts['https://ftp.gnu.org/old-gnu/zlibc/'] == [
assert tree_json.artifacts["https://ftp.gnu.org/old-gnu/zlibc/"] == [
{
'url': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9b.tar.gz', # noqa
'length': 90106,
'time': '1997-03-10T08:00:00+00:00',
'filename': 'zlibc-0.9b.tar.gz',
'version': '0.9b',
"url": "https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9b.tar.gz", # noqa
"length": 90106,
"time": "1997-03-10T08:00:00+00:00",
"filename": "zlibc-0.9b.tar.gz",
"version": "0.9b",
},
{
'url': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9e.tar.gz', # noqa
'length': 89625,
'time': '1997-04-07T07:00:00+00:00',
'filename': 'zlibc-0.9e.tar.gz',
'version': '0.9e',
}
"url": "https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9e.tar.gz", # noqa
"length": 89625,
"time": "1997-04-07T07:00:00+00:00",
"filename": "zlibc-0.9e.tar.gz",
"version": "0.9e",
},
]
def test_tree_json_failures(requests_mock_datadir):
url = 'https://unknown/tree.json.gz'
url = "https://unknown/tree.json.gz"
tree_json = GNUTree(url)
with pytest.raises(ValueError, match='Error during query to %s' % url):
tree_json.artifacts['https://ftp.gnu.org/gnu/3dldf/']
with pytest.raises(ValueError, match="Error during query to %s" % url):
tree_json.artifacts["https://ftp.gnu.org/gnu/3dldf/"]
with pytest.raises(ValueError, match='Error during query to %s' % url):
tree_json.projects['https://ftp.gnu.org/old-gnu/xshogi/']
with pytest.raises(ValueError, match="Error during query to %s" % url):
tree_json.projects["https://ftp.gnu.org/old-gnu/xshogi/"]
def test_find_artifacts_small_sample(datadir):
expected_artifacts = [
{
'url': '/root/artanis/artanis-0.2.1.tar.bz2',
'time': '2017-05-19T14:59:39+00:00',
'length': 424081,
'version': '0.2.1',
'filename': 'artanis-0.2.1.tar.bz2',
"url": "/root/artanis/artanis-0.2.1.tar.bz2",
"time": "2017-05-19T14:59:39+00:00",
"length": 424081,
"version": "0.2.1",
"filename": "artanis-0.2.1.tar.bz2",
},
{
'url': '/root/xboard/winboard/winboard-4_0_0-src.zip', # noqa
'time': '1998-06-21T09:55:00+00:00',
'length': 1514448,
'version': '4_0_0-src',
'filename': 'winboard-4_0_0-src.zip',
"url": "/root/xboard/winboard/winboard-4_0_0-src.zip", # noqa
"time": "1998-06-21T09:55:00+00:00",
"length": 1514448,
"version": "4_0_0-src",
"filename": "winboard-4_0_0-src.zip",
},
{
'url': '/root/xboard/xboard-3.6.2.tar.gz', # noqa
'time': '1997-07-25T07:00:00+00:00',
'length': 450164,
'version': '3.6.2',
'filename': 'xboard-3.6.2.tar.gz',
"url": "/root/xboard/xboard-3.6.2.tar.gz", # noqa
"time": "1997-07-25T07:00:00+00:00",
"length": 450164,
"version": "3.6.2",
"filename": "xboard-3.6.2.tar.gz",
},
{
'url': '/root/xboard/xboard-4.0.0.tar.gz', # noqa
'time': '1998-06-21T09:55:00+00:00',
'length': 514951,
'version': '4.0.0',
'filename': 'xboard-4.0.0.tar.gz',
"url": "/root/xboard/xboard-4.0.0.tar.gz", # noqa
"time": "1998-06-21T09:55:00+00:00",
"length": 514951,
"version": "4.0.0",
"filename": "xboard-4.0.0.tar.gz",
},
]
file_structure = json.load(open(path.join(datadir, 'tree.min.json')))
actual_artifacts = find_artifacts(file_structure, '/root/')
file_structure = json.load(open(path.join(datadir, "tree.min.json")))
actual_artifacts = find_artifacts(file_structure, "/root/")
assert actual_artifacts == expected_artifacts
def test_find_artifacts(datadir):
file_structure = json.load(open(path.join(datadir, 'tree.json')))
actual_artifacts = find_artifacts(file_structure, '/root/')
file_structure = json.load(open(path.join(datadir, "tree.json")))
actual_artifacts = find_artifacts(file_structure, "/root/")
assert len(actual_artifacts) == 42 + 3 # tar + zip
def test_check_filename_is_archive():
for ext in ['abc.xy.zip', 'cvb.zip', 'abc.tar.bz2', 'something.tar']:
for ext in ["abc.xy.zip", "cvb.zip", "abc.tar.bz2", "something.tar"]:
assert check_filename_is_archive(ext) is True
for ext in ['abc.tar.gz.sig', 'abc', 'something.zip2', 'foo.tar.']:
for ext in ["abc.tar.gz.sig", "abc", "something.zip2", "foo.tar."]:
assert check_filename_is_archive(ext) is False
@ -155,54 +159,62 @@ def test_get_version():
"""
for url, expected_branchname in [
('https://gnu.org/sthg/info-2.1.0.tar.gz', '2.1.0'),
('https://gnu.org/sthg/info-2.1.2.zip', '2.1.2'),
('https://sthg.org/gnu/sthg.tar.gz', 'sthg'),
('https://sthg.org/gnu/DLDF-1.1.4.tar.gz', '1.1.4'),
('https://sthg.org/gnu/anubis-latest.tar.bz2', 'latest'),
('https://ftp.org/gnu/aris-w32.zip', 'w32'),
('https://ftp.org/gnu/aris-w32-2.2.zip', 'w32-2.2'),
('https://ftp.org/gnu/autogen.info.tar.gz', 'autogen.info'),
('https://ftp.org/gnu/crypto-build-demo.tar.gz',
'crypto-build-demo'),
('https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz',
'clue+clio+xit.clisp'),
('https://ftp.org/gnu/clue+clio.for-pcl.tar.gz',
'clue+clio.for-pcl'),
('https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz',
'hppa2.0-hp-hpux10.20'),
('clisp-i386-solaris2.6.tar.gz', 'i386-solaris2.6'),
('clisp-mips-sgi-irix6.5.tar.gz', 'mips-sgi-irix6.5'),
('clisp-powerpc-apple-macos.tar.gz', 'powerpc-apple-macos'),
('clisp-powerpc-unknown-linuxlibc6.tar.gz',
'powerpc-unknown-linuxlibc6'),
('clisp-rs6000-ibm-aix3.2.5.tar.gz', 'rs6000-ibm-aix3.2.5'),
('clisp-sparc-redhat51-linux.tar.gz', 'sparc-redhat51-linux'),
('clisp-sparc-sun-solaris2.4.tar.gz', 'sparc-sun-solaris2.4'),
('clisp-sparc-sun-sunos4.1.3_U1.tar.gz',
'sparc-sun-sunos4.1.3_U1'),
('clisp-2.25.1-powerpc-apple-MacOSX.tar.gz',
'2.25.1-powerpc-apple-MacOSX'),
('clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz',
'2.27-PowerMacintosh-powerpc-Darwin-1.3.7'),
('clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz',
'2.27-i686-unknown-Linux-2.2.19'),
('clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz',
'2.28-i386-i386-freebsd-4.3-RELEASE'),
('clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz',
'2.28-i686-unknown-cygwin_me-4.90-1.3.10'),
('clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz',
'2.29-i386-i386-freebsd-4.6-STABLE'),
('clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz',
'2.29-i686-unknown-cygwin_nt-5.0-1.3.12'),
('gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip',
'2.5.3-ansi-japi-xdr.20030701_mingw32'),
('gettext-runtime-0.13.1.bin.woe32.zip', '0.13.1.bin.woe32'),
('sather-logo_images.tar.gz', 'sather-logo_images'),
('sather-specification-000328.html.tar.gz', '000328.html'),
('something-10.1.0.7z', '10.1.0'),
("https://gnu.org/sthg/info-2.1.0.tar.gz", "2.1.0"),
("https://gnu.org/sthg/info-2.1.2.zip", "2.1.2"),
("https://sthg.org/gnu/sthg.tar.gz", "sthg"),
("https://sthg.org/gnu/DLDF-1.1.4.tar.gz", "1.1.4"),
("https://sthg.org/gnu/anubis-latest.tar.bz2", "latest"),
("https://ftp.org/gnu/aris-w32.zip", "w32"),
("https://ftp.org/gnu/aris-w32-2.2.zip", "w32-2.2"),
("https://ftp.org/gnu/autogen.info.tar.gz", "autogen.info"),
("https://ftp.org/gnu/crypto-build-demo.tar.gz", "crypto-build-demo"),
("https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz", "clue+clio+xit.clisp"),
("https://ftp.org/gnu/clue+clio.for-pcl.tar.gz", "clue+clio.for-pcl"),
(
"https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz",
"hppa2.0-hp-hpux10.20",
),
("clisp-i386-solaris2.6.tar.gz", "i386-solaris2.6"),
("clisp-mips-sgi-irix6.5.tar.gz", "mips-sgi-irix6.5"),
("clisp-powerpc-apple-macos.tar.gz", "powerpc-apple-macos"),
("clisp-powerpc-unknown-linuxlibc6.tar.gz", "powerpc-unknown-linuxlibc6"),
("clisp-rs6000-ibm-aix3.2.5.tar.gz", "rs6000-ibm-aix3.2.5"),
("clisp-sparc-redhat51-linux.tar.gz", "sparc-redhat51-linux"),
("clisp-sparc-sun-solaris2.4.tar.gz", "sparc-sun-solaris2.4"),
("clisp-sparc-sun-sunos4.1.3_U1.tar.gz", "sparc-sun-sunos4.1.3_U1"),
("clisp-2.25.1-powerpc-apple-MacOSX.tar.gz", "2.25.1-powerpc-apple-MacOSX"),
(
"clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz",
"2.27-PowerMacintosh-powerpc-Darwin-1.3.7",
),
(
"clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz",
"2.27-i686-unknown-Linux-2.2.19",
),
(
"clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz",
"2.28-i386-i386-freebsd-4.3-RELEASE",
),
(
"clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz",
"2.28-i686-unknown-cygwin_me-4.90-1.3.10",
),
(
"clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz",
"2.29-i386-i386-freebsd-4.6-STABLE",
),
(
"clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz",
"2.29-i686-unknown-cygwin_nt-5.0-1.3.12",
),
(
"gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip",
"2.5.3-ansi-japi-xdr.20030701_mingw32",
),
("gettext-runtime-0.13.1.bin.woe32.zip", "0.13.1.bin.woe32"),
("sather-logo_images.tar.gz", "sather-logo_images"),
("sather-specification-000328.html.tar.gz", "000328.html"),
("something-10.1.0.7z", "10.1.0"),
]:
actual_branchname = get_version(url)
@ -211,16 +223,16 @@ def test_get_version():
def test_format_date():
for timestamp, expected_isoformat_date in [
(1489817408, '2017-03-18T06:10:08+00:00'),
(1386961236, '2013-12-13T19:00:36+00:00'),
('1198900505', '2007-12-29T03:55:05+00:00'),
(1059822922, '2003-08-02T11:15:22+00:00'),
('1489817408', '2017-03-18T06:10:08+00:00'),
(1489817408, "2017-03-18T06:10:08+00:00"),
(1386961236, "2013-12-13T19:00:36+00:00"),
("1198900505", "2007-12-29T03:55:05+00:00"),
(1059822922, "2003-08-02T11:15:22+00:00"),
("1489817408", "2017-03-18T06:10:08+00:00"),
]:
actual_date = format_date(timestamp)
assert actual_date == expected_isoformat_date
with pytest.raises(ValueError):
format_date('')
format_date("")
with pytest.raises(TypeError):
format_date(None)

View file

@ -24,12 +24,13 @@ class GNUTree:
"""Gnu Tree's representation
"""
def __init__(self, url: str):
self.url = url # filepath or uri
u = urlparse(url)
self.base_url = '%s://%s' % (u.scheme, u.netloc)
self.base_url = "%s://%s" % (u.scheme, u.netloc)
# Interesting top level directories
self.top_level_directories = ['gnu', 'old-gnu']
self.top_level_directories = ["gnu", "old-gnu"]
# internal state
self._artifacts = {} # type: Mapping[str, Any]
self._projects = {} # type: Mapping[str, Any]
@ -59,21 +60,23 @@ class GNUTree:
artifacts = {}
raw_data = load_raw_data(self.url)[0]
for directory in raw_data['contents']:
if directory['name'] not in self.top_level_directories:
for directory in raw_data["contents"]:
if directory["name"] not in self.top_level_directories:
continue
infos = directory['contents']
infos = directory["contents"]
for info in infos:
if info['type'] == 'directory':
package_url = '%s/%s/%s/' % (
self.base_url, directory['name'], info['name'])
package_artifacts = find_artifacts(
info['contents'], package_url)
if info["type"] == "directory":
package_url = "%s/%s/%s/" % (
self.base_url,
directory["name"],
info["name"],
)
package_artifacts = find_artifacts(info["contents"], package_url)
if package_artifacts != []:
repo_details = {
'name': info['name'],
'url': package_url,
'time_modified': format_date(info['time'])
"name": info["name"],
"url": package_url,
"time_modified": format_date(info["time"]),
}
artifacts[package_url] = package_artifacts
projects[package_url] = repo_details
@ -81,8 +84,9 @@ class GNUTree:
return projects, artifacts
def find_artifacts(filesystem: List[Mapping[str, Any]],
url: str) -> List[Mapping[str, Any]]:
def find_artifacts(
filesystem: List[Mapping[str, Any]], url: str
) -> List[Mapping[str, Any]]:
"""Recursively list artifacts present in the folder and subfolders for a
particular package url.
@ -127,23 +131,25 @@ def find_artifacts(filesystem: List[Mapping[str, Any]],
"""
artifacts = [] # type: List[Mapping[str, Any]]
for info_file in filesystem:
filetype = info_file['type']
filename = info_file['name']
if filetype == 'file':
filetype = info_file["type"]
filename = info_file["name"]
if filetype == "file":
if check_filename_is_archive(filename):
uri = url + filename
artifacts.append({
'url': uri,
'filename': filename,
'time': format_date(info_file['time']),
'length': int(info_file['size']),
'version': get_version(filename),
})
artifacts.append(
{
"url": uri,
"filename": filename,
"time": format_date(info_file["time"]),
"length": int(info_file["size"]),
"version": get_version(filename),
}
)
# It will recursively check for artifacts in all sub-folders
elif filetype == 'directory':
elif filetype == "directory":
tarballs_in_dir = find_artifacts(
info_file['contents'],
url + filename + '/')
info_file["contents"], url + filename + "/"
)
artifacts.extend(tarballs_in_dir)
return artifacts
@ -176,40 +182,67 @@ def check_filename_is_archive(filename: str) -> bool:
"""
file_suffixes = Path(filename).suffixes
if len(file_suffixes) == 1 and file_suffixes[-1] in ('.zip', '.tar'):
if len(file_suffixes) == 1 and file_suffixes[-1] in (".zip", ".tar"):
return True
elif len(file_suffixes) > 1:
if file_suffixes[-1] == '.zip' or file_suffixes[-2] == '.tar':
if file_suffixes[-1] == ".zip" or file_suffixes[-2] == ".tar":
return True
return False
# to recognize existing naming pattern
EXTENSIONS = [
'zip',
'tar',
'gz', 'tgz',
'bz2', 'bzip2',
'lzma', 'lz',
'xz',
'Z', '7z',
"zip",
"tar",
"gz",
"tgz",
"bz2",
"bzip2",
"lzma",
"lz",
"xz",
"Z",
"7z",
]
VERSION_KEYWORDS = [
'cygwin_me',
'w32', 'win32', 'nt', 'cygwin', 'mingw',
'latest', 'alpha', 'beta',
'release', 'stable',
'hppa',
'solaris', 'sunos', 'sun4u', 'sparc', 'sun',
'aix', 'ibm', 'rs6000',
'i386', 'i686',
'linux', 'redhat', 'linuxlibc',
'mips',
'powerpc', 'macos', 'apple', 'darwin', 'macosx', 'powermacintosh',
'unknown',
'netbsd', 'freebsd',
'sgi', 'irix',
"cygwin_me",
"w32",
"win32",
"nt",
"cygwin",
"mingw",
"latest",
"alpha",
"beta",
"release",
"stable",
"hppa",
"solaris",
"sunos",
"sun4u",
"sparc",
"sun",
"aix",
"ibm",
"rs6000",
"i386",
"i686",
"linux",
"redhat",
"linuxlibc",
"mips",
"powerpc",
"macos",
"apple",
"darwin",
"macosx",
"powermacintosh",
"unknown",
"netbsd",
"freebsd",
"sgi",
"irix",
]
# Match a filename into components.
@ -225,7 +258,7 @@ VERSION_KEYWORDS = [
# greedily with +, software_name and release_number are matched lazily
# with +? and *?).
PATTERN = r'''
PATTERN = r"""
^
(?:
# We have a software name and a release number, separated with a
@ -239,9 +272,9 @@ PATTERN = r'''
)
(?P<extension>(?:\.(?:{extensions}))+)
$
'''.format(
extensions='|'.join(EXTENSIONS),
vkeywords='|'.join('%s[-]?' % k for k in VERSION_KEYWORDS),
""".format(
extensions="|".join(EXTENSIONS),
vkeywords="|".join("%s[-]?" % k for k in VERSION_KEYWORDS),
)
@ -267,16 +300,15 @@ def get_version(uri: str) -> str:
"""
filename = path.split(uri)[-1]
m = re.match(PATTERN, filename,
flags=re.VERBOSE | re.IGNORECASE)
m = re.match(PATTERN, filename, flags=re.VERBOSE | re.IGNORECASE)
if m:
d = m.groupdict()
if d['software_name1'] and d['release_number']:
return d['release_number']
if d['software_name2']:
return d['software_name2']
if d["software_name1"] and d["release_number"]:
return d["release_number"]
if d["software_name2"]:
return d["software_name2"]
return ''
return ""
def load_raw_data(url: str) -> Sequence[Mapping]:
@ -289,15 +321,15 @@ def load_raw_data(url: str) -> Sequence[Mapping]:
The raw json list
"""
if url.startswith('http://') or url.startswith('https://'):
if url.startswith("http://") or url.startswith("https://"):
response = requests.get(url, allow_redirects=True)
if not response.ok:
raise ValueError('Error during query to %s' % url)
raise ValueError("Error during query to %s" % url)
raw = gzip.decompress(response.content)
else:
with gzip.open(url, 'r') as f:
with gzip.open(url, "r") as f:
raw = f.read()
raw_data = json.loads(raw.decode('utf-8'))
raw_data = json.loads(raw.decode("utf-8"))
return raw_data