Enable black
- blackify all the python files, - enable black in pre-commit, - add a black tox environment.
This commit is contained in:
parent
1ae75166c7
commit
93a4d8b784
97 changed files with 1734 additions and 1642 deletions
|
@ -23,6 +23,11 @@ repos:
|
|||
language: system
|
||||
types: [python]
|
||||
|
||||
- repo: https://github.com/python/black
|
||||
rev: 19.10b0
|
||||
hooks:
|
||||
- id: black
|
||||
|
||||
# unfortunately, we are far from being able to enable this...
|
||||
# - repo: https://github.com/PyCQA/pydocstyle.git
|
||||
# rev: 4.0.0
|
||||
|
@ -34,14 +39,3 @@ repos:
|
|||
# language: python
|
||||
# types: [python]
|
||||
|
||||
# black requires py3.6+
|
||||
#- repo: https://github.com/python/black
|
||||
# rev: 19.3b0
|
||||
# hooks:
|
||||
# - id: black
|
||||
# language_version: python3
|
||||
#- repo: https://github.com/asottile/blacken-docs
|
||||
# rev: v1.0.0-1
|
||||
# hooks:
|
||||
# - id: blacken-docs
|
||||
# additional_dependencies: [black==19.3b0]
|
||||
|
|
6
setup.cfg
Normal file
6
setup.cfg
Normal file
|
@ -0,0 +1,6 @@
|
|||
[flake8]
|
||||
# E203: whitespaces before ':' <https://github.com/psf/black/issues/315>
|
||||
# E231: missing whitespace after ','
|
||||
# W503: line break before binary operator <https://github.com/psf/black/issues/52>
|
||||
ignore = E203,E231,W503
|
||||
max-line-length = 88
|
38
setup.py
38
setup.py
|
@ -12,15 +12,15 @@ from io import open
|
|||
here = path.abspath(path.dirname(__file__))
|
||||
|
||||
# Get the long description from the README file
|
||||
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
|
||||
with open(path.join(here, "README.md"), encoding="utf-8") as f:
|
||||
long_description = f.read()
|
||||
|
||||
|
||||
def parse_requirements(name=None):
|
||||
if name:
|
||||
reqf = 'requirements-%s.txt' % name
|
||||
reqf = "requirements-%s.txt" % name
|
||||
else:
|
||||
reqf = 'requirements.txt'
|
||||
reqf = "requirements.txt"
|
||||
|
||||
requirements = []
|
||||
if not path.exists(reqf):
|
||||
|
@ -29,28 +29,28 @@ def parse_requirements(name=None):
|
|||
with open(reqf) as f:
|
||||
for line in f.readlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
requirements.append(line)
|
||||
return requirements
|
||||
|
||||
|
||||
setup(
|
||||
name='swh.lister',
|
||||
description='Software Heritage lister',
|
||||
name="swh.lister",
|
||||
description="Software Heritage lister",
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
author='Software Heritage developers',
|
||||
author_email='swh-devel@inria.fr',
|
||||
url='https://forge.softwareheritage.org/diffusion/DLSGH/',
|
||||
long_description_content_type="text/markdown",
|
||||
author="Software Heritage developers",
|
||||
author_email="swh-devel@inria.fr",
|
||||
url="https://forge.softwareheritage.org/diffusion/DLSGH/",
|
||||
packages=find_packages(),
|
||||
install_requires=parse_requirements() + parse_requirements('swh'),
|
||||
tests_require=parse_requirements('test'),
|
||||
setup_requires=['vcversioner'],
|
||||
extras_require={'testing': parse_requirements('test')},
|
||||
install_requires=parse_requirements() + parse_requirements("swh"),
|
||||
tests_require=parse_requirements("test"),
|
||||
setup_requires=["vcversioner"],
|
||||
extras_require={"testing": parse_requirements("test")},
|
||||
vcversioner={},
|
||||
include_package_data=True,
|
||||
entry_points='''
|
||||
entry_points="""
|
||||
[swh.cli.subcommands]
|
||||
lister=swh.lister.cli:lister
|
||||
[swh.workers]
|
||||
|
@ -65,7 +65,7 @@ setup(
|
|||
lister.packagist=swh.lister.packagist:register
|
||||
lister.phabricator=swh.lister.phabricator:register
|
||||
lister.pypi=swh.lister.pypi:register
|
||||
''',
|
||||
""",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"Intended Audience :: Developers",
|
||||
|
@ -74,8 +74,8 @@ setup(
|
|||
"Development Status :: 5 - Production/Stable",
|
||||
],
|
||||
project_urls={
|
||||
'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
|
||||
'Funding': 'https://www.softwareheritage.org/donate',
|
||||
'Source': 'https://forge.softwareheritage.org/source/swh-lister',
|
||||
"Bug Reports": "https://forge.softwareheritage.org/maniphest",
|
||||
"Funding": "https://www.softwareheritage.org/donate",
|
||||
"Source": "https://forge.softwareheritage.org/source/swh-lister",
|
||||
},
|
||||
)
|
||||
|
|
|
@ -11,17 +11,19 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
try:
|
||||
__version__ = pkg_resources.get_distribution('swh.lister').version
|
||||
__version__ = pkg_resources.get_distribution("swh.lister").version
|
||||
except pkg_resources.DistributionNotFound:
|
||||
__version__ = 'devel'
|
||||
__version__ = "devel"
|
||||
|
||||
USER_AGENT_TEMPLATE = 'Software Heritage Lister (%s)'
|
||||
USER_AGENT_TEMPLATE = "Software Heritage Lister (%s)"
|
||||
USER_AGENT = USER_AGENT_TEMPLATE % __version__
|
||||
|
||||
|
||||
LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
|
||||
for entry_point in pkg_resources.iter_entry_points('swh.workers')
|
||||
if entry_point.name.split('.', 1)[0] == 'lister'}
|
||||
LISTERS = {
|
||||
entry_point.name.split(".", 1)[1]: entry_point
|
||||
for entry_point in pkg_resources.iter_entry_points("swh.workers")
|
||||
if entry_point.name.split(".", 1)[0] == "lister"
|
||||
}
|
||||
|
||||
|
||||
SUPPORTED_LISTERS = list(LISTERS)
|
||||
|
@ -41,12 +43,13 @@ def get_lister(lister_name, db_url=None, **conf):
|
|||
"""
|
||||
if lister_name not in LISTERS:
|
||||
raise ValueError(
|
||||
'Invalid lister %s: only supported listers are %s' %
|
||||
(lister_name, SUPPORTED_LISTERS))
|
||||
"Invalid lister %s: only supported listers are %s"
|
||||
% (lister_name, SUPPORTED_LISTERS)
|
||||
)
|
||||
if db_url:
|
||||
conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
|
||||
conf["lister"] = {"cls": "local", "args": {"db": db_url}}
|
||||
|
||||
registry_entry = LISTERS[lister_name].load()()
|
||||
lister_cls = registry_entry['lister']
|
||||
lister_cls = registry_entry["lister"]
|
||||
lister = lister_cls(override_config=conf)
|
||||
return lister
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import BitBucketModel
|
||||
from .lister import BitBucketLister
|
||||
|
||||
return {'models': [BitBucketModel],
|
||||
'lister': BitBucketLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [BitBucketModel],
|
||||
"lister": BitBucketLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -19,34 +19,33 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class BitBucketLister(IndexingHttpLister):
|
||||
PATH_TEMPLATE = '/repositories?after=%s'
|
||||
PATH_TEMPLATE = "/repositories?after=%s"
|
||||
MODEL = BitBucketModel
|
||||
LISTER_NAME = 'bitbucket'
|
||||
DEFAULT_URL = 'https://api.bitbucket.org/2.0'
|
||||
instance = 'bitbucket'
|
||||
LISTER_NAME = "bitbucket"
|
||||
DEFAULT_URL = "https://api.bitbucket.org/2.0"
|
||||
instance = "bitbucket"
|
||||
default_min_bound = datetime.fromtimestamp(0, timezone.utc) # type: Any
|
||||
|
||||
def __init__(self, url: str = None,
|
||||
override_config=None, per_page: int = 100) -> None:
|
||||
def __init__(
|
||||
self, url: str = None, override_config=None, per_page: int = 100
|
||||
) -> None:
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
per_page = self.config.get('per_page', per_page)
|
||||
per_page = self.config.get("per_page", per_page)
|
||||
|
||||
self.PATH_TEMPLATE = '%s&pagelen=%s' % (
|
||||
self.PATH_TEMPLATE, per_page)
|
||||
self.PATH_TEMPLATE = "%s&pagelen=%s" % (self.PATH_TEMPLATE, per_page)
|
||||
|
||||
def get_model_from_repo(self, repo: Dict) -> Dict[str, Any]:
|
||||
return {
|
||||
'uid': repo['uuid'],
|
||||
'indexable': iso8601.parse_date(repo['created_on']),
|
||||
'name': repo['name'],
|
||||
'full_name': repo['full_name'],
|
||||
'html_url': repo['links']['html']['href'],
|
||||
'origin_url': repo['links']['clone'][0]['href'],
|
||||
'origin_type': repo['scm'],
|
||||
"uid": repo["uuid"],
|
||||
"indexable": iso8601.parse_date(repo["created_on"]),
|
||||
"name": repo["name"],
|
||||
"full_name": repo["full_name"],
|
||||
"html_url": repo["links"]["html"]["href"],
|
||||
"origin_url": repo["links"]["clone"][0]["href"],
|
||||
"origin_type": repo["scm"],
|
||||
}
|
||||
|
||||
def get_next_target_from_response(self, response: Response
|
||||
) -> Optional[datetime]:
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[datetime]:
|
||||
"""This will read the 'next' link from the api response if any
|
||||
and return it as a datetime.
|
||||
|
||||
|
@ -58,23 +57,23 @@ class BitBucketLister(IndexingHttpLister):
|
|||
|
||||
"""
|
||||
body = response.json()
|
||||
next_ = body.get('next')
|
||||
next_ = body.get("next")
|
||||
if next_ is not None:
|
||||
next_ = parse.urlparse(next_)
|
||||
return iso8601.parse_date(parse.parse_qs(next_.query)['after'][0])
|
||||
return iso8601.parse_date(parse.parse_qs(next_.query)["after"][0])
|
||||
return None
|
||||
|
||||
def transport_response_simplified(self, response: Response
|
||||
) -> List[Dict[str, Any]]:
|
||||
repos = response.json()['values']
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
|
||||
repos = response.json()["values"]
|
||||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
||||
def request_uri(self, identifier: datetime) -> str: # type: ignore
|
||||
identifier_str = parse.quote(identifier.isoformat())
|
||||
return super().request_uri(identifier_str or '1970-01-01')
|
||||
return super().request_uri(identifier_str or "1970-01-01")
|
||||
|
||||
def is_within_bounds(self, inner: int, lower: Optional[int] = None,
|
||||
upper: Optional[int] = None) -> bool:
|
||||
def is_within_bounds(
|
||||
self, inner: int, lower: Optional[int] = None, upper: Optional[int] = None
|
||||
) -> bool:
|
||||
# values are expected to be datetimes
|
||||
if lower is None and upper is None:
|
||||
ret = True
|
||||
|
|
|
@ -9,7 +9,8 @@ from swh.lister.core.models import IndexingModelBase
|
|||
|
||||
class BitBucketModel(IndexingModelBase):
|
||||
"""a BitBucket repository"""
|
||||
__tablename__ = 'bitbucket_repo'
|
||||
|
||||
__tablename__ = "bitbucket_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
indexable = Column(DateTime(timezone=True), index=True)
|
||||
|
|
|
@ -10,20 +10,20 @@ from .lister import BitBucketLister
|
|||
GROUP_SPLIT = 10000
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.IncrementalBitBucketLister')
|
||||
@shared_task(name=__name__ + ".IncrementalBitBucketLister")
|
||||
def list_bitbucket_incremental(**lister_args):
|
||||
'''Incremental update of the BitBucket forge'''
|
||||
"""Incremental update of the BitBucket forge"""
|
||||
lister = BitBucketLister(**lister_args)
|
||||
return lister.run(min_bound=lister.db_last_index(), max_bound=None)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.RangeBitBucketLister')
|
||||
@shared_task(name=__name__ + ".RangeBitBucketLister")
|
||||
def _range_bitbucket_lister(start, end, **lister_args):
|
||||
lister = BitBucketLister(**lister_args)
|
||||
return lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.FullBitBucketRelister', bind=True)
|
||||
@shared_task(name=__name__ + ".FullBitBucketRelister", bind=True)
|
||||
def list_bitbucket_full(self, split=None, **lister_args):
|
||||
"""Full update of the BitBucket forge
|
||||
|
||||
|
@ -33,21 +33,22 @@ def list_bitbucket_full(self, split=None, **lister_args):
|
|||
lister = BitBucketLister(**lister_args)
|
||||
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
|
||||
if not ranges:
|
||||
self.log.info('Nothing to list')
|
||||
self.log.info("Nothing to list")
|
||||
return
|
||||
|
||||
random.shuffle(ranges)
|
||||
promise = group(_range_bitbucket_lister.s(minv, maxv, **lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
self.log.debug('%s OK (spawned %s subtasks)', (self.name, len(ranges)))
|
||||
promise = group(
|
||||
_range_bitbucket_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
|
||||
)()
|
||||
self.log.debug("%s OK (spawned %s subtasks)", (self.name, len(ranges)))
|
||||
try:
|
||||
promise.save() # so that we can restore the GroupResult in tests
|
||||
except (NotImplementedError, AttributeError):
|
||||
self.log.info('Unable to call save_group with current result backend.')
|
||||
self.log.info("Unable to call save_group with current result backend.")
|
||||
# FIXME: what to do in terms of return here?
|
||||
return promise.id
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -26,12 +26,12 @@ def _convert_type(req_index):
|
|||
|
||||
class BitBucketListerTester(HttpListerTester, unittest.TestCase):
|
||||
Lister = BitBucketLister
|
||||
test_re = re.compile(r'/repositories\?after=([^?&]+)')
|
||||
lister_subdir = 'bitbucket'
|
||||
good_api_response_file = 'data/https_api.bitbucket.org/response.json'
|
||||
bad_api_response_file = 'data/https_api.bitbucket.org/empty_response.json'
|
||||
first_index = _convert_type('2008-07-12T07:44:01.476818+00:00')
|
||||
last_index = _convert_type('2008-07-19T06:16:43.044743+00:00')
|
||||
test_re = re.compile(r"/repositories\?after=([^?&]+)")
|
||||
lister_subdir = "bitbucket"
|
||||
good_api_response_file = "data/https_api.bitbucket.org/response.json"
|
||||
bad_api_response_file = "data/https_api.bitbucket.org/empty_response.json"
|
||||
first_index = _convert_type("2008-07-12T07:44:01.476818+00:00")
|
||||
last_index = _convert_type("2008-07-19T06:16:43.044743+00:00")
|
||||
entries_per_page = 10
|
||||
convert_type = _convert_type
|
||||
|
||||
|
@ -57,57 +57,64 @@ class BitBucketListerTester(HttpListerTester, unittest.TestCase):
|
|||
self.disable_db(fl)
|
||||
|
||||
# stores no results
|
||||
fl.run(min_bound=self.first_index - timedelta(days=3),
|
||||
max_bound=self.first_index)
|
||||
fl.run(
|
||||
min_bound=self.first_index - timedelta(days=3), max_bound=self.first_index
|
||||
)
|
||||
|
||||
def test_is_within_bounds(self):
|
||||
fl = self.get_fl()
|
||||
self.assertTrue(fl.is_within_bounds(
|
||||
iso8601.parse_date('2008-07-15'),
|
||||
self.first_index, self.last_index))
|
||||
self.assertFalse(fl.is_within_bounds(
|
||||
iso8601.parse_date('2008-07-20'),
|
||||
self.first_index, self.last_index))
|
||||
self.assertFalse(fl.is_within_bounds(
|
||||
iso8601.parse_date('2008-07-11'),
|
||||
self.first_index, self.last_index))
|
||||
self.assertTrue(
|
||||
fl.is_within_bounds(
|
||||
iso8601.parse_date("2008-07-15"), self.first_index, self.last_index
|
||||
)
|
||||
)
|
||||
self.assertFalse(
|
||||
fl.is_within_bounds(
|
||||
iso8601.parse_date("2008-07-20"), self.first_index, self.last_index
|
||||
)
|
||||
)
|
||||
self.assertFalse(
|
||||
fl.is_within_bounds(
|
||||
iso8601.parse_date("2008-07-11"), self.first_index, self.last_index
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_lister_bitbucket(swh_listers, requests_mock_datadir):
|
||||
"""Simple bitbucket listing should create scheduled tasks (git, hg)
|
||||
|
||||
"""
|
||||
lister = swh_listers['bitbucket']
|
||||
lister = swh_listers["bitbucket"]
|
||||
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-hg')
|
||||
r = lister.scheduler.search_tasks(task_type="load-hg")
|
||||
assert len(r) == 9
|
||||
|
||||
for row in r:
|
||||
args = row['arguments']['args']
|
||||
kwargs = row['arguments']['kwargs']
|
||||
args = row["arguments"]["args"]
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
|
||||
assert len(args) == 0
|
||||
assert len(kwargs) == 1
|
||||
url = kwargs['url']
|
||||
url = kwargs["url"]
|
||||
|
||||
assert url.startswith('https://bitbucket.org')
|
||||
assert url.startswith("https://bitbucket.org")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 1
|
||||
|
||||
for row in r:
|
||||
args = row['arguments']['args']
|
||||
kwargs = row['arguments']['kwargs']
|
||||
args = row["arguments"]["args"]
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert len(args) == 0
|
||||
assert len(kwargs) == 1
|
||||
url = kwargs['url']
|
||||
url = kwargs["url"]
|
||||
|
||||
assert url.startswith('https://bitbucket.org')
|
||||
assert url.startswith("https://bitbucket.org")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -5,23 +5,21 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.bitbucket.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.bitbucket.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.bitbucket.tasks.BitBucketLister')
|
||||
@patch("swh.lister.bitbucket.tasks.BitBucketLister")
|
||||
def test_incremental(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked BitbucketLister
|
||||
lister.return_value = lister
|
||||
lister.db_last_index.return_value = 42
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.bitbucket.tasks.IncrementalBitBucketLister')
|
||||
res = swh_app.send_task("swh.lister.bitbucket.tasks.IncrementalBitBucketLister")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -31,15 +29,15 @@ def test_incremental(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
|
||||
|
||||
|
||||
@patch('swh.lister.bitbucket.tasks.BitBucketLister')
|
||||
@patch("swh.lister.bitbucket.tasks.BitBucketLister")
|
||||
def test_range(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked BitbucketLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.bitbucket.tasks.RangeBitBucketLister',
|
||||
kwargs=dict(start=12, end=42))
|
||||
"swh.lister.bitbucket.tasks.RangeBitBucketLister", kwargs=dict(start=12, end=42)
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -49,16 +47,14 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
||||
@patch('swh.lister.bitbucket.tasks.BitBucketLister')
|
||||
@patch("swh.lister.bitbucket.tasks.BitBucketLister")
|
||||
def test_relister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked BitbucketLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.db_partition_indices.return_value = [
|
||||
(i, i+9) for i in range(0, 50, 10)]
|
||||
lister.db_partition_indices.return_value = [(i, i + 9) for i in range(0, 50, 10)]
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.bitbucket.tasks.FullBitBucketRelister')
|
||||
res = swh_app.send_task("swh.lister.bitbucket.tasks.FullBitBucketRelister")
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -85,5 +81,6 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
|
||||
# lister.run should have been called once per partition interval
|
||||
for i in range(5):
|
||||
assert (dict(min_bound=10*i, max_bound=10*i + 9),) \
|
||||
in lister.run.call_args_list
|
||||
assert (
|
||||
dict(min_bound=10 * i, max_bound=10 * i + 9),
|
||||
) in lister.run.call_args_list
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import CGitModel
|
||||
from .lister import CGitLister
|
||||
|
||||
return {'models': [CGitModel],
|
||||
'lister': CGitLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [CGitModel],
|
||||
"lister": CGitLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -50,13 +50,13 @@ class CGitLister(ListerBase):
|
|||
Args:
|
||||
'https://git.savannah.gnu.org/git/elisp-es.git'
|
||||
"""
|
||||
|
||||
MODEL = CGitModel
|
||||
DEFAULT_URL = 'https://git.savannah.gnu.org/cgit/'
|
||||
LISTER_NAME = 'cgit'
|
||||
DEFAULT_URL = "https://git.savannah.gnu.org/cgit/"
|
||||
LISTER_NAME = "cgit"
|
||||
url_prefix_present = True
|
||||
|
||||
def __init__(self, url=None, instance=None,
|
||||
override_config=None):
|
||||
def __init__(self, url=None, instance=None, override_config=None):
|
||||
"""Lister class for CGit repositories.
|
||||
|
||||
Args:
|
||||
|
@ -69,7 +69,7 @@ class CGitLister(ListerBase):
|
|||
super().__init__(override_config=override_config)
|
||||
|
||||
if url is None:
|
||||
url = self.config.get('url', self.DEFAULT_URL)
|
||||
url = self.config.get("url", self.DEFAULT_URL)
|
||||
self.url = url
|
||||
|
||||
if not instance:
|
||||
|
@ -78,23 +78,22 @@ class CGitLister(ListerBase):
|
|||
self.session = Session()
|
||||
self.session.mount(self.url, HTTPAdapter(max_retries=3))
|
||||
self.session.headers = {
|
||||
'User-Agent': USER_AGENT,
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
|
||||
def run(self) -> Dict[str, str]:
|
||||
status = 'uneventful'
|
||||
status = "uneventful"
|
||||
total = 0
|
||||
for repos in grouper(self.get_repos(), 10):
|
||||
models = list(filter(None, (self.build_model(repo)
|
||||
for repo in repos)))
|
||||
models = list(filter(None, (self.build_model(repo) for repo in repos)))
|
||||
injected_repos = self.inject_repo_data_into_db(models)
|
||||
self.schedule_missing_tasks(models, injected_repos)
|
||||
self.db_session.commit()
|
||||
total += len(injected_repos)
|
||||
logger.debug('Scheduled %s tasks for %s', total, self.url)
|
||||
status = 'eventful'
|
||||
logger.debug("Scheduled %s tasks for %s", total, self.url)
|
||||
status = "eventful"
|
||||
|
||||
return {'status': status}
|
||||
return {"status": status}
|
||||
|
||||
def get_repos(self) -> Generator[str, None, None]:
|
||||
"""Generate git 'project' URLs found on the current CGit server
|
||||
|
@ -103,16 +102,16 @@ class CGitLister(ListerBase):
|
|||
next_page = self.url
|
||||
while next_page:
|
||||
bs_idx = self.get_and_parse(next_page)
|
||||
for tr in bs_idx.find(
|
||||
'div', {"class": "content"}).find_all(
|
||||
"tr", {"class": ""}):
|
||||
yield urljoin(self.url, tr.find('a')['href'])
|
||||
for tr in bs_idx.find("div", {"class": "content"}).find_all(
|
||||
"tr", {"class": ""}
|
||||
):
|
||||
yield urljoin(self.url, tr.find("a")["href"])
|
||||
|
||||
try:
|
||||
pager = bs_idx.find('ul', {'class': 'pager'})
|
||||
current_page = pager.find('a', {'class': 'current'})
|
||||
pager = bs_idx.find("ul", {"class": "pager"})
|
||||
current_page = pager.find("a", {"class": "current"})
|
||||
if current_page:
|
||||
next_page = current_page.parent.next_sibling.a['href']
|
||||
next_page = current_page.parent.next_sibling.a["href"]
|
||||
next_page = urljoin(self.url, next_page)
|
||||
except (AttributeError, KeyError):
|
||||
# no pager, or no next page
|
||||
|
@ -123,28 +122,28 @@ class CGitLister(ListerBase):
|
|||
return the repo description (dict) suitable for insertion in the db.
|
||||
"""
|
||||
bs = self.get_and_parse(repo_url)
|
||||
urls = [x['href'] for x in bs.find_all('a', {'rel': 'vcs-git'})]
|
||||
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
|
||||
|
||||
if not urls:
|
||||
return None
|
||||
|
||||
# look for the http/https url, if any, and use it as origin_url
|
||||
for url in urls:
|
||||
if urlparse(url).scheme in ('http', 'https'):
|
||||
if urlparse(url).scheme in ("http", "https"):
|
||||
origin_url = url
|
||||
break
|
||||
else:
|
||||
# otherwise, choose the first one
|
||||
origin_url = urls[0]
|
||||
|
||||
return {'uid': repo_url,
|
||||
'name': bs.find('a', title=re.compile('.+'))['title'],
|
||||
'origin_type': 'git',
|
||||
'instance': self.instance,
|
||||
'origin_url': origin_url,
|
||||
}
|
||||
return {
|
||||
"uid": repo_url,
|
||||
"name": bs.find("a", title=re.compile(".+"))["title"],
|
||||
"origin_type": "git",
|
||||
"instance": self.instance,
|
||||
"origin_url": origin_url,
|
||||
}
|
||||
|
||||
def get_and_parse(self, url: str) -> BeautifulSoup:
|
||||
"Get the given url and parse the retrieved HTML using BeautifulSoup"
|
||||
return BeautifulSoup(self.session.get(url).text,
|
||||
features='html.parser')
|
||||
return BeautifulSoup(self.session.get(url).text, features="html.parser")
|
||||
|
|
|
@ -11,7 +11,8 @@ class CGitModel(ModelBase):
|
|||
"""a CGit repository representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'cgit_repo'
|
||||
|
||||
__tablename__ = "cgit_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
instance = Column(String, index=True)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from .lister import CGitLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.CGitListerTask')
|
||||
@shared_task(name=__name__ + ".CGitListerTask")
|
||||
def list_cgit(**lister_args):
|
||||
'''Lister task for CGit instances'''
|
||||
"""Lister task for CGit instances"""
|
||||
return CGitLister(**lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -7,38 +7,38 @@ from swh.lister import __version__
|
|||
|
||||
|
||||
def test_lister_no_page(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister = swh_listers["cgit"]
|
||||
|
||||
assert lister.url == 'https://git.savannah.gnu.org/cgit/'
|
||||
assert lister.url == "https://git.savannah.gnu.org/cgit/"
|
||||
|
||||
repos = list(lister.get_repos())
|
||||
assert len(repos) == 977
|
||||
|
||||
assert repos[0] == 'https://git.savannah.gnu.org/cgit/elisp-es.git/'
|
||||
assert repos[0] == "https://git.savannah.gnu.org/cgit/elisp-es.git/"
|
||||
# note the url below is NOT a subpath of /cgit/
|
||||
assert repos[-1] == 'https://git.savannah.gnu.org/path/to/yetris.git/' # noqa
|
||||
assert repos[-1] == "https://git.savannah.gnu.org/path/to/yetris.git/" # noqa
|
||||
# note the url below is NOT on the same server
|
||||
assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/'
|
||||
assert repos[-2] == "http://example.org/cgit/xstarcastle.git/"
|
||||
|
||||
|
||||
def test_lister_model(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister = swh_listers["cgit"]
|
||||
|
||||
repo = next(lister.get_repos())
|
||||
|
||||
model = lister.build_model(repo)
|
||||
assert model == {
|
||||
'uid': 'https://git.savannah.gnu.org/cgit/elisp-es.git/',
|
||||
'name': 'elisp-es.git',
|
||||
'origin_type': 'git',
|
||||
'instance': 'git.savannah.gnu.org',
|
||||
'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git'
|
||||
}
|
||||
"uid": "https://git.savannah.gnu.org/cgit/elisp-es.git/",
|
||||
"name": "elisp-es.git",
|
||||
"origin_type": "git",
|
||||
"instance": "git.savannah.gnu.org",
|
||||
"origin_url": "https://git.savannah.gnu.org/git/elisp-es.git",
|
||||
}
|
||||
|
||||
|
||||
def test_lister_with_pages(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister.url = 'https://git.tizen/cgit/'
|
||||
lister = swh_listers["cgit"]
|
||||
lister.url = "https://git.tizen/cgit/"
|
||||
|
||||
repos = list(lister.get_repos())
|
||||
# we should have 16 repos (listed on 3 pages)
|
||||
|
@ -46,37 +46,37 @@ def test_lister_with_pages(requests_mock_datadir, swh_listers):
|
|||
|
||||
|
||||
def test_lister_run(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister.url = 'https://git.tizen/cgit/'
|
||||
lister = swh_listers["cgit"]
|
||||
lister.url = "https://git.tizen/cgit/"
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 16
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-git'
|
||||
assert row["type"] == "load-git"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert len(kwargs) == 1
|
||||
url = kwargs['url']
|
||||
assert url.startswith('https://git.tizen')
|
||||
url = kwargs["url"]
|
||||
assert url.startswith("https://git.tizen")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
||||
|
||||
def test_lister_requests(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister.url = 'https://git.tizen/cgit/'
|
||||
lister = swh_listers["cgit"]
|
||||
lister.url = "https://git.tizen/cgit/"
|
||||
lister.run()
|
||||
|
||||
assert len(requests_mock_datadir.request_history) != 0
|
||||
for request in requests_mock_datadir.request_history:
|
||||
assert 'User-Agent' in request.headers
|
||||
user_agent = request.headers['User-Agent']
|
||||
assert 'Software Heritage Lister' in user_agent
|
||||
assert "User-Agent" in request.headers
|
||||
user_agent = request.headers["User-Agent"]
|
||||
assert "Software Heritage Lister" in user_agent
|
||||
assert __version__ in user_agent
|
||||
|
|
|
@ -2,29 +2,27 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cgit.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.cgit.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.cgit.tasks.CGitLister')
|
||||
@patch("swh.lister.cgit.tasks.CGitLister")
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked CGitLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cgit.tasks.CGitListerTask',
|
||||
kwargs=dict(url='https://git.kernel.org/', instance='kernel'))
|
||||
"swh.lister.cgit.tasks.CGitListerTask",
|
||||
kwargs=dict(url="https://git.kernel.org/", instance="kernel"),
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
url='https://git.kernel.org/',
|
||||
instance='kernel')
|
||||
lister.assert_called_once_with(url="https://git.kernel.org/", instance="kernel")
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with()
|
||||
|
|
|
@ -23,104 +23,123 @@ logger = logging.getLogger(__name__)
|
|||
# value used when inserting a new task-type in the scheduler db will be the one
|
||||
# under the 'full' key below (because it matches xxx_full).
|
||||
DEFAULT_TASK_TYPE = {
|
||||
'full': { # for tasks like 'list_xxx_full()'
|
||||
'default_interval': '90 days',
|
||||
'min_interval': '90 days',
|
||||
'max_interval': '90 days',
|
||||
'backoff_factor': 1
|
||||
},
|
||||
'*': { # value if not suffix matches
|
||||
'default_interval': '1 day',
|
||||
'min_interval': '1 day',
|
||||
'max_interval': '1 day',
|
||||
'backoff_factor': 1
|
||||
},
|
||||
}
|
||||
"full": { # for tasks like 'list_xxx_full()'
|
||||
"default_interval": "90 days",
|
||||
"min_interval": "90 days",
|
||||
"max_interval": "90 days",
|
||||
"backoff_factor": 1,
|
||||
},
|
||||
"*": { # value if not suffix matches
|
||||
"default_interval": "1 day",
|
||||
"min_interval": "1 day",
|
||||
"max_interval": "1 day",
|
||||
"backoff_factor": 1,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
|
||||
@click.option('--config-file', '-C', default=None,
|
||||
type=click.Path(exists=True, dir_okay=False,),
|
||||
help="Configuration file.")
|
||||
@click.option('--db-url', '-d', default=None,
|
||||
help='SQLAlchemy DB URL; see '
|
||||
'<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # noqa
|
||||
@click.group(name="lister", context_settings=CONTEXT_SETTINGS)
|
||||
@click.option(
|
||||
"--config-file",
|
||||
"-C",
|
||||
default=None,
|
||||
type=click.Path(exists=True, dir_okay=False,),
|
||||
help="Configuration file.",
|
||||
)
|
||||
@click.option(
|
||||
"--db-url",
|
||||
"-d",
|
||||
default=None,
|
||||
help="SQLAlchemy DB URL; see "
|
||||
"<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>",
|
||||
) # noqa
|
||||
@click.pass_context
|
||||
def lister(ctx, config_file, db_url):
|
||||
'''Software Heritage Lister tools.'''
|
||||
"""Software Heritage Lister tools."""
|
||||
from swh.core import config
|
||||
|
||||
ctx.ensure_object(dict)
|
||||
|
||||
if not config_file:
|
||||
config_file = os.environ.get('SWH_CONFIG_FILENAME')
|
||||
config_file = os.environ.get("SWH_CONFIG_FILENAME")
|
||||
conf = config.read(config_file)
|
||||
if db_url:
|
||||
conf['lister'] = {
|
||||
'cls': 'local',
|
||||
'args': {'db': db_url}
|
||||
}
|
||||
ctx.obj['config'] = conf
|
||||
conf["lister"] = {"cls": "local", "args": {"db": db_url}}
|
||||
ctx.obj["config"] = conf
|
||||
|
||||
|
||||
@lister.command(name='db-init', context_settings=CONTEXT_SETTINGS)
|
||||
@click.option('--drop-tables', '-D', is_flag=True, default=False,
|
||||
help='Drop tables before creating the database schema')
|
||||
@lister.command(name="db-init", context_settings=CONTEXT_SETTINGS)
|
||||
@click.option(
|
||||
"--drop-tables",
|
||||
"-D",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Drop tables before creating the database schema",
|
||||
)
|
||||
@click.pass_context
|
||||
def db_init(ctx, drop_tables):
|
||||
"""Initialize the database model for given listers.
|
||||
|
||||
"""
|
||||
|
||||
cfg = ctx.obj['config']
|
||||
lister_cfg = cfg['lister']
|
||||
if lister_cfg['cls'] != 'local':
|
||||
click.echo('A local lister configuration is required')
|
||||
cfg = ctx.obj["config"]
|
||||
lister_cfg = cfg["lister"]
|
||||
if lister_cfg["cls"] != "local":
|
||||
click.echo("A local lister configuration is required")
|
||||
ctx.exit(1)
|
||||
|
||||
db_url = lister_cfg['args']['db']
|
||||
db_url = lister_cfg["args"]["db"]
|
||||
db_engine = create_engine(db_url)
|
||||
|
||||
registry = {}
|
||||
for lister, entrypoint in LISTERS.items():
|
||||
logger.info('Loading lister %s', lister)
|
||||
logger.info("Loading lister %s", lister)
|
||||
registry[lister] = entrypoint.load()()
|
||||
|
||||
logger.info('Initializing database')
|
||||
logger.info("Initializing database")
|
||||
initialize(db_engine, drop_tables)
|
||||
|
||||
for lister, entrypoint in LISTERS.items():
|
||||
registry_entry = registry[lister]
|
||||
init_hook = registry_entry.get('init')
|
||||
init_hook = registry_entry.get("init")
|
||||
if callable(init_hook):
|
||||
logger.info('Calling init hook for %s', lister)
|
||||
logger.info("Calling init hook for %s", lister)
|
||||
init_hook(db_engine)
|
||||
|
||||
|
||||
@lister.command(name='run', context_settings=CONTEXT_SETTINGS,
|
||||
help='Trigger a full listing run for a particular forge '
|
||||
'instance. The output of this listing results in '
|
||||
'"oneshot" tasks in the scheduler db with a priority '
|
||||
'defined by the user')
|
||||
@click.option('--lister', '-l', help='Lister to run',
|
||||
type=click.Choice(SUPPORTED_LISTERS))
|
||||
@click.option('--priority', '-p', default='high',
|
||||
type=click.Choice(['high', 'medium', 'low']),
|
||||
help='Task priority for the listed repositories to ingest')
|
||||
@click.argument('options', nargs=-1)
|
||||
@lister.command(
|
||||
name="run",
|
||||
context_settings=CONTEXT_SETTINGS,
|
||||
help="Trigger a full listing run for a particular forge "
|
||||
"instance. The output of this listing results in "
|
||||
'"oneshot" tasks in the scheduler db with a priority '
|
||||
"defined by the user",
|
||||
)
|
||||
@click.option(
|
||||
"--lister", "-l", help="Lister to run", type=click.Choice(SUPPORTED_LISTERS)
|
||||
)
|
||||
@click.option(
|
||||
"--priority",
|
||||
"-p",
|
||||
default="high",
|
||||
type=click.Choice(["high", "medium", "low"]),
|
||||
help="Task priority for the listed repositories to ingest",
|
||||
)
|
||||
@click.argument("options", nargs=-1)
|
||||
@click.pass_context
|
||||
def run(ctx, lister, priority, options):
|
||||
from swh.scheduler.cli.utils import parse_options
|
||||
|
||||
config = deepcopy(ctx.obj['config'])
|
||||
config = deepcopy(ctx.obj["config"])
|
||||
|
||||
if options:
|
||||
config.update(parse_options(options)[1])
|
||||
|
||||
config['priority'] = priority
|
||||
config['policy'] = 'oneshot'
|
||||
config["priority"] = priority
|
||||
config["policy"] = "oneshot"
|
||||
|
||||
get_lister(lister, **config).run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
lister()
|
||||
|
|
|
@ -20,8 +20,9 @@ class AbstractAttribute:
|
|||
AbstractAttribute('docstring for foo')
|
||||
|
||||
"""
|
||||
|
||||
__isabstractmethod__ = True
|
||||
|
||||
def __init__(self, docstring=None):
|
||||
if docstring is not None:
|
||||
self.__doc__ = 'AbstractAttribute: ' + docstring
|
||||
self.__doc__ = "AbstractAttribute: " + docstring
|
||||
|
|
|
@ -49,18 +49,19 @@ class IndexingLister(ListerBase):
|
|||
def get_next_target_from_response
|
||||
|
||||
"""
|
||||
|
||||
flush_packet_db = 20
|
||||
"""Number of iterations in-between write flushes of lister repositories to
|
||||
db (see fn:`run`).
|
||||
"""
|
||||
default_min_bound = ''
|
||||
default_min_bound = ""
|
||||
"""Default initialization value for the minimum boundary index to use when
|
||||
undefined (see fn:`run`).
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_next_target_from_response(
|
||||
self, response: Response
|
||||
self, response: Response
|
||||
) -> Union[Optional[datetime], Optional[str], Optional[int]]:
|
||||
"""Find the next server endpoint identifier given the entire response.
|
||||
|
||||
|
@ -78,14 +79,16 @@ class IndexingLister(ListerBase):
|
|||
# You probably don't need to override anything below this line.
|
||||
|
||||
def filter_before_inject(
|
||||
self, models_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
self, models_list: List[Dict[str, Any]]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Overrides ListerBase.filter_before_inject
|
||||
|
||||
Bounds query results by this Lister's set max_index.
|
||||
"""
|
||||
models_list = [
|
||||
m for m in models_list
|
||||
if self.is_within_bounds(m['indexable'], None, self.max_index)
|
||||
m
|
||||
for m in models_list
|
||||
if self.is_within_bounds(m["indexable"], None, self.max_index)
|
||||
]
|
||||
return models_list
|
||||
|
||||
|
@ -108,7 +111,7 @@ class IndexingLister(ListerBase):
|
|||
return retlist
|
||||
|
||||
def db_partition_indices(
|
||||
self, partition_size: int
|
||||
self, partition_size: int
|
||||
) -> List[Tuple[Optional[int], Optional[int]]]:
|
||||
"""Describe an index-space compartmentalization of the db table
|
||||
in equal sized chunks. This is used to describe min&max bounds for
|
||||
|
@ -135,14 +138,19 @@ class IndexingLister(ListerBase):
|
|||
return []
|
||||
|
||||
if isinstance(min_index, str):
|
||||
|
||||
def format_bound(bound):
|
||||
return bound.isoformat()
|
||||
|
||||
min_index = dateutil.parser.parse(min_index)
|
||||
max_index = dateutil.parser.parse(max_index)
|
||||
elif isinstance(max_index - min_index, int):
|
||||
|
||||
def format_bound(bound):
|
||||
return int(bound)
|
||||
|
||||
else:
|
||||
|
||||
def format_bound(bound):
|
||||
return bound
|
||||
|
||||
|
@ -156,9 +164,7 @@ class IndexingLister(ListerBase):
|
|||
|
||||
# Trim duplicate bounds
|
||||
bounds.append(None)
|
||||
bounds = [cur
|
||||
for cur, next in zip(bounds[:-1], bounds[1:])
|
||||
if cur != next]
|
||||
bounds = [cur for cur, next in zip(bounds[:-1], bounds[1:]) if cur != next]
|
||||
|
||||
# Remove bounds for lowest and highest partition
|
||||
bounds[0] = bounds[-1] = None
|
||||
|
@ -204,8 +210,9 @@ class IndexingLister(ListerBase):
|
|||
deleted_repos = self.winnow_models(
|
||||
self.db_query_range(start, end), self.MODEL.uid, keep_these
|
||||
)
|
||||
tasks_to_disable = [repo.task_id for repo in deleted_repos
|
||||
if repo.task_id is not None]
|
||||
tasks_to_disable = [
|
||||
repo.task_id for repo in deleted_repos if repo.task_id is not None
|
||||
]
|
||||
if tasks_to_disable:
|
||||
self.scheduler.disable_tasks(tasks_to_disable)
|
||||
for repo in deleted_repos:
|
||||
|
@ -224,7 +231,7 @@ class IndexingLister(ListerBase):
|
|||
Returns:
|
||||
nothing
|
||||
"""
|
||||
status = 'uneventful'
|
||||
status = "uneventful"
|
||||
self.min_index = min_bound
|
||||
self.max_index = max_bound
|
||||
|
||||
|
@ -233,7 +240,7 @@ class IndexingLister(ListerBase):
|
|||
for i in count(1):
|
||||
response, injected_repos = self.ingest_data(index)
|
||||
if not response and not injected_repos:
|
||||
logger.info('No response from api server, stopping')
|
||||
logger.info("No response from api server, stopping")
|
||||
return
|
||||
|
||||
next_index = self.get_next_target_from_response(response)
|
||||
|
@ -243,23 +250,22 @@ class IndexingLister(ListerBase):
|
|||
|
||||
# termination condition
|
||||
if next_index is None or next_index == index:
|
||||
logger.info('stopping after index %s, no next link found',
|
||||
index)
|
||||
logger.info("stopping after index %s, no next link found", index)
|
||||
return
|
||||
index = next_index
|
||||
logger.debug('Index: %s', index)
|
||||
logger.debug("Index: %s", index)
|
||||
yield i
|
||||
|
||||
for i in ingest_indexes():
|
||||
if (i % self.flush_packet_db) == 0:
|
||||
logger.debug('Flushing updates at index %s', i)
|
||||
logger.debug("Flushing updates at index %s", i)
|
||||
self.db_session.commit()
|
||||
self.db_session = self.mk_session()
|
||||
status = 'eventful'
|
||||
status = "eventful"
|
||||
|
||||
self.db_session.commit()
|
||||
self.db_session = self.mk_session()
|
||||
return {'status': status}
|
||||
return {"status": status}
|
||||
|
||||
|
||||
class IndexingHttpLister(ListerHttpTransport, IndexingLister):
|
||||
|
|
|
@ -68,11 +68,12 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
"""
|
||||
|
||||
MODEL = AbstractAttribute(
|
||||
'Subclass type (not instance) of swh.lister.core.models.ModelBase '
|
||||
'customized for a specific service.'
|
||||
"Subclass type (not instance) of swh.lister.core.models.ModelBase "
|
||||
"customized for a specific service."
|
||||
) # type: Union[AbstractAttribute, Type[Any]]
|
||||
LISTER_NAME = AbstractAttribute(
|
||||
"Lister's name") # type: Union[AbstractAttribute, str]
|
||||
"Lister's name"
|
||||
) # type: Union[AbstractAttribute, str]
|
||||
|
||||
def transport_request(self, identifier):
|
||||
"""Given a target endpoint identifier to query, try once to request it.
|
||||
|
@ -138,8 +139,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
"""
|
||||
pass
|
||||
|
||||
def filter_before_inject(
|
||||
self, models_list: List[Dict]) -> List[Dict]:
|
||||
def filter_before_inject(self, models_list: List[Dict]) -> List[Dict]:
|
||||
"""Filter models_list entries prior to injection in the db.
|
||||
This is ran directly after `transport_response_simplified`.
|
||||
|
||||
|
@ -154,8 +154,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
"""
|
||||
return models_list
|
||||
|
||||
def do_additional_checks(
|
||||
self, models_list: List[Dict]) -> List[Dict]:
|
||||
def do_additional_checks(self, models_list: List[Dict]) -> List[Dict]:
|
||||
"""Execute some additional checks on the model list (after the
|
||||
filtering).
|
||||
|
||||
|
@ -173,8 +172,8 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
return models_list
|
||||
|
||||
def is_within_bounds(
|
||||
self, inner: int,
|
||||
lower: Optional[int] = None, upper: Optional[int] = None) -> bool:
|
||||
self, inner: int, lower: Optional[int] = None, upper: Optional[int] = None
|
||||
) -> bool:
|
||||
"""See if a sortable value is inside the range [lower,upper].
|
||||
|
||||
MAY BE OVERRIDDEN, for example if the server indexable* key is
|
||||
|
@ -201,11 +200,15 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
|
||||
self.string_pattern_check(inner, lower, upper)
|
||||
except Exception as e:
|
||||
logger.error(str(e) + ': %s, %s, %s' %
|
||||
(('inner=%s%s' % (type(inner), inner)),
|
||||
('lower=%s%s' % (type(lower), lower)),
|
||||
('upper=%s%s' % (type(upper), upper)))
|
||||
)
|
||||
logger.error(
|
||||
str(e)
|
||||
+ ": %s, %s, %s"
|
||||
% (
|
||||
("inner=%s%s" % (type(inner), inner)),
|
||||
("lower=%s%s" % (type(lower), lower)),
|
||||
("upper=%s%s" % (type(upper), upper)),
|
||||
)
|
||||
)
|
||||
raise
|
||||
|
||||
return ret
|
||||
|
@ -213,30 +216,23 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
# You probably don't need to override anything below this line.
|
||||
|
||||
DEFAULT_CONFIG = {
|
||||
'scheduler': ('dict', {
|
||||
'cls': 'remote',
|
||||
'args': {
|
||||
'url': 'http://localhost:5008/'
|
||||
},
|
||||
}),
|
||||
'lister': ('dict', {
|
||||
'cls': 'local',
|
||||
'args': {
|
||||
'db': 'postgresql:///lister',
|
||||
},
|
||||
}),
|
||||
"scheduler": (
|
||||
"dict",
|
||||
{"cls": "remote", "args": {"url": "http://localhost:5008/"},},
|
||||
),
|
||||
"lister": ("dict", {"cls": "local", "args": {"db": "postgresql:///lister",},}),
|
||||
}
|
||||
|
||||
@property
|
||||
def CONFIG_BASE_FILENAME(self): # noqa: N802
|
||||
return 'lister_%s' % self.LISTER_NAME
|
||||
return "lister_%s" % self.LISTER_NAME
|
||||
|
||||
@property
|
||||
def ADDITIONAL_CONFIG(self): # noqa: N802
|
||||
return {
|
||||
'credentials': ('dict', {}),
|
||||
'cache_responses': ('bool', False),
|
||||
'cache_dir': ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME),
|
||||
"credentials": ("dict", {}),
|
||||
"cache_responses": ("bool", False),
|
||||
"cache_dir": ("str", "~/.cache/swh/lister/%s" % self.LISTER_NAME),
|
||||
}
|
||||
|
||||
INITIAL_BACKOFF = 10
|
||||
|
@ -245,21 +241,21 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
|
||||
def __init__(self, override_config=None):
|
||||
self.backoff = self.INITIAL_BACKOFF
|
||||
logger.debug('Loading config from %s' % self.CONFIG_BASE_FILENAME)
|
||||
logger.debug("Loading config from %s" % self.CONFIG_BASE_FILENAME)
|
||||
self.config = self.parse_config_file(
|
||||
base_filename=self.CONFIG_BASE_FILENAME,
|
||||
additional_configs=[self.ADDITIONAL_CONFIG]
|
||||
additional_configs=[self.ADDITIONAL_CONFIG],
|
||||
)
|
||||
self.config['cache_dir'] = os.path.expanduser(self.config['cache_dir'])
|
||||
if self.config['cache_responses']:
|
||||
config.prepare_folders(self.config, 'cache_dir')
|
||||
self.config["cache_dir"] = os.path.expanduser(self.config["cache_dir"])
|
||||
if self.config["cache_responses"]:
|
||||
config.prepare_folders(self.config, "cache_dir")
|
||||
|
||||
if override_config:
|
||||
self.config.update(override_config)
|
||||
|
||||
logger.debug('%s CONFIG=%s' % (self, self.config))
|
||||
self.scheduler = get_scheduler(**self.config['scheduler'])
|
||||
self.db_engine = create_engine(self.config['lister']['args']['db'])
|
||||
logger.debug("%s CONFIG=%s" % (self, self.config))
|
||||
self.scheduler = get_scheduler(**self.config["scheduler"])
|
||||
self.db_engine = create_engine(self.config["lister"]["args"]["db"])
|
||||
self.mk_session = sessionmaker(bind=self.db_engine)
|
||||
self.db_session = self.mk_session()
|
||||
|
||||
|
@ -285,7 +281,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
server response
|
||||
"""
|
||||
retries_left = self.MAX_RETRIES
|
||||
do_cache = self.config['cache_responses']
|
||||
do_cache = self.config["cache_responses"]
|
||||
r = None
|
||||
while retries_left > 0:
|
||||
try:
|
||||
|
@ -293,8 +289,9 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
except FetchError:
|
||||
# network-level connection error, try again
|
||||
logger.warning(
|
||||
'connection error on %s: sleep for %d seconds' %
|
||||
(identifier, self.CONN_SLEEP))
|
||||
"connection error on %s: sleep for %d seconds"
|
||||
% (identifier, self.CONN_SLEEP)
|
||||
)
|
||||
time.sleep(self.CONN_SLEEP)
|
||||
retries_left -= 1
|
||||
continue
|
||||
|
@ -306,8 +303,8 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
must_retry, delay = self.transport_quota_check(r)
|
||||
if must_retry:
|
||||
logger.warning(
|
||||
'rate limited on %s: sleep for %f seconds' %
|
||||
(identifier, delay))
|
||||
"rate limited on %s: sleep for %f seconds" % (identifier, delay)
|
||||
)
|
||||
time.sleep(delay)
|
||||
else: # request ok
|
||||
break
|
||||
|
@ -315,8 +312,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
retries_left -= 1
|
||||
|
||||
if not retries_left:
|
||||
logger.warning(
|
||||
'giving up on %s: max retries exceeded' % identifier)
|
||||
logger.warning("giving up on %s: max retries exceeded" % identifier)
|
||||
|
||||
return r
|
||||
|
||||
|
@ -332,8 +328,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
"""
|
||||
if isinstance(key, str):
|
||||
key = self.MODEL.__dict__[key]
|
||||
return self.db_session.query(self.MODEL) \
|
||||
.filter(key == value).first()
|
||||
return self.db_session.query(self.MODEL).filter(key == value).first()
|
||||
|
||||
def winnow_models(self, mlist, key, to_remove):
|
||||
"""Given a list of models, remove any with <key> matching
|
||||
|
@ -358,8 +353,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
|
||||
def db_num_entries(self):
|
||||
"""Return the known number of entries in the lister db"""
|
||||
return self.db_session.query(func.count('*')).select_from(self.MODEL) \
|
||||
.scalar()
|
||||
return self.db_session.query(func.count("*")).select_from(self.MODEL).scalar()
|
||||
|
||||
def db_inject_repo(self, model_dict):
|
||||
"""Add/update a new repo to the db and mark it last_seen now.
|
||||
|
@ -372,7 +366,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
object associated with the injection
|
||||
|
||||
"""
|
||||
sql_repo = self.db_query_equal('uid', model_dict['uid'])
|
||||
sql_repo = self.db_query_equal("uid", model_dict["uid"])
|
||||
|
||||
if not sql_repo:
|
||||
sql_repo = self.MODEL(**model_dict)
|
||||
|
@ -384,8 +378,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
|
||||
return sql_repo
|
||||
|
||||
def task_dict(self, origin_type: str,
|
||||
origin_url: str, **kwargs) -> Dict[str, Any]:
|
||||
def task_dict(self, origin_type: str, origin_url: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Return special dict format for the tasks list
|
||||
|
||||
Args:
|
||||
|
@ -394,11 +387,11 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
Returns:
|
||||
the same information in a different form
|
||||
"""
|
||||
logger.debug('origin-url: %s, type: %s', origin_url, origin_type)
|
||||
_type = 'load-%s' % origin_type
|
||||
_policy = kwargs.get('policy', 'recurring')
|
||||
priority = kwargs.get('priority')
|
||||
kw = {'priority': priority} if priority else {}
|
||||
logger.debug("origin-url: %s, type: %s", origin_url, origin_type)
|
||||
_type = "load-%s" % origin_type
|
||||
_policy = kwargs.get("policy", "recurring")
|
||||
priority = kwargs.get("priority")
|
||||
kw = {"priority": priority} if priority else {}
|
||||
return utils.create_task_dict(_type, _policy, url=origin_url, **kw)
|
||||
|
||||
def string_pattern_check(self, a, b, c=None):
|
||||
|
@ -420,14 +413,15 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
pattern.
|
||||
"""
|
||||
if isinstance(a, str):
|
||||
a_pattern = re.sub('[a-zA-Z0-9]',
|
||||
'[a-zA-Z0-9]',
|
||||
re.escape(a))
|
||||
if (isinstance(b, str) and (re.match(a_pattern, b) is None)
|
||||
or isinstance(c, str) and
|
||||
(re.match(a_pattern, c) is None)):
|
||||
a_pattern = re.sub("[a-zA-Z0-9]", "[a-zA-Z0-9]", re.escape(a))
|
||||
if (
|
||||
isinstance(b, str)
|
||||
and (re.match(a_pattern, b) is None)
|
||||
or isinstance(c, str)
|
||||
and (re.match(a_pattern, c) is None)
|
||||
):
|
||||
logger.debug(a_pattern)
|
||||
raise TypeError('incomparable string patterns detected')
|
||||
raise TypeError("incomparable string patterns detected")
|
||||
|
||||
def inject_repo_data_into_db(self, models_list: List[Dict]) -> Dict:
|
||||
"""Inject data into the db.
|
||||
|
@ -441,11 +435,12 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
"""
|
||||
injected_repos = {}
|
||||
for m in models_list:
|
||||
injected_repos[m['uid']] = self.db_inject_repo(m)
|
||||
injected_repos[m["uid"]] = self.db_inject_repo(m)
|
||||
return injected_repos
|
||||
|
||||
def schedule_missing_tasks(
|
||||
self, models_list: List[Dict], injected_repos: Dict) -> None:
|
||||
self, models_list: List[Dict], injected_repos: Dict
|
||||
) -> None:
|
||||
"""Schedule any newly created db entries that do not have been
|
||||
scheduled yet.
|
||||
|
||||
|
@ -463,20 +458,17 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
tasks = {}
|
||||
|
||||
def _task_key(m):
|
||||
return '%s-%s' % (
|
||||
m['type'],
|
||||
json.dumps(m['arguments'], sort_keys=True)
|
||||
)
|
||||
return "%s-%s" % (m["type"], json.dumps(m["arguments"], sort_keys=True))
|
||||
|
||||
for m in models_list:
|
||||
ir = injected_repos[m['uid']]
|
||||
ir = injected_repos[m["uid"]]
|
||||
if not ir.task_id:
|
||||
# Patching the model instance to add the policy/priority task
|
||||
# scheduling
|
||||
if 'policy' in self.config:
|
||||
m['policy'] = self.config['policy']
|
||||
if 'priority' in self.config:
|
||||
m['priority'] = self.config['priority']
|
||||
if "policy" in self.config:
|
||||
m["policy"] = self.config["policy"]
|
||||
if "priority" in self.config:
|
||||
m["priority"] = self.config["priority"]
|
||||
task_dict = self.task_dict(**m)
|
||||
tasks[_task_key(task_dict)] = (ir, m, task_dict)
|
||||
|
||||
|
@ -485,7 +477,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
new_tasks = self.scheduler.create_tasks(list(grouped_tasks))
|
||||
for task in new_tasks:
|
||||
ir, m, _ = tasks[_task_key(task)]
|
||||
ir.task_id = task['id']
|
||||
ir.task_id = task["id"]
|
||||
|
||||
def ingest_data(self, identifier: int, checks: bool = False):
|
||||
"""The core data fetch sequence. Request server endpoint. Simplify and
|
||||
|
@ -523,13 +515,7 @@ class ListerBase(abc.ABC, config.SWHConfig):
|
|||
"""
|
||||
datepath = utcnow().isoformat()
|
||||
|
||||
fname = os.path.join(
|
||||
self.config['cache_dir'],
|
||||
datepath + '.gz',
|
||||
)
|
||||
fname = os.path.join(self.config["cache_dir"], datepath + ".gz",)
|
||||
|
||||
with gzip.open(fname, 'w') as f:
|
||||
f.write(bytes(
|
||||
self.transport_response_to_string(response),
|
||||
'UTF-8'
|
||||
))
|
||||
with gzip.open(fname, "w") as f:
|
||||
f.write(bytes(self.transport_response_to_string(response), "UTF-8"))
|
||||
|
|
|
@ -29,14 +29,14 @@ class ListerHttpTransport(abc.ABC):
|
|||
|
||||
To be used in conjunction with ListerBase or a subclass of it.
|
||||
"""
|
||||
|
||||
DEFAULT_URL = None # type: Optional[str]
|
||||
PATH_TEMPLATE = \
|
||||
AbstractAttribute(
|
||||
'string containing a python string format pattern that produces'
|
||||
' the API endpoint path for listing stored repositories when given'
|
||||
' an index, e.g., "/repositories?after=%s". To be implemented in'
|
||||
' the API-specific class inheriting this.'
|
||||
) # type: Union[AbstractAttribute, Optional[str]]
|
||||
PATH_TEMPLATE = AbstractAttribute(
|
||||
"string containing a python string format pattern that produces"
|
||||
" the API endpoint path for listing stored repositories when given"
|
||||
' an index, e.g., "/repositories?after=%s". To be implemented in'
|
||||
" the API-specific class inheriting this."
|
||||
) # type: Union[AbstractAttribute, Optional[str]]
|
||||
|
||||
EXPECTED_STATUS_CODES = (200, 429, 403, 404)
|
||||
|
||||
|
@ -45,9 +45,7 @@ class ListerHttpTransport(abc.ABC):
|
|||
|
||||
MAY BE OVERRIDDEN if request headers are needed.
|
||||
"""
|
||||
return {
|
||||
'User-Agent': USER_AGENT_TEMPLATE % self.lister_version
|
||||
}
|
||||
return {"User-Agent": USER_AGENT_TEMPLATE % self.lister_version}
|
||||
|
||||
def request_instance_credentials(self) -> List[Dict[str, Any]]:
|
||||
"""Returns dictionary of any credentials configuration needed by the
|
||||
|
@ -82,7 +80,7 @@ class ListerHttpTransport(abc.ABC):
|
|||
list of credential dicts for the current lister.
|
||||
|
||||
"""
|
||||
all_creds = self.config.get('credentials') # type: ignore
|
||||
all_creds = self.config.get("credentials") # type: ignore
|
||||
if not all_creds:
|
||||
return []
|
||||
lister_creds = all_creds.get(self.LISTER_NAME, {}) # type: ignore
|
||||
|
@ -110,14 +108,16 @@ class ListerHttpTransport(abc.ABC):
|
|||
|
||||
"""
|
||||
params = {}
|
||||
params['headers'] = self.request_headers() or {}
|
||||
params["headers"] = self.request_headers() or {}
|
||||
creds = self.request_instance_credentials()
|
||||
if not creds:
|
||||
return params
|
||||
auth = random.choice(creds) if creds else None
|
||||
if auth:
|
||||
params['auth'] = (auth['username'], # type: ignore
|
||||
auth['password'])
|
||||
params["auth"] = (
|
||||
auth["username"], # type: ignore
|
||||
auth["password"],
|
||||
)
|
||||
return params
|
||||
|
||||
def transport_quota_check(self, response):
|
||||
|
@ -130,7 +130,7 @@ class ListerHttpTransport(abc.ABC):
|
|||
|
||||
"""
|
||||
if response.status_code == 429: # HTTP too many requests
|
||||
retry_after = response.headers.get('Retry-After', self.back_off())
|
||||
retry_after = response.headers.get("Retry-After", self.back_off())
|
||||
try:
|
||||
# might be seconds
|
||||
return True, float(retry_after)
|
||||
|
@ -145,17 +145,16 @@ class ListerHttpTransport(abc.ABC):
|
|||
|
||||
def __init__(self, url=None):
|
||||
if not url:
|
||||
url = self.config.get('url')
|
||||
url = self.config.get("url")
|
||||
if not url:
|
||||
url = self.DEFAULT_URL
|
||||
if not url:
|
||||
raise NameError('HTTP Lister Transport requires an url.')
|
||||
raise NameError("HTTP Lister Transport requires an url.")
|
||||
self.url = url # eg. 'https://api.github.com'
|
||||
self.session = requests.Session()
|
||||
self.lister_version = __version__
|
||||
|
||||
def _transport_action(
|
||||
self, identifier: str, method: str = 'get') -> Response:
|
||||
def _transport_action(self, identifier: str, method: str = "get") -> Response:
|
||||
"""Permit to ask information to the api prior to actually executing
|
||||
query.
|
||||
|
||||
|
@ -163,16 +162,16 @@ class ListerHttpTransport(abc.ABC):
|
|||
path = self.request_uri(identifier)
|
||||
params = self.request_params(identifier)
|
||||
|
||||
logger.debug('path: %s', path)
|
||||
logger.debug('params: %s', params)
|
||||
logger.debug('method: %s', method)
|
||||
logger.debug("path: %s", path)
|
||||
logger.debug("params: %s", params)
|
||||
logger.debug("method: %s", method)
|
||||
try:
|
||||
if method == 'head':
|
||||
if method == "head":
|
||||
response = self.session.head(path, **params)
|
||||
else:
|
||||
response = self.session.get(path, **params)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
logger.warning('Failed to fetch %s: %s', path, e)
|
||||
logger.warning("Failed to fetch %s: %s", path, e)
|
||||
raise FetchError(e)
|
||||
else:
|
||||
if response.status_code not in self.EXPECTED_STATUS_CODES:
|
||||
|
@ -183,7 +182,7 @@ class ListerHttpTransport(abc.ABC):
|
|||
"""Retrieve head information on api.
|
||||
|
||||
"""
|
||||
return self._transport_action(identifier, method='head')
|
||||
return self._transport_action(identifier, method="head")
|
||||
|
||||
def transport_request(self, identifier: str) -> Response:
|
||||
"""Implements ListerBase.transport_request for HTTP using Requests.
|
||||
|
@ -198,10 +197,10 @@ class ListerHttpTransport(abc.ABC):
|
|||
Requests responses.
|
||||
"""
|
||||
s = pformat(response.request.path_url)
|
||||
s += '\n#\n' + pformat(response.request.headers)
|
||||
s += '\n#\n' + pformat(response.status_code)
|
||||
s += '\n#\n' + pformat(response.headers)
|
||||
s += '\n#\n'
|
||||
s += "\n#\n" + pformat(response.request.headers)
|
||||
s += "\n#\n" + pformat(response.status_code)
|
||||
s += "\n#\n" + pformat(response.headers)
|
||||
s += "\n#\n"
|
||||
try: # json?
|
||||
s += pformat(response.json())
|
||||
except Exception: # not json
|
||||
|
@ -219,9 +218,10 @@ class ListerOnePageApiTransport(ListerHttpTransport):
|
|||
To be used in conjunction with ListerBase or a subclass of it.
|
||||
|
||||
"""
|
||||
|
||||
PAGE = AbstractAttribute(
|
||||
"URL of the API's unique page to retrieve and parse "
|
||||
"for information") # type: Union[AbstractAttribute, str]
|
||||
"URL of the API's unique page to retrieve and parse " "for information"
|
||||
) # type: Union[AbstractAttribute, str]
|
||||
PATH_TEMPLATE = None # we do not use it
|
||||
|
||||
def __init__(self, url=None):
|
||||
|
|
|
@ -25,12 +25,12 @@ class ABCSQLMeta(abc.ABCMeta, DeclarativeMeta):
|
|||
|
||||
class ModelBase(SQLBase, metaclass=ABCSQLMeta):
|
||||
"""a common repository"""
|
||||
|
||||
__abstract__ = True
|
||||
__tablename__ = \
|
||||
AbstractAttribute # type: Union[Type[AbstractAttribute], str]
|
||||
__tablename__ = AbstractAttribute # type: Union[Type[AbstractAttribute], str]
|
||||
|
||||
uid = AbstractAttribute(
|
||||
'Column(<uid_type>, primary_key=True)'
|
||||
"Column(<uid_type>, primary_key=True)"
|
||||
) # type: Union[AbstractAttribute, Column]
|
||||
|
||||
name = Column(String, index=True)
|
||||
|
@ -44,19 +44,18 @@ class ModelBase(SQLBase, metaclass=ABCSQLMeta):
|
|||
task_id = Column(Integer)
|
||||
|
||||
def __init__(self, **kw):
|
||||
kw['last_seen'] = datetime.now()
|
||||
kw["last_seen"] = datetime.now()
|
||||
super().__init__(**kw)
|
||||
|
||||
|
||||
class IndexingModelBase(ModelBase, metaclass=ABCSQLMeta):
|
||||
__abstract__ = True
|
||||
__tablename__ = \
|
||||
AbstractAttribute # type: Union[Type[AbstractAttribute], str]
|
||||
__tablename__ = AbstractAttribute # type: Union[Type[AbstractAttribute], str]
|
||||
|
||||
# The value used for sorting, segmenting, or api query paging,
|
||||
# because uids aren't always sequential.
|
||||
indexable = AbstractAttribute(
|
||||
'Column(<indexable_type>, index=True)'
|
||||
"Column(<indexable_type>, index=True)"
|
||||
) # type: Union[AbstractAttribute, Column]
|
||||
|
||||
|
||||
|
@ -72,8 +71,8 @@ def initialize(db_engine, drop_tables=False, **kwargs):
|
|||
(re)creating them.
|
||||
"""
|
||||
if drop_tables:
|
||||
logger.info('Dropping tables')
|
||||
logger.info("Dropping tables")
|
||||
SQLBase.metadata.drop_all(db_engine, checkfirst=True)
|
||||
|
||||
logger.info('Creating tables')
|
||||
logger.info("Creating tables")
|
||||
SQLBase.metadata.create_all(db_engine, checkfirst=True)
|
||||
|
|
|
@ -37,6 +37,7 @@ class PageByPageLister(ListerBase):
|
|||
def get_next_target_from_response
|
||||
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_next_target_from_response(self, response):
|
||||
"""Find the next server endpoint page given the entire response.
|
||||
|
@ -87,7 +88,7 @@ class PageByPageLister(ListerBase):
|
|||
|
||||
"""
|
||||
for m in models_list:
|
||||
sql_repo = self.db_query_equal('uid', m['uid'])
|
||||
sql_repo = self.db_query_equal("uid", m["uid"])
|
||||
if sql_repo:
|
||||
return False
|
||||
return models_list
|
||||
|
@ -110,7 +111,7 @@ class PageByPageLister(ListerBase):
|
|||
nothing
|
||||
|
||||
"""
|
||||
status = 'uneventful'
|
||||
status = "uneventful"
|
||||
page = min_bound or 0
|
||||
loop_count = 0
|
||||
|
||||
|
@ -118,32 +119,30 @@ class PageByPageLister(ListerBase):
|
|||
self.max_page = max_bound
|
||||
|
||||
while self.is_within_bounds(page, self.min_page, self.max_page):
|
||||
logging.info('listing repos starting at %s' % page)
|
||||
logging.info("listing repos starting at %s" % page)
|
||||
|
||||
response, injected_repos = self.ingest_data(page,
|
||||
checks=check_existence)
|
||||
response, injected_repos = self.ingest_data(page, checks=check_existence)
|
||||
if not response and not injected_repos:
|
||||
logging.info('No response from api server, stopping')
|
||||
logging.info("No response from api server, stopping")
|
||||
break
|
||||
elif not injected_repos:
|
||||
logging.info('Repositories already seen, stopping')
|
||||
logging.info("Repositories already seen, stopping")
|
||||
break
|
||||
status = 'eventful'
|
||||
status = "eventful"
|
||||
|
||||
next_page = self.get_next_target_from_response(response)
|
||||
|
||||
# termination condition
|
||||
|
||||
if (next_page is None) or (next_page == page):
|
||||
logging.info('stopping after page %s, no next link found' %
|
||||
page)
|
||||
logging.info("stopping after page %s, no next link found" % page)
|
||||
break
|
||||
else:
|
||||
page = next_page
|
||||
|
||||
loop_count += 1
|
||||
if loop_count == 20:
|
||||
logging.info('flushing updates')
|
||||
logging.info("flushing updates")
|
||||
loop_count = 0
|
||||
self.db_session.commit()
|
||||
self.db_session = self.mk_session()
|
||||
|
@ -151,7 +150,7 @@ class PageByPageLister(ListerBase):
|
|||
self.db_session.commit()
|
||||
self.db_session = self.mk_session()
|
||||
|
||||
return {'status': status}
|
||||
return {"status": status}
|
||||
|
||||
|
||||
class PageByPageHttpLister(ListerHttpTransport, PageByPageLister):
|
||||
|
@ -159,6 +158,7 @@ class PageByPageHttpLister(ListerHttpTransport, PageByPageLister):
|
|||
combining PageByPageLister and ListerHttpTransport.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, url=None, override_config=None):
|
||||
PageByPageLister.__init__(self, override_config=override_config)
|
||||
ListerHttpTransport.__init__(self, url=url)
|
||||
|
|
|
@ -24,6 +24,7 @@ class SimpleLister(ListerBase):
|
|||
information and stores those in db
|
||||
|
||||
"""
|
||||
|
||||
flush_packet_db = 2
|
||||
"""Number of iterations in-between write flushes of lister repositories to
|
||||
db (see fn:`ingest_data`).
|
||||
|
@ -57,14 +58,14 @@ class SimpleLister(ListerBase):
|
|||
all_injected = []
|
||||
for i, models in enumerate(utils.grouper(models_list, n=100), start=1):
|
||||
models = list(models)
|
||||
logging.debug('models: %s' % len(models))
|
||||
logging.debug("models: %s" % len(models))
|
||||
# inject into local db
|
||||
injected = self.inject_repo_data_into_db(models)
|
||||
# queue workers
|
||||
self.schedule_missing_tasks(models, injected)
|
||||
all_injected.append(injected)
|
||||
if (i % self.flush_packet_db) == 0:
|
||||
logger.debug('Flushing updates at index %s', i)
|
||||
logger.debug("Flushing updates at index %s", i)
|
||||
self.db_session.commit()
|
||||
self.db_session = self.mk_session()
|
||||
|
||||
|
@ -88,9 +89,9 @@ class SimpleLister(ListerBase):
|
|||
dump_not_used_identifier = 0
|
||||
response, injected_repos = self.ingest_data(dump_not_used_identifier)
|
||||
if not response and not injected_repos:
|
||||
logging.info('No response from api server, stopping')
|
||||
status = 'uneventful'
|
||||
logging.info("No response from api server, stopping")
|
||||
status = "uneventful"
|
||||
else:
|
||||
status = 'eventful'
|
||||
status = "eventful"
|
||||
|
||||
return {'status': status}
|
||||
return {"status": status}
|
||||
|
|
|
@ -19,13 +19,14 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
@pytest.fixture
|
||||
def swh_listers(request, postgresql_proc, postgresql, swh_scheduler):
|
||||
db_url = 'postgresql://{user}@{host}:{port}/{dbname}'.format(
|
||||
host=postgresql_proc.host,
|
||||
port=postgresql_proc.port,
|
||||
user='postgres',
|
||||
dbname='tests')
|
||||
db_url = "postgresql://{user}@{host}:{port}/{dbname}".format(
|
||||
host=postgresql_proc.host,
|
||||
port=postgresql_proc.port,
|
||||
user="postgres",
|
||||
dbname="tests",
|
||||
)
|
||||
|
||||
logger.debug('lister db_url: %s', db_url)
|
||||
logger.debug("lister db_url: %s", db_url)
|
||||
|
||||
listers = {}
|
||||
|
||||
|
@ -37,11 +38,13 @@ def swh_listers(request, postgresql_proc, postgresql, swh_scheduler):
|
|||
initialize(create_engine(db_url), drop_tables=True)
|
||||
|
||||
# Add the load-archive-files expected by some listers (gnu, cran, ...)
|
||||
swh_scheduler.create_task_type({
|
||||
'type': 'load-archive-files',
|
||||
'description': 'Load archive files.',
|
||||
'backend_name': 'swh.loader.package.tasks.LoadArchive',
|
||||
'default_interval': '1 day',
|
||||
})
|
||||
swh_scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-archive-files",
|
||||
"description": "Load archive files.",
|
||||
"backend_name": "swh.loader.package.tasks.LoadArchive",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return listers
|
||||
|
|
|
@ -13,8 +13,8 @@ from swh.lister.core.abstractattribute import AbstractAttribute
|
|||
class BaseClass(abc.ABC):
|
||||
v1 = AbstractAttribute # type: Any
|
||||
v2 = AbstractAttribute() # type: Any
|
||||
v3 = AbstractAttribute('changed docstring') # type: Any
|
||||
v4 = 'qux'
|
||||
v3 = AbstractAttribute("changed docstring") # type: Any
|
||||
v4 = "qux"
|
||||
|
||||
|
||||
class BadSubclass1(BaseClass):
|
||||
|
@ -22,19 +22,19 @@ class BadSubclass1(BaseClass):
|
|||
|
||||
|
||||
class BadSubclass2(BaseClass):
|
||||
v1 = 'foo'
|
||||
v2 = 'bar'
|
||||
v1 = "foo"
|
||||
v2 = "bar"
|
||||
|
||||
|
||||
class BadSubclass3(BaseClass):
|
||||
v2 = 'bar'
|
||||
v3 = 'baz'
|
||||
v2 = "bar"
|
||||
v3 = "baz"
|
||||
|
||||
|
||||
class GoodSubclass(BaseClass):
|
||||
v1 = 'foo'
|
||||
v2 = 'bar'
|
||||
v3 = 'baz'
|
||||
v1 = "foo"
|
||||
v2 = "bar"
|
||||
v3 = "baz"
|
||||
|
||||
|
||||
class TestAbstractAttributes(unittest.TestCase):
|
||||
|
@ -54,13 +54,12 @@ class TestAbstractAttributes(unittest.TestCase):
|
|||
self.assertIsInstance(GoodSubclass(), GoodSubclass)
|
||||
gsc = GoodSubclass()
|
||||
|
||||
self.assertEqual(gsc.v1, 'foo')
|
||||
self.assertEqual(gsc.v2, 'bar')
|
||||
self.assertEqual(gsc.v3, 'baz')
|
||||
self.assertEqual(gsc.v4, 'qux')
|
||||
self.assertEqual(gsc.v1, "foo")
|
||||
self.assertEqual(gsc.v2, "bar")
|
||||
self.assertEqual(gsc.v3, "baz")
|
||||
self.assertEqual(gsc.v4, "qux")
|
||||
|
||||
def test_aa_docstrings(self):
|
||||
self.assertEqual(BaseClass.v1.__doc__, AbstractAttribute.__doc__)
|
||||
self.assertEqual(BaseClass.v2.__doc__, AbstractAttribute.__doc__)
|
||||
self.assertEqual(BaseClass.v3.__doc__,
|
||||
'AbstractAttribute: changed docstring')
|
||||
self.assertEqual(BaseClass.v3.__doc__, "AbstractAttribute: changed docstring")
|
||||
|
|
|
@ -9,7 +9,7 @@ from swh.lister.core.indexing_lister import IndexingLister
|
|||
|
||||
class MockedIndexingListerDbPartitionIndices(IndexingLister):
|
||||
# Abstract Attribute boilerplate
|
||||
LISTER_NAME = 'DbPartitionIndices'
|
||||
LISTER_NAME = "DbPartitionIndices"
|
||||
MODEL = type(None)
|
||||
|
||||
# ABC boilerplate
|
||||
|
@ -33,9 +33,7 @@ class MockedIndexingListerDbPartitionIndices(IndexingLister):
|
|||
|
||||
def test_db_partition_indices():
|
||||
m = MockedIndexingListerDbPartitionIndices(
|
||||
num_entries=1000,
|
||||
first_index=1,
|
||||
last_index=10001,
|
||||
num_entries=1000, first_index=1, last_index=10001,
|
||||
)
|
||||
assert m
|
||||
|
||||
|
@ -49,9 +47,7 @@ def test_db_partition_indices():
|
|||
|
||||
def test_db_partition_indices_zero_first():
|
||||
m = MockedIndexingListerDbPartitionIndices(
|
||||
num_entries=1000,
|
||||
first_index=0,
|
||||
last_index=10000,
|
||||
num_entries=1000, first_index=0, last_index=10000,
|
||||
)
|
||||
assert m
|
||||
|
||||
|
@ -65,9 +61,7 @@ def test_db_partition_indices_zero_first():
|
|||
|
||||
def test_db_partition_indices_small_index_range():
|
||||
m = MockedIndexingListerDbPartitionIndices(
|
||||
num_entries=5000,
|
||||
first_index=0,
|
||||
last_index=5,
|
||||
num_entries=5000, first_index=0, last_index=5,
|
||||
)
|
||||
assert m
|
||||
|
||||
|
@ -78,8 +72,8 @@ def test_db_partition_indices_small_index_range():
|
|||
|
||||
def test_db_partition_indices_date_indices():
|
||||
# 24 hour delta
|
||||
first = datetime.datetime.fromisoformat('2019-11-01T00:00:00+00:00')
|
||||
last = datetime.datetime.fromisoformat('2019-11-02T00:00:00+00:00')
|
||||
first = datetime.datetime.fromisoformat("2019-11-01T00:00:00+00:00")
|
||||
last = datetime.datetime.fromisoformat("2019-11-02T00:00:00+00:00")
|
||||
|
||||
m = MockedIndexingListerDbPartitionIndices(
|
||||
# one entry per second
|
||||
|
@ -102,9 +96,7 @@ def test_db_partition_indices_date_indices():
|
|||
|
||||
def test_db_partition_indices_float_index_range():
|
||||
m = MockedIndexingListerDbPartitionIndices(
|
||||
num_entries=10000,
|
||||
first_index=0.0,
|
||||
last_index=1.0,
|
||||
num_entries=10000, first_index=0.0, last_index=1.0,
|
||||
)
|
||||
assert m
|
||||
|
||||
|
@ -120,9 +112,7 @@ def test_db_partition_indices_float_index_range():
|
|||
|
||||
def test_db_partition_indices_uneven_int_index_range():
|
||||
m = MockedIndexingListerDbPartitionIndices(
|
||||
num_entries=5641,
|
||||
first_index=0,
|
||||
last_index=10000,
|
||||
num_entries=5641, first_index=0, last_index=10000,
|
||||
)
|
||||
assert m
|
||||
|
||||
|
|
|
@ -22,8 +22,9 @@ def noop(*args, **kwargs):
|
|||
|
||||
|
||||
def test_version_generation():
|
||||
assert swh.lister.__version__ != 'devel', \
|
||||
"Make sure swh.lister is installed (e.g. pip install -e .)"
|
||||
assert (
|
||||
swh.lister.__version__ != "devel"
|
||||
), "Make sure swh.lister is installed (e.g. pip install -e .)"
|
||||
|
||||
|
||||
class HttpListerTesterBase(abc.ABC):
|
||||
|
@ -35,13 +36,17 @@ class HttpListerTesterBase(abc.ABC):
|
|||
to customize for a specific listing service.
|
||||
|
||||
"""
|
||||
|
||||
Lister = AbstractAttribute(
|
||||
'Lister class to test') # type: Union[AbstractAttribute, Type[Any]]
|
||||
"Lister class to test"
|
||||
) # type: Union[AbstractAttribute, Type[Any]]
|
||||
lister_subdir = AbstractAttribute(
|
||||
'bitbucket, github, etc.') # type: Union[AbstractAttribute, str]
|
||||
"bitbucket, github, etc."
|
||||
) # type: Union[AbstractAttribute, str]
|
||||
good_api_response_file = AbstractAttribute(
|
||||
'Example good response body') # type: Union[AbstractAttribute, str]
|
||||
LISTER_NAME = 'fake-lister'
|
||||
"Example good response body"
|
||||
) # type: Union[AbstractAttribute, str]
|
||||
LISTER_NAME = "fake-lister"
|
||||
|
||||
# May need to override this if the headers are used for something
|
||||
def response_headers(self, request):
|
||||
|
@ -53,7 +58,7 @@ class HttpListerTesterBase(abc.ABC):
|
|||
def mock_rate_quota(self, n, request, context):
|
||||
self.rate_limit += 1
|
||||
context.status_code = 429
|
||||
context.headers['Retry-After'] = '1'
|
||||
context.headers["Retry-After"] = "1"
|
||||
return '{"error":"dummy"}'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
|
@ -89,8 +94,9 @@ class HttpListerTesterBase(abc.ABC):
|
|||
|
||||
"""
|
||||
if override_config or self.fl is None:
|
||||
self.fl = self.Lister(url='https://fakeurl',
|
||||
override_config=override_config)
|
||||
self.fl = self.Lister(
|
||||
url="https://fakeurl", override_config=override_config
|
||||
)
|
||||
self.fl.INITIAL_BACKOFF = 1
|
||||
|
||||
self.fl.reset_backoff()
|
||||
|
@ -105,23 +111,25 @@ class HttpListerTesterBase(abc.ABC):
|
|||
task_id = 0
|
||||
current_nb_tasks = len(self.scheduler_tasks)
|
||||
if current_nb_tasks > 0:
|
||||
task_id = self.scheduler_tasks[-1]['id'] + 1
|
||||
task_id = self.scheduler_tasks[-1]["id"] + 1
|
||||
for task in tasks:
|
||||
scheduler_task = dict(task)
|
||||
scheduler_task.update({
|
||||
'status': 'next_run_not_scheduled',
|
||||
'retries_left': 0,
|
||||
'priority': None,
|
||||
'id': task_id,
|
||||
'current_interval': datetime.timedelta(days=64)
|
||||
})
|
||||
scheduler_task.update(
|
||||
{
|
||||
"status": "next_run_not_scheduled",
|
||||
"retries_left": 0,
|
||||
"priority": None,
|
||||
"id": task_id,
|
||||
"current_interval": datetime.timedelta(days=64),
|
||||
}
|
||||
)
|
||||
self.scheduler_tasks.append(scheduler_task)
|
||||
task_id = task_id + 1
|
||||
return self.scheduler_tasks[current_nb_tasks:]
|
||||
|
||||
def _disable_tasks(task_ids):
|
||||
for task_id in task_ids:
|
||||
self.scheduler_tasks[task_id]['status'] = 'disabled'
|
||||
self.scheduler_tasks[task_id]["status"] = "disabled"
|
||||
|
||||
fl.scheduler.create_tasks = Mock(wraps=_create_tasks)
|
||||
fl.scheduler.disable_tasks = Mock(wraps=_disable_tasks)
|
||||
|
@ -167,26 +175,29 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
to customize for a specific listing service.
|
||||
|
||||
"""
|
||||
|
||||
last_index = AbstractAttribute(
|
||||
'Last index '
|
||||
'in good_api_response') # type: Union[AbstractAttribute, int]
|
||||
"Last index " "in good_api_response"
|
||||
) # type: Union[AbstractAttribute, int]
|
||||
first_index = AbstractAttribute(
|
||||
'First index in '
|
||||
' good_api_response') # type: Union[AbstractAttribute, Optional[int]]
|
||||
"First index in " " good_api_response"
|
||||
) # type: Union[AbstractAttribute, Optional[int]]
|
||||
bad_api_response_file = AbstractAttribute(
|
||||
'Example bad response body') # type: Union[AbstractAttribute, str]
|
||||
"Example bad response body"
|
||||
) # type: Union[AbstractAttribute, str]
|
||||
entries_per_page = AbstractAttribute(
|
||||
'Number of results in '
|
||||
'good response') # type: Union[AbstractAttribute, int]
|
||||
"Number of results in " "good response"
|
||||
) # type: Union[AbstractAttribute, int]
|
||||
test_re = AbstractAttribute(
|
||||
'Compiled regex matching the server url. Must capture the '
|
||||
'index value.') # type: Union[AbstractAttribute, Pattern]
|
||||
"Compiled regex matching the server url. Must capture the " "index value."
|
||||
) # type: Union[AbstractAttribute, Pattern]
|
||||
convert_type = str # type: Callable[..., Any]
|
||||
"""static method used to convert the "request_index" to its right type (for
|
||||
indexing listers for example, this is in accordance with the model's
|
||||
"indexable" column).
|
||||
|
||||
"""
|
||||
|
||||
def mock_response(self, request, context):
|
||||
self.fl.reset_backoff()
|
||||
self.rate_limit = 1
|
||||
|
@ -200,9 +211,11 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
else:
|
||||
response_file = self.bad_api_response_file
|
||||
|
||||
with open('swh/lister/%s/tests/%s' % (self.lister_subdir,
|
||||
response_file),
|
||||
'r', encoding='utf-8') as r:
|
||||
with open(
|
||||
"swh/lister/%s/tests/%s" % (self.lister_subdir, response_file),
|
||||
"r",
|
||||
encoding="utf-8",
|
||||
) as r:
|
||||
return r.read()
|
||||
|
||||
def request_index(self, request):
|
||||
|
@ -214,12 +227,9 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
http_mocker.get(self.test_re, text=self.mock_response)
|
||||
db = init_db()
|
||||
|
||||
fl = self.get_fl(override_config={
|
||||
'lister': {
|
||||
'cls': 'local',
|
||||
'args': {'db': db.url()}
|
||||
}
|
||||
})
|
||||
fl = self.get_fl(
|
||||
override_config={"lister": {"cls": "local", "args": {"db": db.url()}}}
|
||||
)
|
||||
fl.db = db
|
||||
self.init_db(db, fl.MODEL)
|
||||
|
||||
|
@ -233,8 +243,7 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
fl.run()
|
||||
|
||||
self.assertEqual(fl.db_last_index(), self.last_index)
|
||||
ingested_repos = list(fl.db_query_range(self.first_index,
|
||||
self.last_index))
|
||||
ingested_repos = list(fl.db_query_range(self.first_index, self.last_index))
|
||||
self.assertEqual(len(ingested_repos), self.entries_per_page)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -307,13 +316,12 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
"""
|
||||
http_mocker.get(self.test_re, text=self.mock_response)
|
||||
fl = self.get_fl()
|
||||
li = fl.transport_response_simplified(
|
||||
self.get_api_response(self.first_index))
|
||||
li = fl.transport_response_simplified(self.get_api_response(self.first_index))
|
||||
di = li[0]
|
||||
self.assertIsInstance(di, dict)
|
||||
pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')]
|
||||
pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith("_")]
|
||||
for k in pubs:
|
||||
if k not in ['last_seen', 'task_id', 'id']:
|
||||
if k not in ["last_seen", "task_id", "id"]:
|
||||
self.assertIn(k, di)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -322,7 +330,7 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
|
||||
"""
|
||||
http_mocker.get(self.test_re, text=self.mock_limit_twice_response)
|
||||
with patch.object(time, 'sleep', wraps=time.sleep) as sleepmock:
|
||||
with patch.object(time, "sleep", wraps=time.sleep) as sleepmock:
|
||||
self.get_api_response(self.first_index)
|
||||
self.assertEqual(sleepmock.call_count, 2)
|
||||
|
||||
|
@ -332,13 +340,14 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
fl.run()
|
||||
self.assertNotEqual(len(http_mocker.request_history), 0)
|
||||
for request in http_mocker.request_history:
|
||||
assert 'User-Agent' in request.headers
|
||||
user_agent = request.headers['User-Agent']
|
||||
assert 'Software Heritage Lister' in user_agent
|
||||
assert "User-Agent" in request.headers
|
||||
user_agent = request.headers["User-Agent"]
|
||||
assert "Software Heritage Lister" in user_agent
|
||||
assert swh.lister.__version__ in user_agent
|
||||
|
||||
def scheduled_tasks_test(self, next_api_response_file, next_last_index,
|
||||
http_mocker):
|
||||
def scheduled_tasks_test(
|
||||
self, next_api_response_file, next_last_index, http_mocker
|
||||
):
|
||||
"""Check that no loading tasks get disabled when processing a new
|
||||
page of repositories returned by a forge API
|
||||
"""
|
||||
|
@ -361,7 +370,7 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
|
||||
# check tasks are not disabled
|
||||
for task in self.scheduler_tasks:
|
||||
self.assertTrue(task['status'] != 'disabled')
|
||||
self.assertTrue(task["status"] != "disabled")
|
||||
|
||||
|
||||
class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC):
|
||||
|
@ -372,20 +381,20 @@ class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC):
|
|||
to customize for a specific listing service.
|
||||
|
||||
"""
|
||||
|
||||
entries = AbstractAttribute(
|
||||
'Number of results '
|
||||
'in good response') # type: Union[AbstractAttribute, int]
|
||||
"Number of results " "in good response"
|
||||
) # type: Union[AbstractAttribute, int]
|
||||
PAGE = AbstractAttribute(
|
||||
"URL of the server api's unique page to retrieve and "
|
||||
"parse for information") # type: Union[AbstractAttribute, str]
|
||||
"URL of the server api's unique page to retrieve and " "parse for information"
|
||||
) # type: Union[AbstractAttribute, str]
|
||||
|
||||
def get_fl(self, override_config=None):
|
||||
"""Retrieve an instance of fake lister (fl).
|
||||
|
||||
"""
|
||||
if override_config or self.fl is None:
|
||||
self.fl = self.Lister(
|
||||
override_config=override_config)
|
||||
self.fl = self.Lister(override_config=override_config)
|
||||
self.fl.INITIAL_BACKOFF = 1
|
||||
|
||||
self.fl.reset_backoff()
|
||||
|
@ -399,9 +408,11 @@ class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC):
|
|||
context.headers.update(custom_headers)
|
||||
response_file = self.good_api_response_file
|
||||
|
||||
with open('swh/lister/%s/tests/%s' % (self.lister_subdir,
|
||||
response_file),
|
||||
'r', encoding='utf-8') as r:
|
||||
with open(
|
||||
"swh/lister/%s/tests/%s" % (self.lister_subdir, response_file),
|
||||
"r",
|
||||
encoding="utf-8",
|
||||
) as r:
|
||||
return r.read()
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -410,7 +421,7 @@ class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC):
|
|||
|
||||
"""
|
||||
http_mocker.get(self.PAGE, text=self.mock_limit_twice_response)
|
||||
with patch.object(time, 'sleep', wraps=time.sleep) as sleepmock:
|
||||
with patch.object(time, "sleep", wraps=time.sleep) as sleepmock:
|
||||
self.get_api_response(0)
|
||||
self.assertEqual(sleepmock.call_count, 2)
|
||||
|
||||
|
@ -426,9 +437,9 @@ class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC):
|
|||
li = fl.transport_response_simplified(li)
|
||||
di = li[0]
|
||||
self.assertIsInstance(di, dict)
|
||||
pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')]
|
||||
pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith("_")]
|
||||
for k in pubs:
|
||||
if k not in ['last_seen', 'task_id', 'id']:
|
||||
if k not in ["last_seen", "task_id", "id"]:
|
||||
self.assertIn(k, di)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -437,8 +448,6 @@ class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC):
|
|||
|
||||
"""
|
||||
http_mocker.get(self.PAGE, text=self.mock_response)
|
||||
li = self.get_fl().list_packages(
|
||||
self.get_api_response(0)
|
||||
)
|
||||
li = self.get_fl().list_packages(self.get_api_response(0))
|
||||
self.assertIsInstance(li, list)
|
||||
self.assertEqual(len(li), self.entries)
|
||||
|
|
|
@ -16,7 +16,7 @@ class BadSubclass1(ModelBase):
|
|||
|
||||
class BadSubclass2(ModelBase):
|
||||
__abstract__ = True
|
||||
__tablename__ = 'foo'
|
||||
__tablename__ = "foo"
|
||||
|
||||
|
||||
class BadSubclass3(BadSubclass2):
|
||||
|
@ -36,7 +36,7 @@ class IndexingBadSubclass(IndexingModelBase):
|
|||
|
||||
class IndexingBadSubclass2(IndexingModelBase):
|
||||
__abstract__ = True
|
||||
__tablename__ = 'foo'
|
||||
__tablename__ = "foo"
|
||||
|
||||
|
||||
class IndexingBadSubclass3(IndexingBadSubclass2):
|
||||
|
@ -47,7 +47,7 @@ class IndexingBadSubclass3(IndexingBadSubclass2):
|
|||
class IndexingGoodSubclass(IndexingModelBase):
|
||||
uid = Column(Integer, primary_key=True)
|
||||
indexable = Column(Integer, index=True)
|
||||
__tablename__ = 'bar'
|
||||
__tablename__ = "bar"
|
||||
|
||||
|
||||
class TestModel(unittest.TestCase):
|
||||
|
@ -65,10 +65,10 @@ class TestModel(unittest.TestCase):
|
|||
BadSubclass3()
|
||||
|
||||
self.assertIsInstance(GoodSubclass(), GoodSubclass)
|
||||
gsc = GoodSubclass(uid='uid')
|
||||
gsc = GoodSubclass(uid="uid")
|
||||
|
||||
self.assertEqual(gsc.__tablename__, 'foo')
|
||||
self.assertEqual(gsc.uid, 'uid')
|
||||
self.assertEqual(gsc.__tablename__, "foo")
|
||||
self.assertEqual(gsc.uid, "uid")
|
||||
|
||||
def test_indexing_model_instancing(self):
|
||||
with self.assertRaises(TypeError):
|
||||
|
@ -84,8 +84,8 @@ class TestModel(unittest.TestCase):
|
|||
IndexingBadSubclass3()
|
||||
|
||||
self.assertIsInstance(IndexingGoodSubclass(), IndexingGoodSubclass)
|
||||
gsc = IndexingGoodSubclass(uid='uid', indexable='indexable')
|
||||
gsc = IndexingGoodSubclass(uid="uid", indexable="indexable")
|
||||
|
||||
self.assertEqual(gsc.__tablename__, 'bar')
|
||||
self.assertEqual(gsc.uid, 'uid')
|
||||
self.assertEqual(gsc.indexable, 'indexable')
|
||||
self.assertEqual(gsc.__tablename__, "bar")
|
||||
self.assertEqual(gsc.uid, "uid")
|
||||
self.assertEqual(gsc.indexable, "indexable")
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import CRANModel
|
||||
from .lister import CRANLister
|
||||
|
||||
return {'models': [CRANModel],
|
||||
'lister': CRANLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [CRANModel],
|
||||
"lister": CRANLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -19,16 +19,23 @@ from swh.scheduler.utils import create_task_dict
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
CRAN_MIRROR = 'https://cran.r-project.org'
|
||||
CRAN_MIRROR = "https://cran.r-project.org"
|
||||
|
||||
|
||||
class CRANLister(SimpleLister):
|
||||
MODEL = CRANModel
|
||||
LISTER_NAME = 'cran'
|
||||
instance = 'cran'
|
||||
LISTER_NAME = "cran"
|
||||
instance = "cran"
|
||||
|
||||
def task_dict(self, origin_type, origin_url, version=None, html_url=None,
|
||||
policy=None, **kwargs):
|
||||
def task_dict(
|
||||
self,
|
||||
origin_type,
|
||||
origin_url,
|
||||
version=None,
|
||||
html_url=None,
|
||||
policy=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Return task format dict. This creates tasks with args and kwargs
|
||||
set, for example::
|
||||
|
||||
|
@ -43,15 +50,15 @@ class CRANLister(SimpleLister):
|
|||
|
||||
"""
|
||||
if not policy:
|
||||
policy = 'oneshot'
|
||||
policy = "oneshot"
|
||||
artifact_url = html_url
|
||||
assert origin_type == 'tar'
|
||||
assert origin_type == "tar"
|
||||
return create_task_dict(
|
||||
'load-cran', policy,
|
||||
url=origin_url, artifacts=[{
|
||||
'url': artifact_url,
|
||||
'version': version
|
||||
}], retries_left=3
|
||||
"load-cran",
|
||||
policy,
|
||||
url=origin_url,
|
||||
artifacts=[{"url": artifact_url, "version": version}],
|
||||
retries_left=3,
|
||||
)
|
||||
|
||||
def safely_issue_request(self, identifier):
|
||||
|
@ -91,23 +98,22 @@ class CRANLister(SimpleLister):
|
|||
"""
|
||||
return read_cran_data()
|
||||
|
||||
def get_model_from_repo(
|
||||
self, repo: Mapping[str, str]) -> Mapping[str, str]:
|
||||
def get_model_from_repo(self, repo: Mapping[str, str]) -> Mapping[str, str]:
|
||||
"""Transform from repository representation to model
|
||||
|
||||
"""
|
||||
logger.debug('repo: %s', repo)
|
||||
logger.debug("repo: %s", repo)
|
||||
origin_url, artifact_url = compute_origin_urls(repo)
|
||||
package = repo['Package']
|
||||
version = repo['Version']
|
||||
package = repo["Package"]
|
||||
version = repo["Version"]
|
||||
return {
|
||||
'uid': f'{package}-{version}',
|
||||
'name': package,
|
||||
'full_name': repo['Title'],
|
||||
'version': version,
|
||||
'html_url': artifact_url,
|
||||
'origin_url': origin_url,
|
||||
'origin_type': 'tar',
|
||||
"uid": f"{package}-{version}",
|
||||
"name": package,
|
||||
"full_name": repo["Title"],
|
||||
"version": version,
|
||||
"html_url": artifact_url,
|
||||
"origin_url": origin_url,
|
||||
"origin_type": "tar",
|
||||
}
|
||||
|
||||
|
||||
|
@ -115,11 +121,10 @@ def read_cran_data() -> List[Mapping[str, str]]:
|
|||
"""Execute r script to read cran listing.
|
||||
|
||||
"""
|
||||
filepath = pkg_resources.resource_filename('swh.lister.cran',
|
||||
'list_all_packages.R')
|
||||
logger.debug('script list-all-packages.R path: %s', filepath)
|
||||
filepath = pkg_resources.resource_filename("swh.lister.cran", "list_all_packages.R")
|
||||
logger.debug("script list-all-packages.R path: %s", filepath)
|
||||
response = subprocess.run(filepath, stdout=subprocess.PIPE, shell=False)
|
||||
return json.loads(response.stdout.decode('utf-8'))
|
||||
return json.loads(response.stdout.decode("utf-8"))
|
||||
|
||||
|
||||
def compute_origin_urls(repo: Mapping[str, str]) -> Tuple[str, str]:
|
||||
|
@ -132,8 +137,8 @@ def compute_origin_urls(repo: Mapping[str, str]) -> Tuple[str, str]:
|
|||
the tuple project url, artifact url
|
||||
|
||||
"""
|
||||
package = repo['Package']
|
||||
version = repo['Version']
|
||||
origin_url = f'{CRAN_MIRROR}/package={package}'
|
||||
artifact_url = f'{CRAN_MIRROR}/src/contrib/{package}_{version}.tar.gz'
|
||||
package = repo["Package"]
|
||||
version = repo["Version"]
|
||||
origin_url = f"{CRAN_MIRROR}/package={package}"
|
||||
artifact_url = f"{CRAN_MIRROR}/src/contrib/{package}_{version}.tar.gz"
|
||||
return origin_url, artifact_url
|
||||
|
|
|
@ -11,7 +11,8 @@ class CRANModel(ModelBase):
|
|||
"""a CRAN repository representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'cran_repo'
|
||||
|
||||
__tablename__ = "cran_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
version = Column(String)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from swh.lister.cran.lister import CRANLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.CRANListerTask')
|
||||
@shared_task(name=__name__ + ".CRANListerTask")
|
||||
def list_cran(**lister_args):
|
||||
'''Lister task for the CRAN registry'''
|
||||
"""Lister task for the CRAN registry"""
|
||||
return CRANLister(**lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -10,14 +10,16 @@ from swh.lister.core.tests.conftest import * # noqa
|
|||
|
||||
@pytest.fixture
|
||||
def lister_cran(swh_listers):
|
||||
lister = swh_listers['cran']
|
||||
lister = swh_listers["cran"]
|
||||
|
||||
# Add the load-deb-package in the scheduler backend
|
||||
lister.scheduler.create_task_type({
|
||||
'type': 'load-cran',
|
||||
'description': 'Load a CRAN package',
|
||||
'backend_name': 'swh.loader.package.cran.tasks.LoaderCRAN',
|
||||
'default_interval': '1 day',
|
||||
})
|
||||
lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-cran",
|
||||
"description": "Load a CRAN package",
|
||||
"backend_name": "swh.loader.package.cran.tasks.LoaderCRAN",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return lister
|
||||
|
|
|
@ -13,28 +13,25 @@ from swh.lister.cran.lister import compute_origin_urls, CRAN_MIRROR
|
|||
|
||||
|
||||
def test_cran_compute_origin_urls():
|
||||
pack = 'something'
|
||||
vers = '0.0.1'
|
||||
origin_url, artifact_url = compute_origin_urls({
|
||||
'Package': pack,
|
||||
'Version': vers,
|
||||
})
|
||||
pack = "something"
|
||||
vers = "0.0.1"
|
||||
origin_url, artifact_url = compute_origin_urls({"Package": pack, "Version": vers,})
|
||||
|
||||
assert origin_url == f'{CRAN_MIRROR}/package={pack}'
|
||||
assert artifact_url == f'{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz'
|
||||
assert origin_url == f"{CRAN_MIRROR}/package={pack}"
|
||||
assert artifact_url == f"{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz"
|
||||
|
||||
|
||||
def test_cran_compute_origin_urls_failure():
|
||||
for incomplete_repo in [{'Version': '0.0.1'}, {'Package': 'package'}, {}]:
|
||||
for incomplete_repo in [{"Version": "0.0.1"}, {"Package": "package"}, {}]:
|
||||
with pytest.raises(KeyError):
|
||||
compute_origin_urls(incomplete_repo)
|
||||
|
||||
|
||||
@patch('swh.lister.cran.lister.read_cran_data')
|
||||
@patch("swh.lister.cran.lister.read_cran_data")
|
||||
def test_cran_lister_cran(mock_cran, datadir, lister_cran):
|
||||
lister = lister_cran
|
||||
|
||||
with open(path.join(datadir, 'list-r-packages.json')) as f:
|
||||
with open(path.join(datadir, "list-r-packages.json")) as f:
|
||||
data = json.loads(f.read())
|
||||
|
||||
mock_cran.return_value = data
|
||||
|
@ -42,31 +39,33 @@ def test_cran_lister_cran(mock_cran, datadir, lister_cran):
|
|||
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-cran')
|
||||
r = lister.scheduler.search_tasks(task_type="load-cran")
|
||||
assert len(r) == 6
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-cran'
|
||||
assert row["type"] == "load-cran"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert len(kwargs) == 2
|
||||
assert set(kwargs.keys()) == {'url', 'artifacts'}
|
||||
assert set(kwargs.keys()) == {"url", "artifacts"}
|
||||
|
||||
artifacts = kwargs['artifacts']
|
||||
artifacts = kwargs["artifacts"]
|
||||
assert len(artifacts) == 1
|
||||
|
||||
assert set(artifacts[0].keys()) == {'url', 'version'}
|
||||
assert set(artifacts[0].keys()) == {"url", "version"}
|
||||
|
||||
assert row['policy'] == 'oneshot'
|
||||
assert row['retries_left'] == 3
|
||||
assert row["policy"] == "oneshot"
|
||||
assert row["retries_left"] == 3
|
||||
|
||||
origin_url = kwargs['url']
|
||||
record = lister.db_session \
|
||||
.query(lister.MODEL) \
|
||||
.filter(origin_url == origin_url).first()
|
||||
origin_url = kwargs["url"]
|
||||
record = (
|
||||
lister.db_session.query(lister.MODEL)
|
||||
.filter(origin_url == origin_url)
|
||||
.first()
|
||||
)
|
||||
assert record
|
||||
assert record.uid == f'{record.name}-{record.version}'
|
||||
assert record.uid == f"{record.name}-{record.version}"
|
||||
|
|
|
@ -2,22 +2,20 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cran.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.cran.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.cran.tasks.CRANLister')
|
||||
@patch("swh.lister.cran.tasks.CRANLister")
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked CRANLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cran.tasks.CRANListerTask')
|
||||
res = swh_app.send_task("swh.lister.cran.tasks.CRANListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
|
|
@ -11,11 +11,13 @@ from typing import Any, List, Mapping
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def debian_init(db_engine,
|
||||
override_conf: Mapping[str, Any] = {},
|
||||
distribution_name: str = 'Debian',
|
||||
suites: List[str] = ['stretch', 'buster', 'bullseye'],
|
||||
components: List[str] = ['main', 'contrib', 'non-free']):
|
||||
def debian_init(
|
||||
db_engine,
|
||||
override_conf: Mapping[str, Any] = {},
|
||||
distribution_name: str = "Debian",
|
||||
suites: List[str] = ["stretch", "buster", "bullseye"],
|
||||
components: List[str] = ["main", "contrib", "non-free"],
|
||||
):
|
||||
"""Initialize the debian data model.
|
||||
|
||||
Args:
|
||||
|
@ -28,30 +30,32 @@ def debian_init(db_engine,
|
|||
"""
|
||||
from swh.lister.debian.models import Distribution, Area
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
db_session = sessionmaker(bind=db_engine)()
|
||||
distrib = db_session.query(Distribution) \
|
||||
.filter(Distribution.name == distribution_name) \
|
||||
distrib = (
|
||||
db_session.query(Distribution)
|
||||
.filter(Distribution.name == distribution_name)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if distrib is None:
|
||||
distrib = Distribution(
|
||||
name=distribution_name, type='deb',
|
||||
mirror_uri='http://deb.debian.org/debian/'
|
||||
name=distribution_name,
|
||||
type="deb",
|
||||
mirror_uri="http://deb.debian.org/debian/",
|
||||
)
|
||||
db_session.add(distrib)
|
||||
|
||||
# Check the existing
|
||||
existing_area = db_session.query(Area) \
|
||||
.filter(Area.distribution == distrib) \
|
||||
.all()
|
||||
existing_area = db_session.query(Area).filter(Area.distribution == distrib).all()
|
||||
existing_area = set([a.name for a in existing_area])
|
||||
|
||||
logger.debug('Area already known: %s', ', '.join(existing_area))
|
||||
logger.debug("Area already known: %s", ", ".join(existing_area))
|
||||
|
||||
# Create only the new ones
|
||||
for suite in suites:
|
||||
for component in components:
|
||||
area_name = f'{suite}/{component}'
|
||||
area_name = f"{suite}/{component}"
|
||||
if area_name in existing_area:
|
||||
logger.debug("Area '%s' already set, skipping", area_name)
|
||||
continue
|
||||
|
@ -64,7 +68,10 @@ def debian_init(db_engine,
|
|||
|
||||
def register() -> Mapping[str, Any]:
|
||||
from .lister import DebianLister
|
||||
return {'models': [DebianLister.MODEL],
|
||||
'lister': DebianLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
'init': debian_init}
|
||||
|
||||
return {
|
||||
"models": [DebianLister.MODEL],
|
||||
"lister": DebianLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
"init": debian_init,
|
||||
}
|
||||
|
|
|
@ -17,7 +17,10 @@ from typing import Mapping, Optional, Dict, Any
|
|||
from requests import Response
|
||||
|
||||
from swh.lister.debian.models import (
|
||||
AreaSnapshot, Distribution, DistributionSnapshot, Package,
|
||||
AreaSnapshot,
|
||||
Distribution,
|
||||
DistributionSnapshot,
|
||||
Package,
|
||||
TempPackage,
|
||||
)
|
||||
|
||||
|
@ -25,9 +28,9 @@ from swh.lister.core.lister_base import ListerBase, FetchError
|
|||
from swh.lister.core.lister_transports import ListerHttpTransport
|
||||
|
||||
decompressors = {
|
||||
'gz': lambda f: gzip.GzipFile(fileobj=f),
|
||||
'bz2': bz2.BZ2File,
|
||||
'xz': lzma.LZMAFile,
|
||||
"gz": lambda f: gzip.GzipFile(fileobj=f),
|
||||
"bz2": bz2.BZ2File,
|
||||
"xz": lzma.LZMAFile,
|
||||
}
|
||||
|
||||
|
||||
|
@ -37,12 +40,15 @@ logger = logging.getLogger(__name__)
|
|||
class DebianLister(ListerHttpTransport, ListerBase):
|
||||
MODEL = Package
|
||||
PATH_TEMPLATE = None
|
||||
LISTER_NAME = 'debian'
|
||||
instance = 'debian'
|
||||
LISTER_NAME = "debian"
|
||||
instance = "debian"
|
||||
|
||||
def __init__(self, distribution: str = 'Debian',
|
||||
date: Optional[datetime.datetime] = None,
|
||||
override_config: Mapping = {}):
|
||||
def __init__(
|
||||
self,
|
||||
distribution: str = "Debian",
|
||||
date: Optional[datetime.datetime] = None,
|
||||
override_config: Mapping = {},
|
||||
):
|
||||
"""Initialize the debian lister for a given distribution at a given
|
||||
date.
|
||||
|
||||
|
@ -55,9 +61,10 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
"""
|
||||
ListerHttpTransport.__init__(self, url="notused")
|
||||
ListerBase.__init__(self, override_config=override_config)
|
||||
self.distribution = override_config.get('distribution', distribution)
|
||||
self.date = override_config.get('date', date) or datetime.datetime.now(
|
||||
tz=datetime.timezone.utc)
|
||||
self.distribution = override_config.get("distribution", distribution)
|
||||
self.date = override_config.get("date", date) or datetime.datetime.now(
|
||||
tz=datetime.timezone.utc
|
||||
)
|
||||
|
||||
def transport_request(self, identifier) -> Response:
|
||||
"""Subvert ListerHttpTransport.transport_request, to try several
|
||||
|
@ -83,9 +90,7 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
if response.status_code == 200:
|
||||
break
|
||||
else:
|
||||
raise FetchError(
|
||||
"Could not retrieve index for %s" % self.area
|
||||
)
|
||||
raise FetchError("Could not retrieve index for %s" % self.area)
|
||||
self.decompressor = decompressors.get(compression)
|
||||
return response
|
||||
|
||||
|
@ -99,7 +104,7 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
# Enable streaming to allow wrapping the response in the decompressor
|
||||
# in transport_response_simplified.
|
||||
params = super().request_params(identifier)
|
||||
params['stream'] = True
|
||||
params["stream"] = True
|
||||
return params
|
||||
|
||||
def transport_response_simplified(self, response):
|
||||
|
@ -118,22 +123,22 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
files = defaultdict(dict)
|
||||
|
||||
for field in src_pkg._multivalued_fields:
|
||||
if field.startswith('checksums-'):
|
||||
sum_name = field[len('checksums-'):]
|
||||
if field.startswith("checksums-"):
|
||||
sum_name = field[len("checksums-") :]
|
||||
else:
|
||||
sum_name = 'md5sum'
|
||||
sum_name = "md5sum"
|
||||
if field in src_pkg:
|
||||
for entry in src_pkg[field]:
|
||||
name = entry['name']
|
||||
files[name]['name'] = entry['name']
|
||||
files[name]['size'] = int(entry['size'], 10)
|
||||
name = entry["name"]
|
||||
files[name]["name"] = entry["name"]
|
||||
files[name]["size"] = int(entry["size"], 10)
|
||||
files[name][sum_name] = entry[sum_name]
|
||||
|
||||
yield {
|
||||
'name': src_pkg['Package'],
|
||||
'version': src_pkg['Version'],
|
||||
'directory': src_pkg['Directory'],
|
||||
'files': files,
|
||||
"name": src_pkg["Package"],
|
||||
"version": src_pkg["Version"],
|
||||
"directory": src_pkg["Directory"],
|
||||
"files": files,
|
||||
}
|
||||
|
||||
def inject_repo_data_into_db(self, models_list):
|
||||
|
@ -149,13 +154,11 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
area_id = self.area.id
|
||||
|
||||
for model in models_list:
|
||||
name = model['name']
|
||||
version = model['version']
|
||||
temp_packages.append({
|
||||
'area_id': area_id,
|
||||
'name': name,
|
||||
'version': version,
|
||||
})
|
||||
name = model["name"]
|
||||
version = model["version"]
|
||||
temp_packages.append(
|
||||
{"area_id": area_id, "name": name, "version": version,}
|
||||
)
|
||||
by_name_version[name, version] = model
|
||||
|
||||
# Add all the listed packages to a temporary table
|
||||
|
@ -172,15 +175,18 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
)
|
||||
|
||||
# Filter out the packages that already exist in the main Package table
|
||||
new_packages = self.db_session\
|
||||
.query(TempPackage)\
|
||||
.options(load_only('name', 'version'))\
|
||||
.filter(~exists_tmp_pkg(self.db_session, Package))\
|
||||
.all()
|
||||
new_packages = (
|
||||
self.db_session.query(TempPackage)
|
||||
.options(load_only("name", "version"))
|
||||
.filter(~exists_tmp_pkg(self.db_session, Package))
|
||||
.all()
|
||||
)
|
||||
|
||||
self.old_area_packages = self.db_session.query(Package).filter(
|
||||
exists_tmp_pkg(self.db_session, TempPackage)
|
||||
).all()
|
||||
self.old_area_packages = (
|
||||
self.db_session.query(Package)
|
||||
.filter(exists_tmp_pkg(self.db_session, TempPackage))
|
||||
.all()
|
||||
)
|
||||
|
||||
self.db_session.execute(DropTable(TempPackage.__table__))
|
||||
|
||||
|
@ -188,8 +194,7 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
for package in new_packages:
|
||||
model = by_name_version[package.name, package.version]
|
||||
|
||||
added_packages.append(Package(area=self.area,
|
||||
**model))
|
||||
added_packages.append(Package(area=self.area, **model))
|
||||
|
||||
self.db_session.add_all(added_packages)
|
||||
return added_packages
|
||||
|
@ -210,26 +215,26 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
"""Run the lister for a given (distribution, area) tuple.
|
||||
|
||||
"""
|
||||
distribution = self.db_session\
|
||||
.query(Distribution)\
|
||||
.options(joinedload(Distribution.areas))\
|
||||
.filter(Distribution.name == self.distribution)\
|
||||
.one_or_none()
|
||||
distribution = (
|
||||
self.db_session.query(Distribution)
|
||||
.options(joinedload(Distribution.areas))
|
||||
.filter(Distribution.name == self.distribution)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if not distribution:
|
||||
logger.error("Distribution %s is not registered" %
|
||||
self.distribution)
|
||||
return {'status': 'failed'}
|
||||
logger.error("Distribution %s is not registered" % self.distribution)
|
||||
return {"status": "failed"}
|
||||
|
||||
if not distribution.type == 'deb':
|
||||
logger.error("Distribution %s is not a Debian derivative" %
|
||||
distribution)
|
||||
return {'status': 'failed'}
|
||||
if not distribution.type == "deb":
|
||||
logger.error("Distribution %s is not a Debian derivative" % distribution)
|
||||
return {"status": "failed"}
|
||||
|
||||
date = self.date
|
||||
|
||||
logger.debug('Creating snapshot for distribution %s on date %s' %
|
||||
(distribution, date))
|
||||
logger.debug(
|
||||
"Creating snapshot for distribution %s on date %s" % (distribution, date)
|
||||
)
|
||||
|
||||
snapshot = DistributionSnapshot(date=date, distribution=distribution)
|
||||
|
||||
|
@ -241,7 +246,7 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
|
||||
self.area = area
|
||||
|
||||
logger.debug('Processing area %s' % area)
|
||||
logger.debug("Processing area %s" % area)
|
||||
|
||||
_, new_area_packages = self.ingest_data(None)
|
||||
area_snapshot = AreaSnapshot(snapshot=snapshot, area=area)
|
||||
|
@ -253,4 +258,4 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
|
||||
self.db_session.commit()
|
||||
|
||||
return {'status': 'eventful'}
|
||||
return {"status": "eventful"}
|
||||
|
|
|
@ -34,78 +34,66 @@ from swh.lister.core.models import SQLBase
|
|||
|
||||
class Distribution(SQLBase):
|
||||
"""A distribution (e.g. Debian, Ubuntu, Fedora, ...)"""
|
||||
__tablename__ = 'distribution'
|
||||
|
||||
__tablename__ = "distribution"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String, unique=True, nullable=False)
|
||||
type = Column(Enum('deb', 'rpm', name='distribution_types'),
|
||||
nullable=False)
|
||||
type = Column(Enum("deb", "rpm", name="distribution_types"), nullable=False)
|
||||
mirror_uri = Column(String, nullable=False)
|
||||
|
||||
areas = relationship('Area', back_populates='distribution')
|
||||
areas = relationship("Area", back_populates="distribution")
|
||||
|
||||
def origin_for_package(self, package_name: str) -> str:
|
||||
"""Return the origin url for the given package
|
||||
|
||||
"""
|
||||
return '%s://%s/packages/%s' % (self.type, self.name, package_name)
|
||||
return "%s://%s/packages/%s" % (self.type, self.name, package_name)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Distribution(%s (%s) on %s)' % (
|
||||
self.name,
|
||||
self.type,
|
||||
self.mirror_uri,
|
||||
)
|
||||
return "Distribution(%s (%s) on %s)" % (self.name, self.type, self.mirror_uri,)
|
||||
|
||||
|
||||
class Area(SQLBase):
|
||||
__tablename__ = 'area'
|
||||
__table_args__ = (
|
||||
UniqueConstraint('distribution_id', 'name'),
|
||||
)
|
||||
__tablename__ = "area"
|
||||
__table_args__ = (UniqueConstraint("distribution_id", "name"),)
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
distribution_id = Column(Integer, ForeignKey('distribution.id'),
|
||||
nullable=False)
|
||||
distribution_id = Column(Integer, ForeignKey("distribution.id"), nullable=False)
|
||||
name = Column(String, nullable=False)
|
||||
active = Column(Boolean, nullable=False, default=True)
|
||||
|
||||
distribution = relationship('Distribution', back_populates='areas')
|
||||
distribution = relationship("Distribution", back_populates="areas")
|
||||
|
||||
def index_uris(self):
|
||||
"""Get possible URIs for this component's package index"""
|
||||
if self.distribution.type == 'deb':
|
||||
compression_exts = ('xz', 'bz2', 'gz', None)
|
||||
base_uri = '%s/dists/%s/source/Sources' % (
|
||||
if self.distribution.type == "deb":
|
||||
compression_exts = ("xz", "bz2", "gz", None)
|
||||
base_uri = "%s/dists/%s/source/Sources" % (
|
||||
self.distribution.mirror_uri,
|
||||
self.name,
|
||||
)
|
||||
for ext in compression_exts:
|
||||
if ext:
|
||||
yield (base_uri + '.' + ext, ext)
|
||||
yield (base_uri + "." + ext, ext)
|
||||
else:
|
||||
yield (base_uri, None)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
'Do not know how to build index URI for Distribution type %s' %
|
||||
self.distribution.type
|
||||
"Do not know how to build index URI for Distribution type %s"
|
||||
% self.distribution.type
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Area(%s of %s)' % (
|
||||
self.name,
|
||||
self.distribution.name,
|
||||
)
|
||||
return "Area(%s of %s)" % (self.name, self.distribution.name,)
|
||||
|
||||
|
||||
class Package(SQLBase):
|
||||
__tablename__ = 'package'
|
||||
__table_args__ = (
|
||||
UniqueConstraint('area_id', 'name', 'version'),
|
||||
)
|
||||
__tablename__ = "package"
|
||||
__table_args__ = (UniqueConstraint("area_id", "name", "version"),)
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
area_id = Column(Integer, ForeignKey('area.id'), nullable=False)
|
||||
area_id = Column(Integer, ForeignKey("area.id"), nullable=False)
|
||||
name = Column(String, nullable=False)
|
||||
version = Column(String, nullable=False)
|
||||
directory = Column(String, nullable=False)
|
||||
|
@ -116,7 +104,7 @@ class Package(SQLBase):
|
|||
|
||||
revision_id = Column(LargeBinary(20))
|
||||
|
||||
area = relationship('Area')
|
||||
area = relationship("Area")
|
||||
|
||||
@property
|
||||
def distribution(self):
|
||||
|
@ -125,42 +113,38 @@ class Package(SQLBase):
|
|||
def fetch_uri(self, filename):
|
||||
"""Get the URI to fetch the `filename` file associated with the
|
||||
package"""
|
||||
if self.distribution.type == 'deb':
|
||||
return '%s/%s/%s' % (
|
||||
if self.distribution.type == "deb":
|
||||
return "%s/%s/%s" % (
|
||||
self.distribution.mirror_uri,
|
||||
self.directory,
|
||||
filename,
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
'Do not know how to build fetch URI for Distribution type %s' %
|
||||
self.distribution.type
|
||||
"Do not know how to build fetch URI for Distribution type %s"
|
||||
% self.distribution.type
|
||||
)
|
||||
|
||||
def loader_dict(self):
|
||||
ret = {
|
||||
'id': self.id,
|
||||
'name': self.name,
|
||||
'version': self.version,
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"version": self.version,
|
||||
}
|
||||
if self.revision_id:
|
||||
ret['revision_id'] = binascii.hexlify(self.revision_id).decode()
|
||||
ret["revision_id"] = binascii.hexlify(self.revision_id).decode()
|
||||
else:
|
||||
files = {
|
||||
name: checksums.copy()
|
||||
for name, checksums in self.files.items()
|
||||
}
|
||||
files = {name: checksums.copy() for name, checksums in self.files.items()}
|
||||
for name in files:
|
||||
files[name]['uri'] = self.fetch_uri(name)
|
||||
files[name]["uri"] = self.fetch_uri(name)
|
||||
|
||||
ret.update({
|
||||
'revision_id': None,
|
||||
'files': files,
|
||||
})
|
||||
ret.update(
|
||||
{"revision_id": None, "files": files,}
|
||||
)
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
return 'Package(%s_%s of %s %s)' % (
|
||||
return "Package(%s_%s of %s %s)" % (
|
||||
self.name,
|
||||
self.version,
|
||||
self.distribution.name,
|
||||
|
@ -169,37 +153,36 @@ class Package(SQLBase):
|
|||
|
||||
|
||||
class DistributionSnapshot(SQLBase):
|
||||
__tablename__ = 'distribution_snapshot'
|
||||
__tablename__ = "distribution_snapshot"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
date = Column(DateTime, nullable=False, index=True)
|
||||
distribution_id = Column(Integer,
|
||||
ForeignKey('distribution.id'),
|
||||
nullable=False)
|
||||
distribution_id = Column(Integer, ForeignKey("distribution.id"), nullable=False)
|
||||
|
||||
distribution = relationship('Distribution')
|
||||
areas = relationship('AreaSnapshot', back_populates='snapshot')
|
||||
distribution = relationship("Distribution")
|
||||
areas = relationship("AreaSnapshot", back_populates="snapshot")
|
||||
|
||||
def task_for_package(self, package_name: str,
|
||||
package_versions: Mapping) -> Mapping[str, Any]:
|
||||
def task_for_package(
|
||||
self, package_name: str, package_versions: Mapping
|
||||
) -> Mapping[str, Any]:
|
||||
"""Return the task dictionary for the given list of package versions
|
||||
|
||||
"""
|
||||
origin_url = self.distribution.origin_for_package(package_name)
|
||||
|
||||
return {
|
||||
'policy': 'oneshot',
|
||||
'type': 'load-%s-package' % self.distribution.type,
|
||||
'next_run': datetime.datetime.now(tz=datetime.timezone.utc),
|
||||
'arguments': {
|
||||
'args': [],
|
||||
'kwargs': {
|
||||
'url': origin_url,
|
||||
'date': self.date.isoformat(),
|
||||
'packages': package_versions,
|
||||
"policy": "oneshot",
|
||||
"type": "load-%s-package" % self.distribution.type,
|
||||
"next_run": datetime.datetime.now(tz=datetime.timezone.utc),
|
||||
"arguments": {
|
||||
"args": [],
|
||||
"kwargs": {
|
||||
"url": origin_url,
|
||||
"date": self.date.isoformat(),
|
||||
"packages": package_versions,
|
||||
},
|
||||
},
|
||||
'retries_left': 3,
|
||||
"retries_left": 3,
|
||||
}
|
||||
|
||||
def get_packages(self):
|
||||
|
@ -207,41 +190,38 @@ class DistributionSnapshot(SQLBase):
|
|||
for area_snapshot in self.areas:
|
||||
area_name = area_snapshot.area.name
|
||||
for package in area_snapshot.packages:
|
||||
ref_name = '%s/%s' % (area_name, package.version)
|
||||
ref_name = "%s/%s" % (area_name, package.version)
|
||||
packages[package.name][ref_name] = package.loader_dict()
|
||||
|
||||
return packages
|
||||
|
||||
|
||||
area_snapshot_package_assoc = Table(
|
||||
'area_snapshot_package', SQLBase.metadata,
|
||||
Column('area_snapshot_id', Integer, ForeignKey('area_snapshot.id'),
|
||||
nullable=False),
|
||||
Column('package_id', Integer, ForeignKey('package.id'),
|
||||
nullable=False),
|
||||
"area_snapshot_package",
|
||||
SQLBase.metadata,
|
||||
Column("area_snapshot_id", Integer, ForeignKey("area_snapshot.id"), nullable=False),
|
||||
Column("package_id", Integer, ForeignKey("package.id"), nullable=False),
|
||||
)
|
||||
|
||||
|
||||
class AreaSnapshot(SQLBase):
|
||||
__tablename__ = 'area_snapshot'
|
||||
__tablename__ = "area_snapshot"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
snapshot_id = Column(Integer,
|
||||
ForeignKey('distribution_snapshot.id'),
|
||||
nullable=False)
|
||||
area_id = Column(Integer,
|
||||
ForeignKey('area.id'),
|
||||
nullable=False)
|
||||
snapshot_id = Column(
|
||||
Integer, ForeignKey("distribution_snapshot.id"), nullable=False
|
||||
)
|
||||
area_id = Column(Integer, ForeignKey("area.id"), nullable=False)
|
||||
|
||||
snapshot = relationship('DistributionSnapshot', back_populates='areas')
|
||||
area = relationship('Area')
|
||||
packages = relationship('Package', secondary=area_snapshot_package_assoc)
|
||||
snapshot = relationship("DistributionSnapshot", back_populates="areas")
|
||||
area = relationship("Area")
|
||||
packages = relationship("Package", secondary=area_snapshot_package_assoc)
|
||||
|
||||
|
||||
class TempPackage(SQLBase):
|
||||
__tablename__ = 'temp_package'
|
||||
__tablename__ = "temp_package"
|
||||
__table_args__ = {
|
||||
'prefixes': ['TEMPORARY'],
|
||||
"prefixes": ["TEMPORARY"],
|
||||
}
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from .lister import DebianLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.DebianListerTask')
|
||||
@shared_task(name=__name__ + ".DebianListerTask")
|
||||
def list_debian_distribution(distribution, **lister_args):
|
||||
'''List a Debian distribution'''
|
||||
"""List a Debian distribution"""
|
||||
return DebianLister(distribution=distribution, **lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -16,20 +16,20 @@ from swh.lister.debian import debian_init
|
|||
|
||||
@pytest.fixture
|
||||
def lister_debian(swh_listers):
|
||||
lister = swh_listers['debian']
|
||||
lister = swh_listers["debian"]
|
||||
|
||||
# Initialize the debian data model
|
||||
debian_init(
|
||||
lister.db_engine, suites=['stretch'], components=['main', 'contrib']
|
||||
)
|
||||
debian_init(lister.db_engine, suites=["stretch"], components=["main", "contrib"])
|
||||
|
||||
# Add the load-deb-package in the scheduler backend
|
||||
lister.scheduler.create_task_type({
|
||||
'type': 'load-deb-package',
|
||||
'description': 'Load a Debian package',
|
||||
'backend_name': 'swh.loader.debian.tasks.LoaderDebianPackage',
|
||||
'default_interval': '1 day',
|
||||
})
|
||||
lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-deb-package",
|
||||
"description": "Load a Debian package",
|
||||
"backend_name": "swh.loader.debian.tasks.LoaderDebianPackage",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return lister
|
||||
|
||||
|
@ -40,12 +40,10 @@ def sqlalchemy_engine(postgresql_proc):
|
|||
pg_port = postgresql_proc.port
|
||||
pg_user = postgresql_proc.user
|
||||
|
||||
pg_db = 'sqlalchemy-tests'
|
||||
pg_db = "sqlalchemy-tests"
|
||||
|
||||
url = f'postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_db}'
|
||||
with DatabaseJanitor(
|
||||
pg_user, pg_host, pg_port, pg_db, postgresql_proc.version
|
||||
):
|
||||
url = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_db}"
|
||||
with DatabaseJanitor(pg_user, pg_host, pg_port, pg_db, postgresql_proc.version):
|
||||
engine = create_engine(url)
|
||||
yield engine
|
||||
engine.dispose()
|
||||
|
|
|
@ -17,29 +17,37 @@ def engine(session):
|
|||
|
||||
|
||||
def test_debian_init_step(engine, session):
|
||||
distribution_name = 'KaliLinux'
|
||||
distribution_name = "KaliLinux"
|
||||
|
||||
distrib = session.query(Distribution) \
|
||||
.filter(Distribution.name == distribution_name) \
|
||||
distrib = (
|
||||
session.query(Distribution)
|
||||
.filter(Distribution.name == distribution_name)
|
||||
.one_or_none()
|
||||
)
|
||||
assert distrib is None
|
||||
|
||||
all_area = session.query(Area).all()
|
||||
assert all_area == []
|
||||
|
||||
suites = ['wheezy', 'jessie']
|
||||
components = ['main', 'contrib']
|
||||
suites = ["wheezy", "jessie"]
|
||||
components = ["main", "contrib"]
|
||||
|
||||
debian_init(engine, distribution_name=distribution_name,
|
||||
suites=suites, components=components)
|
||||
distrib = session.query(Distribution) \
|
||||
.filter(Distribution.name == distribution_name) \
|
||||
debian_init(
|
||||
engine,
|
||||
distribution_name=distribution_name,
|
||||
suites=suites,
|
||||
components=components,
|
||||
)
|
||||
distrib = (
|
||||
session.query(Distribution)
|
||||
.filter(Distribution.name == distribution_name)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
assert distrib is not None
|
||||
assert distrib.name == distribution_name
|
||||
assert distrib.type == 'deb'
|
||||
assert distrib.mirror_uri == 'http://deb.debian.org/debian/'
|
||||
assert distrib.type == "deb"
|
||||
assert distrib.mirror_uri == "http://deb.debian.org/debian/"
|
||||
|
||||
all_area = session.query(Area).all()
|
||||
assert len(all_area) == 2 * 2, "2 suites * 2 components per suite"
|
||||
|
@ -47,7 +55,7 @@ def test_debian_init_step(engine, session):
|
|||
expected_area_names = []
|
||||
for suite in suites:
|
||||
for component in components:
|
||||
expected_area_names.append(f'{suite}/{component}')
|
||||
expected_area_names.append(f"{suite}/{component}")
|
||||
|
||||
for area in all_area:
|
||||
area.id = None
|
||||
|
@ -56,12 +64,16 @@ def test_debian_init_step(engine, session):
|
|||
|
||||
# check idempotency (on exact same call)
|
||||
|
||||
debian_init(engine, distribution_name=distribution_name,
|
||||
suites=suites, components=components)
|
||||
debian_init(
|
||||
engine,
|
||||
distribution_name=distribution_name,
|
||||
suites=suites,
|
||||
components=components,
|
||||
)
|
||||
|
||||
distribs = session.query(Distribution) \
|
||||
.filter(Distribution.name == distribution_name) \
|
||||
.all()
|
||||
distribs = (
|
||||
session.query(Distribution).filter(Distribution.name == distribution_name).all()
|
||||
)
|
||||
|
||||
assert len(distribs) == 1
|
||||
distrib = distribs[0]
|
||||
|
@ -70,8 +82,12 @@ def test_debian_init_step(engine, session):
|
|||
assert len(all_area) == 2 * 2, "2 suites * 2 components per suite"
|
||||
|
||||
# Add a new suite
|
||||
debian_init(engine, distribution_name=distribution_name,
|
||||
suites=['lenny'], components=components)
|
||||
debian_init(
|
||||
engine,
|
||||
distribution_name=distribution_name,
|
||||
suites=["lenny"],
|
||||
components=components,
|
||||
)
|
||||
|
||||
all_area = [a.name for a in session.query(Area).all()]
|
||||
assert len(all_area) == (2 + 1) * 2, "3 suites * 2 components per suite"
|
||||
|
|
|
@ -16,21 +16,21 @@ def test_lister_debian(lister_debian, datadir, requests_mock_datadir):
|
|||
# Run the lister
|
||||
lister_debian.run()
|
||||
|
||||
r = lister_debian.scheduler.search_tasks(task_type='load-deb-package')
|
||||
r = lister_debian.scheduler.search_tasks(task_type="load-deb-package")
|
||||
assert len(r) == 151
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-deb-package'
|
||||
assert row["type"] == "load-deb-package"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
assert set(kwargs.keys()) == {'url', 'date', 'packages'}
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert set(kwargs.keys()) == {"url", "date", "packages"}
|
||||
|
||||
logger.debug('kwargs: %s', kwargs)
|
||||
assert isinstance(kwargs['url'], str)
|
||||
logger.debug("kwargs: %s", kwargs)
|
||||
assert isinstance(kwargs["url"], str)
|
||||
|
||||
assert row['policy'] == 'oneshot'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "oneshot"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -10,13 +10,9 @@ from swh.lister.debian.models import Distribution, Area
|
|||
|
||||
def test_area_index_uris_deb(session):
|
||||
d = Distribution(
|
||||
name='Debian', type='deb', mirror_uri='http://deb.debian.org/debian'
|
||||
)
|
||||
a = Area(
|
||||
distribution=d,
|
||||
name='unstable/main',
|
||||
active=True,
|
||||
name="Debian", type="deb", mirror_uri="http://deb.debian.org/debian"
|
||||
)
|
||||
a = Area(distribution=d, name="unstable/main", active=True,)
|
||||
session.add_all([d, a])
|
||||
session.commit()
|
||||
|
||||
|
@ -26,14 +22,9 @@ def test_area_index_uris_deb(session):
|
|||
|
||||
def test_area_index_uris_rpm(session):
|
||||
d = Distribution(
|
||||
name='CentOS', type='rpm',
|
||||
mirror_uri='http://centos.mirrors.proxad.net/'
|
||||
)
|
||||
a = Area(
|
||||
distribution=d,
|
||||
name='8',
|
||||
active=True,
|
||||
name="CentOS", type="rpm", mirror_uri="http://centos.mirrors.proxad.net/"
|
||||
)
|
||||
a = Area(distribution=d, name="8", active=True,)
|
||||
session.add_all([d, a])
|
||||
session.commit()
|
||||
|
||||
|
|
|
@ -7,25 +7,23 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.debian.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.debian.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.debian.tasks.DebianLister')
|
||||
@patch("swh.lister.debian.tasks.DebianLister")
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked DebianLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.debian.tasks.DebianListerTask', ('stretch',))
|
||||
res = swh_app.send_task("swh.lister.debian.tasks.DebianListerTask", ("stretch",))
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(distribution='stretch')
|
||||
lister.assert_called_once_with(distribution="stretch")
|
||||
lister.run.assert_called_once_with()
|
||||
|
|
|
@ -11,19 +11,18 @@ from swh.lister.debian.lister import DebianLister
|
|||
|
||||
|
||||
@click.group()
|
||||
@click.option('--verbose/--no-verbose', default=False)
|
||||
@click.option("--verbose/--no-verbose", default=False)
|
||||
@click.pass_context
|
||||
def cli(ctx, verbose):
|
||||
ctx.obj['lister'] = DebianLister()
|
||||
ctx.obj["lister"] = DebianLister()
|
||||
if verbose:
|
||||
loglevel = logging.DEBUG
|
||||
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
|
||||
logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO)
|
||||
else:
|
||||
loglevel = logging.INFO
|
||||
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s %(process)d %(levelname)s %(message)s',
|
||||
level=loglevel,
|
||||
format="%(asctime)s %(process)d %(levelname)s %(message)s", level=loglevel,
|
||||
)
|
||||
|
||||
|
||||
|
@ -31,23 +30,24 @@ def cli(ctx, verbose):
|
|||
@click.pass_context
|
||||
def create_schema(ctx):
|
||||
"""Create the schema from the models"""
|
||||
SQLBase.metadata.create_all(ctx.obj['lister'].db_engine)
|
||||
SQLBase.metadata.create_all(ctx.obj["lister"].db_engine)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option('--name', help='The name of the distribution')
|
||||
@click.option('--type', help='The type of distribution')
|
||||
@click.option('--mirror-uri', help='The URL to the mirror of the distribution')
|
||||
@click.option('--area', help='The areas for the distribution',
|
||||
multiple=True)
|
||||
@click.option("--name", help="The name of the distribution")
|
||||
@click.option("--type", help="The type of distribution")
|
||||
@click.option("--mirror-uri", help="The URL to the mirror of the distribution")
|
||||
@click.option("--area", help="The areas for the distribution", multiple=True)
|
||||
@click.pass_context
|
||||
def create_distribution(ctx, name, type, mirror_uri, area):
|
||||
to_add = []
|
||||
db_session = ctx.obj['lister'].db_session
|
||||
d = db_session.query(Distribution)\
|
||||
.filter(Distribution.name == name)\
|
||||
.filter(Distribution.type == type)\
|
||||
.one_or_none()
|
||||
db_session = ctx.obj["lister"].db_session
|
||||
d = (
|
||||
db_session.query(Distribution)
|
||||
.filter(Distribution.name == name)
|
||||
.filter(Distribution.type == type)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if not d:
|
||||
d = Distribution(name=name, type=type, mirror_uri=mirror_uri)
|
||||
|
@ -56,10 +56,12 @@ def create_distribution(ctx, name, type, mirror_uri, area):
|
|||
for area_name in area:
|
||||
a = None
|
||||
if d.id:
|
||||
a = db_session.query(Area)\
|
||||
.filter(Area.distribution == d)\
|
||||
.filter(Area.name == area_name)\
|
||||
.one_or_none()
|
||||
a = (
|
||||
db_session.query(Area)
|
||||
.filter(Area.distribution == d)
|
||||
.filter(Area.name == area_name)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if not a:
|
||||
a = Area(name=area_name, distribution=d)
|
||||
|
@ -70,12 +72,12 @@ def create_distribution(ctx, name, type, mirror_uri, area):
|
|||
|
||||
|
||||
@cli.command()
|
||||
@click.option('--name', help='The name of the distribution')
|
||||
@click.option("--name", help="The name of the distribution")
|
||||
@click.pass_context
|
||||
def list_distribution(ctx, name):
|
||||
"""List the distribution"""
|
||||
ctx.obj['lister'].run(name)
|
||||
ctx.obj["lister"].run(name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
cli(obj={})
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import GitHubModel
|
||||
from .lister import GitHubLister
|
||||
|
||||
return {'models': [GitHubModel],
|
||||
'lister': GitHubLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [GitHubModel],
|
||||
"lister": GitHubLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -14,60 +14,57 @@ from requests import Response
|
|||
|
||||
|
||||
class GitHubLister(IndexingHttpLister):
|
||||
PATH_TEMPLATE = '/repositories?since=%d'
|
||||
PATH_TEMPLATE = "/repositories?since=%d"
|
||||
MODEL = GitHubModel
|
||||
DEFAULT_URL = 'https://api.github.com'
|
||||
API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)')
|
||||
LISTER_NAME = 'github'
|
||||
instance = 'github' # There is only 1 instance of such lister
|
||||
DEFAULT_URL = "https://api.github.com"
|
||||
API_URL_INDEX_RE = re.compile(r"^.*/repositories\?since=(\d+)")
|
||||
LISTER_NAME = "github"
|
||||
instance = "github" # There is only 1 instance of such lister
|
||||
default_min_bound = 0 # type: Any
|
||||
|
||||
def get_model_from_repo(self, repo: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
'uid': repo['id'],
|
||||
'indexable': repo['id'],
|
||||
'name': repo['name'],
|
||||
'full_name': repo['full_name'],
|
||||
'html_url': repo['html_url'],
|
||||
'origin_url': repo['html_url'],
|
||||
'origin_type': 'git',
|
||||
'fork': repo['fork'],
|
||||
"uid": repo["id"],
|
||||
"indexable": repo["id"],
|
||||
"name": repo["name"],
|
||||
"full_name": repo["full_name"],
|
||||
"html_url": repo["html_url"],
|
||||
"origin_url": repo["html_url"],
|
||||
"origin_type": "git",
|
||||
"fork": repo["fork"],
|
||||
}
|
||||
|
||||
def transport_quota_check(self, response: Response) -> Tuple[bool, int]:
|
||||
x_rate_limit_remaining = response.headers.get('X-RateLimit-Remaining')
|
||||
x_rate_limit_remaining = response.headers.get("X-RateLimit-Remaining")
|
||||
if not x_rate_limit_remaining:
|
||||
return False, 0
|
||||
reqs_remaining = int(x_rate_limit_remaining)
|
||||
if response.status_code == 403 and reqs_remaining == 0:
|
||||
delay = int(response.headers['Retry-After'])
|
||||
delay = int(response.headers["Retry-After"])
|
||||
return True, delay
|
||||
return False, 0
|
||||
|
||||
def get_next_target_from_response(self,
|
||||
response: Response) -> Optional[int]:
|
||||
if 'next' in response.links:
|
||||
next_url = response.links['next']['url']
|
||||
return int(
|
||||
self.API_URL_INDEX_RE.match(next_url).group(1)) # type: ignore
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[int]:
|
||||
if "next" in response.links:
|
||||
next_url = response.links["next"]["url"]
|
||||
return int(self.API_URL_INDEX_RE.match(next_url).group(1)) # type: ignore
|
||||
return None
|
||||
|
||||
def transport_response_simplified(self, response: Response
|
||||
) -> List[Dict[str, Any]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
|
||||
repos = response.json()
|
||||
return [self.get_model_from_repo(repo)
|
||||
for repo in repos if repo and 'id' in repo]
|
||||
return [
|
||||
self.get_model_from_repo(repo) for repo in repos if repo and "id" in repo
|
||||
]
|
||||
|
||||
def request_headers(self) -> Dict[str, Any]:
|
||||
"""(Override) Set requests headers to send when querying the GitHub API
|
||||
|
||||
"""
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/vnd.github.v3+json'
|
||||
headers["Accept"] = "application/vnd.github.v3+json"
|
||||
return headers
|
||||
|
||||
def disable_deleted_repo_tasks(self, index: int,
|
||||
next_index: int, keep_these: int):
|
||||
def disable_deleted_repo_tasks(self, index: int, next_index: int, keep_these: int):
|
||||
""" (Overrides) Fix provided index value to avoid erroneously disabling
|
||||
some scheduler tasks
|
||||
"""
|
||||
|
@ -75,5 +72,4 @@ class GitHubLister(IndexingHttpLister):
|
|||
# parameter, so increment the index to avoid disabling the latest
|
||||
# created task when processing a new repositories page returned by
|
||||
# the Github API
|
||||
return super().disable_deleted_repo_tasks(index + 1, next_index,
|
||||
keep_these)
|
||||
return super().disable_deleted_repo_tasks(index + 1, next_index, keep_these)
|
||||
|
|
|
@ -9,7 +9,8 @@ from swh.lister.core.models import IndexingModelBase
|
|||
|
||||
class GitHubModel(IndexingModelBase):
|
||||
"""a GitHub repository"""
|
||||
__tablename__ = 'github_repo'
|
||||
|
||||
__tablename__ = "github_repo"
|
||||
|
||||
uid = Column(Integer, primary_key=True)
|
||||
indexable = Column(Integer, index=True)
|
||||
|
|
|
@ -11,20 +11,20 @@ from swh.lister.github.lister import GitHubLister
|
|||
GROUP_SPLIT = 10000
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.IncrementalGitHubLister')
|
||||
@shared_task(name=__name__ + ".IncrementalGitHubLister")
|
||||
def list_github_incremental(**lister_args):
|
||||
'Incremental update of GitHub'
|
||||
"Incremental update of GitHub"
|
||||
lister = GitHubLister(**lister_args)
|
||||
return lister.run(min_bound=lister.db_last_index(), max_bound=None)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.RangeGitHubLister')
|
||||
@shared_task(name=__name__ + ".RangeGitHubLister")
|
||||
def _range_github_lister(start, end, **lister_args):
|
||||
lister = GitHubLister(**lister_args)
|
||||
return lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.FullGitHubRelister', bind=True)
|
||||
@shared_task(name=__name__ + ".FullGitHubRelister", bind=True)
|
||||
def list_github_full(self, split=None, **lister_args):
|
||||
"""Full update of GitHub
|
||||
|
||||
|
@ -34,20 +34,21 @@ def list_github_full(self, split=None, **lister_args):
|
|||
lister = GitHubLister(**lister_args)
|
||||
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
|
||||
if not ranges:
|
||||
self.log.info('Nothing to list')
|
||||
self.log.info("Nothing to list")
|
||||
return
|
||||
random.shuffle(ranges)
|
||||
promise = group(_range_github_lister.s(minv, maxv, **lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
|
||||
promise = group(
|
||||
_range_github_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
|
||||
)()
|
||||
self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
|
||||
try:
|
||||
promise.save() # so that we can restore the GroupResult in tests
|
||||
except (NotImplementedError, AttributeError):
|
||||
self.log.info('Unable to call save_group with current result backend.')
|
||||
self.log.info("Unable to call save_group with current result backend.")
|
||||
# FIXME: what to do in terms of return here?
|
||||
return promise.id
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -14,65 +14,70 @@ from swh.lister.github.lister import GitHubLister
|
|||
|
||||
class GitHubListerTester(HttpListerTester, unittest.TestCase):
|
||||
Lister = GitHubLister
|
||||
test_re = re.compile(r'/repositories\?since=([^?&]+)')
|
||||
lister_subdir = 'github'
|
||||
good_api_response_file = 'data/https_api.github.com/first_response.json'
|
||||
bad_api_response_file = 'data/https_api.github.com/empty_response.json'
|
||||
test_re = re.compile(r"/repositories\?since=([^?&]+)")
|
||||
lister_subdir = "github"
|
||||
good_api_response_file = "data/https_api.github.com/first_response.json"
|
||||
bad_api_response_file = "data/https_api.github.com/empty_response.json"
|
||||
first_index = 0
|
||||
last_index = 369
|
||||
entries_per_page = 100
|
||||
convert_type = int
|
||||
|
||||
def response_headers(self, request):
|
||||
headers = {'X-RateLimit-Remaining': '1'}
|
||||
headers = {"X-RateLimit-Remaining": "1"}
|
||||
if self.request_index(request) == self.first_index:
|
||||
headers.update({
|
||||
'Link': '<https://api.github.com/repositories?since=%s>;'
|
||||
' rel="next",'
|
||||
'<https://api.github.com/repositories{?since}>;'
|
||||
' rel="first"' % self.last_index
|
||||
})
|
||||
headers.update(
|
||||
{
|
||||
"Link": "<https://api.github.com/repositories?since=%s>;"
|
||||
' rel="next",'
|
||||
"<https://api.github.com/repositories{?since}>;"
|
||||
' rel="first"' % self.last_index
|
||||
}
|
||||
)
|
||||
else:
|
||||
headers.update({
|
||||
'Link': '<https://api.github.com/repositories{?since}>;'
|
||||
' rel="first"'
|
||||
})
|
||||
headers.update(
|
||||
{
|
||||
"Link": "<https://api.github.com/repositories{?since}>;"
|
||||
' rel="first"'
|
||||
}
|
||||
)
|
||||
return headers
|
||||
|
||||
def mock_rate_quota(self, n, request, context):
|
||||
self.rate_limit += 1
|
||||
context.status_code = 403
|
||||
context.headers['X-RateLimit-Remaining'] = '0'
|
||||
context.headers['Retry-After'] = '1' # 1 second
|
||||
context.headers["X-RateLimit-Remaining"] = "0"
|
||||
context.headers["Retry-After"] = "1" # 1 second
|
||||
return '{"error":"dummy"}'
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_scheduled_tasks(self, http_mocker):
|
||||
self.scheduled_tasks_test(
|
||||
'data/https_api.github.com/next_response.json', 876, http_mocker)
|
||||
"data/https_api.github.com/next_response.json", 876, http_mocker
|
||||
)
|
||||
|
||||
|
||||
def test_lister_github(swh_listers, requests_mock_datadir):
|
||||
"""Simple github listing should create scheduled tasks
|
||||
|
||||
"""
|
||||
lister = swh_listers['github']
|
||||
lister = swh_listers["github"]
|
||||
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 100
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-git'
|
||||
assert row["type"] == "load-git"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
url = kwargs['url']
|
||||
assert url.startswith('https://github.com')
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
url = kwargs["url"]
|
||||
assert url.startswith("https://github.com")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -5,23 +5,21 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.github.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.github.tasks.GitHubLister')
|
||||
@patch("swh.lister.github.tasks.GitHubLister")
|
||||
def test_incremental(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitHubLister
|
||||
lister.return_value = lister
|
||||
lister.db_last_index.return_value = 42
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.IncrementalGitHubLister')
|
||||
res = swh_app.send_task("swh.lister.github.tasks.IncrementalGitHubLister")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -31,15 +29,15 @@ def test_incremental(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
|
||||
|
||||
|
||||
@patch('swh.lister.github.tasks.GitHubLister')
|
||||
@patch("swh.lister.github.tasks.GitHubLister")
|
||||
def test_range(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitHubLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.RangeGitHubLister',
|
||||
kwargs=dict(start=12, end=42))
|
||||
"swh.lister.github.tasks.RangeGitHubLister", kwargs=dict(start=12, end=42)
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -49,16 +47,14 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
||||
@patch('swh.lister.github.tasks.GitHubLister')
|
||||
@patch("swh.lister.github.tasks.GitHubLister")
|
||||
def test_relister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitHubLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.db_partition_indices.return_value = [
|
||||
(i, i+9) for i in range(0, 50, 10)]
|
||||
lister.db_partition_indices.return_value = [(i, i + 9) for i in range(0, 50, 10)]
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.FullGitHubRelister')
|
||||
res = swh_app.send_task("swh.lister.github.tasks.FullGitHubRelister")
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -86,5 +82,6 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
# lister.run should have been called once per partition interval
|
||||
for i in range(5):
|
||||
# XXX inconsistent behavior: max_bound is INCLUDED here
|
||||
assert (dict(min_bound=10*i, max_bound=10*i + 9),) \
|
||||
in lister.run.call_args_list
|
||||
assert (
|
||||
dict(min_bound=10 * i, max_bound=10 * i + 9),
|
||||
) in lister.run.call_args_list
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import GitLabModel
|
||||
from .lister import GitLabLister
|
||||
|
||||
return {'models': [GitLabModel],
|
||||
'lister': GitLabLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [GitLabModel],
|
||||
"lister": GitLabLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -15,77 +15,83 @@ from requests import Response
|
|||
|
||||
class GitLabLister(PageByPageHttpLister):
|
||||
# Template path expecting an integer that represents the page id
|
||||
PATH_TEMPLATE = '/projects?page=%d&order_by=id'
|
||||
DEFAULT_URL = 'https://gitlab.com/api/v4/'
|
||||
PATH_TEMPLATE = "/projects?page=%d&order_by=id"
|
||||
DEFAULT_URL = "https://gitlab.com/api/v4/"
|
||||
MODEL = GitLabModel
|
||||
LISTER_NAME = 'gitlab'
|
||||
LISTER_NAME = "gitlab"
|
||||
|
||||
def __init__(self, url=None, instance=None,
|
||||
override_config=None, sort='asc', per_page=20):
|
||||
def __init__(
|
||||
self, url=None, instance=None, override_config=None, sort="asc", per_page=20
|
||||
):
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
if instance is None:
|
||||
instance = parse_url(self.url).host
|
||||
self.instance = instance
|
||||
self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % (
|
||||
self.PATH_TEMPLATE, sort, per_page)
|
||||
self.PATH_TEMPLATE = "%s&sort=%s&per_page=%s" % (
|
||||
self.PATH_TEMPLATE,
|
||||
sort,
|
||||
per_page,
|
||||
)
|
||||
|
||||
def uid(self, repo: Dict[str, Any]) -> str:
|
||||
return '%s/%s' % (self.instance, repo['path_with_namespace'])
|
||||
return "%s/%s" % (self.instance, repo["path_with_namespace"])
|
||||
|
||||
def get_model_from_repo(self, repo: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
'instance': self.instance,
|
||||
'uid': self.uid(repo),
|
||||
'name': repo['name'],
|
||||
'full_name': repo['path_with_namespace'],
|
||||
'html_url': repo['web_url'],
|
||||
'origin_url': repo['http_url_to_repo'],
|
||||
'origin_type': 'git',
|
||||
"instance": self.instance,
|
||||
"uid": self.uid(repo),
|
||||
"name": repo["name"],
|
||||
"full_name": repo["path_with_namespace"],
|
||||
"html_url": repo["web_url"],
|
||||
"origin_url": repo["http_url_to_repo"],
|
||||
"origin_type": "git",
|
||||
}
|
||||
|
||||
def transport_quota_check(self, response: Response
|
||||
) -> Tuple[bool, Union[int, float]]:
|
||||
def transport_quota_check(
|
||||
self, response: Response
|
||||
) -> Tuple[bool, Union[int, float]]:
|
||||
"""Deal with rate limit if any.
|
||||
|
||||
"""
|
||||
# not all gitlab instance have rate limit
|
||||
if 'RateLimit-Remaining' in response.headers:
|
||||
reqs_remaining = int(response.headers['RateLimit-Remaining'])
|
||||
if "RateLimit-Remaining" in response.headers:
|
||||
reqs_remaining = int(response.headers["RateLimit-Remaining"])
|
||||
if response.status_code == 403 and reqs_remaining == 0:
|
||||
reset_at = int(response.headers['RateLimit-Reset'])
|
||||
reset_at = int(response.headers["RateLimit-Reset"])
|
||||
delay = min(reset_at - time.time(), 3600)
|
||||
return True, delay
|
||||
return False, 0
|
||||
|
||||
def _get_int(self, headers: MutableMapping[str, Any],
|
||||
key: str) -> Optional[int]:
|
||||
def _get_int(self, headers: MutableMapping[str, Any], key: str) -> Optional[int]:
|
||||
_val = headers.get(key)
|
||||
if _val:
|
||||
return int(_val)
|
||||
return None
|
||||
|
||||
def get_next_target_from_response(
|
||||
self, response: Response) -> Optional[int]:
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[int]:
|
||||
"""Determine the next page identifier.
|
||||
|
||||
"""
|
||||
return self._get_int(response.headers, 'x-next-page')
|
||||
return self._get_int(response.headers, "x-next-page")
|
||||
|
||||
def get_pages_information(self) -> Tuple[Optional[int],
|
||||
Optional[int], Optional[int]]:
|
||||
def get_pages_information(
|
||||
self,
|
||||
) -> Tuple[Optional[int], Optional[int], Optional[int]]:
|
||||
"""Determine pages information.
|
||||
|
||||
"""
|
||||
response = self.transport_head(identifier=1) # type: ignore
|
||||
if not response.ok:
|
||||
raise ValueError(
|
||||
'Problem during information fetch: %s' % response.status_code)
|
||||
"Problem during information fetch: %s" % response.status_code
|
||||
)
|
||||
h = response.headers
|
||||
return (self._get_int(h, 'x-total'),
|
||||
self._get_int(h, 'x-total-pages'),
|
||||
self._get_int(h, 'x-per-page'))
|
||||
return (
|
||||
self._get_int(h, "x-total"),
|
||||
self._get_int(h, "x-total-pages"),
|
||||
self._get_int(h, "x-per-page"),
|
||||
)
|
||||
|
||||
def transport_response_simplified(self, response: Response
|
||||
) -> List[Dict[str, Any]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
|
||||
repos = response.json()
|
||||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
|
|
@ -11,7 +11,8 @@ class GitLabModel(ModelBase):
|
|||
"""a Gitlab repository from a gitlab instance
|
||||
|
||||
"""
|
||||
__tablename__ = 'gitlab_repo'
|
||||
|
||||
__tablename__ = "gitlab_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
instance = Column(String, index=True)
|
||||
|
|
|
@ -13,40 +13,41 @@ from .lister import GitLabLister
|
|||
NBPAGES = 10
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.IncrementalGitLabLister')
|
||||
@shared_task(name=__name__ + ".IncrementalGitLabLister")
|
||||
def list_gitlab_incremental(**lister_args):
|
||||
"""Incremental update of a GitLab instance"""
|
||||
lister_args['sort'] = 'desc'
|
||||
lister_args["sort"] = "desc"
|
||||
lister = GitLabLister(**lister_args)
|
||||
total_pages = lister.get_pages_information()[1]
|
||||
# stopping as soon as existing origins for that instance are detected
|
||||
return lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.RangeGitLabLister')
|
||||
@shared_task(name=__name__ + ".RangeGitLabLister")
|
||||
def _range_gitlab_lister(start, end, **lister_args):
|
||||
lister = GitLabLister(**lister_args)
|
||||
return lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.FullGitLabRelister', bind=True)
|
||||
@shared_task(name=__name__ + ".FullGitLabRelister", bind=True)
|
||||
def list_gitlab_full(self, **lister_args):
|
||||
"""Full update of a GitLab instance"""
|
||||
lister = GitLabLister(**lister_args)
|
||||
_, total_pages, _ = lister.get_pages_information()
|
||||
ranges = list(utils.split_range(total_pages, NBPAGES))
|
||||
random.shuffle(ranges)
|
||||
promise = group(_range_gitlab_lister.s(minv, maxv, **lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
|
||||
promise = group(
|
||||
_range_gitlab_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
|
||||
)()
|
||||
self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
|
||||
try:
|
||||
promise.save()
|
||||
except (NotImplementedError, AttributeError):
|
||||
self.log.info('Unable to call save_group with current result backend.')
|
||||
self.log.info("Unable to call save_group with current result backend.")
|
||||
# FIXME: what to do in terms of return here?
|
||||
return promise.id
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -17,50 +17,50 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class GitLabListerTester(HttpListerTesterBase, unittest.TestCase):
|
||||
Lister = GitLabLister
|
||||
test_re = re.compile(r'^.*/projects.*page=(\d+).*')
|
||||
lister_subdir = 'gitlab'
|
||||
good_api_response_file = 'data/gitlab.com/api_response.json'
|
||||
bad_api_response_file = 'data/gitlab.com/api_empty_response.json'
|
||||
test_re = re.compile(r"^.*/projects.*page=(\d+).*")
|
||||
lister_subdir = "gitlab"
|
||||
good_api_response_file = "data/gitlab.com/api_response.json"
|
||||
bad_api_response_file = "data/gitlab.com/api_empty_response.json"
|
||||
first_index = 1
|
||||
entries_per_page = 10
|
||||
convert_type = int
|
||||
|
||||
def response_headers(self, request):
|
||||
headers = {'RateLimit-Remaining': '1'}
|
||||
headers = {"RateLimit-Remaining": "1"}
|
||||
if self.request_index(request) == self.first_index:
|
||||
headers.update({
|
||||
'x-next-page': '3',
|
||||
})
|
||||
headers.update(
|
||||
{"x-next-page": "3",}
|
||||
)
|
||||
|
||||
return headers
|
||||
|
||||
def mock_rate_quota(self, n, request, context):
|
||||
self.rate_limit += 1
|
||||
context.status_code = 403
|
||||
context.headers['RateLimit-Remaining'] = '0'
|
||||
context.headers["RateLimit-Remaining"] = "0"
|
||||
one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp())
|
||||
context.headers['RateLimit-Reset'] = str(one_second)
|
||||
context.headers["RateLimit-Reset"] = str(one_second)
|
||||
return '{"error":"dummy"}'
|
||||
|
||||
|
||||
def test_lister_gitlab(swh_listers, requests_mock_datadir):
|
||||
lister = swh_listers['gitlab']
|
||||
lister = swh_listers["gitlab"]
|
||||
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 10
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-git'
|
||||
assert row["type"] == "load-git"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
url = kwargs['url']
|
||||
assert url.startswith('https://gitlab.com')
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
url = kwargs["url"]
|
||||
assert url.startswith("https://gitlab.com")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -5,43 +5,40 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.gitlab.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_incremental(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.get_pages_information.return_value = (None, 10, None)
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.IncrementalGitLabLister')
|
||||
res = swh_app.send_task("swh.lister.gitlab.tasks.IncrementalGitLabLister")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(sort='desc')
|
||||
lister.assert_called_once_with(sort="desc")
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.get_pages_information.assert_called_once_with()
|
||||
lister.run.assert_called_once_with(
|
||||
min_bound=1, max_bound=10, check_existence=True)
|
||||
lister.run.assert_called_once_with(min_bound=1, max_bound=10, check_existence=True)
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_range(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.RangeGitLabLister',
|
||||
kwargs=dict(start=12, end=42))
|
||||
"swh.lister.gitlab.tasks.RangeGitLabLister", kwargs=dict(start=12, end=42)
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -51,17 +48,17 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_relister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.get_pages_information.return_value = (None, 85, None)
|
||||
lister.db_partition_indices.return_value = [
|
||||
(i, i+9) for i in range(0, 80, 10)] + [(80, 85)]
|
||||
(i, i + 9) for i in range(0, 80, 10)
|
||||
] + [(80, 85)]
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.FullGitLabRelister')
|
||||
res = swh_app.send_task("swh.lister.gitlab.tasks.FullGitLabRelister")
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -90,24 +87,26 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
# lister.run should have been called once per partition interval
|
||||
for i in range(8):
|
||||
# XXX inconsistent behavior: max_bound is EXCLUDED here
|
||||
assert (dict(min_bound=10*i, max_bound=10*i + 10),) \
|
||||
in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) \
|
||||
in lister.run.call_args_list
|
||||
assert (
|
||||
dict(min_bound=10 * i, max_bound=10 * i + 10),
|
||||
) in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_relister_instance(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.get_pages_information.return_value = (None, 85, None)
|
||||
lister.db_partition_indices.return_value = [
|
||||
(i, i+9) for i in range(0, 80, 10)] + [(80, 85)]
|
||||
(i, i + 9) for i in range(0, 80, 10)
|
||||
] + [(80, 85)]
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.FullGitLabRelister',
|
||||
kwargs=dict(url='https://0xacab.org/api/v4'))
|
||||
"swh.lister.gitlab.tasks.FullGitLabRelister",
|
||||
kwargs=dict(url="https://0xacab.org/api/v4"),
|
||||
)
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -123,7 +122,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
|
|||
break
|
||||
sleep(1)
|
||||
|
||||
lister.assert_called_with(url='https://0xacab.org/api/v4')
|
||||
lister.assert_called_with(url="https://0xacab.org/api/v4")
|
||||
|
||||
# one by the FullGitlabRelister task
|
||||
# + 9 for the RangeGitlabLister subtasks
|
||||
|
@ -136,7 +135,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
|
|||
# lister.run should have been called once per partition interval
|
||||
for i in range(8):
|
||||
# XXX inconsistent behavior: max_bound is EXCLUDED here
|
||||
assert (dict(min_bound=10*i, max_bound=10*i + 10),) \
|
||||
in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) \
|
||||
in lister.run.call_args_list
|
||||
assert (
|
||||
dict(min_bound=10 * i, max_bound=10 * i + 10),
|
||||
) in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import GNUModel
|
||||
from .lister import GNULister
|
||||
|
||||
return {'models': [GNUModel],
|
||||
'lister': GNULister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [GNUModel],
|
||||
"lister": GNULister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -18,12 +18,12 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class GNULister(SimpleLister):
|
||||
MODEL = GNUModel
|
||||
LISTER_NAME = 'gnu'
|
||||
instance = 'gnu'
|
||||
LISTER_NAME = "gnu"
|
||||
instance = "gnu"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.gnu_tree = GNUTree('https://ftp.gnu.org/tree.json.gz')
|
||||
self.gnu_tree = GNUTree("https://ftp.gnu.org/tree.json.gz")
|
||||
|
||||
def task_dict(self, origin_type, origin_url, **kwargs):
|
||||
"""Return task format dict
|
||||
|
@ -51,10 +51,10 @@ class GNULister(SimpleLister):
|
|||
|
||||
"""
|
||||
artifacts = self.gnu_tree.artifacts[origin_url]
|
||||
assert origin_type == 'tar'
|
||||
assert origin_type == "tar"
|
||||
return utils.create_task_dict(
|
||||
'load-archive-files',
|
||||
kwargs.get('policy', 'oneshot'),
|
||||
"load-archive-files",
|
||||
kwargs.get("policy", "oneshot"),
|
||||
url=origin_url,
|
||||
artifacts=artifacts,
|
||||
retries_left=3,
|
||||
|
@ -103,11 +103,11 @@ class GNULister(SimpleLister):
|
|||
|
||||
"""
|
||||
return {
|
||||
'uid': repo['url'],
|
||||
'name': repo['name'],
|
||||
'full_name': repo['name'],
|
||||
'html_url': repo['url'],
|
||||
'origin_url': repo['url'],
|
||||
'time_last_updated': repo['time_modified'],
|
||||
'origin_type': 'tar',
|
||||
"uid": repo["url"],
|
||||
"name": repo["name"],
|
||||
"full_name": repo["name"],
|
||||
"html_url": repo["url"],
|
||||
"origin_url": repo["url"],
|
||||
"time_last_updated": repo["time_modified"],
|
||||
"origin_type": "tar",
|
||||
}
|
||||
|
|
|
@ -11,7 +11,8 @@ class GNUModel(ModelBase):
|
|||
"""a GNU repository representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'gnu_repo'
|
||||
|
||||
__tablename__ = "gnu_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
time_last_updated = Column(DateTime)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from .lister import GNULister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.GNUListerTask')
|
||||
@shared_task(name=__name__ + ".GNUListerTask")
|
||||
def list_gnu_full(**lister_args):
|
||||
"""List lister for the GNU source code archive"""
|
||||
return GNULister(**lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -10,43 +10,41 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
def test_gnu_lister(swh_listers, requests_mock_datadir):
|
||||
lister = swh_listers['gnu']
|
||||
lister = swh_listers["gnu"]
|
||||
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-archive-files')
|
||||
r = lister.scheduler.search_tasks(task_type="load-archive-files")
|
||||
assert len(r) == 383
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-archive-files'
|
||||
assert row["type"] == "load-archive-files"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
assert set(kwargs.keys()) == {'url', 'artifacts'}
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert set(kwargs.keys()) == {"url", "artifacts"}
|
||||
|
||||
url = kwargs['url']
|
||||
assert url.startswith('https://ftp.gnu.org')
|
||||
url = kwargs["url"]
|
||||
assert url.startswith("https://ftp.gnu.org")
|
||||
|
||||
url_suffix = url.split('https://ftp.gnu.org')[1]
|
||||
assert 'gnu' in url_suffix or 'old-gnu' in url_suffix
|
||||
url_suffix = url.split("https://ftp.gnu.org")[1]
|
||||
assert "gnu" in url_suffix or "old-gnu" in url_suffix
|
||||
|
||||
artifacts = kwargs['artifacts']
|
||||
artifacts = kwargs["artifacts"]
|
||||
# check the artifact's structure
|
||||
artifact = artifacts[0]
|
||||
assert set(artifact.keys()) == {
|
||||
'url', 'length', 'time', 'filename', 'version'
|
||||
}
|
||||
assert set(artifact.keys()) == {"url", "length", "time", "filename", "version"}
|
||||
|
||||
for artifact in artifacts:
|
||||
logger.debug(artifact)
|
||||
# 'time' is an isoformat string now
|
||||
for key in ['url', 'time', 'filename', 'version']:
|
||||
for key in ["url", "time", "filename", "version"]:
|
||||
assert isinstance(artifact[key], str)
|
||||
assert isinstance(artifact['length'], int)
|
||||
assert isinstance(artifact["length"], int)
|
||||
|
||||
assert row['policy'] == 'oneshot'
|
||||
assert row['priority'] is None
|
||||
assert row['retries_left'] == 3
|
||||
assert row["policy"] == "oneshot"
|
||||
assert row["priority"] is None
|
||||
assert row["retries_left"] == 3
|
||||
|
|
|
@ -2,22 +2,20 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gnu.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.gnu.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.gnu.tasks.GNULister')
|
||||
@patch("swh.lister.gnu.tasks.GNULister")
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GNULister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gnu.tasks.GNUListerTask')
|
||||
res = swh_app.send_task("swh.lister.gnu.tasks.GNUListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
|
|
@ -9,26 +9,30 @@ import pytest
|
|||
|
||||
from os import path
|
||||
from swh.lister.gnu.tree import (
|
||||
GNUTree, find_artifacts, check_filename_is_archive, load_raw_data,
|
||||
get_version, format_date
|
||||
GNUTree,
|
||||
find_artifacts,
|
||||
check_filename_is_archive,
|
||||
load_raw_data,
|
||||
get_version,
|
||||
format_date,
|
||||
)
|
||||
|
||||
|
||||
def test_load_raw_data_from_query(requests_mock_datadir):
|
||||
actual_json = load_raw_data('https://ftp.gnu.org/tree.json.gz')
|
||||
actual_json = load_raw_data("https://ftp.gnu.org/tree.json.gz")
|
||||
assert actual_json is not None
|
||||
assert isinstance(actual_json, list)
|
||||
assert len(actual_json) == 2
|
||||
|
||||
|
||||
def test_load_raw_data_from_query_failure(requests_mock_datadir):
|
||||
inexistant_url = 'https://ftp2.gnu.org/tree.unknown.gz'
|
||||
with pytest.raises(ValueError, match='Error during query'):
|
||||
inexistant_url = "https://ftp2.gnu.org/tree.unknown.gz"
|
||||
with pytest.raises(ValueError, match="Error during query"):
|
||||
load_raw_data(inexistant_url)
|
||||
|
||||
|
||||
def test_load_raw_data_from_file(datadir):
|
||||
filepath = path.join(datadir, 'https_ftp.gnu.org', 'tree.json.gz')
|
||||
filepath = path.join(datadir, "https_ftp.gnu.org", "tree.json.gz")
|
||||
actual_json = load_raw_data(filepath)
|
||||
assert actual_json is not None
|
||||
assert isinstance(actual_json, list)
|
||||
|
@ -36,115 +40,115 @@ def test_load_raw_data_from_file(datadir):
|
|||
|
||||
|
||||
def test_load_raw_data_from_file_failure(datadir):
|
||||
unknown_path = path.join(datadir, 'ftp.gnu.org2', 'tree.json.gz')
|
||||
unknown_path = path.join(datadir, "ftp.gnu.org2", "tree.json.gz")
|
||||
with pytest.raises(FileNotFoundError):
|
||||
load_raw_data(unknown_path)
|
||||
|
||||
|
||||
def test_tree_json(requests_mock_datadir):
|
||||
tree_json = GNUTree('https://ftp.gnu.org/tree.json.gz')
|
||||
tree_json = GNUTree("https://ftp.gnu.org/tree.json.gz")
|
||||
|
||||
assert tree_json.projects['https://ftp.gnu.org/gnu/8sync/'] == {
|
||||
'name': '8sync',
|
||||
'time_modified': '2017-03-18T06:10:08+00:00',
|
||||
'url': 'https://ftp.gnu.org/gnu/8sync/'
|
||||
assert tree_json.projects["https://ftp.gnu.org/gnu/8sync/"] == {
|
||||
"name": "8sync",
|
||||
"time_modified": "2017-03-18T06:10:08+00:00",
|
||||
"url": "https://ftp.gnu.org/gnu/8sync/",
|
||||
}
|
||||
|
||||
assert tree_json.projects['https://ftp.gnu.org/gnu/3dldf/'] == {
|
||||
'name': '3dldf',
|
||||
'time_modified': '2013-12-13T19:00:36+00:00',
|
||||
'url': 'https://ftp.gnu.org/gnu/3dldf/'
|
||||
assert tree_json.projects["https://ftp.gnu.org/gnu/3dldf/"] == {
|
||||
"name": "3dldf",
|
||||
"time_modified": "2013-12-13T19:00:36+00:00",
|
||||
"url": "https://ftp.gnu.org/gnu/3dldf/",
|
||||
}
|
||||
|
||||
assert tree_json.projects['https://ftp.gnu.org/gnu/a2ps/'] == {
|
||||
'name': 'a2ps',
|
||||
'time_modified': '2007-12-29T03:55:05+00:00',
|
||||
'url': 'https://ftp.gnu.org/gnu/a2ps/'
|
||||
assert tree_json.projects["https://ftp.gnu.org/gnu/a2ps/"] == {
|
||||
"name": "a2ps",
|
||||
"time_modified": "2007-12-29T03:55:05+00:00",
|
||||
"url": "https://ftp.gnu.org/gnu/a2ps/",
|
||||
}
|
||||
|
||||
assert tree_json.projects['https://ftp.gnu.org/old-gnu/xshogi/'] == {
|
||||
'name': 'xshogi',
|
||||
'time_modified': '2003-08-02T11:15:22+00:00',
|
||||
'url': 'https://ftp.gnu.org/old-gnu/xshogi/'
|
||||
assert tree_json.projects["https://ftp.gnu.org/old-gnu/xshogi/"] == {
|
||||
"name": "xshogi",
|
||||
"time_modified": "2003-08-02T11:15:22+00:00",
|
||||
"url": "https://ftp.gnu.org/old-gnu/xshogi/",
|
||||
}
|
||||
|
||||
assert tree_json.artifacts['https://ftp.gnu.org/old-gnu/zlibc/'] == [
|
||||
assert tree_json.artifacts["https://ftp.gnu.org/old-gnu/zlibc/"] == [
|
||||
{
|
||||
'url': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9b.tar.gz', # noqa
|
||||
'length': 90106,
|
||||
'time': '1997-03-10T08:00:00+00:00',
|
||||
'filename': 'zlibc-0.9b.tar.gz',
|
||||
'version': '0.9b',
|
||||
"url": "https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9b.tar.gz", # noqa
|
||||
"length": 90106,
|
||||
"time": "1997-03-10T08:00:00+00:00",
|
||||
"filename": "zlibc-0.9b.tar.gz",
|
||||
"version": "0.9b",
|
||||
},
|
||||
{
|
||||
'url': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9e.tar.gz', # noqa
|
||||
'length': 89625,
|
||||
'time': '1997-04-07T07:00:00+00:00',
|
||||
'filename': 'zlibc-0.9e.tar.gz',
|
||||
'version': '0.9e',
|
||||
}
|
||||
"url": "https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9e.tar.gz", # noqa
|
||||
"length": 89625,
|
||||
"time": "1997-04-07T07:00:00+00:00",
|
||||
"filename": "zlibc-0.9e.tar.gz",
|
||||
"version": "0.9e",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_tree_json_failures(requests_mock_datadir):
|
||||
url = 'https://unknown/tree.json.gz'
|
||||
url = "https://unknown/tree.json.gz"
|
||||
tree_json = GNUTree(url)
|
||||
|
||||
with pytest.raises(ValueError, match='Error during query to %s' % url):
|
||||
tree_json.artifacts['https://ftp.gnu.org/gnu/3dldf/']
|
||||
with pytest.raises(ValueError, match="Error during query to %s" % url):
|
||||
tree_json.artifacts["https://ftp.gnu.org/gnu/3dldf/"]
|
||||
|
||||
with pytest.raises(ValueError, match='Error during query to %s' % url):
|
||||
tree_json.projects['https://ftp.gnu.org/old-gnu/xshogi/']
|
||||
with pytest.raises(ValueError, match="Error during query to %s" % url):
|
||||
tree_json.projects["https://ftp.gnu.org/old-gnu/xshogi/"]
|
||||
|
||||
|
||||
def test_find_artifacts_small_sample(datadir):
|
||||
expected_artifacts = [
|
||||
{
|
||||
'url': '/root/artanis/artanis-0.2.1.tar.bz2',
|
||||
'time': '2017-05-19T14:59:39+00:00',
|
||||
'length': 424081,
|
||||
'version': '0.2.1',
|
||||
'filename': 'artanis-0.2.1.tar.bz2',
|
||||
"url": "/root/artanis/artanis-0.2.1.tar.bz2",
|
||||
"time": "2017-05-19T14:59:39+00:00",
|
||||
"length": 424081,
|
||||
"version": "0.2.1",
|
||||
"filename": "artanis-0.2.1.tar.bz2",
|
||||
},
|
||||
{
|
||||
'url': '/root/xboard/winboard/winboard-4_0_0-src.zip', # noqa
|
||||
'time': '1998-06-21T09:55:00+00:00',
|
||||
'length': 1514448,
|
||||
'version': '4_0_0-src',
|
||||
'filename': 'winboard-4_0_0-src.zip',
|
||||
"url": "/root/xboard/winboard/winboard-4_0_0-src.zip", # noqa
|
||||
"time": "1998-06-21T09:55:00+00:00",
|
||||
"length": 1514448,
|
||||
"version": "4_0_0-src",
|
||||
"filename": "winboard-4_0_0-src.zip",
|
||||
},
|
||||
{
|
||||
'url': '/root/xboard/xboard-3.6.2.tar.gz', # noqa
|
||||
'time': '1997-07-25T07:00:00+00:00',
|
||||
'length': 450164,
|
||||
'version': '3.6.2',
|
||||
'filename': 'xboard-3.6.2.tar.gz',
|
||||
"url": "/root/xboard/xboard-3.6.2.tar.gz", # noqa
|
||||
"time": "1997-07-25T07:00:00+00:00",
|
||||
"length": 450164,
|
||||
"version": "3.6.2",
|
||||
"filename": "xboard-3.6.2.tar.gz",
|
||||
},
|
||||
{
|
||||
'url': '/root/xboard/xboard-4.0.0.tar.gz', # noqa
|
||||
'time': '1998-06-21T09:55:00+00:00',
|
||||
'length': 514951,
|
||||
'version': '4.0.0',
|
||||
'filename': 'xboard-4.0.0.tar.gz',
|
||||
"url": "/root/xboard/xboard-4.0.0.tar.gz", # noqa
|
||||
"time": "1998-06-21T09:55:00+00:00",
|
||||
"length": 514951,
|
||||
"version": "4.0.0",
|
||||
"filename": "xboard-4.0.0.tar.gz",
|
||||
},
|
||||
]
|
||||
|
||||
file_structure = json.load(open(path.join(datadir, 'tree.min.json')))
|
||||
actual_artifacts = find_artifacts(file_structure, '/root/')
|
||||
file_structure = json.load(open(path.join(datadir, "tree.min.json")))
|
||||
actual_artifacts = find_artifacts(file_structure, "/root/")
|
||||
assert actual_artifacts == expected_artifacts
|
||||
|
||||
|
||||
def test_find_artifacts(datadir):
|
||||
file_structure = json.load(open(path.join(datadir, 'tree.json')))
|
||||
actual_artifacts = find_artifacts(file_structure, '/root/')
|
||||
file_structure = json.load(open(path.join(datadir, "tree.json")))
|
||||
actual_artifacts = find_artifacts(file_structure, "/root/")
|
||||
assert len(actual_artifacts) == 42 + 3 # tar + zip
|
||||
|
||||
|
||||
def test_check_filename_is_archive():
|
||||
for ext in ['abc.xy.zip', 'cvb.zip', 'abc.tar.bz2', 'something.tar']:
|
||||
for ext in ["abc.xy.zip", "cvb.zip", "abc.tar.bz2", "something.tar"]:
|
||||
assert check_filename_is_archive(ext) is True
|
||||
|
||||
for ext in ['abc.tar.gz.sig', 'abc', 'something.zip2', 'foo.tar.']:
|
||||
for ext in ["abc.tar.gz.sig", "abc", "something.zip2", "foo.tar."]:
|
||||
assert check_filename_is_archive(ext) is False
|
||||
|
||||
|
||||
|
@ -155,54 +159,62 @@ def test_get_version():
|
|||
|
||||
"""
|
||||
for url, expected_branchname in [
|
||||
('https://gnu.org/sthg/info-2.1.0.tar.gz', '2.1.0'),
|
||||
('https://gnu.org/sthg/info-2.1.2.zip', '2.1.2'),
|
||||
('https://sthg.org/gnu/sthg.tar.gz', 'sthg'),
|
||||
('https://sthg.org/gnu/DLDF-1.1.4.tar.gz', '1.1.4'),
|
||||
('https://sthg.org/gnu/anubis-latest.tar.bz2', 'latest'),
|
||||
('https://ftp.org/gnu/aris-w32.zip', 'w32'),
|
||||
('https://ftp.org/gnu/aris-w32-2.2.zip', 'w32-2.2'),
|
||||
('https://ftp.org/gnu/autogen.info.tar.gz', 'autogen.info'),
|
||||
('https://ftp.org/gnu/crypto-build-demo.tar.gz',
|
||||
'crypto-build-demo'),
|
||||
('https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz',
|
||||
'clue+clio+xit.clisp'),
|
||||
('https://ftp.org/gnu/clue+clio.for-pcl.tar.gz',
|
||||
'clue+clio.for-pcl'),
|
||||
('https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz',
|
||||
'hppa2.0-hp-hpux10.20'),
|
||||
('clisp-i386-solaris2.6.tar.gz', 'i386-solaris2.6'),
|
||||
('clisp-mips-sgi-irix6.5.tar.gz', 'mips-sgi-irix6.5'),
|
||||
('clisp-powerpc-apple-macos.tar.gz', 'powerpc-apple-macos'),
|
||||
('clisp-powerpc-unknown-linuxlibc6.tar.gz',
|
||||
'powerpc-unknown-linuxlibc6'),
|
||||
|
||||
('clisp-rs6000-ibm-aix3.2.5.tar.gz', 'rs6000-ibm-aix3.2.5'),
|
||||
('clisp-sparc-redhat51-linux.tar.gz', 'sparc-redhat51-linux'),
|
||||
('clisp-sparc-sun-solaris2.4.tar.gz', 'sparc-sun-solaris2.4'),
|
||||
('clisp-sparc-sun-sunos4.1.3_U1.tar.gz',
|
||||
'sparc-sun-sunos4.1.3_U1'),
|
||||
('clisp-2.25.1-powerpc-apple-MacOSX.tar.gz',
|
||||
'2.25.1-powerpc-apple-MacOSX'),
|
||||
('clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz',
|
||||
'2.27-PowerMacintosh-powerpc-Darwin-1.3.7'),
|
||||
('clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz',
|
||||
'2.27-i686-unknown-Linux-2.2.19'),
|
||||
('clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz',
|
||||
'2.28-i386-i386-freebsd-4.3-RELEASE'),
|
||||
('clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz',
|
||||
'2.28-i686-unknown-cygwin_me-4.90-1.3.10'),
|
||||
('clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz',
|
||||
'2.29-i386-i386-freebsd-4.6-STABLE'),
|
||||
('clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz',
|
||||
'2.29-i686-unknown-cygwin_nt-5.0-1.3.12'),
|
||||
('gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip',
|
||||
'2.5.3-ansi-japi-xdr.20030701_mingw32'),
|
||||
('gettext-runtime-0.13.1.bin.woe32.zip', '0.13.1.bin.woe32'),
|
||||
('sather-logo_images.tar.gz', 'sather-logo_images'),
|
||||
('sather-specification-000328.html.tar.gz', '000328.html'),
|
||||
('something-10.1.0.7z', '10.1.0'),
|
||||
|
||||
("https://gnu.org/sthg/info-2.1.0.tar.gz", "2.1.0"),
|
||||
("https://gnu.org/sthg/info-2.1.2.zip", "2.1.2"),
|
||||
("https://sthg.org/gnu/sthg.tar.gz", "sthg"),
|
||||
("https://sthg.org/gnu/DLDF-1.1.4.tar.gz", "1.1.4"),
|
||||
("https://sthg.org/gnu/anubis-latest.tar.bz2", "latest"),
|
||||
("https://ftp.org/gnu/aris-w32.zip", "w32"),
|
||||
("https://ftp.org/gnu/aris-w32-2.2.zip", "w32-2.2"),
|
||||
("https://ftp.org/gnu/autogen.info.tar.gz", "autogen.info"),
|
||||
("https://ftp.org/gnu/crypto-build-demo.tar.gz", "crypto-build-demo"),
|
||||
("https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz", "clue+clio+xit.clisp"),
|
||||
("https://ftp.org/gnu/clue+clio.for-pcl.tar.gz", "clue+clio.for-pcl"),
|
||||
(
|
||||
"https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz",
|
||||
"hppa2.0-hp-hpux10.20",
|
||||
),
|
||||
("clisp-i386-solaris2.6.tar.gz", "i386-solaris2.6"),
|
||||
("clisp-mips-sgi-irix6.5.tar.gz", "mips-sgi-irix6.5"),
|
||||
("clisp-powerpc-apple-macos.tar.gz", "powerpc-apple-macos"),
|
||||
("clisp-powerpc-unknown-linuxlibc6.tar.gz", "powerpc-unknown-linuxlibc6"),
|
||||
("clisp-rs6000-ibm-aix3.2.5.tar.gz", "rs6000-ibm-aix3.2.5"),
|
||||
("clisp-sparc-redhat51-linux.tar.gz", "sparc-redhat51-linux"),
|
||||
("clisp-sparc-sun-solaris2.4.tar.gz", "sparc-sun-solaris2.4"),
|
||||
("clisp-sparc-sun-sunos4.1.3_U1.tar.gz", "sparc-sun-sunos4.1.3_U1"),
|
||||
("clisp-2.25.1-powerpc-apple-MacOSX.tar.gz", "2.25.1-powerpc-apple-MacOSX"),
|
||||
(
|
||||
"clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz",
|
||||
"2.27-PowerMacintosh-powerpc-Darwin-1.3.7",
|
||||
),
|
||||
(
|
||||
"clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz",
|
||||
"2.27-i686-unknown-Linux-2.2.19",
|
||||
),
|
||||
(
|
||||
"clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz",
|
||||
"2.28-i386-i386-freebsd-4.3-RELEASE",
|
||||
),
|
||||
(
|
||||
"clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz",
|
||||
"2.28-i686-unknown-cygwin_me-4.90-1.3.10",
|
||||
),
|
||||
(
|
||||
"clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz",
|
||||
"2.29-i386-i386-freebsd-4.6-STABLE",
|
||||
),
|
||||
(
|
||||
"clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz",
|
||||
"2.29-i686-unknown-cygwin_nt-5.0-1.3.12",
|
||||
),
|
||||
(
|
||||
"gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip",
|
||||
"2.5.3-ansi-japi-xdr.20030701_mingw32",
|
||||
),
|
||||
("gettext-runtime-0.13.1.bin.woe32.zip", "0.13.1.bin.woe32"),
|
||||
("sather-logo_images.tar.gz", "sather-logo_images"),
|
||||
("sather-specification-000328.html.tar.gz", "000328.html"),
|
||||
("something-10.1.0.7z", "10.1.0"),
|
||||
]:
|
||||
actual_branchname = get_version(url)
|
||||
|
||||
|
@ -211,16 +223,16 @@ def test_get_version():
|
|||
|
||||
def test_format_date():
|
||||
for timestamp, expected_isoformat_date in [
|
||||
(1489817408, '2017-03-18T06:10:08+00:00'),
|
||||
(1386961236, '2013-12-13T19:00:36+00:00'),
|
||||
('1198900505', '2007-12-29T03:55:05+00:00'),
|
||||
(1059822922, '2003-08-02T11:15:22+00:00'),
|
||||
('1489817408', '2017-03-18T06:10:08+00:00'),
|
||||
(1489817408, "2017-03-18T06:10:08+00:00"),
|
||||
(1386961236, "2013-12-13T19:00:36+00:00"),
|
||||
("1198900505", "2007-12-29T03:55:05+00:00"),
|
||||
(1059822922, "2003-08-02T11:15:22+00:00"),
|
||||
("1489817408", "2017-03-18T06:10:08+00:00"),
|
||||
]:
|
||||
actual_date = format_date(timestamp)
|
||||
assert actual_date == expected_isoformat_date
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
format_date('')
|
||||
format_date("")
|
||||
with pytest.raises(TypeError):
|
||||
format_date(None)
|
||||
|
|
|
@ -24,12 +24,13 @@ class GNUTree:
|
|||
"""Gnu Tree's representation
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, url: str):
|
||||
self.url = url # filepath or uri
|
||||
u = urlparse(url)
|
||||
self.base_url = '%s://%s' % (u.scheme, u.netloc)
|
||||
self.base_url = "%s://%s" % (u.scheme, u.netloc)
|
||||
# Interesting top level directories
|
||||
self.top_level_directories = ['gnu', 'old-gnu']
|
||||
self.top_level_directories = ["gnu", "old-gnu"]
|
||||
# internal state
|
||||
self._artifacts = {} # type: Mapping[str, Any]
|
||||
self._projects = {} # type: Mapping[str, Any]
|
||||
|
@ -59,21 +60,23 @@ class GNUTree:
|
|||
artifacts = {}
|
||||
|
||||
raw_data = load_raw_data(self.url)[0]
|
||||
for directory in raw_data['contents']:
|
||||
if directory['name'] not in self.top_level_directories:
|
||||
for directory in raw_data["contents"]:
|
||||
if directory["name"] not in self.top_level_directories:
|
||||
continue
|
||||
infos = directory['contents']
|
||||
infos = directory["contents"]
|
||||
for info in infos:
|
||||
if info['type'] == 'directory':
|
||||
package_url = '%s/%s/%s/' % (
|
||||
self.base_url, directory['name'], info['name'])
|
||||
package_artifacts = find_artifacts(
|
||||
info['contents'], package_url)
|
||||
if info["type"] == "directory":
|
||||
package_url = "%s/%s/%s/" % (
|
||||
self.base_url,
|
||||
directory["name"],
|
||||
info["name"],
|
||||
)
|
||||
package_artifacts = find_artifacts(info["contents"], package_url)
|
||||
if package_artifacts != []:
|
||||
repo_details = {
|
||||
'name': info['name'],
|
||||
'url': package_url,
|
||||
'time_modified': format_date(info['time'])
|
||||
"name": info["name"],
|
||||
"url": package_url,
|
||||
"time_modified": format_date(info["time"]),
|
||||
}
|
||||
artifacts[package_url] = package_artifacts
|
||||
projects[package_url] = repo_details
|
||||
|
@ -81,8 +84,9 @@ class GNUTree:
|
|||
return projects, artifacts
|
||||
|
||||
|
||||
def find_artifacts(filesystem: List[Mapping[str, Any]],
|
||||
url: str) -> List[Mapping[str, Any]]:
|
||||
def find_artifacts(
|
||||
filesystem: List[Mapping[str, Any]], url: str
|
||||
) -> List[Mapping[str, Any]]:
|
||||
"""Recursively list artifacts present in the folder and subfolders for a
|
||||
particular package url.
|
||||
|
||||
|
@ -127,23 +131,25 @@ def find_artifacts(filesystem: List[Mapping[str, Any]],
|
|||
"""
|
||||
artifacts = [] # type: List[Mapping[str, Any]]
|
||||
for info_file in filesystem:
|
||||
filetype = info_file['type']
|
||||
filename = info_file['name']
|
||||
if filetype == 'file':
|
||||
filetype = info_file["type"]
|
||||
filename = info_file["name"]
|
||||
if filetype == "file":
|
||||
if check_filename_is_archive(filename):
|
||||
uri = url + filename
|
||||
artifacts.append({
|
||||
'url': uri,
|
||||
'filename': filename,
|
||||
'time': format_date(info_file['time']),
|
||||
'length': int(info_file['size']),
|
||||
'version': get_version(filename),
|
||||
})
|
||||
artifacts.append(
|
||||
{
|
||||
"url": uri,
|
||||
"filename": filename,
|
||||
"time": format_date(info_file["time"]),
|
||||
"length": int(info_file["size"]),
|
||||
"version": get_version(filename),
|
||||
}
|
||||
)
|
||||
# It will recursively check for artifacts in all sub-folders
|
||||
elif filetype == 'directory':
|
||||
elif filetype == "directory":
|
||||
tarballs_in_dir = find_artifacts(
|
||||
info_file['contents'],
|
||||
url + filename + '/')
|
||||
info_file["contents"], url + filename + "/"
|
||||
)
|
||||
artifacts.extend(tarballs_in_dir)
|
||||
|
||||
return artifacts
|
||||
|
@ -176,40 +182,67 @@ def check_filename_is_archive(filename: str) -> bool:
|
|||
|
||||
"""
|
||||
file_suffixes = Path(filename).suffixes
|
||||
if len(file_suffixes) == 1 and file_suffixes[-1] in ('.zip', '.tar'):
|
||||
if len(file_suffixes) == 1 and file_suffixes[-1] in (".zip", ".tar"):
|
||||
return True
|
||||
elif len(file_suffixes) > 1:
|
||||
if file_suffixes[-1] == '.zip' or file_suffixes[-2] == '.tar':
|
||||
if file_suffixes[-1] == ".zip" or file_suffixes[-2] == ".tar":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# to recognize existing naming pattern
|
||||
EXTENSIONS = [
|
||||
'zip',
|
||||
'tar',
|
||||
'gz', 'tgz',
|
||||
'bz2', 'bzip2',
|
||||
'lzma', 'lz',
|
||||
'xz',
|
||||
'Z', '7z',
|
||||
"zip",
|
||||
"tar",
|
||||
"gz",
|
||||
"tgz",
|
||||
"bz2",
|
||||
"bzip2",
|
||||
"lzma",
|
||||
"lz",
|
||||
"xz",
|
||||
"Z",
|
||||
"7z",
|
||||
]
|
||||
|
||||
VERSION_KEYWORDS = [
|
||||
'cygwin_me',
|
||||
'w32', 'win32', 'nt', 'cygwin', 'mingw',
|
||||
'latest', 'alpha', 'beta',
|
||||
'release', 'stable',
|
||||
'hppa',
|
||||
'solaris', 'sunos', 'sun4u', 'sparc', 'sun',
|
||||
'aix', 'ibm', 'rs6000',
|
||||
'i386', 'i686',
|
||||
'linux', 'redhat', 'linuxlibc',
|
||||
'mips',
|
||||
'powerpc', 'macos', 'apple', 'darwin', 'macosx', 'powermacintosh',
|
||||
'unknown',
|
||||
'netbsd', 'freebsd',
|
||||
'sgi', 'irix',
|
||||
"cygwin_me",
|
||||
"w32",
|
||||
"win32",
|
||||
"nt",
|
||||
"cygwin",
|
||||
"mingw",
|
||||
"latest",
|
||||
"alpha",
|
||||
"beta",
|
||||
"release",
|
||||
"stable",
|
||||
"hppa",
|
||||
"solaris",
|
||||
"sunos",
|
||||
"sun4u",
|
||||
"sparc",
|
||||
"sun",
|
||||
"aix",
|
||||
"ibm",
|
||||
"rs6000",
|
||||
"i386",
|
||||
"i686",
|
||||
"linux",
|
||||
"redhat",
|
||||
"linuxlibc",
|
||||
"mips",
|
||||
"powerpc",
|
||||
"macos",
|
||||
"apple",
|
||||
"darwin",
|
||||
"macosx",
|
||||
"powermacintosh",
|
||||
"unknown",
|
||||
"netbsd",
|
||||
"freebsd",
|
||||
"sgi",
|
||||
"irix",
|
||||
]
|
||||
|
||||
# Match a filename into components.
|
||||
|
@ -225,7 +258,7 @@ VERSION_KEYWORDS = [
|
|||
# greedily with +, software_name and release_number are matched lazily
|
||||
# with +? and *?).
|
||||
|
||||
PATTERN = r'''
|
||||
PATTERN = r"""
|
||||
^
|
||||
(?:
|
||||
# We have a software name and a release number, separated with a
|
||||
|
@ -239,9 +272,9 @@ PATTERN = r'''
|
|||
)
|
||||
(?P<extension>(?:\.(?:{extensions}))+)
|
||||
$
|
||||
'''.format(
|
||||
extensions='|'.join(EXTENSIONS),
|
||||
vkeywords='|'.join('%s[-]?' % k for k in VERSION_KEYWORDS),
|
||||
""".format(
|
||||
extensions="|".join(EXTENSIONS),
|
||||
vkeywords="|".join("%s[-]?" % k for k in VERSION_KEYWORDS),
|
||||
)
|
||||
|
||||
|
||||
|
@ -267,16 +300,15 @@ def get_version(uri: str) -> str:
|
|||
|
||||
"""
|
||||
filename = path.split(uri)[-1]
|
||||
m = re.match(PATTERN, filename,
|
||||
flags=re.VERBOSE | re.IGNORECASE)
|
||||
m = re.match(PATTERN, filename, flags=re.VERBOSE | re.IGNORECASE)
|
||||
if m:
|
||||
d = m.groupdict()
|
||||
if d['software_name1'] and d['release_number']:
|
||||
return d['release_number']
|
||||
if d['software_name2']:
|
||||
return d['software_name2']
|
||||
if d["software_name1"] and d["release_number"]:
|
||||
return d["release_number"]
|
||||
if d["software_name2"]:
|
||||
return d["software_name2"]
|
||||
|
||||
return ''
|
||||
return ""
|
||||
|
||||
|
||||
def load_raw_data(url: str) -> Sequence[Mapping]:
|
||||
|
@ -289,15 +321,15 @@ def load_raw_data(url: str) -> Sequence[Mapping]:
|
|||
The raw json list
|
||||
|
||||
"""
|
||||
if url.startswith('http://') or url.startswith('https://'):
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
response = requests.get(url, allow_redirects=True)
|
||||
if not response.ok:
|
||||
raise ValueError('Error during query to %s' % url)
|
||||
raise ValueError("Error during query to %s" % url)
|
||||
raw = gzip.decompress(response.content)
|
||||
else:
|
||||
with gzip.open(url, 'r') as f:
|
||||
with gzip.open(url, "r") as f:
|
||||
raw = f.read()
|
||||
raw_data = json.loads(raw.decode('utf-8'))
|
||||
raw_data = json.loads(raw.decode("utf-8"))
|
||||
return raw_data
|
||||
|
||||
|
||||
|
|
|
@ -7,14 +7,15 @@ def register():
|
|||
from .models import NpmVisitModel, NpmModel
|
||||
from .lister import NpmLister
|
||||
|
||||
return {'models': [NpmVisitModel, NpmModel],
|
||||
'lister': NpmLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
'task_types': {
|
||||
'list-npm-full': {
|
||||
'default_interval': '7 days',
|
||||
'min_interval': '7 days',
|
||||
'max_interval': '7 days',
|
||||
},
|
||||
},
|
||||
}
|
||||
return {
|
||||
"models": [NpmVisitModel, NpmModel],
|
||||
"lister": NpmLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
"task_types": {
|
||||
"list-npm-full": {
|
||||
"default_interval": "7 days",
|
||||
"min_interval": "7 days",
|
||||
"max_interval": "7 days",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -14,15 +14,17 @@ class NpmListerBase(IndexingHttpLister):
|
|||
"""List packages available in the npm registry in a paginated way
|
||||
|
||||
"""
|
||||
MODEL = NpmModel
|
||||
LISTER_NAME = 'npm'
|
||||
instance = 'npm'
|
||||
|
||||
def __init__(self, url='https://replicate.npmjs.com',
|
||||
per_page=1000, override_config=None):
|
||||
MODEL = NpmModel
|
||||
LISTER_NAME = "npm"
|
||||
instance = "npm"
|
||||
|
||||
def __init__(
|
||||
self, url="https://replicate.npmjs.com", per_page=1000, override_config=None
|
||||
):
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
self.per_page = per_page + 1
|
||||
self.PATH_TEMPLATE += '&limit=%s' % self.per_page
|
||||
self.PATH_TEMPLATE += "&limit=%s" % self.per_page
|
||||
|
||||
@property
|
||||
def ADDITIONAL_CONFIG(self) -> Dict[str, Any]:
|
||||
|
@ -30,22 +32,22 @@ class NpmListerBase(IndexingHttpLister):
|
|||
|
||||
"""
|
||||
default_config = super().ADDITIONAL_CONFIG
|
||||
default_config['loading_task_policy'] = ('str', 'recurring')
|
||||
default_config["loading_task_policy"] = ("str", "recurring")
|
||||
return default_config
|
||||
|
||||
def get_model_from_repo(self, repo_name: str) -> Dict[str, str]:
|
||||
"""(Override) Transform from npm package name to model
|
||||
|
||||
"""
|
||||
package_url = 'https://www.npmjs.com/package/%s' % repo_name
|
||||
package_url = "https://www.npmjs.com/package/%s" % repo_name
|
||||
return {
|
||||
'uid': repo_name,
|
||||
'indexable': repo_name,
|
||||
'name': repo_name,
|
||||
'full_name': repo_name,
|
||||
'html_url': package_url,
|
||||
'origin_url': package_url,
|
||||
'origin_type': 'npm',
|
||||
"uid": repo_name,
|
||||
"indexable": repo_name,
|
||||
"name": repo_name,
|
||||
"full_name": repo_name,
|
||||
"html_url": package_url,
|
||||
"origin_url": package_url,
|
||||
"origin_type": "npm",
|
||||
}
|
||||
|
||||
def task_dict(self, origin_type: str, origin_url: str, **kwargs):
|
||||
|
@ -56,10 +58,9 @@ class NpmListerBase(IndexingHttpLister):
|
|||
needed for the ingestion task creation.
|
||||
|
||||
"""
|
||||
task_type = 'load-%s' % origin_type
|
||||
task_policy = self.config['loading_task_policy']
|
||||
return create_task_dict(task_type, task_policy,
|
||||
url=origin_url)
|
||||
task_type = "load-%s" % origin_type
|
||||
task_policy = self.config["loading_task_policy"]
|
||||
return create_task_dict(task_type, task_policy, url=origin_url)
|
||||
|
||||
def request_headers(self) -> Dict[str, Any]:
|
||||
"""(Override) Set requests headers to send when querying the npm
|
||||
|
@ -67,7 +68,7 @@ class NpmListerBase(IndexingHttpLister):
|
|||
|
||||
"""
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/json'
|
||||
headers["Accept"] = "application/json"
|
||||
return headers
|
||||
|
||||
def string_pattern_check(self, inner: int, lower: int, upper: int = None):
|
||||
|
@ -83,25 +84,24 @@ class NpmLister(NpmListerBase):
|
|||
"""List all packages available in the npm registry in a paginated way
|
||||
|
||||
"""
|
||||
|
||||
PATH_TEMPLATE = '/_all_docs?startkey="%s"'
|
||||
|
||||
def get_next_target_from_response(
|
||||
self, response: Response) -> Optional[str]:
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[str]:
|
||||
"""(Override) Get next npm package name to continue the listing
|
||||
|
||||
"""
|
||||
repos = response.json()['rows']
|
||||
return repos[-1]['id'] if len(repos) == self.per_page else None
|
||||
repos = response.json()["rows"]
|
||||
return repos[-1]["id"] if len(repos) == self.per_page else None
|
||||
|
||||
def transport_response_simplified(
|
||||
self, response: Response) -> List[Dict[str, str]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, str]]:
|
||||
"""(Override) Transform npm registry response to list for model manipulation
|
||||
|
||||
"""
|
||||
repos = response.json()['rows']
|
||||
repos = response.json()["rows"]
|
||||
if len(repos) == self.per_page:
|
||||
repos = repos[:-1]
|
||||
return [self.get_model_from_repo(repo['id']) for repo in repos]
|
||||
return [self.get_model_from_repo(repo["id"]) for repo in repos]
|
||||
|
||||
|
||||
class NpmIncrementalLister(NpmListerBase):
|
||||
|
@ -109,30 +109,29 @@ class NpmIncrementalLister(NpmListerBase):
|
|||
update_seq value of the underlying CouchDB database, in a paginated way.
|
||||
|
||||
"""
|
||||
PATH_TEMPLATE = '/_changes?since=%s'
|
||||
|
||||
PATH_TEMPLATE = "/_changes?since=%s"
|
||||
|
||||
@property
|
||||
def CONFIG_BASE_FILENAME(self): # noqa: N802
|
||||
return 'lister_npm_incremental'
|
||||
return "lister_npm_incremental"
|
||||
|
||||
def get_next_target_from_response(
|
||||
self, response: Response) -> Optional[str]:
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[str]:
|
||||
"""(Override) Get next npm package name to continue the listing.
|
||||
|
||||
"""
|
||||
repos = response.json()['results']
|
||||
return repos[-1]['seq'] if len(repos) == self.per_page else None
|
||||
repos = response.json()["results"]
|
||||
return repos[-1]["seq"] if len(repos) == self.per_page else None
|
||||
|
||||
def transport_response_simplified(
|
||||
self, response: Response) -> List[Dict[str, str]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, str]]:
|
||||
"""(Override) Transform npm registry response to list for model
|
||||
manipulation.
|
||||
|
||||
"""
|
||||
repos = response.json()['results']
|
||||
repos = response.json()["results"]
|
||||
if len(repos) == self.per_page:
|
||||
repos = repos[:-1]
|
||||
return [self.get_model_from_repo(repo['id']) for repo in repos]
|
||||
return [self.get_model_from_repo(repo["id"]) for repo in repos]
|
||||
|
||||
def filter_before_inject(self, models_list: List[Dict[str, Any]]):
|
||||
"""(Override) Filter out documents in the CouchDB database
|
||||
|
@ -141,9 +140,9 @@ class NpmIncrementalLister(NpmListerBase):
|
|||
"""
|
||||
models_filtered = []
|
||||
for model in models_list:
|
||||
package_name = model['name']
|
||||
package_name = model["name"]
|
||||
# document related to CouchDB internals
|
||||
if package_name.startswith('_design/'):
|
||||
if package_name.startswith("_design/"):
|
||||
continue
|
||||
models_filtered.append(model)
|
||||
return models_filtered
|
||||
|
|
|
@ -11,9 +11,10 @@ class NpmVisitModel(SQLBase, metaclass=ABCSQLMeta):
|
|||
"""Table to store the npm registry state at the time of a
|
||||
content listing by Software Heritage
|
||||
"""
|
||||
__tablename__ = 'npm_visit'
|
||||
|
||||
uid = Column(Integer, Sequence('npm_visit_id_seq'), primary_key=True)
|
||||
__tablename__ = "npm_visit"
|
||||
|
||||
uid = Column(Integer, Sequence("npm_visit_id_seq"), primary_key=True)
|
||||
visit_date = Column(DateTime, nullable=False)
|
||||
doc_count = Column(BigInteger)
|
||||
doc_del_count = Column(BigInteger)
|
||||
|
@ -29,7 +30,8 @@ class NpmModel(IndexingModelBase):
|
|||
"""A npm package representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'npm_repo'
|
||||
|
||||
__tablename__ = "npm_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
indexable = Column(String, index=True)
|
||||
|
|
|
@ -13,15 +13,22 @@ from swh.lister.npm.models import NpmVisitModel
|
|||
|
||||
@contextmanager
|
||||
def save_registry_state(lister):
|
||||
params = {'headers': lister.request_headers()}
|
||||
params = {"headers": lister.request_headers()}
|
||||
registry_state = lister.session.get(lister.url, **params)
|
||||
registry_state = registry_state.json()
|
||||
keys = ('doc_count', 'doc_del_count', 'update_seq', 'purge_seq',
|
||||
'disk_size', 'data_size', 'committed_update_seq',
|
||||
'compacted_seq')
|
||||
keys = (
|
||||
"doc_count",
|
||||
"doc_del_count",
|
||||
"update_seq",
|
||||
"purge_seq",
|
||||
"disk_size",
|
||||
"data_size",
|
||||
"committed_update_seq",
|
||||
"compacted_seq",
|
||||
)
|
||||
|
||||
state = {key: registry_state[key] for key in keys}
|
||||
state['visit_date'] = datetime.now()
|
||||
state["visit_date"] = datetime.now()
|
||||
yield
|
||||
npm_visit = NpmVisitModel(**state)
|
||||
lister.db_session.add(npm_visit)
|
||||
|
@ -34,29 +41,31 @@ def get_last_update_seq(lister):
|
|||
query = lister.db_session.query(NpmVisitModel.update_seq)
|
||||
row = query.order_by(NpmVisitModel.uid.desc()).first()
|
||||
if not row:
|
||||
raise ValueError('No npm registry listing previously performed ! '
|
||||
'This is required prior to the execution of an '
|
||||
'incremental listing.')
|
||||
raise ValueError(
|
||||
"No npm registry listing previously performed ! "
|
||||
"This is required prior to the execution of an "
|
||||
"incremental listing."
|
||||
)
|
||||
return row[0]
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.NpmListerTask')
|
||||
@shared_task(name=__name__ + ".NpmListerTask")
|
||||
def list_npm_full(**lister_args):
|
||||
'Full lister for the npm (javascript) registry'
|
||||
"Full lister for the npm (javascript) registry"
|
||||
lister = NpmLister(**lister_args)
|
||||
with save_registry_state(lister):
|
||||
return lister.run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.NpmIncrementalListerTask')
|
||||
@shared_task(name=__name__ + ".NpmIncrementalListerTask")
|
||||
def list_npm_incremental(**lister_args):
|
||||
'Incremental lister for the npm (javascript) registry'
|
||||
"Incremental lister for the npm (javascript) registry"
|
||||
lister = NpmIncrementalLister(**lister_args)
|
||||
update_seq_start = get_last_update_seq(lister)
|
||||
with save_registry_state(lister):
|
||||
return lister.run(min_bound=update_seq_start)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -10,14 +10,16 @@ from swh.lister.core.tests.conftest import * # noqa
|
|||
|
||||
@pytest.fixture
|
||||
def lister_npm(swh_listers):
|
||||
lister = swh_listers['npm']
|
||||
lister = swh_listers["npm"]
|
||||
|
||||
# Add the load-deb-package in the scheduler backend
|
||||
lister.scheduler.create_task_type({
|
||||
'type': 'load-npm',
|
||||
'description': 'Load npm package',
|
||||
'backend_name': 'swh.loader.package.tasks.LoadNpm',
|
||||
'default_interval': '1 day',
|
||||
})
|
||||
lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-npm",
|
||||
"description": "Load npm package",
|
||||
"backend_name": "swh.loader.package.tasks.LoadNpm",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return lister
|
||||
|
|
|
@ -21,10 +21,10 @@ logger = logging.getLogger(__name__)
|
|||
class NpmListerTester(HttpListerTesterBase, unittest.TestCase):
|
||||
Lister = NpmLister
|
||||
test_re = re.compile(r'^.*/_all_docs\?startkey="(.+)".*')
|
||||
lister_subdir = 'npm'
|
||||
good_api_response_file = 'data/replicate.npmjs.com/api_response.json'
|
||||
bad_api_response_file = 'data/api_empty_response.json'
|
||||
first_index = 'jquery'
|
||||
lister_subdir = "npm"
|
||||
good_api_response_file = "data/replicate.npmjs.com/api_response.json"
|
||||
bad_api_response_file = "data/api_empty_response.json"
|
||||
first_index = "jquery"
|
||||
entries_per_page = 100
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -37,11 +37,11 @@ class NpmListerTester(HttpListerTesterBase, unittest.TestCase):
|
|||
|
||||
class NpmIncrementalListerTester(HttpListerTesterBase, unittest.TestCase):
|
||||
Lister = NpmIncrementalLister
|
||||
test_re = re.compile(r'^.*/_changes\?since=([0-9]+).*')
|
||||
lister_subdir = 'npm'
|
||||
good_api_response_file = 'data/api_inc_response.json'
|
||||
bad_api_response_file = 'data/api_inc_empty_response.json'
|
||||
first_index = '6920642'
|
||||
test_re = re.compile(r"^.*/_changes\?since=([0-9]+).*")
|
||||
lister_subdir = "npm"
|
||||
good_api_response_file = "data/api_inc_response.json"
|
||||
bad_api_response_file = "data/api_inc_empty_response.json"
|
||||
first_index = "6920642"
|
||||
entries_per_page = 100
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -58,27 +58,27 @@ def check_tasks(tasks: List[Any]):
|
|||
|
||||
"""
|
||||
for row in tasks:
|
||||
logger.debug('row: %s', row)
|
||||
assert row['type'] == 'load-npm'
|
||||
logger.debug("row: %s", row)
|
||||
assert row["type"] == "load-npm"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert len(kwargs) == 1
|
||||
package_url = kwargs['url']
|
||||
package_name = package_url.split('/')[-1]
|
||||
assert package_url == f'https://www.npmjs.com/package/{package_name}'
|
||||
package_url = kwargs["url"]
|
||||
package_name = package_url.split("/")[-1]
|
||||
assert package_url == f"https://www.npmjs.com/package/{package_name}"
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
||||
|
||||
def test_lister_npm_basic_listing(lister_npm, requests_mock_datadir):
|
||||
lister_npm.run()
|
||||
|
||||
tasks = lister_npm.scheduler.search_tasks(task_type='load-npm')
|
||||
tasks = lister_npm.scheduler.search_tasks(task_type="load-npm")
|
||||
assert len(tasks) == 100
|
||||
|
||||
check_tasks(tasks)
|
||||
|
@ -89,10 +89,11 @@ def test_lister_npm_listing_pagination(lister_npm, requests_mock_datadir):
|
|||
# Patch per page pagination
|
||||
lister.per_page = 10 + 1
|
||||
lister.PATH_TEMPLATE = lister.PATH_TEMPLATE.replace(
|
||||
'&limit=1001', '&limit=%s' % lister.per_page)
|
||||
"&limit=1001", "&limit=%s" % lister.per_page
|
||||
)
|
||||
lister.run()
|
||||
|
||||
tasks = lister.scheduler.search_tasks(task_type='load-npm')
|
||||
tasks = lister.scheduler.search_tasks(task_type="load-npm")
|
||||
assert len(tasks) == 2 * 10 # only 2 files with 10 results each
|
||||
|
||||
check_tasks(tasks)
|
||||
|
|
|
@ -8,23 +8,22 @@ def mock_save(lister):
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.npm.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.npm.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.npm.tasks.save_registry_state')
|
||||
@patch('swh.lister.npm.tasks.NpmLister')
|
||||
@patch("swh.lister.npm.tasks.save_registry_state")
|
||||
@patch("swh.lister.npm.tasks.NpmLister")
|
||||
def test_lister(lister, save, swh_app, celery_session_worker):
|
||||
# setup the mocked NpmLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
save.side_effect = mock_save
|
||||
|
||||
res = swh_app.send_task('swh.lister.npm.tasks.NpmListerTask')
|
||||
res = swh_app.send_task("swh.lister.npm.tasks.NpmListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -33,9 +32,9 @@ def test_lister(lister, save, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with()
|
||||
|
||||
|
||||
@patch('swh.lister.npm.tasks.save_registry_state')
|
||||
@patch('swh.lister.npm.tasks.get_last_update_seq')
|
||||
@patch('swh.lister.npm.tasks.NpmIncrementalLister')
|
||||
@patch("swh.lister.npm.tasks.save_registry_state")
|
||||
@patch("swh.lister.npm.tasks.get_last_update_seq")
|
||||
@patch("swh.lister.npm.tasks.NpmIncrementalLister")
|
||||
def test_incremental(lister, seq, save, swh_app, celery_session_worker):
|
||||
# setup the mocked NpmLister
|
||||
lister.return_value = lister
|
||||
|
@ -43,8 +42,7 @@ def test_incremental(lister, seq, save, swh_app, celery_session_worker):
|
|||
seq.return_value = 42
|
||||
save.side_effect = mock_save
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.npm.tasks.NpmIncrementalListerTask')
|
||||
res = swh_app.send_task("swh.lister.npm.tasks.NpmIncrementalListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import PackagistModel
|
||||
from .lister import PackagistLister
|
||||
|
||||
return {'models': [PackagistModel],
|
||||
'lister': PackagistLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [PackagistModel],
|
||||
"lister": PackagistLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ def compute_package_url(repo_name: str) -> str:
|
|||
"""Compute packgist package url from repo name.
|
||||
|
||||
"""
|
||||
return 'https://repo.packagist.org/p/%s.json' % repo_name
|
||||
return "https://repo.packagist.org/p/%s.json" % repo_name
|
||||
|
||||
|
||||
class PackagistLister(ListerOnePageApiTransport, SimpleLister):
|
||||
|
@ -52,17 +52,19 @@ class PackagistLister(ListerOnePageApiTransport, SimpleLister):
|
|||
'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json'
|
||||
|
||||
"""
|
||||
|
||||
MODEL = PackagistModel
|
||||
LISTER_NAME = 'packagist'
|
||||
PAGE = 'https://packagist.org/packages/list.json'
|
||||
instance = 'packagist'
|
||||
LISTER_NAME = "packagist"
|
||||
PAGE = "https://packagist.org/packages/list.json"
|
||||
instance = "packagist"
|
||||
|
||||
def __init__(self, override_config=None):
|
||||
ListerOnePageApiTransport .__init__(self)
|
||||
ListerOnePageApiTransport.__init__(self)
|
||||
SimpleLister.__init__(self, override_config=override_config)
|
||||
|
||||
def task_dict(self, origin_type: str, origin_url: str,
|
||||
**kwargs: Mapping[str, str]) -> Dict[str, Any]:
|
||||
def task_dict(
|
||||
self, origin_type: str, origin_url: str, **kwargs: Mapping[str, str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Return task format dict
|
||||
|
||||
This is overridden from the lister_base as more information is
|
||||
|
@ -70,18 +72,20 @@ class PackagistLister(ListerOnePageApiTransport, SimpleLister):
|
|||
|
||||
"""
|
||||
return utils.create_task_dict(
|
||||
'load-%s' % origin_type,
|
||||
kwargs.get('policy', 'recurring'),
|
||||
kwargs.get('name'), origin_url,
|
||||
retries_left=3)
|
||||
"load-%s" % origin_type,
|
||||
kwargs.get("policy", "recurring"),
|
||||
kwargs.get("name"),
|
||||
origin_url,
|
||||
retries_left=3,
|
||||
)
|
||||
|
||||
def list_packages(self, response: Any) -> List[str]:
|
||||
"""List the actual packagist origins from the response.
|
||||
|
||||
"""
|
||||
response = json.loads(response.text)
|
||||
packages = [name for name in response['packageNames']]
|
||||
logger.debug('Number of packages: %s', len(packages))
|
||||
packages = [name for name in response["packageNames"]]
|
||||
logger.debug("Number of packages: %s", len(packages))
|
||||
random.shuffle(packages)
|
||||
return packages
|
||||
|
||||
|
@ -91,10 +95,10 @@ class PackagistLister(ListerOnePageApiTransport, SimpleLister):
|
|||
"""
|
||||
url = compute_package_url(repo_name)
|
||||
return {
|
||||
'uid': repo_name,
|
||||
'name': repo_name,
|
||||
'full_name': repo_name,
|
||||
'html_url': url,
|
||||
'origin_url': url,
|
||||
'origin_type': 'packagist',
|
||||
"uid": repo_name,
|
||||
"name": repo_name,
|
||||
"full_name": repo_name,
|
||||
"html_url": url,
|
||||
"origin_url": url,
|
||||
"origin_type": "packagist",
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ class PackagistModel(ModelBase):
|
|||
"""a Packagist repository representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'packagist_repo'
|
||||
|
||||
__tablename__ = "packagist_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from .lister import PackagistLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.PackagistListerTask')
|
||||
@shared_task(name=__name__ + ".PackagistListerTask")
|
||||
def list_packagist(**lister_args):
|
||||
'List the packagist (php) registry'
|
||||
"List the packagist (php) registry"
|
||||
PackagistLister(**lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -10,14 +10,16 @@ from swh.lister.core.tests.conftest import * # noqa
|
|||
|
||||
@pytest.fixture
|
||||
def lister_packagist(swh_listers):
|
||||
lister = swh_listers['packagist']
|
||||
lister = swh_listers["packagist"]
|
||||
|
||||
# Amend the scheduler with an unknown yet load-packagist task type
|
||||
lister.scheduler.create_task_type({
|
||||
'type': 'load-packagist',
|
||||
'description': 'Load packagist origin',
|
||||
'backend_name': 'swh.loader.package.tasks.LoaderPackagist',
|
||||
'default_interval': '1 day',
|
||||
})
|
||||
lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-packagist",
|
||||
"description": "Load packagist origin",
|
||||
"backend_name": "swh.loader.package.tasks.LoaderPackagist",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return lister
|
||||
|
|
|
@ -12,27 +12,29 @@ from swh.lister.packagist.lister import PackagistLister, compute_package_url
|
|||
from swh.lister.core.tests.test_lister import HttpSimpleListerTester
|
||||
|
||||
|
||||
expected_packages = ['0.0.0/composer-include-files', '0.0.0/laravel-env-shim',
|
||||
'0.0.1/try-make-package', '0099ff/dialogflowphp',
|
||||
'00f100/array_dot']
|
||||
expected_packages = [
|
||||
"0.0.0/composer-include-files",
|
||||
"0.0.0/laravel-env-shim",
|
||||
"0.0.1/try-make-package",
|
||||
"0099ff/dialogflowphp",
|
||||
"00f100/array_dot",
|
||||
]
|
||||
|
||||
expected_model = {
|
||||
'uid': '0099ff/dialogflowphp',
|
||||
'name': '0099ff/dialogflowphp',
|
||||
'full_name': '0099ff/dialogflowphp',
|
||||
'html_url':
|
||||
'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
|
||||
'origin_url':
|
||||
'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
|
||||
'origin_type': 'packagist',
|
||||
}
|
||||
"uid": "0099ff/dialogflowphp",
|
||||
"name": "0099ff/dialogflowphp",
|
||||
"full_name": "0099ff/dialogflowphp",
|
||||
"html_url": "https://repo.packagist.org/p/0099ff/dialogflowphp.json",
|
||||
"origin_url": "https://repo.packagist.org/p/0099ff/dialogflowphp.json",
|
||||
"origin_type": "packagist",
|
||||
}
|
||||
|
||||
|
||||
class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase):
|
||||
Lister = PackagistLister
|
||||
PAGE = 'https://packagist.org/packages/list.json'
|
||||
lister_subdir = 'packagist'
|
||||
good_api_response_file = 'data/https_packagist.org/packages_list.json'
|
||||
PAGE = "https://packagist.org/packages/list.json"
|
||||
lister_subdir = "packagist"
|
||||
good_api_response_file = "data/https_packagist.org/packages_list.json"
|
||||
entries = 5
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -52,40 +54,41 @@ class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase):
|
|||
|
||||
"""
|
||||
fl = self.get_fl()
|
||||
model = fl.transport_response_simplified(['0099ff/dialogflowphp'])
|
||||
model = fl.transport_response_simplified(["0099ff/dialogflowphp"])
|
||||
assert len(model) == 1
|
||||
for key, values in model[0].items():
|
||||
assert values == expected_model[key]
|
||||
|
||||
@patch('swh.lister.packagist.lister.utils.create_task_dict')
|
||||
@patch("swh.lister.packagist.lister.utils.create_task_dict")
|
||||
def test_task_dict(self, mock_create_tasks):
|
||||
"""Test the task creation of lister
|
||||
|
||||
"""
|
||||
fl = self.get_fl()
|
||||
fl.task_dict(origin_type='packagist', origin_url='https://abc',
|
||||
name='test_pack')
|
||||
fl.task_dict(
|
||||
origin_type="packagist", origin_url="https://abc", name="test_pack"
|
||||
)
|
||||
mock_create_tasks.assert_called_once_with(
|
||||
'load-packagist', 'recurring', 'test_pack', 'https://abc',
|
||||
retries_left=3)
|
||||
"load-packagist", "recurring", "test_pack", "https://abc", retries_left=3
|
||||
)
|
||||
|
||||
|
||||
def test_compute_package_url():
|
||||
expected_url = 'https://repo.packagist.org/p/hello.json'
|
||||
actual_url = compute_package_url('hello')
|
||||
expected_url = "https://repo.packagist.org/p/hello.json"
|
||||
actual_url = compute_package_url("hello")
|
||||
assert actual_url == expected_url
|
||||
|
||||
|
||||
def test_packagist_lister(lister_packagist, requests_mock_datadir):
|
||||
lister_packagist.run()
|
||||
|
||||
r = lister_packagist.scheduler.search_tasks(task_type='load-packagist')
|
||||
r = lister_packagist.scheduler.search_tasks(task_type="load-packagist")
|
||||
assert len(r) == 5
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-packagist'
|
||||
assert row["type"] == "load-packagist"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 2
|
||||
|
||||
package = args[0]
|
||||
|
@ -95,8 +98,8 @@ def test_packagist_lister(lister_packagist, requests_mock_datadir):
|
|||
assert url == expected_url
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert kwargs == {}
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -6,22 +6,20 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.packagist.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.packagist.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.packagist.tasks.PackagistLister')
|
||||
@patch("swh.lister.packagist.tasks.PackagistLister")
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked PackagistLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.packagist.tasks.PackagistListerTask')
|
||||
res = swh_app.send_task("swh.lister.packagist.tasks.PackagistListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import PhabricatorModel
|
||||
from .lister import PhabricatorLister
|
||||
|
||||
return {'models': [PhabricatorModel],
|
||||
'lister': PhabricatorLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [PhabricatorModel],
|
||||
"lister": PhabricatorLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -21,11 +21,10 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class PhabricatorLister(IndexingHttpLister):
|
||||
PATH_TEMPLATE = '?order=oldest&attachments[uris]=1&after=%s'
|
||||
DEFAULT_URL = \
|
||||
'https://forge.softwareheritage.org/api/diffusion.repository.search'
|
||||
PATH_TEMPLATE = "?order=oldest&attachments[uris]=1&after=%s"
|
||||
DEFAULT_URL = "https://forge.softwareheritage.org/api/diffusion.repository.search"
|
||||
MODEL = PhabricatorModel
|
||||
LISTER_NAME = 'phabricator'
|
||||
LISTER_NAME = "phabricator"
|
||||
|
||||
def __init__(self, url=None, instance=None, override_config=None):
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
|
@ -48,11 +47,14 @@ class PhabricatorLister(IndexingHttpLister):
|
|||
creds = self.request_instance_credentials()
|
||||
if not creds:
|
||||
raise ValueError(
|
||||
'Phabricator forge needs authentication credential to list.')
|
||||
api_token = random.choice(creds)['password']
|
||||
"Phabricator forge needs authentication credential to list."
|
||||
)
|
||||
api_token = random.choice(creds)["password"]
|
||||
|
||||
return {'headers': self.request_headers() or {},
|
||||
'params': {'api.token': api_token}}
|
||||
return {
|
||||
"headers": self.request_headers() or {},
|
||||
"params": {"api.token": api_token},
|
||||
}
|
||||
|
||||
def request_headers(self):
|
||||
"""
|
||||
|
@ -60,39 +62,39 @@ class PhabricatorLister(IndexingHttpLister):
|
|||
Phabricator API
|
||||
"""
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/json'
|
||||
headers["Accept"] = "application/json"
|
||||
return headers
|
||||
|
||||
def get_model_from_repo(
|
||||
self, repo: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
url = get_repo_url(repo['attachments']['uris']['uris'])
|
||||
def get_model_from_repo(self, repo: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
url = get_repo_url(repo["attachments"]["uris"]["uris"])
|
||||
if url is None:
|
||||
return None
|
||||
return {
|
||||
'uid': url,
|
||||
'indexable': repo['id'],
|
||||
'name': repo['fields']['shortName'],
|
||||
'full_name': repo['fields']['name'],
|
||||
'html_url': url,
|
||||
'origin_url': url,
|
||||
'origin_type': repo['fields']['vcs'],
|
||||
'instance': self.instance,
|
||||
"uid": url,
|
||||
"indexable": repo["id"],
|
||||
"name": repo["fields"]["shortName"],
|
||||
"full_name": repo["fields"]["name"],
|
||||
"html_url": url,
|
||||
"origin_url": url,
|
||||
"origin_type": repo["fields"]["vcs"],
|
||||
"instance": self.instance,
|
||||
}
|
||||
|
||||
def get_next_target_from_response(
|
||||
self, response: Response) -> Optional[int]:
|
||||
body = response.json()['result']['cursor']
|
||||
if body['after'] and body['after'] != 'null':
|
||||
return int(body['after'])
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[int]:
|
||||
body = response.json()["result"]["cursor"]
|
||||
if body["after"] and body["after"] != "null":
|
||||
return int(body["after"])
|
||||
return None
|
||||
|
||||
def transport_response_simplified(
|
||||
self, response: Response) -> List[Optional[Dict[str, Any]]]:
|
||||
self, response: Response
|
||||
) -> List[Optional[Dict[str, Any]]]:
|
||||
repos = response.json()
|
||||
if repos['result'] is None:
|
||||
if repos["result"] is None:
|
||||
raise ValueError(
|
||||
'Problem during information fetch: %s' % repos['error_code'])
|
||||
repos = repos['result']['data']
|
||||
"Problem during information fetch: %s" % repos["error_code"]
|
||||
)
|
||||
repos = repos["result"]["data"]
|
||||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
||||
def filter_before_inject(self, models_list):
|
||||
|
@ -103,8 +105,7 @@ class PhabricatorLister(IndexingHttpLister):
|
|||
models_list = [m for m in models_list if m is not None]
|
||||
return super().filter_before_inject(models_list)
|
||||
|
||||
def disable_deleted_repo_tasks(
|
||||
self, index: int, next_index: int, keep_these: str):
|
||||
def disable_deleted_repo_tasks(self, index: int, next_index: int, keep_these: str):
|
||||
"""
|
||||
(Overrides) Fix provided index value to avoid:
|
||||
|
||||
|
@ -113,7 +114,7 @@ class PhabricatorLister(IndexingHttpLister):
|
|||
"""
|
||||
# First call to the Phabricator API uses an empty 'after' parameter,
|
||||
# so set the index to 0 to avoid database query error
|
||||
if index == '':
|
||||
if index == "":
|
||||
index = 0
|
||||
# Next listed repository ids are strictly greater than the 'after'
|
||||
# parameter, so increment the index to avoid disabling the latest
|
||||
|
@ -121,8 +122,7 @@ class PhabricatorLister(IndexingHttpLister):
|
|||
# the Phabricator API
|
||||
else:
|
||||
index = index + 1
|
||||
return super().disable_deleted_repo_tasks(index, next_index,
|
||||
keep_these)
|
||||
return super().disable_deleted_repo_tasks(index, next_index, keep_these)
|
||||
|
||||
def db_first_index(self) -> Optional[int]:
|
||||
"""
|
||||
|
@ -172,19 +172,18 @@ def get_repo_url(attachments: List[Dict[str, Any]]) -> Optional[int]:
|
|||
"""
|
||||
processed_urls = defaultdict(dict) # type: Dict[str, Any]
|
||||
for uri in attachments:
|
||||
protocol = uri['fields']['builtin']['protocol']
|
||||
url = uri['fields']['uri']['effective']
|
||||
identifier = uri['fields']['builtin']['identifier']
|
||||
if protocol in ('http', 'https'):
|
||||
protocol = uri["fields"]["builtin"]["protocol"]
|
||||
url = uri["fields"]["uri"]["effective"]
|
||||
identifier = uri["fields"]["builtin"]["identifier"]
|
||||
if protocol in ("http", "https"):
|
||||
processed_urls[protocol][identifier] = url
|
||||
elif protocol is None:
|
||||
for protocol in ('https', 'http'):
|
||||
for protocol in ("https", "http"):
|
||||
if url.startswith(protocol):
|
||||
processed_urls[protocol]['undefined'] = url
|
||||
processed_urls[protocol]["undefined"] = url
|
||||
break
|
||||
for protocol in ['https', 'http']:
|
||||
for identifier in ['shortname', 'callsign', 'id', 'undefined']:
|
||||
if (protocol in processed_urls and
|
||||
identifier in processed_urls[protocol]):
|
||||
for protocol in ["https", "http"]:
|
||||
for identifier in ["shortname", "callsign", "id", "undefined"]:
|
||||
if protocol in processed_urls and identifier in processed_urls[protocol]:
|
||||
return processed_urls[protocol][identifier]
|
||||
return None
|
||||
|
|
|
@ -9,7 +9,8 @@ from swh.lister.core.models import IndexingModelBase
|
|||
|
||||
class PhabricatorModel(IndexingModelBase):
|
||||
"""a Phabricator repository"""
|
||||
__tablename__ = 'phabricator_repo'
|
||||
|
||||
__tablename__ = "phabricator_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
indexable = Column(Integer, index=True)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from swh.lister.phabricator.lister import PhabricatorLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.FullPhabricatorLister')
|
||||
@shared_task(name=__name__ + ".FullPhabricatorLister")
|
||||
def list_phabricator_full(**lister_args):
|
||||
"""Full update of a Phabricator instance"""
|
||||
return PhabricatorLister(**lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -10,17 +10,12 @@ from swh.lister.core.tests.conftest import * # noqa
|
|||
|
||||
@pytest.fixture
|
||||
def lister_phabricator(swh_listers):
|
||||
lister = swh_listers['phabricator']
|
||||
lister = swh_listers["phabricator"]
|
||||
|
||||
# Amend the credentials
|
||||
lister.config = {
|
||||
'cache_responses': False,
|
||||
'credentials': {
|
||||
'phabricator': {
|
||||
lister.instance: [{
|
||||
'password': 'foo'
|
||||
}]
|
||||
}}
|
||||
"cache_responses": False,
|
||||
"credentials": {"phabricator": {lister.instance: [{"password": "foo"}]}},
|
||||
}
|
||||
|
||||
return lister
|
||||
|
|
|
@ -21,12 +21,11 @@ logger = logging.getLogger(__name__)
|
|||
class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
||||
Lister = PhabricatorLister
|
||||
# first request will have the after parameter empty
|
||||
test_re = re.compile(r'\&after=([^?&]*)')
|
||||
lister_subdir = 'phabricator'
|
||||
good_api_response_file = 'data/api_first_response.json'
|
||||
good_api_response_undefined_protocol = \
|
||||
'data/api_response_undefined_protocol.json'
|
||||
bad_api_response_file = 'data/api_empty_response.json'
|
||||
test_re = re.compile(r"\&after=([^?&]*)")
|
||||
lister_subdir = "phabricator"
|
||||
good_api_response_file = "data/api_first_response.json"
|
||||
good_api_response_undefined_protocol = "data/api_response_undefined_protocol.json"
|
||||
bad_api_response_file = "data/api_empty_response.json"
|
||||
# first_index must be retrieved through a bootstrap process for Phabricator
|
||||
first_index = None
|
||||
last_index = 12
|
||||
|
@ -40,7 +39,7 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
|||
"""
|
||||
m = self.test_re.search(request.path_url)
|
||||
idx = m.group(1)
|
||||
if idx not in ('', 'None'):
|
||||
if idx not in ("", "None"):
|
||||
return int(idx)
|
||||
|
||||
def get_fl(self, override_config=None):
|
||||
|
@ -48,41 +47,42 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
|||
|
||||
"""
|
||||
if override_config or self.fl is None:
|
||||
credentials = {'phabricator': {'fake': [
|
||||
{'password': 'toto'}
|
||||
]}}
|
||||
override_config = dict(credentials=credentials,
|
||||
**(override_config or {}))
|
||||
self.fl = self.Lister(url='https://fakeurl', instance='fake',
|
||||
override_config=override_config)
|
||||
credentials = {"phabricator": {"fake": [{"password": "toto"}]}}
|
||||
override_config = dict(credentials=credentials, **(override_config or {}))
|
||||
self.fl = self.Lister(
|
||||
url="https://fakeurl", instance="fake", override_config=override_config
|
||||
)
|
||||
self.fl.INITIAL_BACKOFF = 1
|
||||
|
||||
self.fl.reset_backoff()
|
||||
return self.fl
|
||||
|
||||
def test_get_repo_url(self):
|
||||
f = open('swh/lister/%s/tests/%s' % (self.lister_subdir,
|
||||
self.good_api_response_file))
|
||||
f = open(
|
||||
"swh/lister/%s/tests/%s" % (self.lister_subdir, self.good_api_response_file)
|
||||
)
|
||||
api_response = json.load(f)
|
||||
repos = api_response['result']['data']
|
||||
repos = api_response["result"]["data"]
|
||||
for repo in repos:
|
||||
self.assertEqual(
|
||||
'https://forge.softwareheritage.org/source/%s.git' %
|
||||
(repo['fields']['shortName']),
|
||||
get_repo_url(repo['attachments']['uris']['uris']))
|
||||
"https://forge.softwareheritage.org/source/%s.git"
|
||||
% (repo["fields"]["shortName"]),
|
||||
get_repo_url(repo["attachments"]["uris"]["uris"]),
|
||||
)
|
||||
|
||||
f = open('swh/lister/%s/tests/%s' %
|
||||
(self.lister_subdir,
|
||||
self.good_api_response_undefined_protocol))
|
||||
f = open(
|
||||
"swh/lister/%s/tests/%s"
|
||||
% (self.lister_subdir, self.good_api_response_undefined_protocol)
|
||||
)
|
||||
repo = json.load(f)
|
||||
self.assertEqual(
|
||||
'https://svn.blender.org/svnroot/bf-blender/',
|
||||
get_repo_url(repo['attachments']['uris']['uris']))
|
||||
"https://svn.blender.org/svnroot/bf-blender/",
|
||||
get_repo_url(repo["attachments"]["uris"]["uris"]),
|
||||
)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_scheduled_tasks(self, http_mocker):
|
||||
self.scheduled_tasks_test('data/api_next_response.json', 23,
|
||||
http_mocker)
|
||||
self.scheduled_tasks_test("data/api_next_response.json", 23, http_mocker)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_scheduled_tasks_multiple_instances(self, http_mocker):
|
||||
|
@ -92,19 +92,14 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
|||
# list first Phabricator instance
|
||||
fl.run()
|
||||
|
||||
fl.instance = 'other_fake'
|
||||
fl.config['credentials'] = {
|
||||
'phabricator': {
|
||||
'other_fake': [{
|
||||
'password': 'foo'
|
||||
}]
|
||||
}
|
||||
fl.instance = "other_fake"
|
||||
fl.config["credentials"] = {
|
||||
"phabricator": {"other_fake": [{"password": "foo"}]}
|
||||
}
|
||||
|
||||
# list second Phabricator instance hosting repositories having
|
||||
# same ids as those listed from the first instance
|
||||
self.good_api_response_file = \
|
||||
'data/api_first_response_other_instance.json'
|
||||
self.good_api_response_file = "data/api_first_response_other_instance.json"
|
||||
self.last_index = 13
|
||||
fl.run()
|
||||
|
||||
|
@ -113,28 +108,28 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
|||
|
||||
# check tasks are not disabled
|
||||
for task in self.scheduler_tasks:
|
||||
self.assertTrue(task['status'] != 'disabled')
|
||||
self.assertTrue(task["status"] != "disabled")
|
||||
|
||||
|
||||
def test_phabricator_lister(lister_phabricator, requests_mock_datadir):
|
||||
lister = lister_phabricator
|
||||
assert lister.url == lister.DEFAULT_URL
|
||||
assert lister.instance == 'forge.softwareheritage.org'
|
||||
assert lister.instance == "forge.softwareheritage.org"
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 10
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-git'
|
||||
assert row["type"] == "load-git"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
url = kwargs['url']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
url = kwargs["url"]
|
||||
assert lister.instance in url
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -4,9 +4,8 @@
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.phabricator.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.phabricator.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
|
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import PyPIModel
|
||||
from .lister import PyPILister
|
||||
|
||||
return {'models': [PyPIModel],
|
||||
'lister': PyPILister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [PyPIModel],
|
||||
"lister": PyPILister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -18,12 +18,12 @@ from requests import Response
|
|||
|
||||
class PyPILister(ListerOnePageApiTransport, SimpleLister):
|
||||
MODEL = PyPIModel
|
||||
LISTER_NAME = 'pypi'
|
||||
PAGE = 'https://pypi.org/simple/'
|
||||
instance = 'pypi' # As of today only the main pypi.org is used
|
||||
LISTER_NAME = "pypi"
|
||||
PAGE = "https://pypi.org/simple/"
|
||||
instance = "pypi" # As of today only the main pypi.org is used
|
||||
|
||||
def __init__(self, override_config=None):
|
||||
ListerOnePageApiTransport .__init__(self)
|
||||
ListerOnePageApiTransport.__init__(self)
|
||||
SimpleLister.__init__(self, override_config=override_config)
|
||||
|
||||
def task_dict(self, origin_type: str, origin_url: str, **kwargs):
|
||||
|
@ -33,17 +33,16 @@ class PyPILister(ListerOnePageApiTransport, SimpleLister):
|
|||
needed for the ingestion task creation.
|
||||
|
||||
"""
|
||||
_type = 'load-%s' % origin_type
|
||||
_policy = kwargs.get('policy', 'recurring')
|
||||
return utils.create_task_dict(
|
||||
_type, _policy, url=origin_url)
|
||||
_type = "load-%s" % origin_type
|
||||
_policy = kwargs.get("policy", "recurring")
|
||||
return utils.create_task_dict(_type, _policy, url=origin_url)
|
||||
|
||||
def list_packages(self, response: Response) -> list:
|
||||
"""(Override) List the actual pypi origins from the response.
|
||||
|
||||
"""
|
||||
result = xmltodict.parse(response.content)
|
||||
_packages = [p['#text'] for p in result['html']['body']['a']]
|
||||
_packages = [p["#text"] for p in result["html"]["body"]["a"]]
|
||||
random.shuffle(_packages)
|
||||
return _packages
|
||||
|
||||
|
@ -51,7 +50,7 @@ class PyPILister(ListerOnePageApiTransport, SimpleLister):
|
|||
"""Returns origin_url
|
||||
|
||||
"""
|
||||
return 'https://pypi.org/project/%s/' % repo_name
|
||||
return "https://pypi.org/project/%s/" % repo_name
|
||||
|
||||
def get_model_from_repo(self, repo_name: str) -> Dict[str, Any]:
|
||||
"""(Override) Transform from repository representation to model
|
||||
|
@ -59,10 +58,10 @@ class PyPILister(ListerOnePageApiTransport, SimpleLister):
|
|||
"""
|
||||
origin_url = self.origin_url(repo_name)
|
||||
return {
|
||||
'uid': origin_url,
|
||||
'name': repo_name,
|
||||
'full_name': repo_name,
|
||||
'html_url': origin_url,
|
||||
'origin_url': origin_url,
|
||||
'origin_type': 'pypi',
|
||||
"uid": origin_url,
|
||||
"name": repo_name,
|
||||
"full_name": repo_name,
|
||||
"html_url": origin_url,
|
||||
"origin_url": origin_url,
|
||||
"origin_type": "pypi",
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ class PyPIModel(ModelBase):
|
|||
"""a PyPI repository representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'pypi_repo'
|
||||
|
||||
__tablename__ = "pypi_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from .lister import PyPILister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.PyPIListerTask')
|
||||
@shared_task(name=__name__ + ".PyPIListerTask")
|
||||
def list_pypi(**lister_args):
|
||||
'Full update of the PyPI (python) registry'
|
||||
"Full update of the PyPI (python) registry"
|
||||
return PyPILister(**lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -10,14 +10,16 @@ from swh.lister.core.tests.conftest import * # noqa
|
|||
|
||||
@pytest.fixture
|
||||
def lister_pypi(swh_listers):
|
||||
lister = swh_listers['pypi']
|
||||
lister = swh_listers["pypi"]
|
||||
|
||||
# Add the load-deb-package in the scheduler backend
|
||||
lister.scheduler.create_task_type({
|
||||
'type': 'load-pypi',
|
||||
'description': 'Load PyPI package',
|
||||
'backend_name': 'swh.loader.package.tasks.LoadPyPI',
|
||||
'default_interval': '1 day',
|
||||
})
|
||||
lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-pypi",
|
||||
"description": "Load PyPI package",
|
||||
"backend_name": "swh.loader.package.tasks.LoadPyPI",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return lister
|
||||
|
|
|
@ -7,21 +7,21 @@
|
|||
def test_pypi_lister(lister_pypi, requests_mock_datadir):
|
||||
lister_pypi.run()
|
||||
|
||||
r = lister_pypi.scheduler.search_tasks(task_type='load-pypi')
|
||||
r = lister_pypi.scheduler.search_tasks(task_type="load-pypi")
|
||||
assert len(r) == 4
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-pypi'
|
||||
assert row["type"] == "load-pypi"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert len(kwargs) == 1
|
||||
|
||||
origin_url = kwargs['url']
|
||||
assert 'https://pypi.org/project' in origin_url
|
||||
origin_url = kwargs["url"]
|
||||
assert "https://pypi.org/project" in origin_url
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -2,22 +2,20 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.pypi.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.pypi.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.pypi.tasks.PyPILister')
|
||||
@patch("swh.lister.pypi.tasks.PyPILister")
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked PypiLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.pypi.tasks.PyPIListerTask')
|
||||
res = swh_app.send_task("swh.lister.pypi.tasks.PyPIListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
|
|
@ -15,7 +15,7 @@ from .test_utils import init_db
|
|||
def test_get_lister_wrong_input():
|
||||
"""Unsupported lister should raise"""
|
||||
with pytest.raises(ValueError) as e:
|
||||
get_lister('unknown', 'db-url')
|
||||
get_lister("unknown", "db-url")
|
||||
|
||||
assert "Invalid lister" in str(e.value)
|
||||
|
||||
|
@ -37,23 +37,22 @@ def test_get_lister_override():
|
|||
db_url = init_db().url()
|
||||
|
||||
listers = {
|
||||
'gitlab': 'https://other.gitlab.uni/api/v4/',
|
||||
'phabricator': 'https://somewhere.org/api/diffusion.repository.search',
|
||||
'cgit': 'https://some.where/cgit',
|
||||
"gitlab": "https://other.gitlab.uni/api/v4/",
|
||||
"phabricator": "https://somewhere.org/api/diffusion.repository.search",
|
||||
"cgit": "https://some.where/cgit",
|
||||
}
|
||||
|
||||
# check the override ends up defined in the lister
|
||||
for lister_name, url in listers.items():
|
||||
lst = get_lister(
|
||||
lister_name, db_url, **{
|
||||
'url': url,
|
||||
'priority': 'high',
|
||||
'policy': 'oneshot',
|
||||
})
|
||||
lister_name,
|
||||
db_url,
|
||||
**{"url": url, "priority": "high", "policy": "oneshot",}
|
||||
)
|
||||
|
||||
assert lst.url == url
|
||||
assert lst.config['priority'] == 'high'
|
||||
assert lst.config['policy'] == 'oneshot'
|
||||
assert lst.config["priority"] == "high"
|
||||
assert lst.config["policy"] == "oneshot"
|
||||
|
||||
# check the default urls are used and not the override (since it's not
|
||||
# passed)
|
||||
|
@ -61,7 +60,7 @@ def test_get_lister_override():
|
|||
lst = get_lister(lister_name, db_url)
|
||||
|
||||
# no override so this does not end up in lister's configuration
|
||||
assert 'url' not in lst.config
|
||||
assert 'priority' not in lst.config
|
||||
assert 'oneshot' not in lst.config
|
||||
assert "url" not in lst.config
|
||||
assert "priority" not in lst.config
|
||||
assert "oneshot" not in lst.config
|
||||
assert lst.url == lst.DEFAULT_URL
|
||||
|
|
|
@ -10,7 +10,6 @@ from swh.lister import utils
|
|||
|
||||
|
||||
class UtilsTest(unittest.TestCase):
|
||||
|
||||
def test_split_range(self):
|
||||
actual_ranges = list(utils.split_range(14, 5))
|
||||
self.assertEqual(actual_ranges, [(0, 5), (5, 10), (10, 14)])
|
||||
|
@ -33,6 +32,6 @@ def init_db():
|
|||
db object to ease db manipulation
|
||||
|
||||
"""
|
||||
initdb_args = Postgresql.DEFAULT_SETTINGS['initdb_args']
|
||||
initdb_args = ' '.join([initdb_args, '-E UTF-8'])
|
||||
initdb_args = Postgresql.DEFAULT_SETTINGS["initdb_args"]
|
||||
initdb_args = " ".join([initdb_args, "-E UTF-8"])
|
||||
return Postgresql(initdb_args=initdb_args)
|
||||
|
|
9
tox.ini
9
tox.ini
|
@ -1,5 +1,5 @@
|
|||
[tox]
|
||||
envlist=flake8,mypy,py3
|
||||
envlist=black,flake8,mypy,py3
|
||||
|
||||
[testenv]
|
||||
extras =
|
||||
|
@ -13,6 +13,13 @@ commands =
|
|||
!dev: --cov={envsitepackagesdir}/swh/lister/ --cov-branch \
|
||||
{envsitepackagesdir}/swh/lister/ {posargs}
|
||||
|
||||
[testenv:black]
|
||||
skip_install = true
|
||||
deps =
|
||||
black
|
||||
commands =
|
||||
{envpython} -m black --check swh
|
||||
|
||||
[testenv:flake8]
|
||||
skip_install = true
|
||||
deps =
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue