core/lister: Make the tasks take an explicit lister_args argument

Avoid eating *all* arbitrary arguments and passing them along to the
new_lister method.
This commit is contained in:
Antoine R. Dumont (@ardumont) 2018-07-17 13:56:36 +02:00
parent d08ab241f5
commit d88f1b60c9
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
6 changed files with 41 additions and 37 deletions

View file

@ -114,11 +114,9 @@ Local deployment
[GCC 8.1.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
instance='salsa.debian.org', api_baseurl='https://salsa.debian.org/api/v4')
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
instance='gitlab.freedesktop.org', api_baseurl='https://gitlab.freedesktop.org/api/v4')
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
instance='gitlab.gnome.org', api_baseurl='https://gitlab.gnome.org/api/v4')
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
instance='gitlab.inria.fr', api_baseurl='https://gitlab.inria.fr/api/v4')
>>>
{'instance': 'debian', 'api_baseurl': 'https://salsa.debian.org/api/v4', 'sort': 'asc'})
>>> from swh.lister.gitlab.tasks import FullGitLabRelister; FullGitLabRelister().run_task(
{'instance':'0xacab', 'api_baseurl':'https://0xacab.org/api/v4', 'sort': 'asc'})
>>> from swh.lister.gitlab.tasks import IncrementalGitLabLister; IncrementalGitLabLister().run_task(
{'instance': 'freedesktop.org', 'api_baseurl': 'https://gitlab.freedesktop.org/api/v4',
'sort': 'asc'})

View file

@ -10,8 +10,8 @@ from .lister import BitBucketLister
class BitBucketListerTask(ListerTaskBase):
def new_lister(self):
return BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0')
def new_lister(self, *, api_baseurl='https://api.bitbucket.org/2.0'):
return BitBucketLister(api_baseurl=api_baseurl)
class IncrementalBitBucketLister(BitBucketListerTask,

View file

@ -39,13 +39,13 @@ class ListerTaskBase(Task, metaclass=AbstractTaskMeta):
task_queue = AbstractAttribute('Celery Task queue name')
@abc.abstractmethod
def new_lister(self, *args, **kwargs):
def new_lister(self, **lister_args):
"""Return a new lister of the appropriate type.
"""
pass
@abc.abstractmethod
def run_task(self, *args, **kwargs):
def run_task(self, *, lister_args=None):
pass
@ -57,8 +57,10 @@ class RangeListerTask(ListerTaskBase):
"""Range lister task.
"""
def run_task(self, start, end, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
def run_task(self, start, end, lister_args=None):
if lister_args is None:
lister_args = {}
lister = self.new_lister(**lister_args)
return lister.run(min_bound=start, max_bound=end)
@ -69,8 +71,10 @@ class IndexingDiscoveryListerTask(ListerTaskBase):
"""Incremental indexing lister task.
"""
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
def run_task(self, *, lister_args=None):
if lister_args is None:
lister_args = {}
lister = self.new_lister(**lister_args)
return lister.run(min_bound=lister.db_last_index(), max_bound=None)
@ -80,10 +84,12 @@ class IndexingRefreshListerTask(ListerTaskBase):
"""
GROUP_SPLIT = 10000
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
def run_task(self, *, lister_args=None):
if lister_args is None:
lister_args = {}
lister = self.new_lister(**lister_args)
ranges = lister.db_partition_indices(self.GROUP_SPLIT)
random.shuffle(ranges)
range_task = RangeListerTask()
group(range_task.s(minv, maxv, *args, **kwargs)
group(range_task.s(minv, maxv, lister_args)
for minv, maxv in ranges)()

View file

@ -1,4 +1,4 @@
# Copyright (C) 2017 the Software Heritage developers
# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

View file

@ -10,8 +10,8 @@ from .lister import GitHubLister
class GitHubListerTask(ListerTaskBase):
def new_lister(self):
return GitHubLister(api_baseurl='https://api.github.com')
def new_lister(self, *, api_baseurl='https://api.github.com'):
return GitHubLister(api_baseurl=api_baseurl)
class IncrementalGitHubLister(GitHubListerTask, IndexingDiscoveryListerTask):

View file

@ -12,9 +12,10 @@ from .lister import GitLabLister
class GitLabListerTask(ListerTaskBase):
def new_lister(self, api_baseurl='https://gitlab.com/api/v4',
instance='gitlab.com'):
return GitLabLister(api_baseurl=api_baseurl, instance=instance)
def new_lister(self, *, api_baseurl='https://gitlab.com/api/v4',
instance='gitlab.com', sort='asc'):
return GitLabLister(
api_baseurl=api_baseurl, instance=instance, sort=sort)
class RangeGitLabLister(GitLabListerTask, RangeListerTask):
@ -33,30 +34,29 @@ class FullGitLabRelister(GitLabListerTask):
# nb pages
nb_pages = 10
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
def run_task(self, lister_args=None):
if lister_args is None:
lister_args = {}
lister = self.new_lister(**lister_args)
_, total_pages, _ = lister.get_pages_information()
ranges = list(utils.split_range(total_pages, self.nb_pages))
random.shuffle(ranges)
range_task = RangeGitLabLister()
group(range_task.s(minv, maxv, *args, **kwargs)
group(range_task.s(minv, maxv, lister_args=lister_args)
for minv, maxv in ranges)()
class IncrementalGitLabLister(ListerTaskBase):
class IncrementalGitLabLister(GitLabListerTask):
"""Incremental GitLab lister (list only new available origins).
"""
task_queue = 'swh_lister_gitlab_discover'
def new_lister(self, api_baseurl='https://gitlab.com/api/v4',
instance='gitlab.com'):
# assuming going forward in desc order, page 1 through <x-total-pages>
return GitLabLister(instance=instance, api_baseurl=api_baseurl,
sort='desc')
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
def run_task(self, lister_args=None):
if lister_args is None:
lister_args = {}
lister_args['sort'] = 'desc'
lister = self.new_lister(**lister_args)
_, total_pages, _ = lister.get_pages_information()
# stopping as soon as existing origins for that instance are detected
return lister.run(min_bound=1, max_bound=total_pages,