core/lister: Make the tasks take an explicit lister_args argument
Avoid eating *all* arbitrary arguments and passing them along to the new_lister method.
This commit is contained in:
parent
d08ab241f5
commit
d88f1b60c9
6 changed files with 41 additions and 37 deletions
14
README.md
14
README.md
|
@ -114,11 +114,9 @@ Local deployment
|
|||
[GCC 8.1.0] on linux
|
||||
Type "help", "copyright", "credits" or "license" for more information.
|
||||
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
|
||||
instance='salsa.debian.org', api_baseurl='https://salsa.debian.org/api/v4')
|
||||
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
|
||||
instance='gitlab.freedesktop.org', api_baseurl='https://gitlab.freedesktop.org/api/v4')
|
||||
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
|
||||
instance='gitlab.gnome.org', api_baseurl='https://gitlab.gnome.org/api/v4')
|
||||
>>> from swh.lister.gitlab.tasks import RangeGitLabLister; RangeGitLabLister().run_task(1, 2,
|
||||
instance='gitlab.inria.fr', api_baseurl='https://gitlab.inria.fr/api/v4')
|
||||
>>>
|
||||
{'instance': 'debian', 'api_baseurl': 'https://salsa.debian.org/api/v4', 'sort': 'asc'})
|
||||
>>> from swh.lister.gitlab.tasks import FullGitLabRelister; FullGitLabRelister().run_task(
|
||||
{'instance':'0xacab', 'api_baseurl':'https://0xacab.org/api/v4', 'sort': 'asc'})
|
||||
>>> from swh.lister.gitlab.tasks import IncrementalGitLabLister; IncrementalGitLabLister().run_task(
|
||||
{'instance': 'freedesktop.org', 'api_baseurl': 'https://gitlab.freedesktop.org/api/v4',
|
||||
'sort': 'asc'})
|
||||
|
|
|
@ -10,8 +10,8 @@ from .lister import BitBucketLister
|
|||
|
||||
|
||||
class BitBucketListerTask(ListerTaskBase):
|
||||
def new_lister(self):
|
||||
return BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0')
|
||||
def new_lister(self, *, api_baseurl='https://api.bitbucket.org/2.0'):
|
||||
return BitBucketLister(api_baseurl=api_baseurl)
|
||||
|
||||
|
||||
class IncrementalBitBucketLister(BitBucketListerTask,
|
||||
|
|
|
@ -39,13 +39,13 @@ class ListerTaskBase(Task, metaclass=AbstractTaskMeta):
|
|||
task_queue = AbstractAttribute('Celery Task queue name')
|
||||
|
||||
@abc.abstractmethod
|
||||
def new_lister(self, *args, **kwargs):
|
||||
def new_lister(self, **lister_args):
|
||||
"""Return a new lister of the appropriate type.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def run_task(self, *args, **kwargs):
|
||||
def run_task(self, *, lister_args=None):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -57,8 +57,10 @@ class RangeListerTask(ListerTaskBase):
|
|||
"""Range lister task.
|
||||
|
||||
"""
|
||||
def run_task(self, start, end, *args, **kwargs):
|
||||
lister = self.new_lister(*args, **kwargs)
|
||||
def run_task(self, start, end, lister_args=None):
|
||||
if lister_args is None:
|
||||
lister_args = {}
|
||||
lister = self.new_lister(**lister_args)
|
||||
return lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
|
@ -69,8 +71,10 @@ class IndexingDiscoveryListerTask(ListerTaskBase):
|
|||
"""Incremental indexing lister task.
|
||||
|
||||
"""
|
||||
def run_task(self, *args, **kwargs):
|
||||
lister = self.new_lister(*args, **kwargs)
|
||||
def run_task(self, *, lister_args=None):
|
||||
if lister_args is None:
|
||||
lister_args = {}
|
||||
lister = self.new_lister(**lister_args)
|
||||
return lister.run(min_bound=lister.db_last_index(), max_bound=None)
|
||||
|
||||
|
||||
|
@ -80,10 +84,12 @@ class IndexingRefreshListerTask(ListerTaskBase):
|
|||
"""
|
||||
GROUP_SPLIT = 10000
|
||||
|
||||
def run_task(self, *args, **kwargs):
|
||||
lister = self.new_lister(*args, **kwargs)
|
||||
def run_task(self, *, lister_args=None):
|
||||
if lister_args is None:
|
||||
lister_args = {}
|
||||
lister = self.new_lister(**lister_args)
|
||||
ranges = lister.db_partition_indices(self.GROUP_SPLIT)
|
||||
random.shuffle(ranges)
|
||||
range_task = RangeListerTask()
|
||||
group(range_task.s(minv, maxv, *args, **kwargs)
|
||||
group(range_task.s(minv, maxv, lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2017 the Software Heritage developers
|
||||
# Copyright (C) 2017-2018 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
|
|
@ -10,8 +10,8 @@ from .lister import GitHubLister
|
|||
|
||||
|
||||
class GitHubListerTask(ListerTaskBase):
|
||||
def new_lister(self):
|
||||
return GitHubLister(api_baseurl='https://api.github.com')
|
||||
def new_lister(self, *, api_baseurl='https://api.github.com'):
|
||||
return GitHubLister(api_baseurl=api_baseurl)
|
||||
|
||||
|
||||
class IncrementalGitHubLister(GitHubListerTask, IndexingDiscoveryListerTask):
|
||||
|
|
|
@ -12,9 +12,10 @@ from .lister import GitLabLister
|
|||
|
||||
|
||||
class GitLabListerTask(ListerTaskBase):
|
||||
def new_lister(self, api_baseurl='https://gitlab.com/api/v4',
|
||||
instance='gitlab.com'):
|
||||
return GitLabLister(api_baseurl=api_baseurl, instance=instance)
|
||||
def new_lister(self, *, api_baseurl='https://gitlab.com/api/v4',
|
||||
instance='gitlab.com', sort='asc'):
|
||||
return GitLabLister(
|
||||
api_baseurl=api_baseurl, instance=instance, sort=sort)
|
||||
|
||||
|
||||
class RangeGitLabLister(GitLabListerTask, RangeListerTask):
|
||||
|
@ -33,30 +34,29 @@ class FullGitLabRelister(GitLabListerTask):
|
|||
# nb pages
|
||||
nb_pages = 10
|
||||
|
||||
def run_task(self, *args, **kwargs):
|
||||
lister = self.new_lister(*args, **kwargs)
|
||||
def run_task(self, lister_args=None):
|
||||
if lister_args is None:
|
||||
lister_args = {}
|
||||
lister = self.new_lister(**lister_args)
|
||||
_, total_pages, _ = lister.get_pages_information()
|
||||
ranges = list(utils.split_range(total_pages, self.nb_pages))
|
||||
random.shuffle(ranges)
|
||||
range_task = RangeGitLabLister()
|
||||
group(range_task.s(minv, maxv, *args, **kwargs)
|
||||
group(range_task.s(minv, maxv, lister_args=lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
|
||||
|
||||
class IncrementalGitLabLister(ListerTaskBase):
|
||||
class IncrementalGitLabLister(GitLabListerTask):
|
||||
"""Incremental GitLab lister (list only new available origins).
|
||||
|
||||
"""
|
||||
task_queue = 'swh_lister_gitlab_discover'
|
||||
|
||||
def new_lister(self, api_baseurl='https://gitlab.com/api/v4',
|
||||
instance='gitlab.com'):
|
||||
# assuming going forward in desc order, page 1 through <x-total-pages>
|
||||
return GitLabLister(instance=instance, api_baseurl=api_baseurl,
|
||||
sort='desc')
|
||||
|
||||
def run_task(self, *args, **kwargs):
|
||||
lister = self.new_lister(*args, **kwargs)
|
||||
def run_task(self, lister_args=None):
|
||||
if lister_args is None:
|
||||
lister_args = {}
|
||||
lister_args['sort'] = 'desc'
|
||||
lister = self.new_lister(**lister_args)
|
||||
_, total_pages, _ = lister.get_pages_information()
|
||||
# stopping as soon as existing origins for that instance are detected
|
||||
return lister.run(min_bound=1, max_bound=total_pages,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue