swh.lister: Document swh.lister.tasks's intent

And remove uneeded indexing name from the RangeListerTask
This commit is contained in:
Antoine R. Dumont (@ardumont) 2018-07-04 16:29:21 +02:00
parent ba146376d6
commit 7954e03627
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
4 changed files with 33 additions and 22 deletions

View file

@ -1,9 +1,9 @@
# Copyright (C) 2017 the Software Heritage developers
# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
IndexingRangeListerTask,
RangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
from .lister import BitBucketLister
@ -20,7 +20,7 @@ class IncrementalBitBucketLister(BitBucketListerTask,
task_queue = 'swh_lister_bitbucket_discover'
class RangeBitBucketLister(BitBucketListerTask, IndexingRangeListerTask):
class RangeBitBucketLister(BitBucketListerTask, RangeListerTask):
task_queue = 'swh_lister_bitbucket_refresh'

View file

@ -49,25 +49,41 @@ class ListerTaskBase(Task, metaclass=AbstractTaskMeta):
pass
class IndexingDiscoveryListerTask(ListerTaskBase):
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
return lister.run(min_index=lister.db_last_index(), max_index=None)
# Paging/Indexing lister tasks derivatives
# (cf. {github/bitbucket/gitlab}/tasks)
class IndexingRangeListerTask(ListerTaskBase):
class RangeListerTask(ListerTaskBase):
"""Range lister task.
"""
def run_task(self, start, end, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
return lister.run(min_index=start, max_index=end)
# Indexing Lister tasks derivatives (cf. {github/bitbucket}/tasks)
class IndexingDiscoveryListerTask(ListerTaskBase):
"""Incremental indexing lister task.
"""
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
return lister.run(min_index=lister.db_last_index(), max_index=None)
class IndexingRefreshListerTask(ListerTaskBase):
"""Full indexing lister task.
"""
GROUP_SPLIT = 10000
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
ranges = lister.db_partition_indices(self.GROUP_SPLIT)
random.shuffle(ranges)
range_task = IndexingRangeListerTask()
range_task = RangeListerTask()
group(range_task.s(minv, maxv, *args, **kwargs)
for minv, maxv in ranges)()

View file

@ -1,9 +1,9 @@
# Copyright (C) 2017 the Software Heritage developers
# Copyright (C) 2017-2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
IndexingRangeListerTask,
RangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
from .lister import GitHubLister
@ -19,7 +19,7 @@ class IncrementalGitHubLister(GitHubListerTask, IndexingDiscoveryListerTask):
task_queue = 'swh_lister_github_discover'
class RangeGitHubLister(GitHubListerTask, IndexingRangeListerTask):
class RangeGitHubLister(GitHubListerTask, RangeListerTask):
task_queue = 'swh_lister_github_refresh'

View file

@ -2,9 +2,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
IndexingRangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
from swh.lister.core.tasks import ListerTaskBase, RangeListerTask
from .lister import GitLabLister
@ -16,14 +15,10 @@ class GitLabDotComListerTask(ListerTaskBase):
lister_name=lister_name, api_baseurl=api_baseurl)
class IncrementalGitLabDotComLister(GitLabDotComListerTask,
IndexingDiscoveryListerTask):
task_queue = 'swh_lister_gitlab_discover'
class RangeGitLabLister(GitLabDotComListerTask, RangeListerTask):
"""GitLab lister working on specified range (start, end) arguments.
class RangeGitLabLister(GitLabDotComListerTask, IndexingRangeListerTask):
"""
task_queue = 'swh_lister_gitlab_refresh'
class FullGitLabRelister(GitLabDotComListerTask, IndexingRefreshListerTask):
task_queue = 'swh_lister_gitlab_refresh'