Heavy refactor of the task system
Get rid of the class based task definition in favor of decorator-based task declarations. Doing so, we can get rid of core/tasks.py Task names are explicitely set to keep compatibility with task definitions in schedulers' database. This also add debug statements at the beginning and end of each lister task.
This commit is contained in:
parent
94c1eaf402
commit
2d1f0643ff
7 changed files with 183 additions and 245 deletions
|
@ -2,25 +2,52 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
|
||||
RangeListerTask,
|
||||
IndexingRefreshListerTask, ListerTaskBase)
|
||||
import random
|
||||
|
||||
from .lister import GitHubLister
|
||||
from celery import group
|
||||
|
||||
from swh.scheduler.celery_backend.config import app
|
||||
from swh.scheduler.task import SWHTask
|
||||
|
||||
from swh.lister.github.lister import GitHubLister
|
||||
|
||||
GROUP_SPLIT = 10000
|
||||
|
||||
|
||||
class GitHubListerTask(ListerTaskBase):
|
||||
def new_lister(self, *, api_baseurl='https://api.github.com'):
|
||||
return GitHubLister(api_baseurl=api_baseurl)
|
||||
def new_lister(api_baseurl='https://api.github.com', **kw):
|
||||
return GitHubLister(api_baseurl=api_baseurl, **kw)
|
||||
|
||||
|
||||
class IncrementalGitHubLister(GitHubListerTask, IndexingDiscoveryListerTask):
|
||||
task_queue = 'swh_lister_github_discover'
|
||||
@app.task(name='swh.lister.github.tasks.IncrementalGitHubLister',
|
||||
base=SWHTask, bind=True)
|
||||
def incremental_github_lister(self, **lister_args):
|
||||
self.log.debug('%s, lister_args=%s' % (
|
||||
self.name, lister_args))
|
||||
lister = new_lister(**lister_args)
|
||||
lister.run(min_bound=lister.db_last_index(), max_bound=None)
|
||||
self.log.debug('%s OK' % (self.name))
|
||||
|
||||
|
||||
class RangeGitHubLister(GitHubListerTask, RangeListerTask):
|
||||
task_queue = 'swh_lister_github_refresh'
|
||||
@app.task(name='swh.lister.github.tasks.RangeGitHubLister',
|
||||
base=SWHTask, bind=True)
|
||||
def range_github_lister(self, start, end, **lister_args):
|
||||
self.log.debug('%s(start=%s, end=%d), lister_args=%s' % (
|
||||
self.name, start, end, lister_args))
|
||||
lister = new_lister(**lister_args)
|
||||
lister.run(min_bound=start, max_bound=end)
|
||||
self.log.debug('%s OK' % (self.name))
|
||||
|
||||
|
||||
@app.task(name='swh.lister.github.tasks.FullGitHubRelister',
|
||||
base=SWHTask, bind=True)
|
||||
def full_github_relister(self, split=None, **lister_args):
|
||||
self.log.debug('%s, lister_args=%s' % (
|
||||
self.name, lister_args))
|
||||
lister = new_lister(**lister_args)
|
||||
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
|
||||
random.shuffle(ranges)
|
||||
group(range_github_lister.s(minv, maxv, **lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
|
||||
|
||||
|
||||
class FullGitHubRelister(GitHubListerTask, IndexingRefreshListerTask):
|
||||
task_queue = 'swh_lister_github_refresh'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue