Heavy refactor of the task system

Get rid of the class based task definition in favor of decorator-based
task declarations.

Doing so, we can get rid of core/tasks.py

Task names are explicitely set to keep compatibility with task
definitions in schedulers' database.

This also add debug statements at the beginning and end of each lister
task.
This commit is contained in:
David Douard 2018-12-20 16:07:28 +01:00
parent 94c1eaf402
commit 2d1f0643ff
7 changed files with 183 additions and 245 deletions

View file

@ -2,26 +2,51 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
RangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
import random
from celery import group
from swh.scheduler.celery_backend.config import app
from swh.scheduler.task import SWHTask
from .lister import BitBucketLister
class BitBucketListerTask(ListerTaskBase):
def new_lister(self, *, api_baseurl='https://api.bitbucket.org/2.0'):
return BitBucketLister(api_baseurl=api_baseurl)
GROUP_SPLIT = 10000
class IncrementalBitBucketLister(BitBucketListerTask,
IndexingDiscoveryListerTask):
task_queue = 'swh_lister_bitbucket_discover'
def new_lister(api_baseurl='https://api.bitbucket.org/2.0'):
return BitBucketLister(api_baseurl=api_baseurl)
class RangeBitBucketLister(BitBucketListerTask, RangeListerTask):
task_queue = 'swh_lister_bitbucket_refresh'
@app.task(name='swh.lister.bitbucket.tasks.IncrementalBitBucketLister',
base=SWHTask, bind=True)
def incremental_bitbucket_lister(self, **lister_args):
self.log.debug('%s, lister_args=%s' % (
self.name, lister_args))
lister = new_lister(**lister_args)
lister.run(min_bound=lister.db_last_index(), max_bound=None)
self.log.debug('%s OK' % (self.name))
@app.task(name='swh.lister.bitbucket.tasks.RangeBitBucketLister',
base=SWHTask, bind=True)
def range_bitbucket_lister(self, start, end, **lister_args):
self.log.debug('%s(start=%s, end=%d), lister_args=%s' % (
self.name, start, end, lister_args))
lister = new_lister(**lister_args)
lister.run(min_bound=start, max_bound=end)
self.log.debug('%s OK' % (self.name))
@app.task(name='swh.lister.bitbucket.tasks.FullBitBucketRelister',
base=SWHTask, bind=True)
def full_bitbucket_relister(self, split=None, **lister_args):
self.log.debug('%s, lister_args=%s' % (
self.name, lister_args))
lister = new_lister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
random.shuffle(ranges)
group(range_bitbucket_lister.s(minv, maxv, **lister_args)
for minv, maxv in ranges)()
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
class FullBitBucketRelister(BitBucketListerTask, IndexingRefreshListerTask):
task_queue = 'swh_lister_bitbucket_refresh'