tasks: add tasks for incremental and full updates
This commit is contained in:
parent
6fd0184229
commit
d47905b0a1
1 changed files with 47 additions and 0 deletions
47
swh/lister/github/tasks.py
Normal file
47
swh/lister/github/tasks.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
# Copyright (C) 2016 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import random
|
||||
|
||||
from celery import group
|
||||
|
||||
from swh.scheduler.task import Task
|
||||
|
||||
from .lister import GitHubLister
|
||||
|
||||
GROUP_SPLIT = 10000
|
||||
|
||||
|
||||
class IncrementalGitHubLister(Task):
|
||||
task_queue = 'swh_lister_github_incremental'
|
||||
|
||||
def run(self):
|
||||
lister = GitHubLister()
|
||||
last_id = lister.last_repo_id()
|
||||
lister.fetch(min_id=last_id + 1, max_id=None)
|
||||
|
||||
|
||||
class RangeGitHubLister(Task):
|
||||
task_queue = 'swh_lister_github_full'
|
||||
|
||||
def run(self, start, end):
|
||||
lister = GitHubLister()
|
||||
lister.fetch(min_id=start, max_id=end)
|
||||
|
||||
|
||||
class FullGitHubLister(Task):
|
||||
task_queue = 'swh_lister_github_full'
|
||||
|
||||
def run(self):
|
||||
lister = GitHubLister()
|
||||
last_id = lister.last_repo_id()
|
||||
ranges = [
|
||||
(i, min(last_id, i + GROUP_SPLIT - 1))
|
||||
for i in range(1, last_id, GROUP_SPLIT)
|
||||
]
|
||||
|
||||
random.shuffle(ranges)
|
||||
|
||||
range_task = RangeGitHubLister()
|
||||
group(range_task.s(min, max) for min, max in ranges)()
|
Loading…
Add table
Add a link
Reference in a new issue