swh.lister.gitlab: Add full gitlab lister

Related T989
This commit is contained in:
Antoine R. Dumont (@ardumont) 2018-07-04 18:47:02 +02:00
parent 7954e03627
commit a51c36194e
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
4 changed files with 84 additions and 7 deletions

View file

@ -100,7 +100,7 @@ class GitLabLister(SWHPagingHttpLister):
return False, 0
def get_next_target_from_response(self, response):
"""Deal with pagination
"""Determine the next page identifier.
"""
if 'next' in response.links:
@ -108,6 +108,23 @@ class GitLabLister(SWHPagingHttpLister):
return int(self.API_URL_INDEX_RE.match(next_url).group(1))
return None
def get_pages_information(self):
"""Determine some pages information.
"""
response = self.transport_head(identifier=1)
h = response.headers
total = h.get('x-total', h.get('X-Total'))
total_pages = h.get('x-total-pages', h.get('X-Total-Pages'))
per_page = h.get('x-per-page', h.get('X-Per-Page'))
if total is not None:
total = int(total)
if total_pages is not None:
total_pages = int(total_pages)
if per_page is not None:
per_page = int(per_page)
return total, total_pages, per_page
def transport_response_simplified(self, response):
repos = response.json()
return [self.get_model_from_repo(repo) for repo in repos]

View file

@ -2,23 +2,43 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import ListerTaskBase, RangeListerTask
import random
from celery import group
from ..core.tasks import ListerTaskBase, RangeListerTask
from .lister import GitLabLister
class GitLabDotComListerTask(ListerTaskBase):
class GitLabListerTask(ListerTaskBase):
def new_lister(self, lister_name='gitlab.com',
api_baseurl='https://gitlab.com/api/v4'):
return GitLabLister(
lister_name=lister_name, api_baseurl=api_baseurl)
class RangeGitLabLister(GitLabDotComListerTask, RangeListerTask):
class RangeGitLabLister(GitLabListerTask, RangeListerTask):
"""GitLab lister working on specified range (start, end) arguments.
"""
task_queue = 'swh_lister_gitlab_refresh'
class FullGitLabRelister(GitLabListerTask):
task_queue = 'swh_lister_gitlab_refresh'
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
total, _, per_page = lister.get_pages_information()
ranges = []
prev_index = None
for index in range(0, total, per_page):
if index is not None and prev_index is not None:
ranges.append((prev_index, index))
prev_index = index
random.shuffle(ranges)
range_task = RangeGitLabLister()
group(range_task.s(minv, maxv, *args, **kwargs)
for minv, maxv in ranges)()