swh.lister.gitlab: Add full gitlab lister

Related T989
This commit is contained in:
Antoine R. Dumont (@ardumont) 2018-07-04 18:47:02 +02:00
parent 7954e03627
commit a51c36194e
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
4 changed files with 84 additions and 7 deletions

View file

@ -100,14 +100,19 @@ class SWHListerHttpTransport(abc.ABC):
self.session = requests.Session()
self.lister_version = __version__
def transport_request(self, identifier):
"""Implements SWHListerBase.transport_request for HTTP using Requests.
def _transport_action(self, identifier, method='get'):
"""Permit to ask information to the api prior to actually executing
query.
"""
path = self.request_uri(identifier)
params = self.request_params(identifier)
try:
response = self.session.get(path, **params)
if method == 'head':
response = self.session.head(path, **params)
else:
response = self.session.get(path, **params)
except requests.exceptions.ConnectionError as e:
raise FetchError(e)
else:
@ -115,6 +120,20 @@ class SWHListerHttpTransport(abc.ABC):
raise FetchError(response)
return response
def transport_head(self, identifier):
"""Retrieve head information on api.
"""
return self._transport_action(identifier, method='head')
def transport_request(self, identifier):
"""Implements SWHListerBase.transport_request for HTTP using Requests.
Retrieve get information on api.
"""
return self._transport_action(identifier)
def transport_response_to_string(self, response):
"""Implements SWHListerBase.transport_response_to_string for HTTP given
Requests responses.

View file

@ -57,6 +57,26 @@ class SWHPagingLister(SWHListerBase):
"""
pass
@abc.abstractmethod
def get_pages_information(self):
"""Find the total number of pages.
Implementation of this method depends on the server API spec
and the shape of the network response object returned by the
transport_request method.
For example, some api can use dedicated headers:
- x-total-pages to provide the total number of pages
- x-total to provide the total number of repositories
- x-per-page to provide the number of elements per page
Returns:
tuple (total number of repositories, total number of
pages, per_page)
"""
pass
# You probably don't need to override anything below this line.
def run(self, min_index=None, max_index=None):
@ -76,6 +96,7 @@ class SWHPagingLister(SWHListerBase):
"""
index = min_index or ''
loop_count = 0
self.min_index = min_index
self.max_index = max_index

View file

@ -100,7 +100,7 @@ class GitLabLister(SWHPagingHttpLister):
return False, 0
def get_next_target_from_response(self, response):
"""Deal with pagination
"""Determine the next page identifier.
"""
if 'next' in response.links:
@ -108,6 +108,23 @@ class GitLabLister(SWHPagingHttpLister):
return int(self.API_URL_INDEX_RE.match(next_url).group(1))
return None
def get_pages_information(self):
"""Determine some pages information.
"""
response = self.transport_head(identifier=1)
h = response.headers
total = h.get('x-total', h.get('X-Total'))
total_pages = h.get('x-total-pages', h.get('X-Total-Pages'))
per_page = h.get('x-per-page', h.get('X-Per-Page'))
if total is not None:
total = int(total)
if total_pages is not None:
total_pages = int(total_pages)
if per_page is not None:
per_page = int(per_page)
return total, total_pages, per_page
def transport_response_simplified(self, response):
repos = response.json()
return [self.get_model_from_repo(repo) for repo in repos]

View file

@ -2,23 +2,43 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import ListerTaskBase, RangeListerTask
import random
from celery import group
from ..core.tasks import ListerTaskBase, RangeListerTask
from .lister import GitLabLister
class GitLabDotComListerTask(ListerTaskBase):
class GitLabListerTask(ListerTaskBase):
def new_lister(self, lister_name='gitlab.com',
api_baseurl='https://gitlab.com/api/v4'):
return GitLabLister(
lister_name=lister_name, api_baseurl=api_baseurl)
class RangeGitLabLister(GitLabDotComListerTask, RangeListerTask):
class RangeGitLabLister(GitLabListerTask, RangeListerTask):
"""GitLab lister working on specified range (start, end) arguments.
"""
task_queue = 'swh_lister_gitlab_refresh'
class FullGitLabRelister(GitLabListerTask):
task_queue = 'swh_lister_gitlab_refresh'
def run_task(self, *args, **kwargs):
lister = self.new_lister(*args, **kwargs)
total, _, per_page = lister.get_pages_information()
ranges = []
prev_index = None
for index in range(0, total, per_page):
if index is not None and prev_index is not None:
ranges.append((prev_index, index))
prev_index = index
random.shuffle(ranges)
range_task = RangeGitLabLister()
group(range_task.s(minv, maxv, *args, **kwargs)
for minv, maxv in ranges)()