swh.lister.gitlab: Add full gitlab lister
Related T989
This commit is contained in:
parent
7954e03627
commit
a51c36194e
4 changed files with 84 additions and 7 deletions
|
@ -100,14 +100,19 @@ class SWHListerHttpTransport(abc.ABC):
|
|||
self.session = requests.Session()
|
||||
self.lister_version = __version__
|
||||
|
||||
def transport_request(self, identifier):
|
||||
"""Implements SWHListerBase.transport_request for HTTP using Requests.
|
||||
def _transport_action(self, identifier, method='get'):
|
||||
"""Permit to ask information to the api prior to actually executing
|
||||
query.
|
||||
|
||||
"""
|
||||
path = self.request_uri(identifier)
|
||||
params = self.request_params(identifier)
|
||||
|
||||
try:
|
||||
response = self.session.get(path, **params)
|
||||
if method == 'head':
|
||||
response = self.session.head(path, **params)
|
||||
else:
|
||||
response = self.session.get(path, **params)
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
raise FetchError(e)
|
||||
else:
|
||||
|
@ -115,6 +120,20 @@ class SWHListerHttpTransport(abc.ABC):
|
|||
raise FetchError(response)
|
||||
return response
|
||||
|
||||
def transport_head(self, identifier):
|
||||
"""Retrieve head information on api.
|
||||
|
||||
"""
|
||||
return self._transport_action(identifier, method='head')
|
||||
|
||||
def transport_request(self, identifier):
|
||||
"""Implements SWHListerBase.transport_request for HTTP using Requests.
|
||||
|
||||
Retrieve get information on api.
|
||||
|
||||
"""
|
||||
return self._transport_action(identifier)
|
||||
|
||||
def transport_response_to_string(self, response):
|
||||
"""Implements SWHListerBase.transport_response_to_string for HTTP given
|
||||
Requests responses.
|
||||
|
|
|
@ -57,6 +57,26 @@ class SWHPagingLister(SWHListerBase):
|
|||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_pages_information(self):
|
||||
"""Find the total number of pages.
|
||||
|
||||
Implementation of this method depends on the server API spec
|
||||
and the shape of the network response object returned by the
|
||||
transport_request method.
|
||||
|
||||
For example, some api can use dedicated headers:
|
||||
- x-total-pages to provide the total number of pages
|
||||
- x-total to provide the total number of repositories
|
||||
- x-per-page to provide the number of elements per page
|
||||
|
||||
Returns:
|
||||
tuple (total number of repositories, total number of
|
||||
pages, per_page)
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
# You probably don't need to override anything below this line.
|
||||
|
||||
def run(self, min_index=None, max_index=None):
|
||||
|
@ -76,6 +96,7 @@ class SWHPagingLister(SWHListerBase):
|
|||
"""
|
||||
index = min_index or ''
|
||||
loop_count = 0
|
||||
|
||||
self.min_index = min_index
|
||||
self.max_index = max_index
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ class GitLabLister(SWHPagingHttpLister):
|
|||
return False, 0
|
||||
|
||||
def get_next_target_from_response(self, response):
|
||||
"""Deal with pagination
|
||||
"""Determine the next page identifier.
|
||||
|
||||
"""
|
||||
if 'next' in response.links:
|
||||
|
@ -108,6 +108,23 @@ class GitLabLister(SWHPagingHttpLister):
|
|||
return int(self.API_URL_INDEX_RE.match(next_url).group(1))
|
||||
return None
|
||||
|
||||
def get_pages_information(self):
|
||||
"""Determine some pages information.
|
||||
|
||||
"""
|
||||
response = self.transport_head(identifier=1)
|
||||
h = response.headers
|
||||
total = h.get('x-total', h.get('X-Total'))
|
||||
total_pages = h.get('x-total-pages', h.get('X-Total-Pages'))
|
||||
per_page = h.get('x-per-page', h.get('X-Per-Page'))
|
||||
if total is not None:
|
||||
total = int(total)
|
||||
if total_pages is not None:
|
||||
total_pages = int(total_pages)
|
||||
if per_page is not None:
|
||||
per_page = int(per_page)
|
||||
return total, total_pages, per_page
|
||||
|
||||
def transport_response_simplified(self, response):
|
||||
repos = response.json()
|
||||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
|
|
@ -2,23 +2,43 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from swh.lister.core.tasks import ListerTaskBase, RangeListerTask
|
||||
import random
|
||||
|
||||
from celery import group
|
||||
|
||||
from ..core.tasks import ListerTaskBase, RangeListerTask
|
||||
from .lister import GitLabLister
|
||||
|
||||
|
||||
class GitLabDotComListerTask(ListerTaskBase):
|
||||
class GitLabListerTask(ListerTaskBase):
|
||||
def new_lister(self, lister_name='gitlab.com',
|
||||
api_baseurl='https://gitlab.com/api/v4'):
|
||||
return GitLabLister(
|
||||
lister_name=lister_name, api_baseurl=api_baseurl)
|
||||
|
||||
|
||||
class RangeGitLabLister(GitLabDotComListerTask, RangeListerTask):
|
||||
class RangeGitLabLister(GitLabListerTask, RangeListerTask):
|
||||
"""GitLab lister working on specified range (start, end) arguments.
|
||||
|
||||
"""
|
||||
task_queue = 'swh_lister_gitlab_refresh'
|
||||
|
||||
|
||||
class FullGitLabRelister(GitLabListerTask):
|
||||
task_queue = 'swh_lister_gitlab_refresh'
|
||||
|
||||
def run_task(self, *args, **kwargs):
|
||||
lister = self.new_lister(*args, **kwargs)
|
||||
total, _, per_page = lister.get_pages_information()
|
||||
|
||||
ranges = []
|
||||
prev_index = None
|
||||
for index in range(0, total, per_page):
|
||||
if index is not None and prev_index is not None:
|
||||
ranges.append((prev_index, index))
|
||||
prev_index = index
|
||||
|
||||
random.shuffle(ranges)
|
||||
range_task = RangeGitLabLister()
|
||||
group(range_task.s(minv, maxv, *args, **kwargs)
|
||||
for minv, maxv in ranges)()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue