swh.lister.gitlab: Bootstrap gitlab lister

Related T989
This commit is contained in:
Antoine R. Dumont (@ardumont) 2018-06-27 14:43:31 +02:00
parent c350434127
commit 0f9ba7bd9b
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
4 changed files with 103 additions and 0 deletions

View file

View file

@ -0,0 +1,55 @@
# Copyright (C) 2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import time
from ..core.indexing_lister import SWHIndexingHttpLister
from .models import GitlabModel
class GitlabLister(SWHIndexingHttpLister):
# Path to give and mentioning the last id for the next page
PATH_TEMPLATE = '/projects?since=%d&visibility=public'
# base orm model
MODEL = GitlabModel
API_URL_INDEX_RE = re.compile(r'^.*/projects\?since=(\d+)&visibility=public')
def get_model_from_repo(self, repo):
return {
'uid': repo['id'],
'indexable': repo['id'],
'name': repo['name'],
'full_name': repo['path_with_namespace'],
'html_url': repo['web_url'],
'origin_url': repo['web_url'],
'origin_type': 'git',
'description': repo['description'],
# FIXME: How to determine the fork nature?
'fork': False,
}
def transport_quota_check(self, response):
"""Deal with ratelimit
"""
reqs_remaining = int(response.headers['RateLimit-Remaining'])
if response.status_code == 403 and reqs_remaining == 0:
reset_at = int(response.headers['RateLimit-Reset'])
delay = min(reset_at - time.time(), 3600)
return True, delay
return False, 0
def get_next_target_from_response(self, response):
"""Deal with pagination
"""
if 'next' in response.links:
next_url = response.links['next']['url']
return int(self.API_URL_INDEX_RE.match(next_url).group(1))
return None
def transport_response_simplified(self, response):
repos = response.json()
return [self.get_model_from_repo(repo) for repo in repos]

View file

@ -0,0 +1,20 @@
# Copyright (C) 2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, Boolean, Integer
from ..core.models import ModelBase
class GitlabModel(ModelBase):
"""a Gitlab repository"""
__tablename__ = 'main_gitlab_repos'
uid = Column(Integer, primary_key=True)
indexable = Column(Integer, index=True)
fork = Column(Boolean)
def __init__(self, *args, **kwargs):
self.fork = kwargs.pop('fork', False)
super().__init__(*args, **kwargs)

View file

@ -0,0 +1,28 @@
# Copyright (C) 2018 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.tasks import (IndexingDiscoveryListerTask,
IndexingRangeListerTask,
IndexingRefreshListerTask, ListerTaskBase)
from .lister import GitlabLister
class GitlabDotComListerTask(ListerTaskBase):
def new_lister(self):
return GitlabLister(lister_name='gitlab.com',
api_baseurl='https://gitlab.com/api/v4')
class IncrementalGitlabDotComLister(GitlabDotComListerTask,
IndexingDiscoveryListerTask):
task_queue = 'swh_lister_gitlab_discover'
class RangeGitlabLister(GitlabDotComListerTask, IndexingRangeListerTask):
task_queue = 'swh_lister_gitlab_refresh'
class FullGitlabRelister(GitlabDotComListerTask, IndexingRefreshListerTask):
task_queue = 'swh_lister_gitlab_refresh'