Enable black
- blackify all the python files, - enable black in pre-commit, - add a black tox environment.
This commit is contained in:
parent
1ae75166c7
commit
93a4d8b784
97 changed files with 1734 additions and 1642 deletions
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import GitLabModel
|
||||
from .lister import GitLabLister
|
||||
|
||||
return {'models': [GitLabModel],
|
||||
'lister': GitLabLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [GitLabModel],
|
||||
"lister": GitLabLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -15,77 +15,83 @@ from requests import Response
|
|||
|
||||
class GitLabLister(PageByPageHttpLister):
|
||||
# Template path expecting an integer that represents the page id
|
||||
PATH_TEMPLATE = '/projects?page=%d&order_by=id'
|
||||
DEFAULT_URL = 'https://gitlab.com/api/v4/'
|
||||
PATH_TEMPLATE = "/projects?page=%d&order_by=id"
|
||||
DEFAULT_URL = "https://gitlab.com/api/v4/"
|
||||
MODEL = GitLabModel
|
||||
LISTER_NAME = 'gitlab'
|
||||
LISTER_NAME = "gitlab"
|
||||
|
||||
def __init__(self, url=None, instance=None,
|
||||
override_config=None, sort='asc', per_page=20):
|
||||
def __init__(
|
||||
self, url=None, instance=None, override_config=None, sort="asc", per_page=20
|
||||
):
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
if instance is None:
|
||||
instance = parse_url(self.url).host
|
||||
self.instance = instance
|
||||
self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % (
|
||||
self.PATH_TEMPLATE, sort, per_page)
|
||||
self.PATH_TEMPLATE = "%s&sort=%s&per_page=%s" % (
|
||||
self.PATH_TEMPLATE,
|
||||
sort,
|
||||
per_page,
|
||||
)
|
||||
|
||||
def uid(self, repo: Dict[str, Any]) -> str:
|
||||
return '%s/%s' % (self.instance, repo['path_with_namespace'])
|
||||
return "%s/%s" % (self.instance, repo["path_with_namespace"])
|
||||
|
||||
def get_model_from_repo(self, repo: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
'instance': self.instance,
|
||||
'uid': self.uid(repo),
|
||||
'name': repo['name'],
|
||||
'full_name': repo['path_with_namespace'],
|
||||
'html_url': repo['web_url'],
|
||||
'origin_url': repo['http_url_to_repo'],
|
||||
'origin_type': 'git',
|
||||
"instance": self.instance,
|
||||
"uid": self.uid(repo),
|
||||
"name": repo["name"],
|
||||
"full_name": repo["path_with_namespace"],
|
||||
"html_url": repo["web_url"],
|
||||
"origin_url": repo["http_url_to_repo"],
|
||||
"origin_type": "git",
|
||||
}
|
||||
|
||||
def transport_quota_check(self, response: Response
|
||||
) -> Tuple[bool, Union[int, float]]:
|
||||
def transport_quota_check(
|
||||
self, response: Response
|
||||
) -> Tuple[bool, Union[int, float]]:
|
||||
"""Deal with rate limit if any.
|
||||
|
||||
"""
|
||||
# not all gitlab instance have rate limit
|
||||
if 'RateLimit-Remaining' in response.headers:
|
||||
reqs_remaining = int(response.headers['RateLimit-Remaining'])
|
||||
if "RateLimit-Remaining" in response.headers:
|
||||
reqs_remaining = int(response.headers["RateLimit-Remaining"])
|
||||
if response.status_code == 403 and reqs_remaining == 0:
|
||||
reset_at = int(response.headers['RateLimit-Reset'])
|
||||
reset_at = int(response.headers["RateLimit-Reset"])
|
||||
delay = min(reset_at - time.time(), 3600)
|
||||
return True, delay
|
||||
return False, 0
|
||||
|
||||
def _get_int(self, headers: MutableMapping[str, Any],
|
||||
key: str) -> Optional[int]:
|
||||
def _get_int(self, headers: MutableMapping[str, Any], key: str) -> Optional[int]:
|
||||
_val = headers.get(key)
|
||||
if _val:
|
||||
return int(_val)
|
||||
return None
|
||||
|
||||
def get_next_target_from_response(
|
||||
self, response: Response) -> Optional[int]:
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[int]:
|
||||
"""Determine the next page identifier.
|
||||
|
||||
"""
|
||||
return self._get_int(response.headers, 'x-next-page')
|
||||
return self._get_int(response.headers, "x-next-page")
|
||||
|
||||
def get_pages_information(self) -> Tuple[Optional[int],
|
||||
Optional[int], Optional[int]]:
|
||||
def get_pages_information(
|
||||
self,
|
||||
) -> Tuple[Optional[int], Optional[int], Optional[int]]:
|
||||
"""Determine pages information.
|
||||
|
||||
"""
|
||||
response = self.transport_head(identifier=1) # type: ignore
|
||||
if not response.ok:
|
||||
raise ValueError(
|
||||
'Problem during information fetch: %s' % response.status_code)
|
||||
"Problem during information fetch: %s" % response.status_code
|
||||
)
|
||||
h = response.headers
|
||||
return (self._get_int(h, 'x-total'),
|
||||
self._get_int(h, 'x-total-pages'),
|
||||
self._get_int(h, 'x-per-page'))
|
||||
return (
|
||||
self._get_int(h, "x-total"),
|
||||
self._get_int(h, "x-total-pages"),
|
||||
self._get_int(h, "x-per-page"),
|
||||
)
|
||||
|
||||
def transport_response_simplified(self, response: Response
|
||||
) -> List[Dict[str, Any]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
|
||||
repos = response.json()
|
||||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
|
|
@ -11,7 +11,8 @@ class GitLabModel(ModelBase):
|
|||
"""a Gitlab repository from a gitlab instance
|
||||
|
||||
"""
|
||||
__tablename__ = 'gitlab_repo'
|
||||
|
||||
__tablename__ = "gitlab_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
instance = Column(String, index=True)
|
||||
|
|
|
@ -13,40 +13,41 @@ from .lister import GitLabLister
|
|||
NBPAGES = 10
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.IncrementalGitLabLister')
|
||||
@shared_task(name=__name__ + ".IncrementalGitLabLister")
|
||||
def list_gitlab_incremental(**lister_args):
|
||||
"""Incremental update of a GitLab instance"""
|
||||
lister_args['sort'] = 'desc'
|
||||
lister_args["sort"] = "desc"
|
||||
lister = GitLabLister(**lister_args)
|
||||
total_pages = lister.get_pages_information()[1]
|
||||
# stopping as soon as existing origins for that instance are detected
|
||||
return lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.RangeGitLabLister')
|
||||
@shared_task(name=__name__ + ".RangeGitLabLister")
|
||||
def _range_gitlab_lister(start, end, **lister_args):
|
||||
lister = GitLabLister(**lister_args)
|
||||
return lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.FullGitLabRelister', bind=True)
|
||||
@shared_task(name=__name__ + ".FullGitLabRelister", bind=True)
|
||||
def list_gitlab_full(self, **lister_args):
|
||||
"""Full update of a GitLab instance"""
|
||||
lister = GitLabLister(**lister_args)
|
||||
_, total_pages, _ = lister.get_pages_information()
|
||||
ranges = list(utils.split_range(total_pages, NBPAGES))
|
||||
random.shuffle(ranges)
|
||||
promise = group(_range_gitlab_lister.s(minv, maxv, **lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
|
||||
promise = group(
|
||||
_range_gitlab_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
|
||||
)()
|
||||
self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
|
||||
try:
|
||||
promise.save()
|
||||
except (NotImplementedError, AttributeError):
|
||||
self.log.info('Unable to call save_group with current result backend.')
|
||||
self.log.info("Unable to call save_group with current result backend.")
|
||||
# FIXME: what to do in terms of return here?
|
||||
return promise.id
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -17,50 +17,50 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
class GitLabListerTester(HttpListerTesterBase, unittest.TestCase):
|
||||
Lister = GitLabLister
|
||||
test_re = re.compile(r'^.*/projects.*page=(\d+).*')
|
||||
lister_subdir = 'gitlab'
|
||||
good_api_response_file = 'data/gitlab.com/api_response.json'
|
||||
bad_api_response_file = 'data/gitlab.com/api_empty_response.json'
|
||||
test_re = re.compile(r"^.*/projects.*page=(\d+).*")
|
||||
lister_subdir = "gitlab"
|
||||
good_api_response_file = "data/gitlab.com/api_response.json"
|
||||
bad_api_response_file = "data/gitlab.com/api_empty_response.json"
|
||||
first_index = 1
|
||||
entries_per_page = 10
|
||||
convert_type = int
|
||||
|
||||
def response_headers(self, request):
|
||||
headers = {'RateLimit-Remaining': '1'}
|
||||
headers = {"RateLimit-Remaining": "1"}
|
||||
if self.request_index(request) == self.first_index:
|
||||
headers.update({
|
||||
'x-next-page': '3',
|
||||
})
|
||||
headers.update(
|
||||
{"x-next-page": "3",}
|
||||
)
|
||||
|
||||
return headers
|
||||
|
||||
def mock_rate_quota(self, n, request, context):
|
||||
self.rate_limit += 1
|
||||
context.status_code = 403
|
||||
context.headers['RateLimit-Remaining'] = '0'
|
||||
context.headers["RateLimit-Remaining"] = "0"
|
||||
one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp())
|
||||
context.headers['RateLimit-Reset'] = str(one_second)
|
||||
context.headers["RateLimit-Reset"] = str(one_second)
|
||||
return '{"error":"dummy"}'
|
||||
|
||||
|
||||
def test_lister_gitlab(swh_listers, requests_mock_datadir):
|
||||
lister = swh_listers['gitlab']
|
||||
lister = swh_listers["gitlab"]
|
||||
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 10
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-git'
|
||||
assert row["type"] == "load-git"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
url = kwargs['url']
|
||||
assert url.startswith('https://gitlab.com')
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
url = kwargs["url"]
|
||||
assert url.startswith("https://gitlab.com")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -5,43 +5,40 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.gitlab.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_incremental(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.get_pages_information.return_value = (None, 10, None)
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.IncrementalGitLabLister')
|
||||
res = swh_app.send_task("swh.lister.gitlab.tasks.IncrementalGitLabLister")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(sort='desc')
|
||||
lister.assert_called_once_with(sort="desc")
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.get_pages_information.assert_called_once_with()
|
||||
lister.run.assert_called_once_with(
|
||||
min_bound=1, max_bound=10, check_existence=True)
|
||||
lister.run.assert_called_once_with(min_bound=1, max_bound=10, check_existence=True)
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_range(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.RangeGitLabLister',
|
||||
kwargs=dict(start=12, end=42))
|
||||
"swh.lister.gitlab.tasks.RangeGitLabLister", kwargs=dict(start=12, end=42)
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -51,17 +48,17 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_relister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.get_pages_information.return_value = (None, 85, None)
|
||||
lister.db_partition_indices.return_value = [
|
||||
(i, i+9) for i in range(0, 80, 10)] + [(80, 85)]
|
||||
(i, i + 9) for i in range(0, 80, 10)
|
||||
] + [(80, 85)]
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.FullGitLabRelister')
|
||||
res = swh_app.send_task("swh.lister.gitlab.tasks.FullGitLabRelister")
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -90,24 +87,26 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
# lister.run should have been called once per partition interval
|
||||
for i in range(8):
|
||||
# XXX inconsistent behavior: max_bound is EXCLUDED here
|
||||
assert (dict(min_bound=10*i, max_bound=10*i + 10),) \
|
||||
in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) \
|
||||
in lister.run.call_args_list
|
||||
assert (
|
||||
dict(min_bound=10 * i, max_bound=10 * i + 10),
|
||||
) in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
|
||||
|
||||
|
||||
@patch('swh.lister.gitlab.tasks.GitLabLister')
|
||||
@patch("swh.lister.gitlab.tasks.GitLabLister")
|
||||
def test_relister_instance(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitlabLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.get_pages_information.return_value = (None, 85, None)
|
||||
lister.db_partition_indices.return_value = [
|
||||
(i, i+9) for i in range(0, 80, 10)] + [(80, 85)]
|
||||
(i, i + 9) for i in range(0, 80, 10)
|
||||
] + [(80, 85)]
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.FullGitLabRelister',
|
||||
kwargs=dict(url='https://0xacab.org/api/v4'))
|
||||
"swh.lister.gitlab.tasks.FullGitLabRelister",
|
||||
kwargs=dict(url="https://0xacab.org/api/v4"),
|
||||
)
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -123,7 +122,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
|
|||
break
|
||||
sleep(1)
|
||||
|
||||
lister.assert_called_with(url='https://0xacab.org/api/v4')
|
||||
lister.assert_called_with(url="https://0xacab.org/api/v4")
|
||||
|
||||
# one by the FullGitlabRelister task
|
||||
# + 9 for the RangeGitlabLister subtasks
|
||||
|
@ -136,7 +135,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
|
|||
# lister.run should have been called once per partition interval
|
||||
for i in range(8):
|
||||
# XXX inconsistent behavior: max_bound is EXCLUDED here
|
||||
assert (dict(min_bound=10*i, max_bound=10*i + 10),) \
|
||||
in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) \
|
||||
in lister.run.call_args_list
|
||||
assert (
|
||||
dict(min_bound=10 * i, max_bound=10 * i + 10),
|
||||
) in lister.run.call_args_list
|
||||
assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue