Enable black

- blackify all the python files,
- enable black in pre-commit,
- add a black tox environment.
This commit is contained in:
David Douard 2020-04-08 16:31:22 +02:00
parent 1ae75166c7
commit 93a4d8b784
97 changed files with 1734 additions and 1642 deletions

View file

@ -7,7 +7,8 @@ def register():
from .models import GitLabModel
from .lister import GitLabLister
return {'models': [GitLabModel],
'lister': GitLabLister,
'task_modules': ['%s.tasks' % __name__],
}
return {
"models": [GitLabModel],
"lister": GitLabLister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -15,77 +15,83 @@ from requests import Response
class GitLabLister(PageByPageHttpLister):
# Template path expecting an integer that represents the page id
PATH_TEMPLATE = '/projects?page=%d&order_by=id'
DEFAULT_URL = 'https://gitlab.com/api/v4/'
PATH_TEMPLATE = "/projects?page=%d&order_by=id"
DEFAULT_URL = "https://gitlab.com/api/v4/"
MODEL = GitLabModel
LISTER_NAME = 'gitlab'
LISTER_NAME = "gitlab"
def __init__(self, url=None, instance=None,
override_config=None, sort='asc', per_page=20):
def __init__(
self, url=None, instance=None, override_config=None, sort="asc", per_page=20
):
super().__init__(url=url, override_config=override_config)
if instance is None:
instance = parse_url(self.url).host
self.instance = instance
self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % (
self.PATH_TEMPLATE, sort, per_page)
self.PATH_TEMPLATE = "%s&sort=%s&per_page=%s" % (
self.PATH_TEMPLATE,
sort,
per_page,
)
def uid(self, repo: Dict[str, Any]) -> str:
return '%s/%s' % (self.instance, repo['path_with_namespace'])
return "%s/%s" % (self.instance, repo["path_with_namespace"])
def get_model_from_repo(self, repo: Dict[str, Any]) -> Dict[str, Any]:
return {
'instance': self.instance,
'uid': self.uid(repo),
'name': repo['name'],
'full_name': repo['path_with_namespace'],
'html_url': repo['web_url'],
'origin_url': repo['http_url_to_repo'],
'origin_type': 'git',
"instance": self.instance,
"uid": self.uid(repo),
"name": repo["name"],
"full_name": repo["path_with_namespace"],
"html_url": repo["web_url"],
"origin_url": repo["http_url_to_repo"],
"origin_type": "git",
}
def transport_quota_check(self, response: Response
) -> Tuple[bool, Union[int, float]]:
def transport_quota_check(
self, response: Response
) -> Tuple[bool, Union[int, float]]:
"""Deal with rate limit if any.
"""
# not all gitlab instance have rate limit
if 'RateLimit-Remaining' in response.headers:
reqs_remaining = int(response.headers['RateLimit-Remaining'])
if "RateLimit-Remaining" in response.headers:
reqs_remaining = int(response.headers["RateLimit-Remaining"])
if response.status_code == 403 and reqs_remaining == 0:
reset_at = int(response.headers['RateLimit-Reset'])
reset_at = int(response.headers["RateLimit-Reset"])
delay = min(reset_at - time.time(), 3600)
return True, delay
return False, 0
def _get_int(self, headers: MutableMapping[str, Any],
key: str) -> Optional[int]:
def _get_int(self, headers: MutableMapping[str, Any], key: str) -> Optional[int]:
_val = headers.get(key)
if _val:
return int(_val)
return None
def get_next_target_from_response(
self, response: Response) -> Optional[int]:
def get_next_target_from_response(self, response: Response) -> Optional[int]:
"""Determine the next page identifier.
"""
return self._get_int(response.headers, 'x-next-page')
return self._get_int(response.headers, "x-next-page")
def get_pages_information(self) -> Tuple[Optional[int],
Optional[int], Optional[int]]:
def get_pages_information(
self,
) -> Tuple[Optional[int], Optional[int], Optional[int]]:
"""Determine pages information.
"""
response = self.transport_head(identifier=1) # type: ignore
if not response.ok:
raise ValueError(
'Problem during information fetch: %s' % response.status_code)
"Problem during information fetch: %s" % response.status_code
)
h = response.headers
return (self._get_int(h, 'x-total'),
self._get_int(h, 'x-total-pages'),
self._get_int(h, 'x-per-page'))
return (
self._get_int(h, "x-total"),
self._get_int(h, "x-total-pages"),
self._get_int(h, "x-per-page"),
)
def transport_response_simplified(self, response: Response
) -> List[Dict[str, Any]]:
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
repos = response.json()
return [self.get_model_from_repo(repo) for repo in repos]

View file

@ -11,7 +11,8 @@ class GitLabModel(ModelBase):
"""a Gitlab repository from a gitlab instance
"""
__tablename__ = 'gitlab_repo'
__tablename__ = "gitlab_repo"
uid = Column(String, primary_key=True)
instance = Column(String, index=True)

View file

@ -13,40 +13,41 @@ from .lister import GitLabLister
NBPAGES = 10
@shared_task(name=__name__ + '.IncrementalGitLabLister')
@shared_task(name=__name__ + ".IncrementalGitLabLister")
def list_gitlab_incremental(**lister_args):
"""Incremental update of a GitLab instance"""
lister_args['sort'] = 'desc'
lister_args["sort"] = "desc"
lister = GitLabLister(**lister_args)
total_pages = lister.get_pages_information()[1]
# stopping as soon as existing origins for that instance are detected
return lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
@shared_task(name=__name__ + '.RangeGitLabLister')
@shared_task(name=__name__ + ".RangeGitLabLister")
def _range_gitlab_lister(start, end, **lister_args):
lister = GitLabLister(**lister_args)
return lister.run(min_bound=start, max_bound=end)
@shared_task(name=__name__ + '.FullGitLabRelister', bind=True)
@shared_task(name=__name__ + ".FullGitLabRelister", bind=True)
def list_gitlab_full(self, **lister_args):
"""Full update of a GitLab instance"""
lister = GitLabLister(**lister_args)
_, total_pages, _ = lister.get_pages_information()
ranges = list(utils.split_range(total_pages, NBPAGES))
random.shuffle(ranges)
promise = group(_range_gitlab_lister.s(minv, maxv, **lister_args)
for minv, maxv in ranges)()
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
promise = group(
_range_gitlab_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
)()
self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
try:
promise.save()
except (NotImplementedError, AttributeError):
self.log.info('Unable to call save_group with current result backend.')
self.log.info("Unable to call save_group with current result backend.")
# FIXME: what to do in terms of return here?
return promise.id
@shared_task(name=__name__ + '.ping')
@shared_task(name=__name__ + ".ping")
def _ping():
return 'OK'
return "OK"

View file

@ -17,50 +17,50 @@ logger = logging.getLogger(__name__)
class GitLabListerTester(HttpListerTesterBase, unittest.TestCase):
Lister = GitLabLister
test_re = re.compile(r'^.*/projects.*page=(\d+).*')
lister_subdir = 'gitlab'
good_api_response_file = 'data/gitlab.com/api_response.json'
bad_api_response_file = 'data/gitlab.com/api_empty_response.json'
test_re = re.compile(r"^.*/projects.*page=(\d+).*")
lister_subdir = "gitlab"
good_api_response_file = "data/gitlab.com/api_response.json"
bad_api_response_file = "data/gitlab.com/api_empty_response.json"
first_index = 1
entries_per_page = 10
convert_type = int
def response_headers(self, request):
headers = {'RateLimit-Remaining': '1'}
headers = {"RateLimit-Remaining": "1"}
if self.request_index(request) == self.first_index:
headers.update({
'x-next-page': '3',
})
headers.update(
{"x-next-page": "3",}
)
return headers
def mock_rate_quota(self, n, request, context):
self.rate_limit += 1
context.status_code = 403
context.headers['RateLimit-Remaining'] = '0'
context.headers["RateLimit-Remaining"] = "0"
one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp())
context.headers['RateLimit-Reset'] = str(one_second)
context.headers["RateLimit-Reset"] = str(one_second)
return '{"error":"dummy"}'
def test_lister_gitlab(swh_listers, requests_mock_datadir):
lister = swh_listers['gitlab']
lister = swh_listers["gitlab"]
lister.run()
r = lister.scheduler.search_tasks(task_type='load-git')
r = lister.scheduler.search_tasks(task_type="load-git")
assert len(r) == 10
for row in r:
assert row['type'] == 'load-git'
assert row["type"] == "load-git"
# arguments check
args = row['arguments']['args']
args = row["arguments"]["args"]
assert len(args) == 0
# kwargs
kwargs = row['arguments']['kwargs']
url = kwargs['url']
assert url.startswith('https://gitlab.com')
kwargs = row["arguments"]["kwargs"]
url = kwargs["url"]
assert url.startswith("https://gitlab.com")
assert row['policy'] == 'recurring'
assert row['priority'] is None
assert row["policy"] == "recurring"
assert row["priority"] is None

View file

@ -5,43 +5,40 @@ from unittest.mock import patch
def test_ping(swh_app, celery_session_worker):
res = swh_app.send_task(
'swh.lister.gitlab.tasks.ping')
res = swh_app.send_task("swh.lister.gitlab.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == 'OK'
assert res.result == "OK"
@patch('swh.lister.gitlab.tasks.GitLabLister')
@patch("swh.lister.gitlab.tasks.GitLabLister")
def test_incremental(lister, swh_app, celery_session_worker):
# setup the mocked GitlabLister
lister.return_value = lister
lister.run.return_value = None
lister.get_pages_information.return_value = (None, 10, None)
res = swh_app.send_task(
'swh.lister.gitlab.tasks.IncrementalGitLabLister')
res = swh_app.send_task("swh.lister.gitlab.tasks.IncrementalGitLabLister")
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with(sort='desc')
lister.assert_called_once_with(sort="desc")
lister.db_last_index.assert_not_called()
lister.get_pages_information.assert_called_once_with()
lister.run.assert_called_once_with(
min_bound=1, max_bound=10, check_existence=True)
lister.run.assert_called_once_with(min_bound=1, max_bound=10, check_existence=True)
@patch('swh.lister.gitlab.tasks.GitLabLister')
@patch("swh.lister.gitlab.tasks.GitLabLister")
def test_range(lister, swh_app, celery_session_worker):
# setup the mocked GitlabLister
lister.return_value = lister
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.gitlab.tasks.RangeGitLabLister',
kwargs=dict(start=12, end=42))
"swh.lister.gitlab.tasks.RangeGitLabLister", kwargs=dict(start=12, end=42)
)
assert res
res.wait()
assert res.successful()
@ -51,17 +48,17 @@ def test_range(lister, swh_app, celery_session_worker):
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@patch('swh.lister.gitlab.tasks.GitLabLister')
@patch("swh.lister.gitlab.tasks.GitLabLister")
def test_relister(lister, swh_app, celery_session_worker):
# setup the mocked GitlabLister
lister.return_value = lister
lister.run.return_value = None
lister.get_pages_information.return_value = (None, 85, None)
lister.db_partition_indices.return_value = [
(i, i+9) for i in range(0, 80, 10)] + [(80, 85)]
(i, i + 9) for i in range(0, 80, 10)
] + [(80, 85)]
res = swh_app.send_task(
'swh.lister.gitlab.tasks.FullGitLabRelister')
res = swh_app.send_task("swh.lister.gitlab.tasks.FullGitLabRelister")
assert res
res.wait()
@ -90,24 +87,26 @@ def test_relister(lister, swh_app, celery_session_worker):
# lister.run should have been called once per partition interval
for i in range(8):
# XXX inconsistent behavior: max_bound is EXCLUDED here
assert (dict(min_bound=10*i, max_bound=10*i + 10),) \
in lister.run.call_args_list
assert (dict(min_bound=80, max_bound=85),) \
in lister.run.call_args_list
assert (
dict(min_bound=10 * i, max_bound=10 * i + 10),
) in lister.run.call_args_list
assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list
@patch('swh.lister.gitlab.tasks.GitLabLister')
@patch("swh.lister.gitlab.tasks.GitLabLister")
def test_relister_instance(lister, swh_app, celery_session_worker):
# setup the mocked GitlabLister
lister.return_value = lister
lister.run.return_value = None
lister.get_pages_information.return_value = (None, 85, None)
lister.db_partition_indices.return_value = [
(i, i+9) for i in range(0, 80, 10)] + [(80, 85)]
(i, i + 9) for i in range(0, 80, 10)
] + [(80, 85)]
res = swh_app.send_task(
'swh.lister.gitlab.tasks.FullGitLabRelister',
kwargs=dict(url='https://0xacab.org/api/v4'))
"swh.lister.gitlab.tasks.FullGitLabRelister",
kwargs=dict(url="https://0xacab.org/api/v4"),
)
assert res
res.wait()
@ -123,7 +122,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
break
sleep(1)
lister.assert_called_with(url='https://0xacab.org/api/v4')
lister.assert_called_with(url="https://0xacab.org/api/v4")
# one by the FullGitlabRelister task
# + 9 for the RangeGitlabLister subtasks
@ -136,7 +135,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
# lister.run should have been called once per partition interval
for i in range(8):
# XXX inconsistent behavior: max_bound is EXCLUDED here
assert (dict(min_bound=10*i, max_bound=10*i + 10),) \
in lister.run.call_args_list
assert (dict(min_bound=80, max_bound=85),) \
in lister.run.call_args_list
assert (
dict(min_bound=10 * i, max_bound=10 * i + 10),
) in lister.run.call_args_list
assert (dict(min_bound=80, max_bound=85),) in lister.run.call_args_list