Enable black

- blackify all the python files,
- enable black in pre-commit,
- add a black tox environment.
This commit is contained in:
David Douard 2020-04-08 16:31:22 +02:00
parent 1ae75166c7
commit 93a4d8b784
97 changed files with 1734 additions and 1642 deletions

View file

@ -7,7 +7,8 @@ def register():
from .models import GitHubModel
from .lister import GitHubLister
return {'models': [GitHubModel],
'lister': GitHubLister,
'task_modules': ['%s.tasks' % __name__],
}
return {
"models": [GitHubModel],
"lister": GitHubLister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -14,60 +14,57 @@ from requests import Response
class GitHubLister(IndexingHttpLister):
PATH_TEMPLATE = '/repositories?since=%d'
PATH_TEMPLATE = "/repositories?since=%d"
MODEL = GitHubModel
DEFAULT_URL = 'https://api.github.com'
API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)')
LISTER_NAME = 'github'
instance = 'github' # There is only 1 instance of such lister
DEFAULT_URL = "https://api.github.com"
API_URL_INDEX_RE = re.compile(r"^.*/repositories\?since=(\d+)")
LISTER_NAME = "github"
instance = "github" # There is only 1 instance of such lister
default_min_bound = 0 # type: Any
def get_model_from_repo(self, repo: Dict[str, Any]) -> Dict[str, Any]:
return {
'uid': repo['id'],
'indexable': repo['id'],
'name': repo['name'],
'full_name': repo['full_name'],
'html_url': repo['html_url'],
'origin_url': repo['html_url'],
'origin_type': 'git',
'fork': repo['fork'],
"uid": repo["id"],
"indexable": repo["id"],
"name": repo["name"],
"full_name": repo["full_name"],
"html_url": repo["html_url"],
"origin_url": repo["html_url"],
"origin_type": "git",
"fork": repo["fork"],
}
def transport_quota_check(self, response: Response) -> Tuple[bool, int]:
x_rate_limit_remaining = response.headers.get('X-RateLimit-Remaining')
x_rate_limit_remaining = response.headers.get("X-RateLimit-Remaining")
if not x_rate_limit_remaining:
return False, 0
reqs_remaining = int(x_rate_limit_remaining)
if response.status_code == 403 and reqs_remaining == 0:
delay = int(response.headers['Retry-After'])
delay = int(response.headers["Retry-After"])
return True, delay
return False, 0
def get_next_target_from_response(self,
response: Response) -> Optional[int]:
if 'next' in response.links:
next_url = response.links['next']['url']
return int(
self.API_URL_INDEX_RE.match(next_url).group(1)) # type: ignore
def get_next_target_from_response(self, response: Response) -> Optional[int]:
if "next" in response.links:
next_url = response.links["next"]["url"]
return int(self.API_URL_INDEX_RE.match(next_url).group(1)) # type: ignore
return None
def transport_response_simplified(self, response: Response
) -> List[Dict[str, Any]]:
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
repos = response.json()
return [self.get_model_from_repo(repo)
for repo in repos if repo and 'id' in repo]
return [
self.get_model_from_repo(repo) for repo in repos if repo and "id" in repo
]
def request_headers(self) -> Dict[str, Any]:
"""(Override) Set requests headers to send when querying the GitHub API
"""
headers = super().request_headers()
headers['Accept'] = 'application/vnd.github.v3+json'
headers["Accept"] = "application/vnd.github.v3+json"
return headers
def disable_deleted_repo_tasks(self, index: int,
next_index: int, keep_these: int):
def disable_deleted_repo_tasks(self, index: int, next_index: int, keep_these: int):
""" (Overrides) Fix provided index value to avoid erroneously disabling
some scheduler tasks
"""
@ -75,5 +72,4 @@ class GitHubLister(IndexingHttpLister):
# parameter, so increment the index to avoid disabling the latest
# created task when processing a new repositories page returned by
# the Github API
return super().disable_deleted_repo_tasks(index + 1, next_index,
keep_these)
return super().disable_deleted_repo_tasks(index + 1, next_index, keep_these)

View file

@ -9,7 +9,8 @@ from swh.lister.core.models import IndexingModelBase
class GitHubModel(IndexingModelBase):
"""a GitHub repository"""
__tablename__ = 'github_repo'
__tablename__ = "github_repo"
uid = Column(Integer, primary_key=True)
indexable = Column(Integer, index=True)

View file

@ -11,20 +11,20 @@ from swh.lister.github.lister import GitHubLister
GROUP_SPLIT = 10000
@shared_task(name=__name__ + '.IncrementalGitHubLister')
@shared_task(name=__name__ + ".IncrementalGitHubLister")
def list_github_incremental(**lister_args):
'Incremental update of GitHub'
"Incremental update of GitHub"
lister = GitHubLister(**lister_args)
return lister.run(min_bound=lister.db_last_index(), max_bound=None)
@shared_task(name=__name__ + '.RangeGitHubLister')
@shared_task(name=__name__ + ".RangeGitHubLister")
def _range_github_lister(start, end, **lister_args):
lister = GitHubLister(**lister_args)
return lister.run(min_bound=start, max_bound=end)
@shared_task(name=__name__ + '.FullGitHubRelister', bind=True)
@shared_task(name=__name__ + ".FullGitHubRelister", bind=True)
def list_github_full(self, split=None, **lister_args):
"""Full update of GitHub
@ -34,20 +34,21 @@ def list_github_full(self, split=None, **lister_args):
lister = GitHubLister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
if not ranges:
self.log.info('Nothing to list')
self.log.info("Nothing to list")
return
random.shuffle(ranges)
promise = group(_range_github_lister.s(minv, maxv, **lister_args)
for minv, maxv in ranges)()
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
promise = group(
_range_github_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
)()
self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
try:
promise.save() # so that we can restore the GroupResult in tests
except (NotImplementedError, AttributeError):
self.log.info('Unable to call save_group with current result backend.')
self.log.info("Unable to call save_group with current result backend.")
# FIXME: what to do in terms of return here?
return promise.id
@shared_task(name=__name__ + '.ping')
@shared_task(name=__name__ + ".ping")
def _ping():
return 'OK'
return "OK"

View file

@ -14,65 +14,70 @@ from swh.lister.github.lister import GitHubLister
class GitHubListerTester(HttpListerTester, unittest.TestCase):
Lister = GitHubLister
test_re = re.compile(r'/repositories\?since=([^?&]+)')
lister_subdir = 'github'
good_api_response_file = 'data/https_api.github.com/first_response.json'
bad_api_response_file = 'data/https_api.github.com/empty_response.json'
test_re = re.compile(r"/repositories\?since=([^?&]+)")
lister_subdir = "github"
good_api_response_file = "data/https_api.github.com/first_response.json"
bad_api_response_file = "data/https_api.github.com/empty_response.json"
first_index = 0
last_index = 369
entries_per_page = 100
convert_type = int
def response_headers(self, request):
headers = {'X-RateLimit-Remaining': '1'}
headers = {"X-RateLimit-Remaining": "1"}
if self.request_index(request) == self.first_index:
headers.update({
'Link': '<https://api.github.com/repositories?since=%s>;'
' rel="next",'
'<https://api.github.com/repositories{?since}>;'
' rel="first"' % self.last_index
})
headers.update(
{
"Link": "<https://api.github.com/repositories?since=%s>;"
' rel="next",'
"<https://api.github.com/repositories{?since}>;"
' rel="first"' % self.last_index
}
)
else:
headers.update({
'Link': '<https://api.github.com/repositories{?since}>;'
' rel="first"'
})
headers.update(
{
"Link": "<https://api.github.com/repositories{?since}>;"
' rel="first"'
}
)
return headers
def mock_rate_quota(self, n, request, context):
self.rate_limit += 1
context.status_code = 403
context.headers['X-RateLimit-Remaining'] = '0'
context.headers['Retry-After'] = '1' # 1 second
context.headers["X-RateLimit-Remaining"] = "0"
context.headers["Retry-After"] = "1" # 1 second
return '{"error":"dummy"}'
@requests_mock.Mocker()
def test_scheduled_tasks(self, http_mocker):
self.scheduled_tasks_test(
'data/https_api.github.com/next_response.json', 876, http_mocker)
"data/https_api.github.com/next_response.json", 876, http_mocker
)
def test_lister_github(swh_listers, requests_mock_datadir):
"""Simple github listing should create scheduled tasks
"""
lister = swh_listers['github']
lister = swh_listers["github"]
lister.run()
r = lister.scheduler.search_tasks(task_type='load-git')
r = lister.scheduler.search_tasks(task_type="load-git")
assert len(r) == 100
for row in r:
assert row['type'] == 'load-git'
assert row["type"] == "load-git"
# arguments check
args = row['arguments']['args']
args = row["arguments"]["args"]
assert len(args) == 0
# kwargs
kwargs = row['arguments']['kwargs']
url = kwargs['url']
assert url.startswith('https://github.com')
kwargs = row["arguments"]["kwargs"]
url = kwargs["url"]
assert url.startswith("https://github.com")
assert row['policy'] == 'recurring'
assert row['priority'] is None
assert row["policy"] == "recurring"
assert row["priority"] is None

View file

@ -5,23 +5,21 @@ from unittest.mock import patch
def test_ping(swh_app, celery_session_worker):
res = swh_app.send_task(
'swh.lister.github.tasks.ping')
res = swh_app.send_task("swh.lister.github.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == 'OK'
assert res.result == "OK"
@patch('swh.lister.github.tasks.GitHubLister')
@patch("swh.lister.github.tasks.GitHubLister")
def test_incremental(lister, swh_app, celery_session_worker):
# setup the mocked GitHubLister
lister.return_value = lister
lister.db_last_index.return_value = 42
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.github.tasks.IncrementalGitHubLister')
res = swh_app.send_task("swh.lister.github.tasks.IncrementalGitHubLister")
assert res
res.wait()
assert res.successful()
@ -31,15 +29,15 @@ def test_incremental(lister, swh_app, celery_session_worker):
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
@patch('swh.lister.github.tasks.GitHubLister')
@patch("swh.lister.github.tasks.GitHubLister")
def test_range(lister, swh_app, celery_session_worker):
# setup the mocked GitHubLister
lister.return_value = lister
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.github.tasks.RangeGitHubLister',
kwargs=dict(start=12, end=42))
"swh.lister.github.tasks.RangeGitHubLister", kwargs=dict(start=12, end=42)
)
assert res
res.wait()
assert res.successful()
@ -49,16 +47,14 @@ def test_range(lister, swh_app, celery_session_worker):
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@patch('swh.lister.github.tasks.GitHubLister')
@patch("swh.lister.github.tasks.GitHubLister")
def test_relister(lister, swh_app, celery_session_worker):
# setup the mocked GitHubLister
lister.return_value = lister
lister.run.return_value = None
lister.db_partition_indices.return_value = [
(i, i+9) for i in range(0, 50, 10)]
lister.db_partition_indices.return_value = [(i, i + 9) for i in range(0, 50, 10)]
res = swh_app.send_task(
'swh.lister.github.tasks.FullGitHubRelister')
res = swh_app.send_task("swh.lister.github.tasks.FullGitHubRelister")
assert res
res.wait()
@ -86,5 +82,6 @@ def test_relister(lister, swh_app, celery_session_worker):
# lister.run should have been called once per partition interval
for i in range(5):
# XXX inconsistent behavior: max_bound is INCLUDED here
assert (dict(min_bound=10*i, max_bound=10*i + 9),) \
in lister.run.call_args_list
assert (
dict(min_bound=10 * i, max_bound=10 * i + 9),
) in lister.run.call_args_list