Enable black
- blackify all the python files, - enable black in pre-commit, - add a black tox environment.
This commit is contained in:
parent
1ae75166c7
commit
93a4d8b784
97 changed files with 1734 additions and 1642 deletions
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import GitHubModel
|
||||
from .lister import GitHubLister
|
||||
|
||||
return {'models': [GitHubModel],
|
||||
'lister': GitHubLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [GitHubModel],
|
||||
"lister": GitHubLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -14,60 +14,57 @@ from requests import Response
|
|||
|
||||
|
||||
class GitHubLister(IndexingHttpLister):
|
||||
PATH_TEMPLATE = '/repositories?since=%d'
|
||||
PATH_TEMPLATE = "/repositories?since=%d"
|
||||
MODEL = GitHubModel
|
||||
DEFAULT_URL = 'https://api.github.com'
|
||||
API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)')
|
||||
LISTER_NAME = 'github'
|
||||
instance = 'github' # There is only 1 instance of such lister
|
||||
DEFAULT_URL = "https://api.github.com"
|
||||
API_URL_INDEX_RE = re.compile(r"^.*/repositories\?since=(\d+)")
|
||||
LISTER_NAME = "github"
|
||||
instance = "github" # There is only 1 instance of such lister
|
||||
default_min_bound = 0 # type: Any
|
||||
|
||||
def get_model_from_repo(self, repo: Dict[str, Any]) -> Dict[str, Any]:
|
||||
return {
|
||||
'uid': repo['id'],
|
||||
'indexable': repo['id'],
|
||||
'name': repo['name'],
|
||||
'full_name': repo['full_name'],
|
||||
'html_url': repo['html_url'],
|
||||
'origin_url': repo['html_url'],
|
||||
'origin_type': 'git',
|
||||
'fork': repo['fork'],
|
||||
"uid": repo["id"],
|
||||
"indexable": repo["id"],
|
||||
"name": repo["name"],
|
||||
"full_name": repo["full_name"],
|
||||
"html_url": repo["html_url"],
|
||||
"origin_url": repo["html_url"],
|
||||
"origin_type": "git",
|
||||
"fork": repo["fork"],
|
||||
}
|
||||
|
||||
def transport_quota_check(self, response: Response) -> Tuple[bool, int]:
|
||||
x_rate_limit_remaining = response.headers.get('X-RateLimit-Remaining')
|
||||
x_rate_limit_remaining = response.headers.get("X-RateLimit-Remaining")
|
||||
if not x_rate_limit_remaining:
|
||||
return False, 0
|
||||
reqs_remaining = int(x_rate_limit_remaining)
|
||||
if response.status_code == 403 and reqs_remaining == 0:
|
||||
delay = int(response.headers['Retry-After'])
|
||||
delay = int(response.headers["Retry-After"])
|
||||
return True, delay
|
||||
return False, 0
|
||||
|
||||
def get_next_target_from_response(self,
|
||||
response: Response) -> Optional[int]:
|
||||
if 'next' in response.links:
|
||||
next_url = response.links['next']['url']
|
||||
return int(
|
||||
self.API_URL_INDEX_RE.match(next_url).group(1)) # type: ignore
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[int]:
|
||||
if "next" in response.links:
|
||||
next_url = response.links["next"]["url"]
|
||||
return int(self.API_URL_INDEX_RE.match(next_url).group(1)) # type: ignore
|
||||
return None
|
||||
|
||||
def transport_response_simplified(self, response: Response
|
||||
) -> List[Dict[str, Any]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
|
||||
repos = response.json()
|
||||
return [self.get_model_from_repo(repo)
|
||||
for repo in repos if repo and 'id' in repo]
|
||||
return [
|
||||
self.get_model_from_repo(repo) for repo in repos if repo and "id" in repo
|
||||
]
|
||||
|
||||
def request_headers(self) -> Dict[str, Any]:
|
||||
"""(Override) Set requests headers to send when querying the GitHub API
|
||||
|
||||
"""
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/vnd.github.v3+json'
|
||||
headers["Accept"] = "application/vnd.github.v3+json"
|
||||
return headers
|
||||
|
||||
def disable_deleted_repo_tasks(self, index: int,
|
||||
next_index: int, keep_these: int):
|
||||
def disable_deleted_repo_tasks(self, index: int, next_index: int, keep_these: int):
|
||||
""" (Overrides) Fix provided index value to avoid erroneously disabling
|
||||
some scheduler tasks
|
||||
"""
|
||||
|
@ -75,5 +72,4 @@ class GitHubLister(IndexingHttpLister):
|
|||
# parameter, so increment the index to avoid disabling the latest
|
||||
# created task when processing a new repositories page returned by
|
||||
# the Github API
|
||||
return super().disable_deleted_repo_tasks(index + 1, next_index,
|
||||
keep_these)
|
||||
return super().disable_deleted_repo_tasks(index + 1, next_index, keep_these)
|
||||
|
|
|
@ -9,7 +9,8 @@ from swh.lister.core.models import IndexingModelBase
|
|||
|
||||
class GitHubModel(IndexingModelBase):
|
||||
"""a GitHub repository"""
|
||||
__tablename__ = 'github_repo'
|
||||
|
||||
__tablename__ = "github_repo"
|
||||
|
||||
uid = Column(Integer, primary_key=True)
|
||||
indexable = Column(Integer, index=True)
|
||||
|
|
|
@ -11,20 +11,20 @@ from swh.lister.github.lister import GitHubLister
|
|||
GROUP_SPLIT = 10000
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.IncrementalGitHubLister')
|
||||
@shared_task(name=__name__ + ".IncrementalGitHubLister")
|
||||
def list_github_incremental(**lister_args):
|
||||
'Incremental update of GitHub'
|
||||
"Incremental update of GitHub"
|
||||
lister = GitHubLister(**lister_args)
|
||||
return lister.run(min_bound=lister.db_last_index(), max_bound=None)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.RangeGitHubLister')
|
||||
@shared_task(name=__name__ + ".RangeGitHubLister")
|
||||
def _range_github_lister(start, end, **lister_args):
|
||||
lister = GitHubLister(**lister_args)
|
||||
return lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.FullGitHubRelister', bind=True)
|
||||
@shared_task(name=__name__ + ".FullGitHubRelister", bind=True)
|
||||
def list_github_full(self, split=None, **lister_args):
|
||||
"""Full update of GitHub
|
||||
|
||||
|
@ -34,20 +34,21 @@ def list_github_full(self, split=None, **lister_args):
|
|||
lister = GitHubLister(**lister_args)
|
||||
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
|
||||
if not ranges:
|
||||
self.log.info('Nothing to list')
|
||||
self.log.info("Nothing to list")
|
||||
return
|
||||
random.shuffle(ranges)
|
||||
promise = group(_range_github_lister.s(minv, maxv, **lister_args)
|
||||
for minv, maxv in ranges)()
|
||||
self.log.debug('%s OK (spawned %s subtasks)' % (self.name, len(ranges)))
|
||||
promise = group(
|
||||
_range_github_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
|
||||
)()
|
||||
self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
|
||||
try:
|
||||
promise.save() # so that we can restore the GroupResult in tests
|
||||
except (NotImplementedError, AttributeError):
|
||||
self.log.info('Unable to call save_group with current result backend.')
|
||||
self.log.info("Unable to call save_group with current result backend.")
|
||||
# FIXME: what to do in terms of return here?
|
||||
return promise.id
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -14,65 +14,70 @@ from swh.lister.github.lister import GitHubLister
|
|||
|
||||
class GitHubListerTester(HttpListerTester, unittest.TestCase):
|
||||
Lister = GitHubLister
|
||||
test_re = re.compile(r'/repositories\?since=([^?&]+)')
|
||||
lister_subdir = 'github'
|
||||
good_api_response_file = 'data/https_api.github.com/first_response.json'
|
||||
bad_api_response_file = 'data/https_api.github.com/empty_response.json'
|
||||
test_re = re.compile(r"/repositories\?since=([^?&]+)")
|
||||
lister_subdir = "github"
|
||||
good_api_response_file = "data/https_api.github.com/first_response.json"
|
||||
bad_api_response_file = "data/https_api.github.com/empty_response.json"
|
||||
first_index = 0
|
||||
last_index = 369
|
||||
entries_per_page = 100
|
||||
convert_type = int
|
||||
|
||||
def response_headers(self, request):
|
||||
headers = {'X-RateLimit-Remaining': '1'}
|
||||
headers = {"X-RateLimit-Remaining": "1"}
|
||||
if self.request_index(request) == self.first_index:
|
||||
headers.update({
|
||||
'Link': '<https://api.github.com/repositories?since=%s>;'
|
||||
' rel="next",'
|
||||
'<https://api.github.com/repositories{?since}>;'
|
||||
' rel="first"' % self.last_index
|
||||
})
|
||||
headers.update(
|
||||
{
|
||||
"Link": "<https://api.github.com/repositories?since=%s>;"
|
||||
' rel="next",'
|
||||
"<https://api.github.com/repositories{?since}>;"
|
||||
' rel="first"' % self.last_index
|
||||
}
|
||||
)
|
||||
else:
|
||||
headers.update({
|
||||
'Link': '<https://api.github.com/repositories{?since}>;'
|
||||
' rel="first"'
|
||||
})
|
||||
headers.update(
|
||||
{
|
||||
"Link": "<https://api.github.com/repositories{?since}>;"
|
||||
' rel="first"'
|
||||
}
|
||||
)
|
||||
return headers
|
||||
|
||||
def mock_rate_quota(self, n, request, context):
|
||||
self.rate_limit += 1
|
||||
context.status_code = 403
|
||||
context.headers['X-RateLimit-Remaining'] = '0'
|
||||
context.headers['Retry-After'] = '1' # 1 second
|
||||
context.headers["X-RateLimit-Remaining"] = "0"
|
||||
context.headers["Retry-After"] = "1" # 1 second
|
||||
return '{"error":"dummy"}'
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_scheduled_tasks(self, http_mocker):
|
||||
self.scheduled_tasks_test(
|
||||
'data/https_api.github.com/next_response.json', 876, http_mocker)
|
||||
"data/https_api.github.com/next_response.json", 876, http_mocker
|
||||
)
|
||||
|
||||
|
||||
def test_lister_github(swh_listers, requests_mock_datadir):
|
||||
"""Simple github listing should create scheduled tasks
|
||||
|
||||
"""
|
||||
lister = swh_listers['github']
|
||||
lister = swh_listers["github"]
|
||||
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 100
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-git'
|
||||
assert row["type"] == "load-git"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
url = kwargs['url']
|
||||
assert url.startswith('https://github.com')
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
url = kwargs["url"]
|
||||
assert url.startswith("https://github.com")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
|
|
@ -5,23 +5,21 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.github.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.github.tasks.GitHubLister')
|
||||
@patch("swh.lister.github.tasks.GitHubLister")
|
||||
def test_incremental(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitHubLister
|
||||
lister.return_value = lister
|
||||
lister.db_last_index.return_value = 42
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.IncrementalGitHubLister')
|
||||
res = swh_app.send_task("swh.lister.github.tasks.IncrementalGitHubLister")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -31,15 +29,15 @@ def test_incremental(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
|
||||
|
||||
|
||||
@patch('swh.lister.github.tasks.GitHubLister')
|
||||
@patch("swh.lister.github.tasks.GitHubLister")
|
||||
def test_range(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitHubLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.RangeGitHubLister',
|
||||
kwargs=dict(start=12, end=42))
|
||||
"swh.lister.github.tasks.RangeGitHubLister", kwargs=dict(start=12, end=42)
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -49,16 +47,14 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
||||
@patch('swh.lister.github.tasks.GitHubLister')
|
||||
@patch("swh.lister.github.tasks.GitHubLister")
|
||||
def test_relister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked GitHubLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.db_partition_indices.return_value = [
|
||||
(i, i+9) for i in range(0, 50, 10)]
|
||||
lister.db_partition_indices.return_value = [(i, i + 9) for i in range(0, 50, 10)]
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.github.tasks.FullGitHubRelister')
|
||||
res = swh_app.send_task("swh.lister.github.tasks.FullGitHubRelister")
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -86,5 +82,6 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
# lister.run should have been called once per partition interval
|
||||
for i in range(5):
|
||||
# XXX inconsistent behavior: max_bound is INCLUDED here
|
||||
assert (dict(min_bound=10*i, max_bound=10*i + 9),) \
|
||||
in lister.run.call_args_list
|
||||
assert (
|
||||
dict(min_bound=10 * i, max_bound=10 * i + 9),
|
||||
) in lister.run.call_args_list
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue