Enable black

- blackify all the python files,
- enable black in pre-commit,
- add a black tox environment.
This commit is contained in:
David Douard 2020-04-08 16:31:22 +02:00
parent 1ae75166c7
commit 93a4d8b784
97 changed files with 1734 additions and 1642 deletions

View file

@ -7,7 +7,8 @@ def register():
from .models import CGitModel
from .lister import CGitLister
return {'models': [CGitModel],
'lister': CGitLister,
'task_modules': ['%s.tasks' % __name__],
}
return {
"models": [CGitModel],
"lister": CGitLister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -50,13 +50,13 @@ class CGitLister(ListerBase):
Args:
'https://git.savannah.gnu.org/git/elisp-es.git'
"""
MODEL = CGitModel
DEFAULT_URL = 'https://git.savannah.gnu.org/cgit/'
LISTER_NAME = 'cgit'
DEFAULT_URL = "https://git.savannah.gnu.org/cgit/"
LISTER_NAME = "cgit"
url_prefix_present = True
def __init__(self, url=None, instance=None,
override_config=None):
def __init__(self, url=None, instance=None, override_config=None):
"""Lister class for CGit repositories.
Args:
@ -69,7 +69,7 @@ class CGitLister(ListerBase):
super().__init__(override_config=override_config)
if url is None:
url = self.config.get('url', self.DEFAULT_URL)
url = self.config.get("url", self.DEFAULT_URL)
self.url = url
if not instance:
@ -78,23 +78,22 @@ class CGitLister(ListerBase):
self.session = Session()
self.session.mount(self.url, HTTPAdapter(max_retries=3))
self.session.headers = {
'User-Agent': USER_AGENT,
"User-Agent": USER_AGENT,
}
def run(self) -> Dict[str, str]:
status = 'uneventful'
status = "uneventful"
total = 0
for repos in grouper(self.get_repos(), 10):
models = list(filter(None, (self.build_model(repo)
for repo in repos)))
models = list(filter(None, (self.build_model(repo) for repo in repos)))
injected_repos = self.inject_repo_data_into_db(models)
self.schedule_missing_tasks(models, injected_repos)
self.db_session.commit()
total += len(injected_repos)
logger.debug('Scheduled %s tasks for %s', total, self.url)
status = 'eventful'
logger.debug("Scheduled %s tasks for %s", total, self.url)
status = "eventful"
return {'status': status}
return {"status": status}
def get_repos(self) -> Generator[str, None, None]:
"""Generate git 'project' URLs found on the current CGit server
@ -103,16 +102,16 @@ class CGitLister(ListerBase):
next_page = self.url
while next_page:
bs_idx = self.get_and_parse(next_page)
for tr in bs_idx.find(
'div', {"class": "content"}).find_all(
"tr", {"class": ""}):
yield urljoin(self.url, tr.find('a')['href'])
for tr in bs_idx.find("div", {"class": "content"}).find_all(
"tr", {"class": ""}
):
yield urljoin(self.url, tr.find("a")["href"])
try:
pager = bs_idx.find('ul', {'class': 'pager'})
current_page = pager.find('a', {'class': 'current'})
pager = bs_idx.find("ul", {"class": "pager"})
current_page = pager.find("a", {"class": "current"})
if current_page:
next_page = current_page.parent.next_sibling.a['href']
next_page = current_page.parent.next_sibling.a["href"]
next_page = urljoin(self.url, next_page)
except (AttributeError, KeyError):
# no pager, or no next page
@ -123,28 +122,28 @@ class CGitLister(ListerBase):
return the repo description (dict) suitable for insertion in the db.
"""
bs = self.get_and_parse(repo_url)
urls = [x['href'] for x in bs.find_all('a', {'rel': 'vcs-git'})]
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
if not urls:
return None
# look for the http/https url, if any, and use it as origin_url
for url in urls:
if urlparse(url).scheme in ('http', 'https'):
if urlparse(url).scheme in ("http", "https"):
origin_url = url
break
else:
# otherwise, choose the first one
origin_url = urls[0]
return {'uid': repo_url,
'name': bs.find('a', title=re.compile('.+'))['title'],
'origin_type': 'git',
'instance': self.instance,
'origin_url': origin_url,
}
return {
"uid": repo_url,
"name": bs.find("a", title=re.compile(".+"))["title"],
"origin_type": "git",
"instance": self.instance,
"origin_url": origin_url,
}
def get_and_parse(self, url: str) -> BeautifulSoup:
"Get the given url and parse the retrieved HTML using BeautifulSoup"
return BeautifulSoup(self.session.get(url).text,
features='html.parser')
return BeautifulSoup(self.session.get(url).text, features="html.parser")

View file

@ -11,7 +11,8 @@ class CGitModel(ModelBase):
"""a CGit repository representation
"""
__tablename__ = 'cgit_repo'
__tablename__ = "cgit_repo"
uid = Column(String, primary_key=True)
instance = Column(String, index=True)

View file

@ -7,12 +7,12 @@ from celery import shared_task
from .lister import CGitLister
@shared_task(name=__name__ + '.CGitListerTask')
@shared_task(name=__name__ + ".CGitListerTask")
def list_cgit(**lister_args):
'''Lister task for CGit instances'''
"""Lister task for CGit instances"""
return CGitLister(**lister_args).run()
@shared_task(name=__name__ + '.ping')
@shared_task(name=__name__ + ".ping")
def _ping():
return 'OK'
return "OK"

View file

@ -7,38 +7,38 @@ from swh.lister import __version__
def test_lister_no_page(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister = swh_listers["cgit"]
assert lister.url == 'https://git.savannah.gnu.org/cgit/'
assert lister.url == "https://git.savannah.gnu.org/cgit/"
repos = list(lister.get_repos())
assert len(repos) == 977
assert repos[0] == 'https://git.savannah.gnu.org/cgit/elisp-es.git/'
assert repos[0] == "https://git.savannah.gnu.org/cgit/elisp-es.git/"
# note the url below is NOT a subpath of /cgit/
assert repos[-1] == 'https://git.savannah.gnu.org/path/to/yetris.git/' # noqa
assert repos[-1] == "https://git.savannah.gnu.org/path/to/yetris.git/" # noqa
# note the url below is NOT on the same server
assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/'
assert repos[-2] == "http://example.org/cgit/xstarcastle.git/"
def test_lister_model(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister = swh_listers["cgit"]
repo = next(lister.get_repos())
model = lister.build_model(repo)
assert model == {
'uid': 'https://git.savannah.gnu.org/cgit/elisp-es.git/',
'name': 'elisp-es.git',
'origin_type': 'git',
'instance': 'git.savannah.gnu.org',
'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git'
}
"uid": "https://git.savannah.gnu.org/cgit/elisp-es.git/",
"name": "elisp-es.git",
"origin_type": "git",
"instance": "git.savannah.gnu.org",
"origin_url": "https://git.savannah.gnu.org/git/elisp-es.git",
}
def test_lister_with_pages(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister.url = 'https://git.tizen/cgit/'
lister = swh_listers["cgit"]
lister.url = "https://git.tizen/cgit/"
repos = list(lister.get_repos())
# we should have 16 repos (listed on 3 pages)
@ -46,37 +46,37 @@ def test_lister_with_pages(requests_mock_datadir, swh_listers):
def test_lister_run(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister.url = 'https://git.tizen/cgit/'
lister = swh_listers["cgit"]
lister.url = "https://git.tizen/cgit/"
lister.run()
r = lister.scheduler.search_tasks(task_type='load-git')
r = lister.scheduler.search_tasks(task_type="load-git")
assert len(r) == 16
for row in r:
assert row['type'] == 'load-git'
assert row["type"] == "load-git"
# arguments check
args = row['arguments']['args']
args = row["arguments"]["args"]
assert len(args) == 0
# kwargs
kwargs = row['arguments']['kwargs']
kwargs = row["arguments"]["kwargs"]
assert len(kwargs) == 1
url = kwargs['url']
assert url.startswith('https://git.tizen')
url = kwargs["url"]
assert url.startswith("https://git.tizen")
assert row['policy'] == 'recurring'
assert row['priority'] is None
assert row["policy"] == "recurring"
assert row["priority"] is None
def test_lister_requests(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister.url = 'https://git.tizen/cgit/'
lister = swh_listers["cgit"]
lister.url = "https://git.tizen/cgit/"
lister.run()
assert len(requests_mock_datadir.request_history) != 0
for request in requests_mock_datadir.request_history:
assert 'User-Agent' in request.headers
user_agent = request.headers['User-Agent']
assert 'Software Heritage Lister' in user_agent
assert "User-Agent" in request.headers
user_agent = request.headers["User-Agent"]
assert "Software Heritage Lister" in user_agent
assert __version__ in user_agent

View file

@ -2,29 +2,27 @@ from unittest.mock import patch
def test_ping(swh_app, celery_session_worker):
res = swh_app.send_task(
'swh.lister.cgit.tasks.ping')
res = swh_app.send_task("swh.lister.cgit.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == 'OK'
assert res.result == "OK"
@patch('swh.lister.cgit.tasks.CGitLister')
@patch("swh.lister.cgit.tasks.CGitLister")
def test_lister(lister, swh_app, celery_session_worker):
# setup the mocked CGitLister
lister.return_value = lister
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.cgit.tasks.CGitListerTask',
kwargs=dict(url='https://git.kernel.org/', instance='kernel'))
"swh.lister.cgit.tasks.CGitListerTask",
kwargs=dict(url="https://git.kernel.org/", instance="kernel"),
)
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with(
url='https://git.kernel.org/',
instance='kernel')
lister.assert_called_once_with(url="https://git.kernel.org/", instance="kernel")
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with()