Enable black
- blackify all the python files, - enable black in pre-commit, - add a black tox environment.
This commit is contained in:
parent
1ae75166c7
commit
93a4d8b784
97 changed files with 1734 additions and 1642 deletions
|
@ -7,7 +7,8 @@ def register():
|
|||
from .models import CGitModel
|
||||
from .lister import CGitLister
|
||||
|
||||
return {'models': [CGitModel],
|
||||
'lister': CGitLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
}
|
||||
return {
|
||||
"models": [CGitModel],
|
||||
"lister": CGitLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -50,13 +50,13 @@ class CGitLister(ListerBase):
|
|||
Args:
|
||||
'https://git.savannah.gnu.org/git/elisp-es.git'
|
||||
"""
|
||||
|
||||
MODEL = CGitModel
|
||||
DEFAULT_URL = 'https://git.savannah.gnu.org/cgit/'
|
||||
LISTER_NAME = 'cgit'
|
||||
DEFAULT_URL = "https://git.savannah.gnu.org/cgit/"
|
||||
LISTER_NAME = "cgit"
|
||||
url_prefix_present = True
|
||||
|
||||
def __init__(self, url=None, instance=None,
|
||||
override_config=None):
|
||||
def __init__(self, url=None, instance=None, override_config=None):
|
||||
"""Lister class for CGit repositories.
|
||||
|
||||
Args:
|
||||
|
@ -69,7 +69,7 @@ class CGitLister(ListerBase):
|
|||
super().__init__(override_config=override_config)
|
||||
|
||||
if url is None:
|
||||
url = self.config.get('url', self.DEFAULT_URL)
|
||||
url = self.config.get("url", self.DEFAULT_URL)
|
||||
self.url = url
|
||||
|
||||
if not instance:
|
||||
|
@ -78,23 +78,22 @@ class CGitLister(ListerBase):
|
|||
self.session = Session()
|
||||
self.session.mount(self.url, HTTPAdapter(max_retries=3))
|
||||
self.session.headers = {
|
||||
'User-Agent': USER_AGENT,
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
|
||||
def run(self) -> Dict[str, str]:
|
||||
status = 'uneventful'
|
||||
status = "uneventful"
|
||||
total = 0
|
||||
for repos in grouper(self.get_repos(), 10):
|
||||
models = list(filter(None, (self.build_model(repo)
|
||||
for repo in repos)))
|
||||
models = list(filter(None, (self.build_model(repo) for repo in repos)))
|
||||
injected_repos = self.inject_repo_data_into_db(models)
|
||||
self.schedule_missing_tasks(models, injected_repos)
|
||||
self.db_session.commit()
|
||||
total += len(injected_repos)
|
||||
logger.debug('Scheduled %s tasks for %s', total, self.url)
|
||||
status = 'eventful'
|
||||
logger.debug("Scheduled %s tasks for %s", total, self.url)
|
||||
status = "eventful"
|
||||
|
||||
return {'status': status}
|
||||
return {"status": status}
|
||||
|
||||
def get_repos(self) -> Generator[str, None, None]:
|
||||
"""Generate git 'project' URLs found on the current CGit server
|
||||
|
@ -103,16 +102,16 @@ class CGitLister(ListerBase):
|
|||
next_page = self.url
|
||||
while next_page:
|
||||
bs_idx = self.get_and_parse(next_page)
|
||||
for tr in bs_idx.find(
|
||||
'div', {"class": "content"}).find_all(
|
||||
"tr", {"class": ""}):
|
||||
yield urljoin(self.url, tr.find('a')['href'])
|
||||
for tr in bs_idx.find("div", {"class": "content"}).find_all(
|
||||
"tr", {"class": ""}
|
||||
):
|
||||
yield urljoin(self.url, tr.find("a")["href"])
|
||||
|
||||
try:
|
||||
pager = bs_idx.find('ul', {'class': 'pager'})
|
||||
current_page = pager.find('a', {'class': 'current'})
|
||||
pager = bs_idx.find("ul", {"class": "pager"})
|
||||
current_page = pager.find("a", {"class": "current"})
|
||||
if current_page:
|
||||
next_page = current_page.parent.next_sibling.a['href']
|
||||
next_page = current_page.parent.next_sibling.a["href"]
|
||||
next_page = urljoin(self.url, next_page)
|
||||
except (AttributeError, KeyError):
|
||||
# no pager, or no next page
|
||||
|
@ -123,28 +122,28 @@ class CGitLister(ListerBase):
|
|||
return the repo description (dict) suitable for insertion in the db.
|
||||
"""
|
||||
bs = self.get_and_parse(repo_url)
|
||||
urls = [x['href'] for x in bs.find_all('a', {'rel': 'vcs-git'})]
|
||||
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
|
||||
|
||||
if not urls:
|
||||
return None
|
||||
|
||||
# look for the http/https url, if any, and use it as origin_url
|
||||
for url in urls:
|
||||
if urlparse(url).scheme in ('http', 'https'):
|
||||
if urlparse(url).scheme in ("http", "https"):
|
||||
origin_url = url
|
||||
break
|
||||
else:
|
||||
# otherwise, choose the first one
|
||||
origin_url = urls[0]
|
||||
|
||||
return {'uid': repo_url,
|
||||
'name': bs.find('a', title=re.compile('.+'))['title'],
|
||||
'origin_type': 'git',
|
||||
'instance': self.instance,
|
||||
'origin_url': origin_url,
|
||||
}
|
||||
return {
|
||||
"uid": repo_url,
|
||||
"name": bs.find("a", title=re.compile(".+"))["title"],
|
||||
"origin_type": "git",
|
||||
"instance": self.instance,
|
||||
"origin_url": origin_url,
|
||||
}
|
||||
|
||||
def get_and_parse(self, url: str) -> BeautifulSoup:
|
||||
"Get the given url and parse the retrieved HTML using BeautifulSoup"
|
||||
return BeautifulSoup(self.session.get(url).text,
|
||||
features='html.parser')
|
||||
return BeautifulSoup(self.session.get(url).text, features="html.parser")
|
||||
|
|
|
@ -11,7 +11,8 @@ class CGitModel(ModelBase):
|
|||
"""a CGit repository representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'cgit_repo'
|
||||
|
||||
__tablename__ = "cgit_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
instance = Column(String, index=True)
|
||||
|
|
|
@ -7,12 +7,12 @@ from celery import shared_task
|
|||
from .lister import CGitLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.CGitListerTask')
|
||||
@shared_task(name=__name__ + ".CGitListerTask")
|
||||
def list_cgit(**lister_args):
|
||||
'''Lister task for CGit instances'''
|
||||
"""Lister task for CGit instances"""
|
||||
return CGitLister(**lister_args).run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -7,38 +7,38 @@ from swh.lister import __version__
|
|||
|
||||
|
||||
def test_lister_no_page(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister = swh_listers["cgit"]
|
||||
|
||||
assert lister.url == 'https://git.savannah.gnu.org/cgit/'
|
||||
assert lister.url == "https://git.savannah.gnu.org/cgit/"
|
||||
|
||||
repos = list(lister.get_repos())
|
||||
assert len(repos) == 977
|
||||
|
||||
assert repos[0] == 'https://git.savannah.gnu.org/cgit/elisp-es.git/'
|
||||
assert repos[0] == "https://git.savannah.gnu.org/cgit/elisp-es.git/"
|
||||
# note the url below is NOT a subpath of /cgit/
|
||||
assert repos[-1] == 'https://git.savannah.gnu.org/path/to/yetris.git/' # noqa
|
||||
assert repos[-1] == "https://git.savannah.gnu.org/path/to/yetris.git/" # noqa
|
||||
# note the url below is NOT on the same server
|
||||
assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/'
|
||||
assert repos[-2] == "http://example.org/cgit/xstarcastle.git/"
|
||||
|
||||
|
||||
def test_lister_model(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister = swh_listers["cgit"]
|
||||
|
||||
repo = next(lister.get_repos())
|
||||
|
||||
model = lister.build_model(repo)
|
||||
assert model == {
|
||||
'uid': 'https://git.savannah.gnu.org/cgit/elisp-es.git/',
|
||||
'name': 'elisp-es.git',
|
||||
'origin_type': 'git',
|
||||
'instance': 'git.savannah.gnu.org',
|
||||
'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git'
|
||||
}
|
||||
"uid": "https://git.savannah.gnu.org/cgit/elisp-es.git/",
|
||||
"name": "elisp-es.git",
|
||||
"origin_type": "git",
|
||||
"instance": "git.savannah.gnu.org",
|
||||
"origin_url": "https://git.savannah.gnu.org/git/elisp-es.git",
|
||||
}
|
||||
|
||||
|
||||
def test_lister_with_pages(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister.url = 'https://git.tizen/cgit/'
|
||||
lister = swh_listers["cgit"]
|
||||
lister.url = "https://git.tizen/cgit/"
|
||||
|
||||
repos = list(lister.get_repos())
|
||||
# we should have 16 repos (listed on 3 pages)
|
||||
|
@ -46,37 +46,37 @@ def test_lister_with_pages(requests_mock_datadir, swh_listers):
|
|||
|
||||
|
||||
def test_lister_run(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister.url = 'https://git.tizen/cgit/'
|
||||
lister = swh_listers["cgit"]
|
||||
lister.url = "https://git.tizen/cgit/"
|
||||
lister.run()
|
||||
|
||||
r = lister.scheduler.search_tasks(task_type='load-git')
|
||||
r = lister.scheduler.search_tasks(task_type="load-git")
|
||||
assert len(r) == 16
|
||||
|
||||
for row in r:
|
||||
assert row['type'] == 'load-git'
|
||||
assert row["type"] == "load-git"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert len(kwargs) == 1
|
||||
url = kwargs['url']
|
||||
assert url.startswith('https://git.tizen')
|
||||
url = kwargs["url"]
|
||||
assert url.startswith("https://git.tizen")
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
||||
|
||||
def test_lister_requests(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister.url = 'https://git.tizen/cgit/'
|
||||
lister = swh_listers["cgit"]
|
||||
lister.url = "https://git.tizen/cgit/"
|
||||
lister.run()
|
||||
|
||||
assert len(requests_mock_datadir.request_history) != 0
|
||||
for request in requests_mock_datadir.request_history:
|
||||
assert 'User-Agent' in request.headers
|
||||
user_agent = request.headers['User-Agent']
|
||||
assert 'Software Heritage Lister' in user_agent
|
||||
assert "User-Agent" in request.headers
|
||||
user_agent = request.headers["User-Agent"]
|
||||
assert "Software Heritage Lister" in user_agent
|
||||
assert __version__ in user_agent
|
||||
|
|
|
@ -2,29 +2,27 @@ from unittest.mock import patch
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cgit.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.cgit.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.cgit.tasks.CGitLister')
|
||||
@patch("swh.lister.cgit.tasks.CGitLister")
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked CGitLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cgit.tasks.CGitListerTask',
|
||||
kwargs=dict(url='https://git.kernel.org/', instance='kernel'))
|
||||
"swh.lister.cgit.tasks.CGitListerTask",
|
||||
kwargs=dict(url="https://git.kernel.org/", instance="kernel"),
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
url='https://git.kernel.org/',
|
||||
instance='kernel')
|
||||
lister.assert_called_once_with(url="https://git.kernel.org/", instance="kernel")
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue