python: Reformat code with black 22.3.0

Related to T3922
This commit is contained in:
Antoine Lambert 2022-04-08 15:15:09 +02:00
parent 00f1b99ad9
commit d38e05cff7
37 changed files with 265 additions and 144 deletions

View file

@ -26,9 +26,7 @@ NewForgeListerPage = List[Dict[str, Any]]
@dataclass
class NewForgeListerState:
"""The NewForgeLister instance state. This is used for incremental listing.
"""
"""The NewForgeLister instance state. This is used for incremental listing."""
current: str = ""
"""Id of the last origin listed on an incremental pass"""
@ -36,9 +34,7 @@ class NewForgeListerState:
# If there is no need to keep state, subclass StatelessLister[NewForgeListerPage]
class NewForgeLister(Lister[NewForgeListerState, NewForgeListerPage]):
"""List origins from the "NewForge" forge.
"""
"""List origins from the "NewForge" forge."""
# Part of the lister API, that identifies this lister
LISTER_NAME = ""
@ -63,7 +59,10 @@ class NewForgeLister(Lister[NewForgeListerState, NewForgeListerPage]):
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
)
self.session = requests.Session()

View file

@ -155,9 +155,7 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
def get_origins_from_page(
self, page: List[Dict[str, Any]]
) -> Iterator[ListedOrigin]:
"""Convert a page of Bitbucket repositories into a list of ListedOrigins.
"""
"""Convert a page of Bitbucket repositories into a list of ListedOrigins."""
assert self.lister_obj.id is not None
for repo in page:

View file

@ -51,7 +51,10 @@ def test_bitbucket_incremental_lister(
requests_mock.get(
BitbucketLister.API_URL,
[{"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2},],
[
{"json": bb_api_repositories_page1},
{"json": bb_api_repositories_page2},
],
)
lister = BitbucketLister(scheduler=swh_scheduler, page_size=10)

View file

@ -25,7 +25,11 @@ def test_incremental_listing(
res = swh_scheduler_celery_app.send_task(
"swh.lister.bitbucket.tasks.IncrementalBitBucketLister",
kwargs=dict(page_size=100, username="username", password="password",),
kwargs=dict(
page_size=100,
username="username",
password="password",
),
)
assert res
res.wait()
@ -41,7 +45,11 @@ def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_wor
res = swh_scheduler_celery_app.send_task(
"swh.lister.bitbucket.tasks.FullBitBucketRelister",
kwargs=dict(page_size=100, username="username", password="password",),
kwargs=dict(
page_size=100,
username="username",
password="password",
),
)
assert res
res.wait()

View file

@ -67,7 +67,10 @@ class CGitLister(StatelessLister[Repositories]):
"""
super().__init__(
scheduler=scheduler, url=url, instance=instance, credentials=credentials,
scheduler=scheduler,
url=url,
instance=instance,
credentials=credentials,
)
self.session = requests.Session()
@ -85,8 +88,8 @@ class CGitLister(StatelessLister[Repositories]):
def get_pages(self) -> Iterator[Repositories]:
"""Generate git 'project' URLs found on the current CGit server
The last_update date is retrieved on the list of repo page to avoid
to compute it on the repository details which only give a date per branch
The last_update date is retrieved on the list of repo page to avoid
to compute it on the repository details which only give a date per branch
"""
next_page: Optional[str] = self.url
while next_page:
@ -206,7 +209,9 @@ def _parse_last_updated_date(repository: Dict[str, Any]) -> Optional[datetime]:
if not parsed_date:
logger.warning(
"Could not parse %s last_updated date: %s", repository["url"], date,
"Could not parse %s last_updated date: %s",
repository["url"],
date,
)
return parsed_date

View file

@ -80,7 +80,11 @@ def test_lister_cgit_run_populates_last_update(requests_mock_datadir, swh_schedu
urls_without_date = [
f"https://git.tizen.org/cgit/{suffix_url}"
for suffix_url in ["All-Projects", "All-Users", "Lock-Projects",]
for suffix_url in [
"All-Projects",
"All-Users",
"Lock-Projects",
]
]
lister_cgit = CGitLister(swh_scheduler, url=url)
@ -145,7 +149,9 @@ def test_lister_cgit_date_parsing(date_str, expected_date):
requests_mock_datadir_missing_url = requests_mock_datadir_factory(
ignore_urls=["https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12",]
ignore_urls=[
"https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12",
]
)
@ -208,10 +214,12 @@ def test_lister_cgit_from_configfile(swh_scheduler_config, mocker):
def test_lister_cgit_with_base_git_url(
url, base_git_url, expected_nb_origins, requests_mock_datadir, swh_scheduler
):
"""With base git url provided, listed urls should be the computed origin urls
"""
lister_cgit = CGitLister(swh_scheduler, url=url, base_git_url=base_git_url,)
"""With base git url provided, listed urls should be the computed origin urls"""
lister_cgit = CGitLister(
swh_scheduler,
url=url,
base_git_url=base_git_url,
)
stats = lister_cgit.run()

View file

@ -25,7 +25,8 @@ def test_cgit_lister_task(
kwargs = dict(url="https://git.kernel.org/", instance="kernel", base_git_url=None)
res = swh_scheduler_celery_app.send_task(
"swh.lister.cgit.tasks.CGitListerTask", kwargs=kwargs,
"swh.lister.cgit.tasks.CGitListerTask",
kwargs=kwargs,
)
assert res
res.wait()

View file

@ -24,7 +24,10 @@ logger = logging.getLogger(__name__)
"--config-file",
"-C",
default=None,
type=click.Path(exists=True, dir_okay=False,),
type=click.Path(
exists=True,
dir_okay=False,
),
help="Configuration file.",
)
@click.pass_context

View file

@ -132,7 +132,8 @@ def parse_packaged_date(package_info: Dict[str, str]) -> Optional[datetime]:
):
try:
packaged_at = datetime.strptime(
packaged_at_str.split(";")[0], date_format,
packaged_at_str.split(";")[0],
date_format,
).replace(tzinfo=timezone.utc)
break
except Exception:

View file

@ -20,7 +20,12 @@ from swh.lister.cran.lister import (
def test_cran_compute_origin_urls():
pack = "something"
vers = "0.0.1"
origin_url, artifact_url = compute_origin_urls({"Package": pack, "Version": vers,})
origin_url, artifact_url = compute_origin_urls(
{
"Package": pack,
"Version": vers,
}
)
assert origin_url == f"{CRAN_MIRROR}/package={pack}"
assert artifact_url == f"{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz"

View file

@ -42,7 +42,9 @@ class CratesLister(StatelessLister[CratesListerPage]):
)
def __init__(
self, scheduler: SchedulerInterface, credentials: CredentialsType = None,
self,
scheduler: SchedulerInterface,
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler,
@ -55,7 +57,12 @@ class CratesLister(StatelessLister[CratesListerPage]):
"""Get crates.io-index repository up to date running git command."""
subprocess.check_call(
["git", "clone", self.INDEX_REPOSITORY_URL, self.DESTINATION_PATH,]
[
"git",
"clone",
self.INDEX_REPOSITORY_URL,
self.DESTINATION_PATH,
]
)
def get_crates_index(self) -> List[Path]:

View file

@ -87,7 +87,9 @@ def _init_test(
requests_mock.get(idx_url, status_code=404)
else:
requests_mock.get(
idx_url, text=sources, headers={"Last-Modified": last_modified},
idx_url,
text=sources,
headers={"Last-Modified": last_modified},
)
for idx_url, _ in lister.debian_index_urls(suite, _components[1]):
@ -186,7 +188,11 @@ def test_lister_debian_all_suites(
@pytest.mark.parametrize(
"suites_params",
[[_suites[:1]], [_suites[:1], _suites[:2]], [_suites[:1], _suites[:2], _suites],],
[
[_suites[:1]],
[_suites[:1], _suites[:2]],
[_suites[:1], _suites[:2], _suites],
],
)
def test_lister_debian_updated_packages(
swh_scheduler: SchedulerInterface,

View file

@ -47,7 +47,10 @@ class GiteaLister(StatelessLister[RepoListPage]):
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
)
self.query_params = {
@ -59,7 +62,10 @@ class GiteaLister(StatelessLister[RepoListPage]):
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": USER_AGENT,}
{
"Accept": "application/json",
"User-Agent": USER_AGENT,
}
)
if api_token is None:
@ -122,9 +128,7 @@ class GiteaLister(StatelessLister[RepoListPage]):
response = self.page_request(url, {})
def get_origins_from_page(self, page: RepoListPage) -> Iterator[ListedOrigin]:
"""Convert a page of Gitea repositories into a list of ListedOrigins.
"""
"""Convert a page of Gitea repositories into a list of ListedOrigins."""
assert self.lister_obj.id is not None
for repo in page:

View file

@ -23,7 +23,8 @@ def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_wor
kwargs = dict(url="https://try.gitea.io/api/v1")
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs,
"swh.lister.gitea.tasks.FullGiteaRelister",
kwargs=kwargs,
)
assert res
res.wait()
@ -49,7 +50,8 @@ def test_full_listing_params(
page_size=50,
)
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs,
"swh.lister.gitea.tasks.FullGiteaRelister",
kwargs=kwargs,
)
assert res
res.wait()

View file

@ -138,7 +138,7 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
first_id: the id of the first repo to list
last_id: stop listing after seeing a repo with an id higher than this value.
""" # noqa: E501
""" # noqa: B950
LISTER_NAME = "github"

View file

@ -29,16 +29,16 @@ def _match_request(request):
def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
"""Gitlab lister supports full listing
"""
"""Gitlab lister supports full listing"""
instance = "gitlab.com"
lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)
response = gitlab_page_response(datadir, instance, 1)
requests_mock.get(
lister.page_url(), [{"json": response}], additional_matcher=_match_request,
lister.page_url(),
[{"json": response}],
additional_matcher=_match_request,
)
listed_result = lister.run()
@ -57,9 +57,7 @@ def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
"""Heptapod lister happily lists hg, hg_git as hg and git origins
"""
"""Heptapod lister happily lists hg, hg_git as hg and git origins"""
name = "heptapod"
instance = "foss.heptapod.net"
lister = GitLabLister(
@ -70,7 +68,9 @@ def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
response = gitlab_page_response(datadir, instance, 1)
requests_mock.get(
lister.page_url(), [{"json": response}], additional_matcher=_match_request,
lister.page_url(),
[{"json": response}],
additional_matcher=_match_request,
)
listed_result = lister.run()
@ -99,9 +99,7 @@ def gitlab_page_response(datadir, instance: str, id_after: int) -> List[Dict]:
def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
"""Gitlab lister supports pagination
"""
"""Gitlab lister supports pagination"""
instance = "gite.lirmm.fr"
lister = GitLabLister(swh_scheduler, url=api_url(instance))
@ -115,7 +113,9 @@ def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
)
requests_mock.get(
lister.page_url(2), [{"json": response2}], additional_matcher=_match_request,
lister.page_url(2),
[{"json": response2}],
additional_matcher=_match_request,
)
listed_result = lister.run()
@ -135,9 +135,7 @@ def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
"""Gitlab lister supports incremental visits
"""
"""Gitlab lister supports incremental visits"""
instance = "gite.lirmm.fr"
url = api_url(instance)
lister = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
@ -155,7 +153,9 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
additional_matcher=_match_request,
)
requests_mock.get(
url_page2, [{"json": response2}], additional_matcher=_match_request,
url_page2,
[{"json": response2}],
additional_matcher=_match_request,
)
listed_result = lister.run()
@ -173,7 +173,9 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
additional_matcher=_match_request,
)
requests_mock.get(
url_page3, [{"json": response3}], additional_matcher=_match_request,
url_page3,
[{"json": response3}],
additional_matcher=_match_request,
)
listed_result2 = lister2.run()
@ -197,9 +199,7 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker):
"""Gitlab lister supports rate-limit
"""
"""Gitlab lister supports rate-limit"""
instance = "gite.lirmm.fr"
url = api_url(instance)
lister = GitLabLister(swh_scheduler, url=url, instance=instance)
@ -241,9 +241,7 @@ def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker)
def test_lister_gitlab_http_errors(
swh_scheduler, requests_mock, datadir, mocker, status_code
):
"""Gitlab lister should retry requests when encountering HTTP 50x errors
"""
"""Gitlab lister should retry requests when encountering HTTP 50x errors"""
instance = "gite.lirmm.fr"
url = api_url(instance)
lister = GitLabLister(swh_scheduler, url=url, instance=instance)
@ -281,9 +279,7 @@ def test_lister_gitlab_http_errors(
def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
"""Gitlab lister should skip buggy URL and move to next page.
"""
"""Gitlab lister should skip buggy URL and move to next page."""
instance = "gite.lirmm.fr"
url = api_url(instance)
lister = GitLabLister(swh_scheduler, url=url, instance=instance)
@ -300,11 +296,17 @@ def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
additional_matcher=_match_request,
)
requests_mock.get(
url_page2, [{"status_code": 500},], additional_matcher=_match_request,
url_page2,
[
{"status_code": 500},
],
additional_matcher=_match_request,
)
requests_mock.get(
url_page3, [{"json": response3}], additional_matcher=_match_request,
url_page3,
[{"json": response3}],
additional_matcher=_match_request,
)
listed_result = lister.run()
@ -314,9 +316,7 @@ def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
def test_lister_gitlab_credentials(swh_scheduler):
"""Gitlab lister supports credentials configuration
"""
"""Gitlab lister supports credentials configuration"""
instance = "gitlab"
credentials = {
"gitlab": {instance: [{"username": "user", "password": "api-token"}]}
@ -328,7 +328,13 @@ def test_lister_gitlab_credentials(swh_scheduler):
assert lister.session.headers["Authorization"] == "Bearer api-token"
@pytest.mark.parametrize("url", [api_url("gitlab").rstrip("/"), api_url("gitlab"),])
@pytest.mark.parametrize(
"url",
[
api_url("gitlab").rstrip("/"),
api_url("gitlab"),
],
)
def test_lister_gitlab_url_computation(url, swh_scheduler):
lister = GitLabLister(scheduler=swh_scheduler, url=url)
assert not lister.url.endswith("/")

View file

@ -34,7 +34,8 @@ def test_task_lister_gitlab(
kwargs = dict(url="https://gitweb.torproject.org/")
res = swh_scheduler_celery_app.send_task(
f"swh.lister.gitlab.tasks.{task_name}", kwargs=kwargs,
f"swh.lister.gitlab.tasks.{task_name}",
kwargs=kwargs,
)
assert res
res.wait()

View file

@ -28,7 +28,9 @@ class GNULister(StatelessLister[GNUPageType]):
GNU_FTP_URL = "https://ftp.gnu.org"
def __init__(
self, scheduler: SchedulerInterface, credentials: CredentialsType = None,
self,
scheduler: SchedulerInterface,
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler,

View file

@ -19,9 +19,7 @@ logger = logging.getLogger(__name__)
class GNUTree:
"""Gnu Tree's representation
"""
"""Gnu Tree's representation"""
def __init__(self, url: str):
self.url = url # filepath or uri
@ -330,7 +328,5 @@ def load_raw_data(url: str) -> Sequence[Mapping]:
def format_date(timestamp: str) -> str:
"""Format a string timestamp to an isoformat string
"""
"""Format a string timestamp to an isoformat string"""
return datetime.fromtimestamp(int(timestamp), tz=timezone.utc).isoformat()

View file

@ -117,7 +117,8 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
}
return get_vcs_fns[vcs_type](
order_by="most neglected first", modified_since_date=date_last_modified,
order_by="most neglected first",
modified_since_date=date_last_modified,
)
def get_pages(self) -> Iterator[LaunchpadPageType]:

View file

@ -192,9 +192,16 @@ def test_launchpad_incremental_lister(
def test_launchpad_lister_invalid_url_filtering(
swh_scheduler, mocker,
swh_scheduler,
mocker,
):
invalid_origin = [_Repo({"git_https_url": "tag:launchpad.net:2008:redacted",})]
invalid_origin = [
_Repo(
{
"git_https_url": "tag:launchpad.net:2008:redacted",
}
)
]
_mock_launchpad(mocker, invalid_origin)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
@ -205,7 +212,8 @@ def test_launchpad_lister_invalid_url_filtering(
def test_launchpad_lister_duplicated_origin(
swh_scheduler, mocker,
swh_scheduler,
mocker,
):
origin = _Repo(
{

View file

@ -87,12 +87,18 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
instance = parse_url(url).host
super().__init__(
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
)
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": USER_AGENT,}
{
"Accept": "application/json",
"User-Agent": USER_AGENT,
}
)
def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState:
@ -119,7 +125,7 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
return response
def get_pages(self) -> Iterator[RepoPage]:
""" Retrieve and parse exported maven indexes to
"""Retrieve and parse exported maven indexes to
identify all pom files and src archives.
"""
@ -213,7 +219,10 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
):
continue
url_path = f"{path}/{aid}/{version}/{aid}-{version}.{ext}"
url_pom = urljoin(self.BASE_URL, url_path,)
url_pom = urljoin(
self.BASE_URL,
url_path,
)
out_pom[url_pom] = doc_id
elif (
classifier.lower() == "sources" or ("src" in classifier)
@ -271,9 +280,7 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
logger.info("Could not parse POM %s XML: %s. Next.", pom, error)
def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
"""Convert a page of Maven repositories into a list of ListedOrigins.
"""
"""Convert a page of Maven repositories into a list of ListedOrigins."""
assert self.lister_obj.id is not None
scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
if page["type"] == "scm":
@ -288,13 +295,17 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
if scm_type in scm_types_ok:
scm_url = m_scm.group("url")
origin = ListedOrigin(
lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
lister_id=self.lister_obj.id,
url=scm_url,
visit_type=scm_type,
)
yield origin
else:
if page["url"].endswith(".git"):
origin = ListedOrigin(
lister_id=self.lister_obj.id, url=page["url"], visit_type="git",
lister_id=self.lister_obj.id,
url=page["url"],
visit_type="git",
)
yield origin
else:

View file

@ -84,7 +84,12 @@ def maven_pom_3(datadir) -> str:
def test_maven_full_listing(
swh_scheduler, requests_mock, mocker, maven_index, maven_pom_1, maven_pom_2,
swh_scheduler,
requests_mock,
mocker,
maven_index,
maven_pom_1,
maven_pom_2,
):
"""Covers full listing of multiple pages, checking page results and listed
origins, statelessness."""

View file

@ -35,7 +35,8 @@ def test_task_lister_maven(
url="https://repo1.maven.org/maven2/", index_url="http://indexes/export.fld"
)
res = swh_scheduler_celery_app.send_task(
f"swh.lister.maven.tasks.{task_name}", kwargs=kwargs,
f"swh.lister.maven.tasks.{task_name}",
kwargs=kwargs,
)
assert res
res.wait()

View file

@ -66,7 +66,10 @@ def test_npm_lister_full(
requests_mock.get(
lister.API_FULL_LISTING_URL,
[{"json": npm_full_listing_page1}, {"json": npm_full_listing_page2},],
[
{"json": npm_full_listing_page1},
{"json": npm_full_listing_page2},
],
additional_matcher=_match_request,
)
@ -169,7 +172,9 @@ def test_npm_lister_incremental(
def test_npm_lister_incremental_restart(
swh_scheduler, requests_mock, mocker,
swh_scheduler,
requests_mock,
mocker,
):
"""Check incremental npm listing will restart from saved state"""
page_size = 2
@ -190,7 +195,9 @@ def test_npm_lister_incremental_restart(
def test_npm_lister_http_error(
swh_scheduler, requests_mock, mocker,
swh_scheduler,
requests_mock,
mocker,
):
lister = NpmLister(scheduler=swh_scheduler)

View file

@ -48,7 +48,10 @@ class OpamLister(StatelessLister[PageType]):
opam_root: str = "/tmp/opam/",
):
super().__init__(
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
)
self.env = os.environ.copy()
# Opam root folder is initialized in the :meth:`get_pages` method as no

View file

@ -17,9 +17,7 @@ module_name = "swh.lister.opam.lister"
@pytest.fixture
def mock_opam(mocker):
"""Fixture to bypass the actual opam calls within the test context.
"""
"""Fixture to bypass the actual opam calls within the test context."""
# inhibits the real `subprocess.call` which prepares the required internal opam
# state
mock_init = mocker.patch(f"{module_name}.call", return_value=None)
@ -31,9 +29,7 @@ def mock_opam(mocker):
def test_mock_init_repository_init(mock_opam, tmp_path, datadir):
"""Initializing opam root directory with an instance should be ok
"""
"""Initializing opam root directory with an instance should be ok"""
mock_init, mock_popen = mock_opam
instance = "fake"
@ -48,9 +44,7 @@ def test_mock_init_repository_init(mock_opam, tmp_path, datadir):
def test_mock_init_repository_update(mock_opam, tmp_path, datadir):
"""Updating opam root directory with another instance should be ok
"""
"""Updating opam root directory with another instance should be ok"""
mock_init, mock_popen = mock_opam
instance = "fake_opam_repo"
@ -74,7 +68,10 @@ def test_lister_opam_optional_instance(swh_scheduler):
netloc = "opam.ocaml.org"
instance_url = f"https://{netloc}"
lister = OpamLister(swh_scheduler, url=instance_url,)
lister = OpamLister(
swh_scheduler,
url=instance_url,
)
assert lister.instance == netloc
assert lister.opam_root == "/tmp/opam/"
@ -85,7 +82,10 @@ def test_urls(swh_scheduler, mock_opam, tmp_path):
tmp_folder = mkdtemp(dir=tmp_path, prefix="swh_opam_lister")
lister = OpamLister(
swh_scheduler, url=instance_url, instance="opam", opam_root=tmp_folder,
swh_scheduler,
url=instance_url,
instance="opam",
opam_root=tmp_folder,
)
assert lister.instance == "opam"
assert lister.opam_root == tmp_folder

View file

@ -51,7 +51,9 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
PACKAGIST_REPO_BASE_URL = "https://repo.packagist.org/p"
def __init__(
self, scheduler: SchedulerInterface, credentials: CredentialsType = None,
self,
scheduler: SchedulerInterface,
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler,

View file

@ -112,7 +112,9 @@ class PhabricatorLister(StatelessLister[PageType]):
if response_data.get("result") is None:
logger.warning(
"Got unexpected response on %s: %s", response.url, response_data,
"Got unexpected response on %s: %s",
response.url,
response_data,
)
break

View file

@ -115,7 +115,9 @@ def test_lister(
def test_lister_request_error(
swh_scheduler, requests_mock, phabricator_repositories_page1,
swh_scheduler,
requests_mock,
phabricator_repositories_page1,
):
FORGE_BASE_URL = "https://forge.softwareheritage.org"

View file

@ -54,16 +54,12 @@ def _if_rate_limited(retry_state) -> bool:
def pypi_url(package_name: str) -> str:
"""Build pypi url out of a package name.
"""
"""Build pypi url out of a package name."""
return PyPILister.PACKAGE_URL.format(package_name=package_name)
class PyPILister(Lister[PyPIListerState, PackageListPage]):
"""List origins from PyPI.
"""
"""List origins from PyPI."""
LISTER_NAME = "pypi"
INSTANCE = "pypi" # As of today only the main pypi.org is used
@ -168,7 +164,7 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]):
def finalize(self):
"""Finalize the visit state by updating with the new last_serial if updates
actually happened.
actually happened.
"""
self.updated = (

View file

@ -55,8 +55,7 @@ LastModifiedT = datetime.date
@dataclass
class SourceForgeListerState:
"""Current state of the SourceForge lister in incremental runs
"""
"""Current state of the SourceForge lister in incremental runs"""
"""If the subsitemap does not exist, we assume a full run of this subsitemap
is needed. If the date is the same, we skip the subsitemap, otherwise we
@ -105,9 +104,7 @@ ProjectsLastModifiedCache = Dict[Tuple[str, str], LastModifiedT]
class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
"""List origins from the "SourceForge" forge.
"""
"""List origins from the "SourceForge" forge."""
# Part of the lister API, that identifies this lister
LISTER_NAME = "sourceforge"
@ -386,7 +383,7 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
bs = BeautifulSoup(response.text, features="html.parser")
cvs_base_url = "rsync://a.cvs.sourceforge.net/cvsroot"
for text in [b.text for b in bs.find_all("b")]:
match = re.search(fr".*/cvsroot/{project} co -P (.+)", text)
match = re.search(rf".*/cvsroot/{project} co -P (.+)", text)
if match is not None:
module = match.group(1)
url = f"{cvs_base_url}/{project}/{module}"

View file

@ -8,15 +8,24 @@ import pytest
from swh.lister.cli import SUPPORTED_LISTERS, get_lister
lister_args = {
"cgit": {"url": "https://git.eclipse.org/c/",},
"cgit": {
"url": "https://git.eclipse.org/c/",
},
"phabricator": {
"instance": "softwareheritage",
"url": "https://forge.softwareheritage.org/api/diffusion.repository.search",
"api_token": "bogus",
},
"gitea": {"url": "https://try.gitea.io/api/v1/",},
"tuleap": {"url": "https://tuleap.net",},
"gitlab": {"url": "https://gitlab.ow2.org/api/v4", "instance": "ow2",},
"gitea": {
"url": "https://try.gitea.io/api/v1/",
},
"tuleap": {
"url": "https://tuleap.net",
},
"gitlab": {
"url": "https://gitlab.ow2.org/api/v4",
"instance": "ow2",
},
"opam": {"url": "https://opam.ocaml.org", "instance": "opam"},
"maven": {
"url": "https://repo1.maven.org/maven2/",
@ -34,9 +43,7 @@ def test_get_lister_wrong_input():
def test_get_lister(swh_scheduler_config):
"""Instantiating a supported lister should be ok
"""
"""Instantiating a supported lister should be ok"""
# Drop launchpad lister from the lister to check, its test setup is more involved
# than the other listers and it's not currently done here
for lister_name in SUPPORTED_LISTERS:

View file

@ -154,7 +154,9 @@ class InstantiableStatelessLister(pattern.StatelessLister[PageType]):
def test_stateless_instantiation(swh_scheduler):
lister = InstantiableStatelessLister(
scheduler=swh_scheduler, url="https://example.com", instance="example.com",
scheduler=swh_scheduler,
url="https://example.com",
instance="example.com",
)
# check the lister was registered in the scheduler backend

View file

@ -21,7 +21,19 @@ from swh.lister.utils import (
(14, 5, [(0, 4), (5, 9), (10, 14)]),
(19, 10, [(0, 9), (10, 19)]),
(20, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]),
(21, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 21),],),
(
21,
3,
[
(0, 2),
(3, 5),
(6, 8),
(9, 11),
(12, 14),
(15, 17),
(18, 21),
],
),
],
)
def test_split_range(total_pages, nb_pages, expected_ranges):
@ -72,7 +84,8 @@ def test_throttling_retry(requests_mock, mocker):
def test_throttling_retry_max_attemps(requests_mock, mocker):
requests_mock.get(
TEST_URL, [{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
TEST_URL,
[{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
)
mock_sleep = mocker.patch.object(make_request.retry, "sleep")
@ -85,7 +98,7 @@ def test_throttling_retry_max_attemps(requests_mock, mocker):
assert_sleep_calls(
mocker,
mock_sleep,
[float(WAIT_EXP_BASE ** i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
[float(WAIT_EXP_BASE**i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
)

View file

@ -51,12 +51,18 @@ class TuleapLister(StatelessLister[RepoPage]):
credentials: CredentialsType = None,
):
super().__init__(
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
)
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": USER_AGENT,}
{
"Accept": "application/json",
"User-Agent": USER_AGENT,
}
)
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
@ -133,9 +139,7 @@ class TuleapLister(StatelessLister[RepoPage]):
yield self.results_simplified(url_api, "git", repo)
def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
"""Convert a page of Tuleap repositories into a list of ListedOrigins.
"""
"""Convert a page of Tuleap repositories into a list of ListedOrigins."""
assert self.lister_obj.id is not None
yield ListedOrigin(

View file

@ -21,7 +21,8 @@ def test_full_listing(swh_scheduler_celery_app, swh_scheduler_celery_worker, moc
kwargs = dict(url="https://tuleap.net")
res = swh_scheduler_celery_app.send_task(
"swh.lister.tuleap.tasks.FullTuleapLister", kwargs=kwargs,
"swh.lister.tuleap.tasks.FullTuleapLister",
kwargs=kwargs,
)
assert res
res.wait()
@ -38,9 +39,13 @@ def test_full_listing_params(
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(url="https://tuleap.net", instance="tuleap.net",)
kwargs = dict(
url="https://tuleap.net",
instance="tuleap.net",
)
res = swh_scheduler_celery_app.send_task(
"swh.lister.tuleap.tasks.FullTuleapLister", kwargs=kwargs,
"swh.lister.tuleap.tasks.FullTuleapLister",
kwargs=kwargs,
)
assert res
res.wait()