From d38e05cff7253de9259b3a0b6cdb59856f2afb92 Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Fri, 8 Apr 2022 15:15:09 +0200 Subject: [PATCH] python: Reformat code with black 22.3.0 Related to T3922 --- docs/new_lister_template.py | 13 ++-- swh/lister/bitbucket/lister.py | 4 +- swh/lister/bitbucket/tests/test_lister.py | 5 +- swh/lister/bitbucket/tests/test_tasks.py | 12 +++- swh/lister/cgit/lister.py | 13 ++-- swh/lister/cgit/tests/test_lister.py | 20 ++++-- swh/lister/cgit/tests/test_tasks.py | 3 +- swh/lister/cli.py | 5 +- swh/lister/cran/lister.py | 3 +- swh/lister/cran/tests/test_lister.py | 7 ++- swh/lister/crates/lister.py | 11 +++- swh/lister/debian/tests/test_lister.py | 10 ++- swh/lister/gitea/lister.py | 14 +++-- swh/lister/gitea/tests/test_tasks.py | 6 +- swh/lister/github/lister.py | 2 +- swh/lister/gitlab/tests/test_lister.py | 70 +++++++++++---------- swh/lister/gitlab/tests/test_tasks.py | 3 +- swh/lister/gnu/lister.py | 4 +- swh/lister/gnu/tree.py | 8 +-- swh/lister/launchpad/lister.py | 3 +- swh/lister/launchpad/tests/test_lister.py | 14 ++++- swh/lister/maven/lister.py | 29 ++++++--- swh/lister/maven/tests/test_lister.py | 7 ++- swh/lister/maven/tests/test_tasks.py | 3 +- swh/lister/npm/tests/test_lister.py | 13 +++- swh/lister/opam/lister.py | 5 +- swh/lister/opam/tests/test_lister.py | 22 +++---- swh/lister/packagist/lister.py | 4 +- swh/lister/phabricator/lister.py | 4 +- swh/lister/phabricator/tests/test_lister.py | 4 +- swh/lister/pypi/lister.py | 10 +-- swh/lister/sourceforge/lister.py | 9 +-- swh/lister/tests/test_cli.py | 21 ++++--- swh/lister/tests/test_pattern.py | 4 +- swh/lister/tests/test_utils.py | 19 +++++- swh/lister/tuleap/lister.py | 14 +++-- swh/lister/tuleap/tests/test_tasks.py | 11 +++- 37 files changed, 265 insertions(+), 144 deletions(-) diff --git a/docs/new_lister_template.py b/docs/new_lister_template.py index 32d2152..20e3e90 100644 --- a/docs/new_lister_template.py +++ b/docs/new_lister_template.py @@ -26,9 +26,7 @@ NewForgeListerPage = List[Dict[str, Any]] @dataclass class NewForgeListerState: - """The NewForgeLister instance state. This is used for incremental listing. - - """ + """The NewForgeLister instance state. This is used for incremental listing.""" current: str = "" """Id of the last origin listed on an incremental pass""" @@ -36,9 +34,7 @@ class NewForgeListerState: # If there is no need to keep state, subclass StatelessLister[NewForgeListerPage] class NewForgeLister(Lister[NewForgeListerState, NewForgeListerPage]): - """List origins from the "NewForge" forge. - - """ + """List origins from the "NewForge" forge.""" # Part of the lister API, that identifies this lister LISTER_NAME = "" @@ -63,7 +59,10 @@ class NewForgeLister(Lister[NewForgeListerState, NewForgeListerPage]): credentials: CredentialsType = None, ): super().__init__( - scheduler=scheduler, credentials=credentials, url=url, instance=instance, + scheduler=scheduler, + credentials=credentials, + url=url, + instance=instance, ) self.session = requests.Session() diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py index ab364a9..6a99699 100644 --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -155,9 +155,7 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]): def get_origins_from_page( self, page: List[Dict[str, Any]] ) -> Iterator[ListedOrigin]: - """Convert a page of Bitbucket repositories into a list of ListedOrigins. - - """ + """Convert a page of Bitbucket repositories into a list of ListedOrigins.""" assert self.lister_obj.id is not None for repo in page: diff --git a/swh/lister/bitbucket/tests/test_lister.py b/swh/lister/bitbucket/tests/test_lister.py index ee5d79e..c568dbf 100644 --- a/swh/lister/bitbucket/tests/test_lister.py +++ b/swh/lister/bitbucket/tests/test_lister.py @@ -51,7 +51,10 @@ def test_bitbucket_incremental_lister( requests_mock.get( BitbucketLister.API_URL, - [{"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2},], + [ + {"json": bb_api_repositories_page1}, + {"json": bb_api_repositories_page2}, + ], ) lister = BitbucketLister(scheduler=swh_scheduler, page_size=10) diff --git a/swh/lister/bitbucket/tests/test_tasks.py b/swh/lister/bitbucket/tests/test_tasks.py index a646aa7..892e7db 100644 --- a/swh/lister/bitbucket/tests/test_tasks.py +++ b/swh/lister/bitbucket/tests/test_tasks.py @@ -25,7 +25,11 @@ def test_incremental_listing( res = swh_scheduler_celery_app.send_task( "swh.lister.bitbucket.tasks.IncrementalBitBucketLister", - kwargs=dict(page_size=100, username="username", password="password",), + kwargs=dict( + page_size=100, + username="username", + password="password", + ), ) assert res res.wait() @@ -41,7 +45,11 @@ def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_wor res = swh_scheduler_celery_app.send_task( "swh.lister.bitbucket.tasks.FullBitBucketRelister", - kwargs=dict(page_size=100, username="username", password="password",), + kwargs=dict( + page_size=100, + username="username", + password="password", + ), ) assert res res.wait() diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py index 3195f2a..c0d9113 100644 --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -67,7 +67,10 @@ class CGitLister(StatelessLister[Repositories]): """ super().__init__( - scheduler=scheduler, url=url, instance=instance, credentials=credentials, + scheduler=scheduler, + url=url, + instance=instance, + credentials=credentials, ) self.session = requests.Session() @@ -85,8 +88,8 @@ class CGitLister(StatelessLister[Repositories]): def get_pages(self) -> Iterator[Repositories]: """Generate git 'project' URLs found on the current CGit server - The last_update date is retrieved on the list of repo page to avoid - to compute it on the repository details which only give a date per branch + The last_update date is retrieved on the list of repo page to avoid + to compute it on the repository details which only give a date per branch """ next_page: Optional[str] = self.url while next_page: @@ -206,7 +209,9 @@ def _parse_last_updated_date(repository: Dict[str, Any]) -> Optional[datetime]: if not parsed_date: logger.warning( - "Could not parse %s last_updated date: %s", repository["url"], date, + "Could not parse %s last_updated date: %s", + repository["url"], + date, ) return parsed_date diff --git a/swh/lister/cgit/tests/test_lister.py b/swh/lister/cgit/tests/test_lister.py index 7f402c0..f996333 100644 --- a/swh/lister/cgit/tests/test_lister.py +++ b/swh/lister/cgit/tests/test_lister.py @@ -80,7 +80,11 @@ def test_lister_cgit_run_populates_last_update(requests_mock_datadir, swh_schedu urls_without_date = [ f"https://git.tizen.org/cgit/{suffix_url}" - for suffix_url in ["All-Projects", "All-Users", "Lock-Projects",] + for suffix_url in [ + "All-Projects", + "All-Users", + "Lock-Projects", + ] ] lister_cgit = CGitLister(swh_scheduler, url=url) @@ -145,7 +149,9 @@ def test_lister_cgit_date_parsing(date_str, expected_date): requests_mock_datadir_missing_url = requests_mock_datadir_factory( - ignore_urls=["https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12",] + ignore_urls=[ + "https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12", + ] ) @@ -208,10 +214,12 @@ def test_lister_cgit_from_configfile(swh_scheduler_config, mocker): def test_lister_cgit_with_base_git_url( url, base_git_url, expected_nb_origins, requests_mock_datadir, swh_scheduler ): - """With base git url provided, listed urls should be the computed origin urls - - """ - lister_cgit = CGitLister(swh_scheduler, url=url, base_git_url=base_git_url,) + """With base git url provided, listed urls should be the computed origin urls""" + lister_cgit = CGitLister( + swh_scheduler, + url=url, + base_git_url=base_git_url, + ) stats = lister_cgit.run() diff --git a/swh/lister/cgit/tests/test_tasks.py b/swh/lister/cgit/tests/test_tasks.py index b9a00cd..ce08c69 100644 --- a/swh/lister/cgit/tests/test_tasks.py +++ b/swh/lister/cgit/tests/test_tasks.py @@ -25,7 +25,8 @@ def test_cgit_lister_task( kwargs = dict(url="https://git.kernel.org/", instance="kernel", base_git_url=None) res = swh_scheduler_celery_app.send_task( - "swh.lister.cgit.tasks.CGitListerTask", kwargs=kwargs, + "swh.lister.cgit.tasks.CGitListerTask", + kwargs=kwargs, ) assert res res.wait() diff --git a/swh/lister/cli.py b/swh/lister/cli.py index 770eeeb..ec7655e 100644 --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -24,7 +24,10 @@ logger = logging.getLogger(__name__) "--config-file", "-C", default=None, - type=click.Path(exists=True, dir_okay=False,), + type=click.Path( + exists=True, + dir_okay=False, + ), help="Configuration file.", ) @click.pass_context diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py index 97d24cf..e9f937a 100644 --- a/swh/lister/cran/lister.py +++ b/swh/lister/cran/lister.py @@ -132,7 +132,8 @@ def parse_packaged_date(package_info: Dict[str, str]) -> Optional[datetime]: ): try: packaged_at = datetime.strptime( - packaged_at_str.split(";")[0], date_format, + packaged_at_str.split(";")[0], + date_format, ).replace(tzinfo=timezone.utc) break except Exception: diff --git a/swh/lister/cran/tests/test_lister.py b/swh/lister/cran/tests/test_lister.py index fa0b463..a0bebfc 100644 --- a/swh/lister/cran/tests/test_lister.py +++ b/swh/lister/cran/tests/test_lister.py @@ -20,7 +20,12 @@ from swh.lister.cran.lister import ( def test_cran_compute_origin_urls(): pack = "something" vers = "0.0.1" - origin_url, artifact_url = compute_origin_urls({"Package": pack, "Version": vers,}) + origin_url, artifact_url = compute_origin_urls( + { + "Package": pack, + "Version": vers, + } + ) assert origin_url == f"{CRAN_MIRROR}/package={pack}" assert artifact_url == f"{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz" diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py index 5a95049..d0c6984 100644 --- a/swh/lister/crates/lister.py +++ b/swh/lister/crates/lister.py @@ -42,7 +42,9 @@ class CratesLister(StatelessLister[CratesListerPage]): ) def __init__( - self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + self, + scheduler: SchedulerInterface, + credentials: CredentialsType = None, ): super().__init__( scheduler=scheduler, @@ -55,7 +57,12 @@ class CratesLister(StatelessLister[CratesListerPage]): """Get crates.io-index repository up to date running git command.""" subprocess.check_call( - ["git", "clone", self.INDEX_REPOSITORY_URL, self.DESTINATION_PATH,] + [ + "git", + "clone", + self.INDEX_REPOSITORY_URL, + self.DESTINATION_PATH, + ] ) def get_crates_index(self) -> List[Path]: diff --git a/swh/lister/debian/tests/test_lister.py b/swh/lister/debian/tests/test_lister.py index 695d7f1..6f2711d 100644 --- a/swh/lister/debian/tests/test_lister.py +++ b/swh/lister/debian/tests/test_lister.py @@ -87,7 +87,9 @@ def _init_test( requests_mock.get(idx_url, status_code=404) else: requests_mock.get( - idx_url, text=sources, headers={"Last-Modified": last_modified}, + idx_url, + text=sources, + headers={"Last-Modified": last_modified}, ) for idx_url, _ in lister.debian_index_urls(suite, _components[1]): @@ -186,7 +188,11 @@ def test_lister_debian_all_suites( @pytest.mark.parametrize( "suites_params", - [[_suites[:1]], [_suites[:1], _suites[:2]], [_suites[:1], _suites[:2], _suites],], + [ + [_suites[:1]], + [_suites[:1], _suites[:2]], + [_suites[:1], _suites[:2], _suites], + ], ) def test_lister_debian_updated_packages( swh_scheduler: SchedulerInterface, diff --git a/swh/lister/gitea/lister.py b/swh/lister/gitea/lister.py index 19ca4aa..25bea4e 100644 --- a/swh/lister/gitea/lister.py +++ b/swh/lister/gitea/lister.py @@ -47,7 +47,10 @@ class GiteaLister(StatelessLister[RepoListPage]): credentials: CredentialsType = None, ): super().__init__( - scheduler=scheduler, credentials=credentials, url=url, instance=instance, + scheduler=scheduler, + credentials=credentials, + url=url, + instance=instance, ) self.query_params = { @@ -59,7 +62,10 @@ class GiteaLister(StatelessLister[RepoListPage]): self.session = requests.Session() self.session.headers.update( - {"Accept": "application/json", "User-Agent": USER_AGENT,} + { + "Accept": "application/json", + "User-Agent": USER_AGENT, + } ) if api_token is None: @@ -122,9 +128,7 @@ class GiteaLister(StatelessLister[RepoListPage]): response = self.page_request(url, {}) def get_origins_from_page(self, page: RepoListPage) -> Iterator[ListedOrigin]: - """Convert a page of Gitea repositories into a list of ListedOrigins. - - """ + """Convert a page of Gitea repositories into a list of ListedOrigins.""" assert self.lister_obj.id is not None for repo in page: diff --git a/swh/lister/gitea/tests/test_tasks.py b/swh/lister/gitea/tests/test_tasks.py index 458bc7e..a204cb1 100644 --- a/swh/lister/gitea/tests/test_tasks.py +++ b/swh/lister/gitea/tests/test_tasks.py @@ -23,7 +23,8 @@ def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_wor kwargs = dict(url="https://try.gitea.io/api/v1") res = swh_scheduler_celery_app.send_task( - "swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs, + "swh.lister.gitea.tasks.FullGiteaRelister", + kwargs=kwargs, ) assert res res.wait() @@ -49,7 +50,8 @@ def test_full_listing_params( page_size=50, ) res = swh_scheduler_celery_app.send_task( - "swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs, + "swh.lister.gitea.tasks.FullGiteaRelister", + kwargs=kwargs, ) assert res res.wait() diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py index f4246a2..2e051f1 100644 --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -138,7 +138,7 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]): first_id: the id of the first repo to list last_id: stop listing after seeing a repo with an id higher than this value. - """ # noqa: E501 + """ # noqa: B950 LISTER_NAME = "github" diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py index 10144a7..80650b8 100644 --- a/swh/lister/gitlab/tests/test_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -29,16 +29,16 @@ def _match_request(request): def test_lister_gitlab(datadir, swh_scheduler, requests_mock): - """Gitlab lister supports full listing - - """ + """Gitlab lister supports full listing""" instance = "gitlab.com" lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance) response = gitlab_page_response(datadir, instance, 1) requests_mock.get( - lister.page_url(), [{"json": response}], additional_matcher=_match_request, + lister.page_url(), + [{"json": response}], + additional_matcher=_match_request, ) listed_result = lister.run() @@ -57,9 +57,7 @@ def test_lister_gitlab(datadir, swh_scheduler, requests_mock): def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock): - """Heptapod lister happily lists hg, hg_git as hg and git origins - - """ + """Heptapod lister happily lists hg, hg_git as hg and git origins""" name = "heptapod" instance = "foss.heptapod.net" lister = GitLabLister( @@ -70,7 +68,9 @@ def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock): response = gitlab_page_response(datadir, instance, 1) requests_mock.get( - lister.page_url(), [{"json": response}], additional_matcher=_match_request, + lister.page_url(), + [{"json": response}], + additional_matcher=_match_request, ) listed_result = lister.run() @@ -99,9 +99,7 @@ def gitlab_page_response(datadir, instance: str, id_after: int) -> List[Dict]: def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir): - """Gitlab lister supports pagination - - """ + """Gitlab lister supports pagination""" instance = "gite.lirmm.fr" lister = GitLabLister(swh_scheduler, url=api_url(instance)) @@ -115,7 +113,9 @@ def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir): ) requests_mock.get( - lister.page_url(2), [{"json": response2}], additional_matcher=_match_request, + lister.page_url(2), + [{"json": response2}], + additional_matcher=_match_request, ) listed_result = lister.run() @@ -135,9 +135,7 @@ def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir): def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir): - """Gitlab lister supports incremental visits - - """ + """Gitlab lister supports incremental visits""" instance = "gite.lirmm.fr" url = api_url(instance) lister = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True) @@ -155,7 +153,9 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir): additional_matcher=_match_request, ) requests_mock.get( - url_page2, [{"json": response2}], additional_matcher=_match_request, + url_page2, + [{"json": response2}], + additional_matcher=_match_request, ) listed_result = lister.run() @@ -173,7 +173,9 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir): additional_matcher=_match_request, ) requests_mock.get( - url_page3, [{"json": response3}], additional_matcher=_match_request, + url_page3, + [{"json": response3}], + additional_matcher=_match_request, ) listed_result2 = lister2.run() @@ -197,9 +199,7 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir): def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker): - """Gitlab lister supports rate-limit - - """ + """Gitlab lister supports rate-limit""" instance = "gite.lirmm.fr" url = api_url(instance) lister = GitLabLister(swh_scheduler, url=url, instance=instance) @@ -241,9 +241,7 @@ def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker) def test_lister_gitlab_http_errors( swh_scheduler, requests_mock, datadir, mocker, status_code ): - """Gitlab lister should retry requests when encountering HTTP 50x errors - - """ + """Gitlab lister should retry requests when encountering HTTP 50x errors""" instance = "gite.lirmm.fr" url = api_url(instance) lister = GitLabLister(swh_scheduler, url=url, instance=instance) @@ -281,9 +279,7 @@ def test_lister_gitlab_http_errors( def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir): - """Gitlab lister should skip buggy URL and move to next page. - - """ + """Gitlab lister should skip buggy URL and move to next page.""" instance = "gite.lirmm.fr" url = api_url(instance) lister = GitLabLister(swh_scheduler, url=url, instance=instance) @@ -300,11 +296,17 @@ def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir): additional_matcher=_match_request, ) requests_mock.get( - url_page2, [{"status_code": 500},], additional_matcher=_match_request, + url_page2, + [ + {"status_code": 500}, + ], + additional_matcher=_match_request, ) requests_mock.get( - url_page3, [{"json": response3}], additional_matcher=_match_request, + url_page3, + [{"json": response3}], + additional_matcher=_match_request, ) listed_result = lister.run() @@ -314,9 +316,7 @@ def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir): def test_lister_gitlab_credentials(swh_scheduler): - """Gitlab lister supports credentials configuration - - """ + """Gitlab lister supports credentials configuration""" instance = "gitlab" credentials = { "gitlab": {instance: [{"username": "user", "password": "api-token"}]} @@ -328,7 +328,13 @@ def test_lister_gitlab_credentials(swh_scheduler): assert lister.session.headers["Authorization"] == "Bearer api-token" -@pytest.mark.parametrize("url", [api_url("gitlab").rstrip("/"), api_url("gitlab"),]) +@pytest.mark.parametrize( + "url", + [ + api_url("gitlab").rstrip("/"), + api_url("gitlab"), + ], +) def test_lister_gitlab_url_computation(url, swh_scheduler): lister = GitLabLister(scheduler=swh_scheduler, url=url) assert not lister.url.endswith("/") diff --git a/swh/lister/gitlab/tests/test_tasks.py b/swh/lister/gitlab/tests/test_tasks.py index 38d5e92..3d0b2a3 100644 --- a/swh/lister/gitlab/tests/test_tasks.py +++ b/swh/lister/gitlab/tests/test_tasks.py @@ -34,7 +34,8 @@ def test_task_lister_gitlab( kwargs = dict(url="https://gitweb.torproject.org/") res = swh_scheduler_celery_app.send_task( - f"swh.lister.gitlab.tasks.{task_name}", kwargs=kwargs, + f"swh.lister.gitlab.tasks.{task_name}", + kwargs=kwargs, ) assert res res.wait() diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py index 3d35829..65eca1f 100644 --- a/swh/lister/gnu/lister.py +++ b/swh/lister/gnu/lister.py @@ -28,7 +28,9 @@ class GNULister(StatelessLister[GNUPageType]): GNU_FTP_URL = "https://ftp.gnu.org" def __init__( - self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + self, + scheduler: SchedulerInterface, + credentials: CredentialsType = None, ): super().__init__( scheduler=scheduler, diff --git a/swh/lister/gnu/tree.py b/swh/lister/gnu/tree.py index ba74e04..f414ef3 100644 --- a/swh/lister/gnu/tree.py +++ b/swh/lister/gnu/tree.py @@ -19,9 +19,7 @@ logger = logging.getLogger(__name__) class GNUTree: - """Gnu Tree's representation - - """ + """Gnu Tree's representation""" def __init__(self, url: str): self.url = url # filepath or uri @@ -330,7 +328,5 @@ def load_raw_data(url: str) -> Sequence[Mapping]: def format_date(timestamp: str) -> str: - """Format a string timestamp to an isoformat string - - """ + """Format a string timestamp to an isoformat string""" return datetime.fromtimestamp(int(timestamp), tz=timezone.utc).isoformat() diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py index 8078175..b134303 100644 --- a/swh/lister/launchpad/lister.py +++ b/swh/lister/launchpad/lister.py @@ -117,7 +117,8 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): } return get_vcs_fns[vcs_type]( - order_by="most neglected first", modified_since_date=date_last_modified, + order_by="most neglected first", + modified_since_date=date_last_modified, ) def get_pages(self) -> Iterator[LaunchpadPageType]: diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py index 59fe605..0768dd5 100644 --- a/swh/lister/launchpad/tests/test_lister.py +++ b/swh/lister/launchpad/tests/test_lister.py @@ -192,9 +192,16 @@ def test_launchpad_incremental_lister( def test_launchpad_lister_invalid_url_filtering( - swh_scheduler, mocker, + swh_scheduler, + mocker, ): - invalid_origin = [_Repo({"git_https_url": "tag:launchpad.net:2008:redacted",})] + invalid_origin = [ + _Repo( + { + "git_https_url": "tag:launchpad.net:2008:redacted", + } + ) + ] _mock_launchpad(mocker, invalid_origin) lister = LaunchpadLister(scheduler=swh_scheduler) stats = lister.run() @@ -205,7 +212,8 @@ def test_launchpad_lister_invalid_url_filtering( def test_launchpad_lister_duplicated_origin( - swh_scheduler, mocker, + swh_scheduler, + mocker, ): origin = _Repo( { diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py index 2d57550..a940a4d 100644 --- a/swh/lister/maven/lister.py +++ b/swh/lister/maven/lister.py @@ -87,12 +87,18 @@ class MavenLister(Lister[MavenListerState, RepoPage]): instance = parse_url(url).host super().__init__( - scheduler=scheduler, credentials=credentials, url=url, instance=instance, + scheduler=scheduler, + credentials=credentials, + url=url, + instance=instance, ) self.session = requests.Session() self.session.headers.update( - {"Accept": "application/json", "User-Agent": USER_AGENT,} + { + "Accept": "application/json", + "User-Agent": USER_AGENT, + } ) def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState: @@ -119,7 +125,7 @@ class MavenLister(Lister[MavenListerState, RepoPage]): return response def get_pages(self) -> Iterator[RepoPage]: - """ Retrieve and parse exported maven indexes to + """Retrieve and parse exported maven indexes to identify all pom files and src archives. """ @@ -213,7 +219,10 @@ class MavenLister(Lister[MavenListerState, RepoPage]): ): continue url_path = f"{path}/{aid}/{version}/{aid}-{version}.{ext}" - url_pom = urljoin(self.BASE_URL, url_path,) + url_pom = urljoin( + self.BASE_URL, + url_path, + ) out_pom[url_pom] = doc_id elif ( classifier.lower() == "sources" or ("src" in classifier) @@ -271,9 +280,7 @@ class MavenLister(Lister[MavenListerState, RepoPage]): logger.info("Could not parse POM %s XML: %s. Next.", pom, error) def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]: - """Convert a page of Maven repositories into a list of ListedOrigins. - - """ + """Convert a page of Maven repositories into a list of ListedOrigins.""" assert self.lister_obj.id is not None scm_types_ok = ("git", "svn", "hg", "cvs", "bzr") if page["type"] == "scm": @@ -288,13 +295,17 @@ class MavenLister(Lister[MavenListerState, RepoPage]): if scm_type in scm_types_ok: scm_url = m_scm.group("url") origin = ListedOrigin( - lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type, + lister_id=self.lister_obj.id, + url=scm_url, + visit_type=scm_type, ) yield origin else: if page["url"].endswith(".git"): origin = ListedOrigin( - lister_id=self.lister_obj.id, url=page["url"], visit_type="git", + lister_id=self.lister_obj.id, + url=page["url"], + visit_type="git", ) yield origin else: diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py index c81ee96..c8142ef 100644 --- a/swh/lister/maven/tests/test_lister.py +++ b/swh/lister/maven/tests/test_lister.py @@ -84,7 +84,12 @@ def maven_pom_3(datadir) -> str: def test_maven_full_listing( - swh_scheduler, requests_mock, mocker, maven_index, maven_pom_1, maven_pom_2, + swh_scheduler, + requests_mock, + mocker, + maven_index, + maven_pom_1, + maven_pom_2, ): """Covers full listing of multiple pages, checking page results and listed origins, statelessness.""" diff --git a/swh/lister/maven/tests/test_tasks.py b/swh/lister/maven/tests/test_tasks.py index 864c00d..b95dfda 100644 --- a/swh/lister/maven/tests/test_tasks.py +++ b/swh/lister/maven/tests/test_tasks.py @@ -35,7 +35,8 @@ def test_task_lister_maven( url="https://repo1.maven.org/maven2/", index_url="http://indexes/export.fld" ) res = swh_scheduler_celery_app.send_task( - f"swh.lister.maven.tasks.{task_name}", kwargs=kwargs, + f"swh.lister.maven.tasks.{task_name}", + kwargs=kwargs, ) assert res res.wait() diff --git a/swh/lister/npm/tests/test_lister.py b/swh/lister/npm/tests/test_lister.py index 8ceb86c..1c20b33 100644 --- a/swh/lister/npm/tests/test_lister.py +++ b/swh/lister/npm/tests/test_lister.py @@ -66,7 +66,10 @@ def test_npm_lister_full( requests_mock.get( lister.API_FULL_LISTING_URL, - [{"json": npm_full_listing_page1}, {"json": npm_full_listing_page2},], + [ + {"json": npm_full_listing_page1}, + {"json": npm_full_listing_page2}, + ], additional_matcher=_match_request, ) @@ -169,7 +172,9 @@ def test_npm_lister_incremental( def test_npm_lister_incremental_restart( - swh_scheduler, requests_mock, mocker, + swh_scheduler, + requests_mock, + mocker, ): """Check incremental npm listing will restart from saved state""" page_size = 2 @@ -190,7 +195,9 @@ def test_npm_lister_incremental_restart( def test_npm_lister_http_error( - swh_scheduler, requests_mock, mocker, + swh_scheduler, + requests_mock, + mocker, ): lister = NpmLister(scheduler=swh_scheduler) diff --git a/swh/lister/opam/lister.py b/swh/lister/opam/lister.py index 4ad510e..724d198 100644 --- a/swh/lister/opam/lister.py +++ b/swh/lister/opam/lister.py @@ -48,7 +48,10 @@ class OpamLister(StatelessLister[PageType]): opam_root: str = "/tmp/opam/", ): super().__init__( - scheduler=scheduler, credentials=credentials, url=url, instance=instance, + scheduler=scheduler, + credentials=credentials, + url=url, + instance=instance, ) self.env = os.environ.copy() # Opam root folder is initialized in the :meth:`get_pages` method as no diff --git a/swh/lister/opam/tests/test_lister.py b/swh/lister/opam/tests/test_lister.py index b39c501..26dc753 100644 --- a/swh/lister/opam/tests/test_lister.py +++ b/swh/lister/opam/tests/test_lister.py @@ -17,9 +17,7 @@ module_name = "swh.lister.opam.lister" @pytest.fixture def mock_opam(mocker): - """Fixture to bypass the actual opam calls within the test context. - - """ + """Fixture to bypass the actual opam calls within the test context.""" # inhibits the real `subprocess.call` which prepares the required internal opam # state mock_init = mocker.patch(f"{module_name}.call", return_value=None) @@ -31,9 +29,7 @@ def mock_opam(mocker): def test_mock_init_repository_init(mock_opam, tmp_path, datadir): - """Initializing opam root directory with an instance should be ok - - """ + """Initializing opam root directory with an instance should be ok""" mock_init, mock_popen = mock_opam instance = "fake" @@ -48,9 +44,7 @@ def test_mock_init_repository_init(mock_opam, tmp_path, datadir): def test_mock_init_repository_update(mock_opam, tmp_path, datadir): - """Updating opam root directory with another instance should be ok - - """ + """Updating opam root directory with another instance should be ok""" mock_init, mock_popen = mock_opam instance = "fake_opam_repo" @@ -74,7 +68,10 @@ def test_lister_opam_optional_instance(swh_scheduler): netloc = "opam.ocaml.org" instance_url = f"https://{netloc}" - lister = OpamLister(swh_scheduler, url=instance_url,) + lister = OpamLister( + swh_scheduler, + url=instance_url, + ) assert lister.instance == netloc assert lister.opam_root == "/tmp/opam/" @@ -85,7 +82,10 @@ def test_urls(swh_scheduler, mock_opam, tmp_path): tmp_folder = mkdtemp(dir=tmp_path, prefix="swh_opam_lister") lister = OpamLister( - swh_scheduler, url=instance_url, instance="opam", opam_root=tmp_folder, + swh_scheduler, + url=instance_url, + instance="opam", + opam_root=tmp_folder, ) assert lister.instance == "opam" assert lister.opam_root == tmp_folder diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py index 9378691..19b4721 100644 --- a/swh/lister/packagist/lister.py +++ b/swh/lister/packagist/lister.py @@ -51,7 +51,9 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]): PACKAGIST_REPO_BASE_URL = "https://repo.packagist.org/p" def __init__( - self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + self, + scheduler: SchedulerInterface, + credentials: CredentialsType = None, ): super().__init__( scheduler=scheduler, diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py index 1dbee37..83ddc31 100644 --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -112,7 +112,9 @@ class PhabricatorLister(StatelessLister[PageType]): if response_data.get("result") is None: logger.warning( - "Got unexpected response on %s: %s", response.url, response_data, + "Got unexpected response on %s: %s", + response.url, + response_data, ) break diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py index a21d302..a638c40 100644 --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -115,7 +115,9 @@ def test_lister( def test_lister_request_error( - swh_scheduler, requests_mock, phabricator_repositories_page1, + swh_scheduler, + requests_mock, + phabricator_repositories_page1, ): FORGE_BASE_URL = "https://forge.softwareheritage.org" diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py index abc27ec..eefd797 100644 --- a/swh/lister/pypi/lister.py +++ b/swh/lister/pypi/lister.py @@ -54,16 +54,12 @@ def _if_rate_limited(retry_state) -> bool: def pypi_url(package_name: str) -> str: - """Build pypi url out of a package name. - - """ + """Build pypi url out of a package name.""" return PyPILister.PACKAGE_URL.format(package_name=package_name) class PyPILister(Lister[PyPIListerState, PackageListPage]): - """List origins from PyPI. - - """ + """List origins from PyPI.""" LISTER_NAME = "pypi" INSTANCE = "pypi" # As of today only the main pypi.org is used @@ -168,7 +164,7 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]): def finalize(self): """Finalize the visit state by updating with the new last_serial if updates - actually happened. + actually happened. """ self.updated = ( diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py index 6199240..c95a089 100644 --- a/swh/lister/sourceforge/lister.py +++ b/swh/lister/sourceforge/lister.py @@ -55,8 +55,7 @@ LastModifiedT = datetime.date @dataclass class SourceForgeListerState: - """Current state of the SourceForge lister in incremental runs - """ + """Current state of the SourceForge lister in incremental runs""" """If the subsitemap does not exist, we assume a full run of this subsitemap is needed. If the date is the same, we skip the subsitemap, otherwise we @@ -105,9 +104,7 @@ ProjectsLastModifiedCache = Dict[Tuple[str, str], LastModifiedT] class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]): - """List origins from the "SourceForge" forge. - - """ + """List origins from the "SourceForge" forge.""" # Part of the lister API, that identifies this lister LISTER_NAME = "sourceforge" @@ -386,7 +383,7 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]): bs = BeautifulSoup(response.text, features="html.parser") cvs_base_url = "rsync://a.cvs.sourceforge.net/cvsroot" for text in [b.text for b in bs.find_all("b")]: - match = re.search(fr".*/cvsroot/{project} co -P (.+)", text) + match = re.search(rf".*/cvsroot/{project} co -P (.+)", text) if match is not None: module = match.group(1) url = f"{cvs_base_url}/{project}/{module}" diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py index e2585df..dfaf72b 100644 --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -8,15 +8,24 @@ import pytest from swh.lister.cli import SUPPORTED_LISTERS, get_lister lister_args = { - "cgit": {"url": "https://git.eclipse.org/c/",}, + "cgit": { + "url": "https://git.eclipse.org/c/", + }, "phabricator": { "instance": "softwareheritage", "url": "https://forge.softwareheritage.org/api/diffusion.repository.search", "api_token": "bogus", }, - "gitea": {"url": "https://try.gitea.io/api/v1/",}, - "tuleap": {"url": "https://tuleap.net",}, - "gitlab": {"url": "https://gitlab.ow2.org/api/v4", "instance": "ow2",}, + "gitea": { + "url": "https://try.gitea.io/api/v1/", + }, + "tuleap": { + "url": "https://tuleap.net", + }, + "gitlab": { + "url": "https://gitlab.ow2.org/api/v4", + "instance": "ow2", + }, "opam": {"url": "https://opam.ocaml.org", "instance": "opam"}, "maven": { "url": "https://repo1.maven.org/maven2/", @@ -34,9 +43,7 @@ def test_get_lister_wrong_input(): def test_get_lister(swh_scheduler_config): - """Instantiating a supported lister should be ok - - """ + """Instantiating a supported lister should be ok""" # Drop launchpad lister from the lister to check, its test setup is more involved # than the other listers and it's not currently done here for lister_name in SUPPORTED_LISTERS: diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py index 795b715..192f8f7 100644 --- a/swh/lister/tests/test_pattern.py +++ b/swh/lister/tests/test_pattern.py @@ -154,7 +154,9 @@ class InstantiableStatelessLister(pattern.StatelessLister[PageType]): def test_stateless_instantiation(swh_scheduler): lister = InstantiableStatelessLister( - scheduler=swh_scheduler, url="https://example.com", instance="example.com", + scheduler=swh_scheduler, + url="https://example.com", + instance="example.com", ) # check the lister was registered in the scheduler backend diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py index acb73fe..6d9b50d 100644 --- a/swh/lister/tests/test_utils.py +++ b/swh/lister/tests/test_utils.py @@ -21,7 +21,19 @@ from swh.lister.utils import ( (14, 5, [(0, 4), (5, 9), (10, 14)]), (19, 10, [(0, 9), (10, 19)]), (20, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]), - (21, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 21),],), + ( + 21, + 3, + [ + (0, 2), + (3, 5), + (6, 8), + (9, 11), + (12, 14), + (15, 17), + (18, 21), + ], + ), ], ) def test_split_range(total_pages, nb_pages, expected_ranges): @@ -72,7 +84,8 @@ def test_throttling_retry(requests_mock, mocker): def test_throttling_retry_max_attemps(requests_mock, mocker): requests_mock.get( - TEST_URL, [{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS), + TEST_URL, + [{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS), ) mock_sleep = mocker.patch.object(make_request.retry, "sleep") @@ -85,7 +98,7 @@ def test_throttling_retry_max_attemps(requests_mock, mocker): assert_sleep_calls( mocker, mock_sleep, - [float(WAIT_EXP_BASE ** i) for i in range(MAX_NUMBER_ATTEMPTS - 1)], + [float(WAIT_EXP_BASE**i) for i in range(MAX_NUMBER_ATTEMPTS - 1)], ) diff --git a/swh/lister/tuleap/lister.py b/swh/lister/tuleap/lister.py index b630508..179329a 100644 --- a/swh/lister/tuleap/lister.py +++ b/swh/lister/tuleap/lister.py @@ -51,12 +51,18 @@ class TuleapLister(StatelessLister[RepoPage]): credentials: CredentialsType = None, ): super().__init__( - scheduler=scheduler, credentials=credentials, url=url, instance=instance, + scheduler=scheduler, + credentials=credentials, + url=url, + instance=instance, ) self.session = requests.Session() self.session.headers.update( - {"Accept": "application/json", "User-Agent": USER_AGENT,} + { + "Accept": "application/json", + "User-Agent": USER_AGENT, + } ) @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) @@ -133,9 +139,7 @@ class TuleapLister(StatelessLister[RepoPage]): yield self.results_simplified(url_api, "git", repo) def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]: - """Convert a page of Tuleap repositories into a list of ListedOrigins. - - """ + """Convert a page of Tuleap repositories into a list of ListedOrigins.""" assert self.lister_obj.id is not None yield ListedOrigin( diff --git a/swh/lister/tuleap/tests/test_tasks.py b/swh/lister/tuleap/tests/test_tasks.py index a9b3cf2..2f394b9 100644 --- a/swh/lister/tuleap/tests/test_tasks.py +++ b/swh/lister/tuleap/tests/test_tasks.py @@ -21,7 +21,8 @@ def test_full_listing(swh_scheduler_celery_app, swh_scheduler_celery_worker, moc kwargs = dict(url="https://tuleap.net") res = swh_scheduler_celery_app.send_task( - "swh.lister.tuleap.tasks.FullTuleapLister", kwargs=kwargs, + "swh.lister.tuleap.tasks.FullTuleapLister", + kwargs=kwargs, ) assert res res.wait() @@ -38,9 +39,13 @@ def test_full_listing_params( lister.from_configfile.return_value = lister lister.run.return_value = ListerStats(pages=10, origins=500) - kwargs = dict(url="https://tuleap.net", instance="tuleap.net",) + kwargs = dict( + url="https://tuleap.net", + instance="tuleap.net", + ) res = swh_scheduler_celery_app.send_task( - "swh.lister.tuleap.tasks.FullTuleapLister", kwargs=kwargs, + "swh.lister.tuleap.tasks.FullTuleapLister", + kwargs=kwargs, ) assert res res.wait()