python: Reformat code with black 22.3.0
Related to T3922
This commit is contained in:
parent
00f1b99ad9
commit
d38e05cff7
37 changed files with 265 additions and 144 deletions
|
@ -26,9 +26,7 @@ NewForgeListerPage = List[Dict[str, Any]]
|
|||
|
||||
@dataclass
|
||||
class NewForgeListerState:
|
||||
"""The NewForgeLister instance state. This is used for incremental listing.
|
||||
|
||||
"""
|
||||
"""The NewForgeLister instance state. This is used for incremental listing."""
|
||||
|
||||
current: str = ""
|
||||
"""Id of the last origin listed on an incremental pass"""
|
||||
|
@ -36,9 +34,7 @@ class NewForgeListerState:
|
|||
|
||||
# If there is no need to keep state, subclass StatelessLister[NewForgeListerPage]
|
||||
class NewForgeLister(Lister[NewForgeListerState, NewForgeListerPage]):
|
||||
"""List origins from the "NewForge" forge.
|
||||
|
||||
"""
|
||||
"""List origins from the "NewForge" forge."""
|
||||
|
||||
# Part of the lister API, that identifies this lister
|
||||
LISTER_NAME = ""
|
||||
|
@ -63,7 +59,10 @@ class NewForgeLister(Lister[NewForgeListerState, NewForgeListerPage]):
|
|||
credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=url,
|
||||
instance=instance,
|
||||
)
|
||||
|
||||
self.session = requests.Session()
|
||||
|
|
|
@ -155,9 +155,7 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
|
|||
def get_origins_from_page(
|
||||
self, page: List[Dict[str, Any]]
|
||||
) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of Bitbucket repositories into a list of ListedOrigins.
|
||||
|
||||
"""
|
||||
"""Convert a page of Bitbucket repositories into a list of ListedOrigins."""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
for repo in page:
|
||||
|
|
|
@ -51,7 +51,10 @@ def test_bitbucket_incremental_lister(
|
|||
|
||||
requests_mock.get(
|
||||
BitbucketLister.API_URL,
|
||||
[{"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2},],
|
||||
[
|
||||
{"json": bb_api_repositories_page1},
|
||||
{"json": bb_api_repositories_page2},
|
||||
],
|
||||
)
|
||||
|
||||
lister = BitbucketLister(scheduler=swh_scheduler, page_size=10)
|
||||
|
|
|
@ -25,7 +25,11 @@ def test_incremental_listing(
|
|||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.bitbucket.tasks.IncrementalBitBucketLister",
|
||||
kwargs=dict(page_size=100, username="username", password="password",),
|
||||
kwargs=dict(
|
||||
page_size=100,
|
||||
username="username",
|
||||
password="password",
|
||||
),
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
@ -41,7 +45,11 @@ def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_wor
|
|||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.bitbucket.tasks.FullBitBucketRelister",
|
||||
kwargs=dict(page_size=100, username="username", password="password",),
|
||||
kwargs=dict(
|
||||
page_size=100,
|
||||
username="username",
|
||||
password="password",
|
||||
),
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
|
|
@ -67,7 +67,10 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
|
||||
"""
|
||||
super().__init__(
|
||||
scheduler=scheduler, url=url, instance=instance, credentials=credentials,
|
||||
scheduler=scheduler,
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
self.session = requests.Session()
|
||||
|
@ -85,8 +88,8 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
|
||||
def get_pages(self) -> Iterator[Repositories]:
|
||||
"""Generate git 'project' URLs found on the current CGit server
|
||||
The last_update date is retrieved on the list of repo page to avoid
|
||||
to compute it on the repository details which only give a date per branch
|
||||
The last_update date is retrieved on the list of repo page to avoid
|
||||
to compute it on the repository details which only give a date per branch
|
||||
"""
|
||||
next_page: Optional[str] = self.url
|
||||
while next_page:
|
||||
|
@ -206,7 +209,9 @@ def _parse_last_updated_date(repository: Dict[str, Any]) -> Optional[datetime]:
|
|||
|
||||
if not parsed_date:
|
||||
logger.warning(
|
||||
"Could not parse %s last_updated date: %s", repository["url"], date,
|
||||
"Could not parse %s last_updated date: %s",
|
||||
repository["url"],
|
||||
date,
|
||||
)
|
||||
|
||||
return parsed_date
|
||||
|
|
|
@ -80,7 +80,11 @@ def test_lister_cgit_run_populates_last_update(requests_mock_datadir, swh_schedu
|
|||
|
||||
urls_without_date = [
|
||||
f"https://git.tizen.org/cgit/{suffix_url}"
|
||||
for suffix_url in ["All-Projects", "All-Users", "Lock-Projects",]
|
||||
for suffix_url in [
|
||||
"All-Projects",
|
||||
"All-Users",
|
||||
"Lock-Projects",
|
||||
]
|
||||
]
|
||||
|
||||
lister_cgit = CGitLister(swh_scheduler, url=url)
|
||||
|
@ -145,7 +149,9 @@ def test_lister_cgit_date_parsing(date_str, expected_date):
|
|||
|
||||
|
||||
requests_mock_datadir_missing_url = requests_mock_datadir_factory(
|
||||
ignore_urls=["https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12",]
|
||||
ignore_urls=[
|
||||
"https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
|
@ -208,10 +214,12 @@ def test_lister_cgit_from_configfile(swh_scheduler_config, mocker):
|
|||
def test_lister_cgit_with_base_git_url(
|
||||
url, base_git_url, expected_nb_origins, requests_mock_datadir, swh_scheduler
|
||||
):
|
||||
"""With base git url provided, listed urls should be the computed origin urls
|
||||
|
||||
"""
|
||||
lister_cgit = CGitLister(swh_scheduler, url=url, base_git_url=base_git_url,)
|
||||
"""With base git url provided, listed urls should be the computed origin urls"""
|
||||
lister_cgit = CGitLister(
|
||||
swh_scheduler,
|
||||
url=url,
|
||||
base_git_url=base_git_url,
|
||||
)
|
||||
|
||||
stats = lister_cgit.run()
|
||||
|
||||
|
|
|
@ -25,7 +25,8 @@ def test_cgit_lister_task(
|
|||
kwargs = dict(url="https://git.kernel.org/", instance="kernel", base_git_url=None)
|
||||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.cgit.tasks.CGitListerTask", kwargs=kwargs,
|
||||
"swh.lister.cgit.tasks.CGitListerTask",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
|
|
@ -24,7 +24,10 @@ logger = logging.getLogger(__name__)
|
|||
"--config-file",
|
||||
"-C",
|
||||
default=None,
|
||||
type=click.Path(exists=True, dir_okay=False,),
|
||||
type=click.Path(
|
||||
exists=True,
|
||||
dir_okay=False,
|
||||
),
|
||||
help="Configuration file.",
|
||||
)
|
||||
@click.pass_context
|
||||
|
|
|
@ -132,7 +132,8 @@ def parse_packaged_date(package_info: Dict[str, str]) -> Optional[datetime]:
|
|||
):
|
||||
try:
|
||||
packaged_at = datetime.strptime(
|
||||
packaged_at_str.split(";")[0], date_format,
|
||||
packaged_at_str.split(";")[0],
|
||||
date_format,
|
||||
).replace(tzinfo=timezone.utc)
|
||||
break
|
||||
except Exception:
|
||||
|
|
|
@ -20,7 +20,12 @@ from swh.lister.cran.lister import (
|
|||
def test_cran_compute_origin_urls():
|
||||
pack = "something"
|
||||
vers = "0.0.1"
|
||||
origin_url, artifact_url = compute_origin_urls({"Package": pack, "Version": vers,})
|
||||
origin_url, artifact_url = compute_origin_urls(
|
||||
{
|
||||
"Package": pack,
|
||||
"Version": vers,
|
||||
}
|
||||
)
|
||||
|
||||
assert origin_url == f"{CRAN_MIRROR}/package={pack}"
|
||||
assert artifact_url == f"{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz"
|
||||
|
|
|
@ -42,7 +42,9 @@ class CratesLister(StatelessLister[CratesListerPage]):
|
|||
)
|
||||
|
||||
def __init__(
|
||||
self, scheduler: SchedulerInterface, credentials: CredentialsType = None,
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
|
@ -55,7 +57,12 @@ class CratesLister(StatelessLister[CratesListerPage]):
|
|||
"""Get crates.io-index repository up to date running git command."""
|
||||
|
||||
subprocess.check_call(
|
||||
["git", "clone", self.INDEX_REPOSITORY_URL, self.DESTINATION_PATH,]
|
||||
[
|
||||
"git",
|
||||
"clone",
|
||||
self.INDEX_REPOSITORY_URL,
|
||||
self.DESTINATION_PATH,
|
||||
]
|
||||
)
|
||||
|
||||
def get_crates_index(self) -> List[Path]:
|
||||
|
|
|
@ -87,7 +87,9 @@ def _init_test(
|
|||
requests_mock.get(idx_url, status_code=404)
|
||||
else:
|
||||
requests_mock.get(
|
||||
idx_url, text=sources, headers={"Last-Modified": last_modified},
|
||||
idx_url,
|
||||
text=sources,
|
||||
headers={"Last-Modified": last_modified},
|
||||
)
|
||||
|
||||
for idx_url, _ in lister.debian_index_urls(suite, _components[1]):
|
||||
|
@ -186,7 +188,11 @@ def test_lister_debian_all_suites(
|
|||
|
||||
@pytest.mark.parametrize(
|
||||
"suites_params",
|
||||
[[_suites[:1]], [_suites[:1], _suites[:2]], [_suites[:1], _suites[:2], _suites],],
|
||||
[
|
||||
[_suites[:1]],
|
||||
[_suites[:1], _suites[:2]],
|
||||
[_suites[:1], _suites[:2], _suites],
|
||||
],
|
||||
)
|
||||
def test_lister_debian_updated_packages(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
|
|
|
@ -47,7 +47,10 @@ class GiteaLister(StatelessLister[RepoListPage]):
|
|||
credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=url,
|
||||
instance=instance,
|
||||
)
|
||||
|
||||
self.query_params = {
|
||||
|
@ -59,7 +62,10 @@ class GiteaLister(StatelessLister[RepoListPage]):
|
|||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{"Accept": "application/json", "User-Agent": USER_AGENT,}
|
||||
{
|
||||
"Accept": "application/json",
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
)
|
||||
|
||||
if api_token is None:
|
||||
|
@ -122,9 +128,7 @@ class GiteaLister(StatelessLister[RepoListPage]):
|
|||
response = self.page_request(url, {})
|
||||
|
||||
def get_origins_from_page(self, page: RepoListPage) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of Gitea repositories into a list of ListedOrigins.
|
||||
|
||||
"""
|
||||
"""Convert a page of Gitea repositories into a list of ListedOrigins."""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
for repo in page:
|
||||
|
|
|
@ -23,7 +23,8 @@ def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_wor
|
|||
|
||||
kwargs = dict(url="https://try.gitea.io/api/v1")
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs,
|
||||
"swh.lister.gitea.tasks.FullGiteaRelister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
@ -49,7 +50,8 @@ def test_full_listing_params(
|
|||
page_size=50,
|
||||
)
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs,
|
||||
"swh.lister.gitea.tasks.FullGiteaRelister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
|
|
@ -138,7 +138,7 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
|
|||
first_id: the id of the first repo to list
|
||||
last_id: stop listing after seeing a repo with an id higher than this value.
|
||||
|
||||
""" # noqa: E501
|
||||
""" # noqa: B950
|
||||
|
||||
LISTER_NAME = "github"
|
||||
|
||||
|
|
|
@ -29,16 +29,16 @@ def _match_request(request):
|
|||
|
||||
|
||||
def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
|
||||
"""Gitlab lister supports full listing
|
||||
|
||||
"""
|
||||
"""Gitlab lister supports full listing"""
|
||||
instance = "gitlab.com"
|
||||
lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)
|
||||
|
||||
response = gitlab_page_response(datadir, instance, 1)
|
||||
|
||||
requests_mock.get(
|
||||
lister.page_url(), [{"json": response}], additional_matcher=_match_request,
|
||||
lister.page_url(),
|
||||
[{"json": response}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result = lister.run()
|
||||
|
@ -57,9 +57,7 @@ def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
|
|||
|
||||
|
||||
def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
|
||||
"""Heptapod lister happily lists hg, hg_git as hg and git origins
|
||||
|
||||
"""
|
||||
"""Heptapod lister happily lists hg, hg_git as hg and git origins"""
|
||||
name = "heptapod"
|
||||
instance = "foss.heptapod.net"
|
||||
lister = GitLabLister(
|
||||
|
@ -70,7 +68,9 @@ def test_lister_gitlab_heptapod(datadir, swh_scheduler, requests_mock):
|
|||
response = gitlab_page_response(datadir, instance, 1)
|
||||
|
||||
requests_mock.get(
|
||||
lister.page_url(), [{"json": response}], additional_matcher=_match_request,
|
||||
lister.page_url(),
|
||||
[{"json": response}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result = lister.run()
|
||||
|
@ -99,9 +99,7 @@ def gitlab_page_response(datadir, instance: str, id_after: int) -> List[Dict]:
|
|||
|
||||
|
||||
def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
|
||||
"""Gitlab lister supports pagination
|
||||
|
||||
"""
|
||||
"""Gitlab lister supports pagination"""
|
||||
instance = "gite.lirmm.fr"
|
||||
lister = GitLabLister(swh_scheduler, url=api_url(instance))
|
||||
|
||||
|
@ -115,7 +113,9 @@ def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
|
|||
)
|
||||
|
||||
requests_mock.get(
|
||||
lister.page_url(2), [{"json": response2}], additional_matcher=_match_request,
|
||||
lister.page_url(2),
|
||||
[{"json": response2}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result = lister.run()
|
||||
|
@ -135,9 +135,7 @@ def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
|
|||
|
||||
|
||||
def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
|
||||
"""Gitlab lister supports incremental visits
|
||||
|
||||
"""
|
||||
"""Gitlab lister supports incremental visits"""
|
||||
instance = "gite.lirmm.fr"
|
||||
url = api_url(instance)
|
||||
lister = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
|
||||
|
@ -155,7 +153,9 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
|
|||
additional_matcher=_match_request,
|
||||
)
|
||||
requests_mock.get(
|
||||
url_page2, [{"json": response2}], additional_matcher=_match_request,
|
||||
url_page2,
|
||||
[{"json": response2}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result = lister.run()
|
||||
|
@ -173,7 +173,9 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
|
|||
additional_matcher=_match_request,
|
||||
)
|
||||
requests_mock.get(
|
||||
url_page3, [{"json": response3}], additional_matcher=_match_request,
|
||||
url_page3,
|
||||
[{"json": response3}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result2 = lister2.run()
|
||||
|
@ -197,9 +199,7 @@ def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
|
|||
|
||||
|
||||
def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker):
|
||||
"""Gitlab lister supports rate-limit
|
||||
|
||||
"""
|
||||
"""Gitlab lister supports rate-limit"""
|
||||
instance = "gite.lirmm.fr"
|
||||
url = api_url(instance)
|
||||
lister = GitLabLister(swh_scheduler, url=url, instance=instance)
|
||||
|
@ -241,9 +241,7 @@ def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker)
|
|||
def test_lister_gitlab_http_errors(
|
||||
swh_scheduler, requests_mock, datadir, mocker, status_code
|
||||
):
|
||||
"""Gitlab lister should retry requests when encountering HTTP 50x errors
|
||||
|
||||
"""
|
||||
"""Gitlab lister should retry requests when encountering HTTP 50x errors"""
|
||||
instance = "gite.lirmm.fr"
|
||||
url = api_url(instance)
|
||||
lister = GitLabLister(swh_scheduler, url=url, instance=instance)
|
||||
|
@ -281,9 +279,7 @@ def test_lister_gitlab_http_errors(
|
|||
|
||||
|
||||
def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
|
||||
"""Gitlab lister should skip buggy URL and move to next page.
|
||||
|
||||
"""
|
||||
"""Gitlab lister should skip buggy URL and move to next page."""
|
||||
instance = "gite.lirmm.fr"
|
||||
url = api_url(instance)
|
||||
lister = GitLabLister(swh_scheduler, url=url, instance=instance)
|
||||
|
@ -300,11 +296,17 @@ def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
|
|||
additional_matcher=_match_request,
|
||||
)
|
||||
requests_mock.get(
|
||||
url_page2, [{"status_code": 500},], additional_matcher=_match_request,
|
||||
url_page2,
|
||||
[
|
||||
{"status_code": 500},
|
||||
],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
requests_mock.get(
|
||||
url_page3, [{"json": response3}], additional_matcher=_match_request,
|
||||
url_page3,
|
||||
[{"json": response3}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result = lister.run()
|
||||
|
@ -314,9 +316,7 @@ def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
|
|||
|
||||
|
||||
def test_lister_gitlab_credentials(swh_scheduler):
|
||||
"""Gitlab lister supports credentials configuration
|
||||
|
||||
"""
|
||||
"""Gitlab lister supports credentials configuration"""
|
||||
instance = "gitlab"
|
||||
credentials = {
|
||||
"gitlab": {instance: [{"username": "user", "password": "api-token"}]}
|
||||
|
@ -328,7 +328,13 @@ def test_lister_gitlab_credentials(swh_scheduler):
|
|||
assert lister.session.headers["Authorization"] == "Bearer api-token"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("url", [api_url("gitlab").rstrip("/"), api_url("gitlab"),])
|
||||
@pytest.mark.parametrize(
|
||||
"url",
|
||||
[
|
||||
api_url("gitlab").rstrip("/"),
|
||||
api_url("gitlab"),
|
||||
],
|
||||
)
|
||||
def test_lister_gitlab_url_computation(url, swh_scheduler):
|
||||
lister = GitLabLister(scheduler=swh_scheduler, url=url)
|
||||
assert not lister.url.endswith("/")
|
||||
|
|
|
@ -34,7 +34,8 @@ def test_task_lister_gitlab(
|
|||
|
||||
kwargs = dict(url="https://gitweb.torproject.org/")
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
f"swh.lister.gitlab.tasks.{task_name}", kwargs=kwargs,
|
||||
f"swh.lister.gitlab.tasks.{task_name}",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
|
|
@ -28,7 +28,9 @@ class GNULister(StatelessLister[GNUPageType]):
|
|||
GNU_FTP_URL = "https://ftp.gnu.org"
|
||||
|
||||
def __init__(
|
||||
self, scheduler: SchedulerInterface, credentials: CredentialsType = None,
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
|
|
|
@ -19,9 +19,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class GNUTree:
|
||||
"""Gnu Tree's representation
|
||||
|
||||
"""
|
||||
"""Gnu Tree's representation"""
|
||||
|
||||
def __init__(self, url: str):
|
||||
self.url = url # filepath or uri
|
||||
|
@ -330,7 +328,5 @@ def load_raw_data(url: str) -> Sequence[Mapping]:
|
|||
|
||||
|
||||
def format_date(timestamp: str) -> str:
|
||||
"""Format a string timestamp to an isoformat string
|
||||
|
||||
"""
|
||||
"""Format a string timestamp to an isoformat string"""
|
||||
return datetime.fromtimestamp(int(timestamp), tz=timezone.utc).isoformat()
|
||||
|
|
|
@ -117,7 +117,8 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
|
|||
}
|
||||
|
||||
return get_vcs_fns[vcs_type](
|
||||
order_by="most neglected first", modified_since_date=date_last_modified,
|
||||
order_by="most neglected first",
|
||||
modified_since_date=date_last_modified,
|
||||
)
|
||||
|
||||
def get_pages(self) -> Iterator[LaunchpadPageType]:
|
||||
|
|
|
@ -192,9 +192,16 @@ def test_launchpad_incremental_lister(
|
|||
|
||||
|
||||
def test_launchpad_lister_invalid_url_filtering(
|
||||
swh_scheduler, mocker,
|
||||
swh_scheduler,
|
||||
mocker,
|
||||
):
|
||||
invalid_origin = [_Repo({"git_https_url": "tag:launchpad.net:2008:redacted",})]
|
||||
invalid_origin = [
|
||||
_Repo(
|
||||
{
|
||||
"git_https_url": "tag:launchpad.net:2008:redacted",
|
||||
}
|
||||
)
|
||||
]
|
||||
_mock_launchpad(mocker, invalid_origin)
|
||||
lister = LaunchpadLister(scheduler=swh_scheduler)
|
||||
stats = lister.run()
|
||||
|
@ -205,7 +212,8 @@ def test_launchpad_lister_invalid_url_filtering(
|
|||
|
||||
|
||||
def test_launchpad_lister_duplicated_origin(
|
||||
swh_scheduler, mocker,
|
||||
swh_scheduler,
|
||||
mocker,
|
||||
):
|
||||
origin = _Repo(
|
||||
{
|
||||
|
|
|
@ -87,12 +87,18 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
instance = parse_url(url).host
|
||||
|
||||
super().__init__(
|
||||
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=url,
|
||||
instance=instance,
|
||||
)
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{"Accept": "application/json", "User-Agent": USER_AGENT,}
|
||||
{
|
||||
"Accept": "application/json",
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
)
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState:
|
||||
|
@ -119,7 +125,7 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
return response
|
||||
|
||||
def get_pages(self) -> Iterator[RepoPage]:
|
||||
""" Retrieve and parse exported maven indexes to
|
||||
"""Retrieve and parse exported maven indexes to
|
||||
identify all pom files and src archives.
|
||||
"""
|
||||
|
||||
|
@ -213,7 +219,10 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
):
|
||||
continue
|
||||
url_path = f"{path}/{aid}/{version}/{aid}-{version}.{ext}"
|
||||
url_pom = urljoin(self.BASE_URL, url_path,)
|
||||
url_pom = urljoin(
|
||||
self.BASE_URL,
|
||||
url_path,
|
||||
)
|
||||
out_pom[url_pom] = doc_id
|
||||
elif (
|
||||
classifier.lower() == "sources" or ("src" in classifier)
|
||||
|
@ -271,9 +280,7 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
logger.info("Could not parse POM %s XML: %s. Next.", pom, error)
|
||||
|
||||
def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of Maven repositories into a list of ListedOrigins.
|
||||
|
||||
"""
|
||||
"""Convert a page of Maven repositories into a list of ListedOrigins."""
|
||||
assert self.lister_obj.id is not None
|
||||
scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
|
||||
if page["type"] == "scm":
|
||||
|
@ -288,13 +295,17 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
if scm_type in scm_types_ok:
|
||||
scm_url = m_scm.group("url")
|
||||
origin = ListedOrigin(
|
||||
lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
|
||||
lister_id=self.lister_obj.id,
|
||||
url=scm_url,
|
||||
visit_type=scm_type,
|
||||
)
|
||||
yield origin
|
||||
else:
|
||||
if page["url"].endswith(".git"):
|
||||
origin = ListedOrigin(
|
||||
lister_id=self.lister_obj.id, url=page["url"], visit_type="git",
|
||||
lister_id=self.lister_obj.id,
|
||||
url=page["url"],
|
||||
visit_type="git",
|
||||
)
|
||||
yield origin
|
||||
else:
|
||||
|
|
|
@ -84,7 +84,12 @@ def maven_pom_3(datadir) -> str:
|
|||
|
||||
|
||||
def test_maven_full_listing(
|
||||
swh_scheduler, requests_mock, mocker, maven_index, maven_pom_1, maven_pom_2,
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
maven_index,
|
||||
maven_pom_1,
|
||||
maven_pom_2,
|
||||
):
|
||||
"""Covers full listing of multiple pages, checking page results and listed
|
||||
origins, statelessness."""
|
||||
|
|
|
@ -35,7 +35,8 @@ def test_task_lister_maven(
|
|||
url="https://repo1.maven.org/maven2/", index_url="http://indexes/export.fld"
|
||||
)
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
f"swh.lister.maven.tasks.{task_name}", kwargs=kwargs,
|
||||
f"swh.lister.maven.tasks.{task_name}",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
|
|
@ -66,7 +66,10 @@ def test_npm_lister_full(
|
|||
|
||||
requests_mock.get(
|
||||
lister.API_FULL_LISTING_URL,
|
||||
[{"json": npm_full_listing_page1}, {"json": npm_full_listing_page2},],
|
||||
[
|
||||
{"json": npm_full_listing_page1},
|
||||
{"json": npm_full_listing_page2},
|
||||
],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
|
@ -169,7 +172,9 @@ def test_npm_lister_incremental(
|
|||
|
||||
|
||||
def test_npm_lister_incremental_restart(
|
||||
swh_scheduler, requests_mock, mocker,
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
):
|
||||
"""Check incremental npm listing will restart from saved state"""
|
||||
page_size = 2
|
||||
|
@ -190,7 +195,9 @@ def test_npm_lister_incremental_restart(
|
|||
|
||||
|
||||
def test_npm_lister_http_error(
|
||||
swh_scheduler, requests_mock, mocker,
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
):
|
||||
lister = NpmLister(scheduler=swh_scheduler)
|
||||
|
||||
|
|
|
@ -48,7 +48,10 @@ class OpamLister(StatelessLister[PageType]):
|
|||
opam_root: str = "/tmp/opam/",
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=url,
|
||||
instance=instance,
|
||||
)
|
||||
self.env = os.environ.copy()
|
||||
# Opam root folder is initialized in the :meth:`get_pages` method as no
|
||||
|
|
|
@ -17,9 +17,7 @@ module_name = "swh.lister.opam.lister"
|
|||
|
||||
@pytest.fixture
|
||||
def mock_opam(mocker):
|
||||
"""Fixture to bypass the actual opam calls within the test context.
|
||||
|
||||
"""
|
||||
"""Fixture to bypass the actual opam calls within the test context."""
|
||||
# inhibits the real `subprocess.call` which prepares the required internal opam
|
||||
# state
|
||||
mock_init = mocker.patch(f"{module_name}.call", return_value=None)
|
||||
|
@ -31,9 +29,7 @@ def mock_opam(mocker):
|
|||
|
||||
|
||||
def test_mock_init_repository_init(mock_opam, tmp_path, datadir):
|
||||
"""Initializing opam root directory with an instance should be ok
|
||||
|
||||
"""
|
||||
"""Initializing opam root directory with an instance should be ok"""
|
||||
mock_init, mock_popen = mock_opam
|
||||
|
||||
instance = "fake"
|
||||
|
@ -48,9 +44,7 @@ def test_mock_init_repository_init(mock_opam, tmp_path, datadir):
|
|||
|
||||
|
||||
def test_mock_init_repository_update(mock_opam, tmp_path, datadir):
|
||||
"""Updating opam root directory with another instance should be ok
|
||||
|
||||
"""
|
||||
"""Updating opam root directory with another instance should be ok"""
|
||||
mock_init, mock_popen = mock_opam
|
||||
|
||||
instance = "fake_opam_repo"
|
||||
|
@ -74,7 +68,10 @@ def test_lister_opam_optional_instance(swh_scheduler):
|
|||
netloc = "opam.ocaml.org"
|
||||
instance_url = f"https://{netloc}"
|
||||
|
||||
lister = OpamLister(swh_scheduler, url=instance_url,)
|
||||
lister = OpamLister(
|
||||
swh_scheduler,
|
||||
url=instance_url,
|
||||
)
|
||||
assert lister.instance == netloc
|
||||
assert lister.opam_root == "/tmp/opam/"
|
||||
|
||||
|
@ -85,7 +82,10 @@ def test_urls(swh_scheduler, mock_opam, tmp_path):
|
|||
tmp_folder = mkdtemp(dir=tmp_path, prefix="swh_opam_lister")
|
||||
|
||||
lister = OpamLister(
|
||||
swh_scheduler, url=instance_url, instance="opam", opam_root=tmp_folder,
|
||||
swh_scheduler,
|
||||
url=instance_url,
|
||||
instance="opam",
|
||||
opam_root=tmp_folder,
|
||||
)
|
||||
assert lister.instance == "opam"
|
||||
assert lister.opam_root == tmp_folder
|
||||
|
|
|
@ -51,7 +51,9 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
|
|||
PACKAGIST_REPO_BASE_URL = "https://repo.packagist.org/p"
|
||||
|
||||
def __init__(
|
||||
self, scheduler: SchedulerInterface, credentials: CredentialsType = None,
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
|
|
|
@ -112,7 +112,9 @@ class PhabricatorLister(StatelessLister[PageType]):
|
|||
|
||||
if response_data.get("result") is None:
|
||||
logger.warning(
|
||||
"Got unexpected response on %s: %s", response.url, response_data,
|
||||
"Got unexpected response on %s: %s",
|
||||
response.url,
|
||||
response_data,
|
||||
)
|
||||
break
|
||||
|
||||
|
|
|
@ -115,7 +115,9 @@ def test_lister(
|
|||
|
||||
|
||||
def test_lister_request_error(
|
||||
swh_scheduler, requests_mock, phabricator_repositories_page1,
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
phabricator_repositories_page1,
|
||||
):
|
||||
FORGE_BASE_URL = "https://forge.softwareheritage.org"
|
||||
|
||||
|
|
|
@ -54,16 +54,12 @@ def _if_rate_limited(retry_state) -> bool:
|
|||
|
||||
|
||||
def pypi_url(package_name: str) -> str:
|
||||
"""Build pypi url out of a package name.
|
||||
|
||||
"""
|
||||
"""Build pypi url out of a package name."""
|
||||
return PyPILister.PACKAGE_URL.format(package_name=package_name)
|
||||
|
||||
|
||||
class PyPILister(Lister[PyPIListerState, PackageListPage]):
|
||||
"""List origins from PyPI.
|
||||
|
||||
"""
|
||||
"""List origins from PyPI."""
|
||||
|
||||
LISTER_NAME = "pypi"
|
||||
INSTANCE = "pypi" # As of today only the main pypi.org is used
|
||||
|
@ -168,7 +164,7 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]):
|
|||
|
||||
def finalize(self):
|
||||
"""Finalize the visit state by updating with the new last_serial if updates
|
||||
actually happened.
|
||||
actually happened.
|
||||
|
||||
"""
|
||||
self.updated = (
|
||||
|
|
|
@ -55,8 +55,7 @@ LastModifiedT = datetime.date
|
|||
|
||||
@dataclass
|
||||
class SourceForgeListerState:
|
||||
"""Current state of the SourceForge lister in incremental runs
|
||||
"""
|
||||
"""Current state of the SourceForge lister in incremental runs"""
|
||||
|
||||
"""If the subsitemap does not exist, we assume a full run of this subsitemap
|
||||
is needed. If the date is the same, we skip the subsitemap, otherwise we
|
||||
|
@ -105,9 +104,7 @@ ProjectsLastModifiedCache = Dict[Tuple[str, str], LastModifiedT]
|
|||
|
||||
|
||||
class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
||||
"""List origins from the "SourceForge" forge.
|
||||
|
||||
"""
|
||||
"""List origins from the "SourceForge" forge."""
|
||||
|
||||
# Part of the lister API, that identifies this lister
|
||||
LISTER_NAME = "sourceforge"
|
||||
|
@ -386,7 +383,7 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
|||
bs = BeautifulSoup(response.text, features="html.parser")
|
||||
cvs_base_url = "rsync://a.cvs.sourceforge.net/cvsroot"
|
||||
for text in [b.text for b in bs.find_all("b")]:
|
||||
match = re.search(fr".*/cvsroot/{project} co -P (.+)", text)
|
||||
match = re.search(rf".*/cvsroot/{project} co -P (.+)", text)
|
||||
if match is not None:
|
||||
module = match.group(1)
|
||||
url = f"{cvs_base_url}/{project}/{module}"
|
||||
|
|
|
@ -8,15 +8,24 @@ import pytest
|
|||
from swh.lister.cli import SUPPORTED_LISTERS, get_lister
|
||||
|
||||
lister_args = {
|
||||
"cgit": {"url": "https://git.eclipse.org/c/",},
|
||||
"cgit": {
|
||||
"url": "https://git.eclipse.org/c/",
|
||||
},
|
||||
"phabricator": {
|
||||
"instance": "softwareheritage",
|
||||
"url": "https://forge.softwareheritage.org/api/diffusion.repository.search",
|
||||
"api_token": "bogus",
|
||||
},
|
||||
"gitea": {"url": "https://try.gitea.io/api/v1/",},
|
||||
"tuleap": {"url": "https://tuleap.net",},
|
||||
"gitlab": {"url": "https://gitlab.ow2.org/api/v4", "instance": "ow2",},
|
||||
"gitea": {
|
||||
"url": "https://try.gitea.io/api/v1/",
|
||||
},
|
||||
"tuleap": {
|
||||
"url": "https://tuleap.net",
|
||||
},
|
||||
"gitlab": {
|
||||
"url": "https://gitlab.ow2.org/api/v4",
|
||||
"instance": "ow2",
|
||||
},
|
||||
"opam": {"url": "https://opam.ocaml.org", "instance": "opam"},
|
||||
"maven": {
|
||||
"url": "https://repo1.maven.org/maven2/",
|
||||
|
@ -34,9 +43,7 @@ def test_get_lister_wrong_input():
|
|||
|
||||
|
||||
def test_get_lister(swh_scheduler_config):
|
||||
"""Instantiating a supported lister should be ok
|
||||
|
||||
"""
|
||||
"""Instantiating a supported lister should be ok"""
|
||||
# Drop launchpad lister from the lister to check, its test setup is more involved
|
||||
# than the other listers and it's not currently done here
|
||||
for lister_name in SUPPORTED_LISTERS:
|
||||
|
|
|
@ -154,7 +154,9 @@ class InstantiableStatelessLister(pattern.StatelessLister[PageType]):
|
|||
|
||||
def test_stateless_instantiation(swh_scheduler):
|
||||
lister = InstantiableStatelessLister(
|
||||
scheduler=swh_scheduler, url="https://example.com", instance="example.com",
|
||||
scheduler=swh_scheduler,
|
||||
url="https://example.com",
|
||||
instance="example.com",
|
||||
)
|
||||
|
||||
# check the lister was registered in the scheduler backend
|
||||
|
|
|
@ -21,7 +21,19 @@ from swh.lister.utils import (
|
|||
(14, 5, [(0, 4), (5, 9), (10, 14)]),
|
||||
(19, 10, [(0, 9), (10, 19)]),
|
||||
(20, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]),
|
||||
(21, 3, [(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 21),],),
|
||||
(
|
||||
21,
|
||||
3,
|
||||
[
|
||||
(0, 2),
|
||||
(3, 5),
|
||||
(6, 8),
|
||||
(9, 11),
|
||||
(12, 14),
|
||||
(15, 17),
|
||||
(18, 21),
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_split_range(total_pages, nb_pages, expected_ranges):
|
||||
|
@ -72,7 +84,8 @@ def test_throttling_retry(requests_mock, mocker):
|
|||
|
||||
def test_throttling_retry_max_attemps(requests_mock, mocker):
|
||||
requests_mock.get(
|
||||
TEST_URL, [{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
|
||||
TEST_URL,
|
||||
[{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
|
||||
)
|
||||
|
||||
mock_sleep = mocker.patch.object(make_request.retry, "sleep")
|
||||
|
@ -85,7 +98,7 @@ def test_throttling_retry_max_attemps(requests_mock, mocker):
|
|||
assert_sleep_calls(
|
||||
mocker,
|
||||
mock_sleep,
|
||||
[float(WAIT_EXP_BASE ** i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
|
||||
[float(WAIT_EXP_BASE**i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -51,12 +51,18 @@ class TuleapLister(StatelessLister[RepoPage]):
|
|||
credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=url,
|
||||
instance=instance,
|
||||
)
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{"Accept": "application/json", "User-Agent": USER_AGENT,}
|
||||
{
|
||||
"Accept": "application/json",
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
)
|
||||
|
||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
|
||||
|
@ -133,9 +139,7 @@ class TuleapLister(StatelessLister[RepoPage]):
|
|||
yield self.results_simplified(url_api, "git", repo)
|
||||
|
||||
def get_origins_from_page(self, page: RepoPage) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of Tuleap repositories into a list of ListedOrigins.
|
||||
|
||||
"""
|
||||
"""Convert a page of Tuleap repositories into a list of ListedOrigins."""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
yield ListedOrigin(
|
||||
|
|
|
@ -21,7 +21,8 @@ def test_full_listing(swh_scheduler_celery_app, swh_scheduler_celery_worker, moc
|
|||
|
||||
kwargs = dict(url="https://tuleap.net")
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.tuleap.tasks.FullTuleapLister", kwargs=kwargs,
|
||||
"swh.lister.tuleap.tasks.FullTuleapLister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
@ -38,9 +39,13 @@ def test_full_listing_params(
|
|||
lister.from_configfile.return_value = lister
|
||||
lister.run.return_value = ListerStats(pages=10, origins=500)
|
||||
|
||||
kwargs = dict(url="https://tuleap.net", instance="tuleap.net",)
|
||||
kwargs = dict(
|
||||
url="https://tuleap.net",
|
||||
instance="tuleap.net",
|
||||
)
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.tuleap.tasks.FullTuleapLister", kwargs=kwargs,
|
||||
"swh.lister.tuleap.tasks.FullTuleapLister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue