Harmonize listers parameters and add test to check mandatory ones
Ensure that all lister classes have the same set of mandatory parameters in their constructors, notably: scheduler, url, instance and credentials. Add a new test checking listers classes have mandatory parameters declared in their constructors. The purpose is to avoid deployment issues on staging or production environment as celery tasks can fail to be executed if mandatory parameters are not handled by listers. Reated to swh/infra/sysadm-environment#5030.
This commit is contained in:
parent
5f717e311d
commit
6e7bc49ec7
27 changed files with 231 additions and 73 deletions
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -80,6 +80,7 @@ class ArchLister(StatelessLister[ArchListerPage]):
|
|||
VISIT_TYPE = "arch"
|
||||
INSTANCE = "arch"
|
||||
|
||||
BASE_URL = "https://archlinux.org"
|
||||
ARCH_PACKAGE_URL_PATTERN = "{base_url}/packages/{repo}/{arch}/{pkgname}"
|
||||
ARCH_PACKAGE_VERSIONS_URL_PATTERN = "{base_url}/packages/{pkgname[0]}/{pkgname}"
|
||||
ARCH_PACKAGE_DOWNLOAD_URL_PATTERN = (
|
||||
|
@ -93,6 +94,8 @@ class ArchLister(StatelessLister[ArchListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -119,8 +122,8 @@ class ArchLister(StatelessLister[ArchListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=flavours["official"]["base_info_url"],
|
||||
instance=self.INSTANCE,
|
||||
url=url,
|
||||
instance=instance,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -46,6 +46,8 @@ class AurLister(StatelessLister[AurListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -54,8 +56,8 @@ class AurLister(StatelessLister[AurListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.BASE_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -51,6 +51,8 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = API_URL,
|
||||
instance: str = INSTANCE,
|
||||
page_size: int = 1000,
|
||||
incremental: bool = True,
|
||||
credentials: CredentialsType = None,
|
||||
|
@ -61,8 +63,8 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=self.API_URL,
|
||||
instance=self.INSTANCE,
|
||||
url=url,
|
||||
instance=instance,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -29,6 +29,8 @@ class BowerLister(StatelessLister[BowerListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = API_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -37,8 +39,8 @@ class BowerLister(StatelessLister[BowerListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.API_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -39,6 +39,7 @@ class CondaLister(StatelessLister[CondaListerPage]):
|
|||
scheduler: SchedulerInterface,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
url: str = BASE_REPO_URL,
|
||||
instance: str = INSTANCE,
|
||||
channel: str = "",
|
||||
archs: List = [],
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
|
@ -48,7 +49,7 @@ class CondaLister(StatelessLister[CondaListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -80,6 +80,8 @@ class CpanLister(StatelessLister[CpanListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = API_BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -88,8 +90,8 @@ class CpanLister(StatelessLister[CpanListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.API_BASE_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -34,10 +34,13 @@ class CRANLister(StatelessLister[PageType]):
|
|||
"""
|
||||
|
||||
LISTER_NAME = "cran"
|
||||
INSTANCE = "cran"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = CRAN_MIRROR_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -45,8 +48,8 @@ class CRANLister(StatelessLister[PageType]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler,
|
||||
url=CRAN_MIRROR_URL,
|
||||
instance="cran",
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -65,6 +65,8 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: CredentialsType = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -73,8 +75,8 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=self.BASE_URL,
|
||||
instance=self.INSTANCE,
|
||||
url=url,
|
||||
instance=instance,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2017-2022 The Software Heritage developers
|
||||
# Copyright (C) 2017-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -68,12 +68,14 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
|
|||
"""
|
||||
|
||||
LISTER_NAME = "debian"
|
||||
MIRROR_URL = "http://deb.debian.org/debian/"
|
||||
INSTANCE = "Debian"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
distribution: str = "Debian",
|
||||
mirror_url: str = "http://deb.debian.org/debian/",
|
||||
url: str = MIRROR_URL,
|
||||
instance: str = INSTANCE,
|
||||
suites: List[Suite] = ["stretch", "buster", "bullseye"],
|
||||
components: List[Component] = ["main", "contrib", "non-free"],
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
|
@ -83,8 +85,8 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=mirror_url,
|
||||
instance=distribution,
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
@ -95,7 +97,7 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
|
|||
if not self.url.endswith("/"):
|
||||
self.url += "/"
|
||||
|
||||
self.distribution = distribution
|
||||
self.distribution = instance
|
||||
self.suites = suites
|
||||
self.components = components
|
||||
|
||||
|
|
|
@ -10,6 +10,11 @@ from .lister import DebianLister
|
|||
@shared_task(name=__name__ + ".DebianListerTask")
|
||||
def list_debian_distribution(**lister_args):
|
||||
"""List a Debian distribution"""
|
||||
# for backward compatibility with previous parameter names
|
||||
if "mirror_url" in lister_args:
|
||||
lister_args["url"] = lister_args.pop("mirror_url")
|
||||
if "distribution" in lister_args:
|
||||
lister_args["instance"] = lister_args.pop("distribution")
|
||||
return DebianLister.from_configfile(**lister_args).run().dict()
|
||||
|
||||
|
||||
|
|
|
@ -65,7 +65,7 @@ def _init_test(
|
|||
|
||||
lister = DebianLister(
|
||||
scheduler=swh_scheduler,
|
||||
mirror_url=_mirror_url,
|
||||
url=_mirror_url,
|
||||
suites=list(debian_sources.keys()),
|
||||
components=_components,
|
||||
)
|
||||
|
|
|
@ -24,8 +24,8 @@ def test_lister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
|||
lister.run.return_value = stats
|
||||
|
||||
kwargs = dict(
|
||||
mirror_url="http://www-ftp.lip6.fr/pub/linux/distributions/Ubuntu/archive/",
|
||||
distribution="Ubuntu",
|
||||
url="http://www-ftp.lip6.fr/pub/linux/distributions/Ubuntu/archive/",
|
||||
instance="Ubuntu",
|
||||
suites=["xenial", "bionic", "focal"],
|
||||
components=["main", "multiverse", "restricted", "universe"],
|
||||
)
|
||||
|
@ -41,3 +41,35 @@ def test_lister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
|||
lister.run.assert_called_once_with()
|
||||
|
||||
assert res.result == stats.dict()
|
||||
|
||||
|
||||
@patch("swh.lister.debian.tasks.DebianLister")
|
||||
def test_lister_old_parameter_names(
|
||||
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
|
||||
):
|
||||
# setup the mocked DebianLister
|
||||
lister.from_configfile.return_value = lister
|
||||
stats = ListerStats(pages=12, origins=35618)
|
||||
lister.run.return_value = stats
|
||||
|
||||
kwargs = dict(
|
||||
mirror_url="http://www-ftp.lip6.fr/pub/linux/distributions/Ubuntu/archive/",
|
||||
distribution="Ubuntu",
|
||||
suites=["xenial", "bionic", "focal"],
|
||||
components=["main", "multiverse", "restricted", "universe"],
|
||||
)
|
||||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.debian.tasks.DebianListerTask", kwargs=kwargs
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
kwargs["url"] = kwargs.pop("mirror_url")
|
||||
kwargs["instance"] = kwargs.pop("distribution")
|
||||
|
||||
lister.from_configfile.assert_called_once_with(**kwargs)
|
||||
lister.run.assert_called_once_with()
|
||||
|
||||
assert res.result == stats.dict()
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2020-2022 The Software Heritage developers
|
||||
# Copyright (C) 2020-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -62,6 +62,7 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
|
|||
""" # noqa: B950
|
||||
|
||||
LISTER_NAME = "github"
|
||||
INSTANCE = "github"
|
||||
|
||||
API_URL = "https://api.github.com/repositories"
|
||||
PAGE_SIZE = 1000
|
||||
|
@ -69,6 +70,8 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = API_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: CredentialsType = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -79,8 +82,8 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=self.API_URL,
|
||||
instance="github",
|
||||
url=url,
|
||||
instance=instance,
|
||||
with_github_session=True,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2019-2021 The Software Heritage developers
|
||||
# Copyright (C) 2019-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -25,11 +25,14 @@ class GNULister(StatelessLister[GNUPageType]):
|
|||
"""
|
||||
|
||||
LISTER_NAME = "GNU"
|
||||
INSTANCE = "GNU"
|
||||
GNU_FTP_URL = "https://ftp.gnu.org"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = GNU_FTP_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: CredentialsType = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -37,8 +40,8 @@ class GNULister(StatelessLister[GNUPageType]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=self.GNU_FTP_URL,
|
||||
instance="GNU",
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -45,6 +45,8 @@ class GolangLister(Lister[GolangStateType, GolangPageType]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = GOLANG_MODULES_INDEX_URL,
|
||||
instance: str = LISTER_NAME,
|
||||
incremental: bool = False,
|
||||
credentials: CredentialsType = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
|
@ -53,8 +55,8 @@ class GolangLister(Lister[GolangStateType, GolangPageType]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=self.GOLANG_MODULES_INDEX_URL,
|
||||
instance=self.LISTER_NAME,
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -43,17 +43,18 @@ class HackageLister(Lister[HackageListerState, HackageListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
enable_origins: bool = True,
|
||||
url: Optional[str] = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=url if url else self.BASE_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
# Copyright (C) 2021-2022 The Software Heritage developers
|
||||
# Copyright (C) 2021-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import iso8601
|
||||
|
@ -46,15 +46,22 @@ class HexLister(Lister[HexListerState, HexListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
instance: str = "hex",
|
||||
url: str = HEX_API_URL,
|
||||
instance: str = LISTER_NAME,
|
||||
page_size: int = 100,
|
||||
credentials: CredentialsType = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
enable_origins: bool = True,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=self.HEX_API_URL,
|
||||
url=url,
|
||||
instance=instance,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
)
|
||||
# TODO: Add authentication support
|
||||
self.page_size = page_size
|
||||
|
|
|
@ -59,11 +59,14 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
|
|||
will be returned
|
||||
"""
|
||||
|
||||
LAUNCHPAD_URL = "https://launchpad.net/"
|
||||
LISTER_NAME = "launchpad"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = LAUNCHPAD_URL,
|
||||
instance: str = LISTER_NAME,
|
||||
incremental: bool = False,
|
||||
credentials: CredentialsType = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
|
@ -72,8 +75,8 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url="https://launchpad.net/",
|
||||
instance="launchpad",
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2018-2022 the Software Heritage developers
|
||||
# Copyright (C) 2018-2023 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
@ -50,6 +50,8 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = API_FULL_LISTING_URL,
|
||||
instance: str = INSTANCE,
|
||||
page_size: int = 1000,
|
||||
incremental: bool = False,
|
||||
credentials: CredentialsType = None,
|
||||
|
@ -60,10 +62,8 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=self.API_INCREMENTAL_LISTING_URL
|
||||
if incremental
|
||||
else self.API_FULL_LISTING_URL,
|
||||
instance=self.INSTANCE,
|
||||
url=url,
|
||||
instance=instance,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
@ -75,6 +75,8 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
|
|||
# provided as the startkey query parameter value, so we increment the page
|
||||
# size by one to avoid double package processing
|
||||
self.page_size += 1
|
||||
else:
|
||||
self.url = self.API_INCREMENTAL_LISTING_URL
|
||||
self.incremental = incremental
|
||||
|
||||
self.session.headers.update({"Accept": "application/json"})
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -43,6 +43,8 @@ class NugetLister(Lister[NugetListerState, NugetListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = API_INDEX_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -51,8 +53,8 @@ class NugetLister(Lister[NugetListerState, NugetListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.API_INDEX_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -55,6 +55,7 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
|
|||
"""
|
||||
|
||||
LISTER_NAME = "Packagist"
|
||||
INSTANCE = "packagist"
|
||||
PACKAGIST_PACKAGES_LIST_URL = "https://packagist.org/packages/list.json"
|
||||
PACKAGIST_PACKAGE_URL_FORMATS = [
|
||||
# preferred, static, efficient on their side as it can be cached
|
||||
|
@ -72,6 +73,8 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = PACKAGIST_PACKAGES_LIST_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: CredentialsType = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -80,8 +83,8 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=self.PACKAGIST_PACKAGES_LIST_URL,
|
||||
instance="packagist",
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
with_github_session=True,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -35,6 +35,8 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -43,8 +45,8 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.BASE_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -42,6 +42,8 @@ class PuppetLister(Lister[PuppetListerState, PuppetListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -50,8 +52,8 @@ class PuppetLister(Lister[PuppetListerState, PuppetListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.BASE_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -69,6 +69,8 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = PACKAGE_LIST_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -76,8 +78,8 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=self.PACKAGE_LIST_URL,
|
||||
instance=self.INSTANCE,
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -62,6 +62,8 @@ class RubyGemsLister(StatelessLister[RubyGemsListerPage]):
|
|||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = RUBY_GEMS_POSTGRES_DUMP_BASE_URL,
|
||||
instance: str = INSTANCE,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
|
@ -70,8 +72,8 @@ class RubyGemsLister(StatelessLister[RubyGemsListerPage]):
|
|||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.RUBY_GEMS_POSTGRES_DUMP_BASE_URL,
|
||||
instance=instance,
|
||||
url=url,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2021-2022 The Software Heritage developers
|
||||
# Copyright (C) 2021-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -105,12 +105,16 @@ ProjectsLastModifiedCache = Dict[Tuple[str, str], LastModifiedT]
|
|||
class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
||||
"""List origins from the "SourceForge" forge."""
|
||||
|
||||
SOURCEFORGE_URL = "https://sourceforge.net"
|
||||
# Part of the lister API, that identifies this lister
|
||||
LISTER_NAME = "sourceforge"
|
||||
INSTANCE = "main"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str = SOURCEFORGE_URL,
|
||||
instance: str = INSTANCE,
|
||||
incremental: bool = False,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
|
@ -119,8 +123,8 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
|||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url="https://sourceforge.net",
|
||||
instance="main",
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials=credentials,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
|
|
64
swh/lister/tests/test_lister_packages.py
Normal file
64
swh/lister/tests/test_lister_packages.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import importlib
|
||||
import inspect
|
||||
import pkgutil
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def lister_packages():
|
||||
import swh.lister
|
||||
|
||||
return [
|
||||
mod.name
|
||||
for mod in pkgutil.iter_modules(swh.lister.__path__)
|
||||
if mod.ispkg and mod.name != "tests"
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lister_package", lister_packages())
|
||||
def test_lister_has_mandatory_parameters(lister_package):
|
||||
from swh.lister.pattern import Lister, StatelessLister
|
||||
|
||||
lister_mandatory_params = {
|
||||
"scheduler",
|
||||
"url",
|
||||
"instance",
|
||||
"credentials",
|
||||
"max_origins_per_page",
|
||||
"max_pages",
|
||||
"enable_origins",
|
||||
}
|
||||
|
||||
lister_module = importlib.import_module(f"swh.lister.{lister_package}.lister")
|
||||
lister_module_members = inspect.getmembers(lister_module)
|
||||
for name, obj in lister_module_members:
|
||||
if (
|
||||
inspect.isclass(obj)
|
||||
and obj not in (Lister, StatelessLister)
|
||||
and issubclass(obj, Lister)
|
||||
):
|
||||
lister_params = set(inspect.getfullargspec(getattr(obj, "__init__")).args)
|
||||
|
||||
missing_params = lister_mandatory_params - lister_params
|
||||
|
||||
assert not missing_params, (
|
||||
f"swh.lister.{lister_package}.{name} class is missing the following "
|
||||
f"parameters in its constructor: {', '.join(missing_params)}.\n"
|
||||
"Please add them and transmit them to the base lister class constructor "
|
||||
f"to avoid bad surprises when deploying\nthe {lister_package} lister in "
|
||||
"staging or production environment."
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lister_package", lister_packages())
|
||||
def test_lister_package_has_register_function(lister_package):
|
||||
lister_module = importlib.import_module(f"swh.lister.{lister_package}")
|
||||
assert hasattr(lister_module, "register"), (
|
||||
f"swh.lister.{lister_package} module is missing the register function required "
|
||||
"to register its celery tasks in scheduler database."
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue