Hook up recently introduced options to all listers

Hopefully one day we'll be able to replace all of this mess with PEP692
TypedDict kwargs, but that's only on track for Python 3.12.
This commit is contained in:
Nicolas Dandrimont 2022-12-05 16:33:45 +01:00
parent 5ea79ee3e0
commit e785e67315
31 changed files with 195 additions and 3 deletions

View file

@ -94,6 +94,9 @@ class ArchLister(StatelessLister[ArchListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
flavours: Dict[str, Any] = {
"official": {
"archs": ["x86_64"],
@ -118,6 +121,9 @@ class ArchLister(StatelessLister[ArchListerPage]):
credentials=credentials,
url=flavours["official"]["base_info_url"],
instance=self.INSTANCE,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.flavours = flavours

View file

@ -47,12 +47,18 @@ class AurLister(StatelessLister[AurListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.BASE_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
def download_packages_index(self) -> List[Dict[str, Any]]:

View file

@ -53,12 +53,18 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
page_size: int = 1000,
incremental: bool = True,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=self.API_URL,
instance=self.INSTANCE,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.incremental = incremental

View file

@ -30,12 +30,18 @@ class BowerLister(StatelessLister[BowerListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.API_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})

View file

@ -50,6 +50,9 @@ class CGitLister(StatelessLister[Repositories]):
instance: Optional[str] = None,
credentials: Optional[CredentialsType] = None,
base_git_url: Optional[str] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
"""Lister class for CGit repositories.
@ -67,6 +70,9 @@ class CGitLister(StatelessLister[Repositories]):
url=url,
instance=instance,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/html"})

View file

@ -41,12 +41,18 @@ class CondaLister(StatelessLister[CondaListerPage]):
url: str = BASE_REPO_URL,
channel: str = "",
archs: List = [],
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=url,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.channel: str = channel
self.archs: List[str] = archs

View file

@ -81,12 +81,18 @@ class CpanLister(StatelessLister[CpanListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.API_BASE_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.artifacts: Dict[str, List[Dict[str, Any]]] = defaultdict(list)

View file

@ -32,9 +32,18 @@ class CRANLister(StatelessLister[PageType]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler, url=CRAN_MIRROR, instance="cran", credentials=credentials
scheduler,
url=CRAN_MIRROR,
instance="cran",
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
def get_pages(self) -> Iterator[PageType]:

View file

@ -66,12 +66,18 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
self,
scheduler: SchedulerInterface,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=self.BASE_URL,
instance=self.INSTANCE,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.index_metadata: Dict[str, str] = {}

View file

@ -77,12 +77,18 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
suites: List[Suite] = ["stretch", "buster", "bullseye"],
components: List[Component] = ["main", "contrib", "non-free"],
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
url=mirror_url,
instance=distribution,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
# to ensure urljoin will produce valid Sources URL

View file

@ -6,7 +6,7 @@
from dataclasses import dataclass, field
from datetime import datetime, timezone
import logging
from typing import Any, Dict, Iterator, List, Set, Type
from typing import Any, Dict, Iterator, List, Optional, Set, Type
from urllib.error import HTTPError
from urllib.parse import urljoin
@ -91,12 +91,18 @@ class FedoraLister(Lister[FedoraListerState, FedoraPageType]):
instance: str = "fedora",
url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
releases: List[Release] = [34, 35, 36],
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
url=url,
instance=instance,
credentials={},
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.releases = releases

View file

@ -70,6 +70,9 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
self,
scheduler: SchedulerInterface,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
first_id: Optional[int] = None,
last_id: Optional[int] = None,
):
@ -79,6 +82,9 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]):
url=self.API_URL,
instance="github",
with_github_session=True,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.first_id = first_id

View file

@ -103,6 +103,9 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
name: Optional[str] = "gitlab",
instance: Optional[str] = None,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
incremental: bool = False,
ignored_project_prefixes: Optional[List[str]] = None,
):
@ -113,6 +116,9 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
url=url.rstrip("/"),
instance=instance,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.incremental = incremental
self.last_page: Optional[str] = None

View file

@ -31,12 +31,18 @@ class GNULister(StatelessLister[GNUPageType]):
self,
scheduler: SchedulerInterface,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
url=self.GNU_FTP_URL,
instance="GNU",
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
# no side-effect calls in constructor, if extra state is needed, as preconized
# by the pattern docstring, this must happen in the get_pages method.

View file

@ -75,12 +75,18 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
api_token: Optional[str] = None,
page_size: int = 50,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.query_params = {

View file

@ -47,12 +47,18 @@ class GolangLister(Lister[GolangStateType, GolangPageType]):
scheduler: SchedulerInterface,
incremental: bool = False,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
url=self.GOLANG_MODULES_INDEX_URL,
instance=self.LISTER_NAME,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})

View file

@ -44,6 +44,9 @@ class HackageLister(Lister[HackageListerState, HackageListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
url: Optional[str] = None,
):
super().__init__(
@ -51,6 +54,9 @@ class HackageLister(Lister[HackageListerState, HackageListerPage]):
credentials=credentials,
instance=self.INSTANCE,
url=url if url else self.BASE_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
# Ensure to set this with same value as the http api search endpoint use
# (50 as of august 2022)

View file

@ -66,12 +66,18 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
scheduler: SchedulerInterface,
incremental: bool = False,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
url="https://launchpad.net/",
instance="launchpad",
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.incremental = incremental
self.date_last_modified: Dict[str, Optional[datetime]] = {

View file

@ -61,6 +61,9 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
index_url: str = None,
instance: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
incremental: bool = True,
):
"""Lister class for Maven repositories.
@ -88,6 +91,9 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
url=url,
instance=instance,
with_github_session=True,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})

View file

@ -320,6 +320,9 @@ class NixGuixLister(StatelessLister[PageResult]):
origin_upstream: str,
instance: Optional[str] = None,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
# canonicalize urls, can be turned off during docker runs
canonicalize: bool = True,
extensions_to_ignore: List[str] = [],
@ -331,6 +334,9 @@ class NixGuixLister(StatelessLister[PageResult]):
instance=instance,
credentials=credentials,
with_github_session=canonicalize,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
# either full fqdn NixOS/nixpkgs or guix repository urls
# maybe add an assert on those specific urls?

View file

@ -53,6 +53,9 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
page_size: int = 1000,
incremental: bool = False,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
@ -61,6 +64,9 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
if incremental
else self.API_FULL_LISTING_URL,
instance=self.INSTANCE,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.page_size = page_size

View file

@ -44,12 +44,18 @@ class NugetLister(Lister[NugetListerState, NugetListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.API_INDEX_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.listing_date: Optional[datetime] = None

View file

@ -45,6 +45,9 @@ class OpamLister(StatelessLister[PageType]):
url: str,
instance: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
opam_root: str = "/tmp/opam/",
):
super().__init__(
@ -52,6 +55,9 @@ class OpamLister(StatelessLister[PageType]):
credentials=credentials,
url=url,
instance=instance,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.env = os.environ.copy()
# Opam root folder is initialized in the :meth:`get_pages` method as no

View file

@ -53,6 +53,9 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
self,
scheduler: SchedulerInterface,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
@ -60,6 +63,9 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
instance="packagist",
credentials=credentials,
with_github_session=True,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})

View file

@ -40,9 +40,18 @@ class PhabricatorLister(StatelessLister[PageType]):
instance: Optional[str] = None,
api_token: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler, urljoin(url, self.API_REPOSITORY_PATH), instance, credentials
scheduler=scheduler,
url=urljoin(url, self.API_REPOSITORY_PATH),
instance=instance,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})

View file

@ -36,12 +36,18 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.BASE_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})

View file

@ -43,12 +43,18 @@ class PuppetLister(Lister[PuppetListerState, PuppetListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.BASE_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
# Store the datetime the lister runs for incremental purpose
self.listing_date = datetime.now()

View file

@ -70,12 +70,18 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
url=self.PACKAGE_LIST_URL,
instance=self.INSTANCE,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
# used as termination condition and if useful, becomes the new state when the

View file

@ -63,12 +63,18 @@ class RubyGemsLister(StatelessLister[RubyGemsListerPage]):
self,
scheduler: SchedulerInterface,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
instance=self.INSTANCE,
url=self.RUBY_GEMS_POSTGRES_DUMP_BASE_URL,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
def get_latest_dump_file(self) -> str:

View file

@ -113,12 +113,18 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
scheduler: SchedulerInterface,
incremental: bool = False,
credentials: Optional[CredentialsType] = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
url="https://sourceforge.net",
instance="main",
credentials=credentials,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
# Will hold the currently saved "last modified" dates to compare against our

View file

@ -45,12 +45,18 @@ class TuleapLister(StatelessLister[RepoPage]):
url: str,
instance: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=url,
instance=instance,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.session.headers.update({"Accept": "application/json"})