diff --git a/swh/lister/arch/lister.py b/swh/lister/arch/lister.py index 563fa18..c281f22 100644 --- a/swh/lister/arch/lister.py +++ b/swh/lister/arch/lister.py @@ -94,6 +94,9 @@ class ArchLister(StatelessLister[ArchListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, flavours: Dict[str, Any] = { "official": { "archs": ["x86_64"], @@ -118,6 +121,9 @@ class ArchLister(StatelessLister[ArchListerPage]): credentials=credentials, url=flavours["official"]["base_info_url"], instance=self.INSTANCE, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.flavours = flavours diff --git a/swh/lister/aur/lister.py b/swh/lister/aur/lister.py index 9bbdf37..dc43d7d 100644 --- a/swh/lister/aur/lister.py +++ b/swh/lister/aur/lister.py @@ -47,12 +47,18 @@ class AurLister(StatelessLister[AurListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.BASE_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) def download_packages_index(self) -> List[Dict[str, Any]]: diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py index 7bcec03..05720c9 100644 --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -53,12 +53,18 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]): page_size: int = 1000, incremental: bool = True, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, url=self.API_URL, instance=self.INSTANCE, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.incremental = incremental diff --git a/swh/lister/bower/lister.py b/swh/lister/bower/lister.py index 5b488e4..cc440dc 100644 --- a/swh/lister/bower/lister.py +++ b/swh/lister/bower/lister.py @@ -30,12 +30,18 @@ class BowerLister(StatelessLister[BowerListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.API_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/json"}) diff --git a/swh/lister/cgit/lister.py b/swh/lister/cgit/lister.py index 49458d0..4a9aeab 100644 --- a/swh/lister/cgit/lister.py +++ b/swh/lister/cgit/lister.py @@ -50,6 +50,9 @@ class CGitLister(StatelessLister[Repositories]): instance: Optional[str] = None, credentials: Optional[CredentialsType] = None, base_git_url: Optional[str] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): """Lister class for CGit repositories. @@ -67,6 +70,9 @@ class CGitLister(StatelessLister[Repositories]): url=url, instance=instance, credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/html"}) diff --git a/swh/lister/conda/lister.py b/swh/lister/conda/lister.py index ab0190f..4f5cb40 100644 --- a/swh/lister/conda/lister.py +++ b/swh/lister/conda/lister.py @@ -41,12 +41,18 @@ class CondaLister(StatelessLister[CondaListerPage]): url: str = BASE_REPO_URL, channel: str = "", archs: List = [], + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=url, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.channel: str = channel self.archs: List[str] = archs diff --git a/swh/lister/cpan/lister.py b/swh/lister/cpan/lister.py index 32f7479..80669eb 100644 --- a/swh/lister/cpan/lister.py +++ b/swh/lister/cpan/lister.py @@ -81,12 +81,18 @@ class CpanLister(StatelessLister[CpanListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.API_BASE_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.artifacts: Dict[str, List[Dict[str, Any]]] = defaultdict(list) diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py index 35e3d2b..728c6d3 100644 --- a/swh/lister/cran/lister.py +++ b/swh/lister/cran/lister.py @@ -32,9 +32,18 @@ class CRANLister(StatelessLister[PageType]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( - scheduler, url=CRAN_MIRROR, instance="cran", credentials=credentials + scheduler, + url=CRAN_MIRROR, + instance="cran", + credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) def get_pages(self) -> Iterator[PageType]: diff --git a/swh/lister/crates/lister.py b/swh/lister/crates/lister.py index eca9f10..6b8c94a 100644 --- a/swh/lister/crates/lister.py +++ b/swh/lister/crates/lister.py @@ -66,12 +66,18 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]): self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, url=self.BASE_URL, instance=self.INSTANCE, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.index_metadata: Dict[str, str] = {} diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py index 940e453..23d520a 100644 --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -77,12 +77,18 @@ class DebianLister(Lister[DebianListerState, DebianPageType]): suites: List[Suite] = ["stretch", "buster", "bullseye"], components: List[Component] = ["main", "contrib", "non-free"], credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, url=mirror_url, instance=distribution, credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) # to ensure urljoin will produce valid Sources URL diff --git a/swh/lister/fedora/lister.py b/swh/lister/fedora/lister.py index 8f3dced..34712b3 100644 --- a/swh/lister/fedora/lister.py +++ b/swh/lister/fedora/lister.py @@ -6,7 +6,7 @@ from dataclasses import dataclass, field from datetime import datetime, timezone import logging -from typing import Any, Dict, Iterator, List, Set, Type +from typing import Any, Dict, Iterator, List, Optional, Set, Type from urllib.error import HTTPError from urllib.parse import urljoin @@ -91,12 +91,18 @@ class FedoraLister(Lister[FedoraListerState, FedoraPageType]): instance: str = "fedora", url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/", releases: List[Release] = [34, 35, 36], + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, url=url, instance=instance, credentials={}, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.releases = releases diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py index 5728727..738c516 100644 --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -70,6 +70,9 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]): self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, first_id: Optional[int] = None, last_id: Optional[int] = None, ): @@ -79,6 +82,9 @@ class GitHubLister(Lister[GitHubListerState, List[Dict[str, Any]]]): url=self.API_URL, instance="github", with_github_session=True, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.first_id = first_id diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 7823ee2..3ad2bfd 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -103,6 +103,9 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): name: Optional[str] = "gitlab", instance: Optional[str] = None, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, incremental: bool = False, ignored_project_prefixes: Optional[List[str]] = None, ): @@ -113,6 +116,9 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): url=url.rstrip("/"), instance=instance, credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.incremental = incremental self.last_page: Optional[str] = None diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py index 65eca1f..721bdc2 100644 --- a/swh/lister/gnu/lister.py +++ b/swh/lister/gnu/lister.py @@ -31,12 +31,18 @@ class GNULister(StatelessLister[GNUPageType]): self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, url=self.GNU_FTP_URL, instance="GNU", credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) # no side-effect calls in constructor, if extra state is needed, as preconized # by the pattern docstring, this must happen in the get_pages method. diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py index ce8a398..cdc5576 100644 --- a/swh/lister/gogs/lister.py +++ b/swh/lister/gogs/lister.py @@ -75,12 +75,18 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]): api_token: Optional[str] = None, page_size: int = 50, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, url=url, instance=instance, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.query_params = { diff --git a/swh/lister/golang/lister.py b/swh/lister/golang/lister.py index 10e5935..36a247b 100644 --- a/swh/lister/golang/lister.py +++ b/swh/lister/golang/lister.py @@ -47,12 +47,18 @@ class GolangLister(Lister[GolangStateType, GolangPageType]): scheduler: SchedulerInterface, incremental: bool = False, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, url=self.GOLANG_MODULES_INDEX_URL, instance=self.LISTER_NAME, credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/json"}) diff --git a/swh/lister/hackage/lister.py b/swh/lister/hackage/lister.py index 04fb6f2..a86ff67 100644 --- a/swh/lister/hackage/lister.py +++ b/swh/lister/hackage/lister.py @@ -44,6 +44,9 @@ class HackageLister(Lister[HackageListerState, HackageListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, url: Optional[str] = None, ): super().__init__( @@ -51,6 +54,9 @@ class HackageLister(Lister[HackageListerState, HackageListerPage]): credentials=credentials, instance=self.INSTANCE, url=url if url else self.BASE_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) # Ensure to set this with same value as the http api search endpoint use # (50 as of august 2022) diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py index e9c36fa..b9daa18 100644 --- a/swh/lister/launchpad/lister.py +++ b/swh/lister/launchpad/lister.py @@ -66,12 +66,18 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): scheduler: SchedulerInterface, incremental: bool = False, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, url="https://launchpad.net/", instance="launchpad", credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.incremental = incremental self.date_last_modified: Dict[str, Optional[datetime]] = { diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py index 195a8a3..8dc702c 100644 --- a/swh/lister/maven/lister.py +++ b/swh/lister/maven/lister.py @@ -61,6 +61,9 @@ class MavenLister(Lister[MavenListerState, RepoPage]): index_url: str = None, instance: Optional[str] = None, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, incremental: bool = True, ): """Lister class for Maven repositories. @@ -88,6 +91,9 @@ class MavenLister(Lister[MavenListerState, RepoPage]): url=url, instance=instance, with_github_session=True, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/json"}) diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py index 3e410aa..3440a8e 100644 --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -320,6 +320,9 @@ class NixGuixLister(StatelessLister[PageResult]): origin_upstream: str, instance: Optional[str] = None, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, # canonicalize urls, can be turned off during docker runs canonicalize: bool = True, extensions_to_ignore: List[str] = [], @@ -331,6 +334,9 @@ class NixGuixLister(StatelessLister[PageResult]): instance=instance, credentials=credentials, with_github_session=canonicalize, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) # either full fqdn NixOS/nixpkgs or guix repository urls # maybe add an assert on those specific urls? diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py index b940699..f10c02d 100644 --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -53,6 +53,9 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]): page_size: int = 1000, incremental: bool = False, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, @@ -61,6 +64,9 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]): if incremental else self.API_FULL_LISTING_URL, instance=self.INSTANCE, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.page_size = page_size diff --git a/swh/lister/nuget/lister.py b/swh/lister/nuget/lister.py index 54a6c22..98f9fc9 100644 --- a/swh/lister/nuget/lister.py +++ b/swh/lister/nuget/lister.py @@ -44,12 +44,18 @@ class NugetLister(Lister[NugetListerState, NugetListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.API_INDEX_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.listing_date: Optional[datetime] = None diff --git a/swh/lister/opam/lister.py b/swh/lister/opam/lister.py index 724d198..6b54e66 100644 --- a/swh/lister/opam/lister.py +++ b/swh/lister/opam/lister.py @@ -45,6 +45,9 @@ class OpamLister(StatelessLister[PageType]): url: str, instance: Optional[str] = None, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, opam_root: str = "/tmp/opam/", ): super().__init__( @@ -52,6 +55,9 @@ class OpamLister(StatelessLister[PageType]): credentials=credentials, url=url, instance=instance, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.env = os.environ.copy() # Opam root folder is initialized in the :meth:`get_pages` method as no diff --git a/swh/lister/packagist/lister.py b/swh/lister/packagist/lister.py index e9fa296..af57b55 100644 --- a/swh/lister/packagist/lister.py +++ b/swh/lister/packagist/lister.py @@ -53,6 +53,9 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]): self, scheduler: SchedulerInterface, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, @@ -60,6 +63,9 @@ class PackagistLister(Lister[PackagistListerState, PackagistPageType]): instance="packagist", credentials=credentials, with_github_session=True, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/json"}) diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py index 4556178..651dc8e 100644 --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -40,9 +40,18 @@ class PhabricatorLister(StatelessLister[PageType]): instance: Optional[str] = None, api_token: Optional[str] = None, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( - scheduler, urljoin(url, self.API_REPOSITORY_PATH), instance, credentials + scheduler=scheduler, + url=urljoin(url, self.API_REPOSITORY_PATH), + instance=instance, + credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/json"}) diff --git a/swh/lister/pubdev/lister.py b/swh/lister/pubdev/lister.py index fd1dc45..50e4f15 100644 --- a/swh/lister/pubdev/lister.py +++ b/swh/lister/pubdev/lister.py @@ -36,12 +36,18 @@ class PubDevLister(StatelessLister[PubDevListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.BASE_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/json"}) diff --git a/swh/lister/puppet/lister.py b/swh/lister/puppet/lister.py index 39deecf..6e84b27 100644 --- a/swh/lister/puppet/lister.py +++ b/swh/lister/puppet/lister.py @@ -43,12 +43,18 @@ class PuppetLister(Lister[PuppetListerState, PuppetListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.BASE_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) # Store the datetime the lister runs for incremental purpose self.listing_date = datetime.now() diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py index 443c21d..64f14fa 100644 --- a/swh/lister/pypi/lister.py +++ b/swh/lister/pypi/lister.py @@ -70,12 +70,18 @@ class PyPILister(Lister[PyPIListerState, PackageListPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, url=self.PACKAGE_LIST_URL, instance=self.INSTANCE, credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) # used as termination condition and if useful, becomes the new state when the diff --git a/swh/lister/rubygems/lister.py b/swh/lister/rubygems/lister.py index 917a2d6..bb317ea 100644 --- a/swh/lister/rubygems/lister.py +++ b/swh/lister/rubygems/lister.py @@ -63,12 +63,18 @@ class RubyGemsLister(StatelessLister[RubyGemsListerPage]): self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.RUBY_GEMS_POSTGRES_DUMP_BASE_URL, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) def get_latest_dump_file(self) -> str: diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py index ba8c412..234e198 100644 --- a/swh/lister/sourceforge/lister.py +++ b/swh/lister/sourceforge/lister.py @@ -113,12 +113,18 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]): scheduler: SchedulerInterface, incremental: bool = False, credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, url="https://sourceforge.net", instance="main", credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) # Will hold the currently saved "last modified" dates to compare against our diff --git a/swh/lister/tuleap/lister.py b/swh/lister/tuleap/lister.py index 4a55499..ce5cadf 100644 --- a/swh/lister/tuleap/lister.py +++ b/swh/lister/tuleap/lister.py @@ -45,12 +45,18 @@ class TuleapLister(StatelessLister[RepoPage]): url: str, instance: Optional[str] = None, credentials: CredentialsType = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, ): super().__init__( scheduler=scheduler, credentials=credentials, url=url, instance=instance, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, ) self.session.headers.update({"Accept": "application/json"})