Add built-in page and origin count limit to listers

This will allow more automation of the staging add forge now process:
for known-good listers, we can limit the number of origins being
processed and reduce the amount of manual steps taken for each instance.
This commit is contained in:
Nicolas Dandrimont 2022-12-05 14:20:19 +01:00
parent a66e24bfa2
commit b815737054
2 changed files with 88 additions and 1 deletions

View file

@ -84,6 +84,8 @@ class Lister(Generic[StateType, PageType]):
identifies the :attr:`LISTER_NAME`, the second level the lister
:attr:`instance`. The final level is a list of dicts containing the
expected credentials for the given instance of that lister.
max_pages: the maximum number of pages listed in a full listing operation
max_origins_per_page: the maximum number of origins processed per page
Generic types:
- *StateType*: concrete lister type; should usually be a :class:`dataclass` for
@ -102,6 +104,8 @@ class Lister(Generic[StateType, PageType]):
url: str,
instance: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
with_github_session: bool = False,
):
if not self.LISTER_NAME:
@ -140,6 +144,8 @@ class Lister(Generic[StateType, PageType]):
)
self.recorded_origins: Set[str] = set()
self.max_pages = max_pages
self.max_origins_per_page = max_origins_per_page
@http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
@ -172,11 +178,25 @@ class Lister(Generic[StateType, PageType]):
try:
for page in self.get_pages():
full_stats.pages += 1
origins = self.get_origins_from_page(page)
origins = list(self.get_origins_from_page(page))
if (
self.max_origins_per_page
and len(origins) > self.max_origins_per_page
):
logger.info(
"Max origins per page set, truncated %s page results down to %s",
len(origins),
self.max_origins_per_page,
)
origins = origins[: self.max_origins_per_page]
sent_origins = self.send_origins(origins)
self.recorded_origins.update(sent_origins)
full_stats.origins = len(self.recorded_origins)
self.commit_page(page)
if self.max_pages and full_stats.pages >= self.max_pages:
logger.info("Reached page limit of %s, terminating", self.max_pages)
break
finally:
self.finalize()
if self.updated: