From 5426883c49ce0fc4a442b705ae573f868b9f7a62 Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Thu, 29 Sep 2022 11:14:35 +0200 Subject: [PATCH] debian: Remove no longer needed code to get accurate origins count The base lister class now ensures the count of listed origins will be accurate. --- swh/lister/debian/lister.py | 42 +++++-------------------------------- 1 file changed, 5 insertions(+), 37 deletions(-) diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py index c37d48f..940e453 100644 --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -95,12 +95,7 @@ class DebianLister(Lister[DebianListerState, DebianPageType]): # will hold all listed origins info self.listed_origins: Dict[DebianOrigin, ListedOrigin] = {} - # will contain origin urls that have already been listed - # in a previous page - self.sent_origins: Set[DebianOrigin] = set() - # will contain already listed package info that need to be sent - # to the scheduler for update in the commit_page method - self.origins_to_update: Dict[DebianOrigin, ListedOrigin] = {} + # will contain the lister state after a call to run self.package_versions: Dict[PkgName, Set[PkgVersion]] = {} @@ -185,7 +180,6 @@ class DebianLister(Lister[DebianListerState, DebianPageType]): assert self.lister_obj.id is not None origins_to_send = {} - self.origins_to_update = {} # iterate on each package source info for src_pkg in page: @@ -228,17 +222,11 @@ class DebianLister(Lister[DebianListerState, DebianPageType]): extra_loader_arguments={"packages": {}}, last_update=self.last_sources_update, ) - # origin will be yielded at the end of that method - origins_to_send[origin_url] = self.listed_origins[origin_url] # init set that will contain all listed package versions self.package_versions[package_name] = set() - # package has already been listed in a previous page or current page - elif origin_url not in origins_to_send: - # if package has been listed in a previous page, its new versions - # will be added to its ListedOrigin object but the update will - # be sent to the scheduler in the commit_page method - self.origins_to_update[origin_url] = self.listed_origins[origin_url] + # origin will be yielded at the end of that method + origins_to_send[origin_url] = self.listed_origins[origin_url] # update package versions data in parameter that will be provided # to the debian loader @@ -273,20 +261,8 @@ class DebianLister(Lister[DebianListerState, DebianPageType]): # no new versions so far, no need to send the origin to the scheduler if not new_versions: origins_to_send.pop(origin_url, None) - self.origins_to_update.pop(origin_url, None) - # new versions found, ensure the origin will be sent to the scheduler - elif origin_url not in self.sent_origins: - self.origins_to_update.pop(origin_url, None) - origins_to_send[origin_url] = self.listed_origins[origin_url] - # update already counted origins with changes since last page - self.sent_origins.update(origins_to_send.keys()) - - logger.debug( - "Found %s new packages, %s packages with new versions.", - len(origins_to_send), - len(self.origins_to_update), - ) + logger.debug("Found %s new packages.", len(origins_to_send)) logger.debug( "Current total number of listed packages is equal to %s.", len(self.listed_origins), @@ -294,15 +270,7 @@ class DebianLister(Lister[DebianListerState, DebianPageType]): yield from origins_to_send.values() - def get_origins_to_update(self) -> Iterator[ListedOrigin]: - yield from self.origins_to_update.values() - - def commit_page(self, page: DebianPageType): - """Send to scheduler already listed origins where new versions have been found - in current page.""" - self.send_origins(self.get_origins_to_update()) - def finalize(self): # set mapping between listed package names and versions as lister state self.state.package_versions = self.package_versions - self.updated = len(self.sent_origins) > 0 + self.updated = len(self.listed_origins) > 0