From 5ab6b0040806844762e6ae229c95555564a6b2ea Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Tue, 21 Sep 2021 10:25:31 +0200 Subject: [PATCH] gnu: Respect the pattern docstring about state initialization Any extra state initialization (outside the scheduler scope) is to happen in the get_pages method. --- swh/lister/gnu/lister.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py index 7d976ee..3d35829 100644 --- a/swh/lister/gnu/lister.py +++ b/swh/lister/gnu/lister.py @@ -4,7 +4,7 @@ # See top-level LICENSE file for more information import logging -from typing import Any, Iterator, Mapping +from typing import Any, Iterator, Mapping, Optional import iso8601 @@ -36,12 +36,16 @@ class GNULister(StatelessLister[GNUPageType]): instance="GNU", credentials=credentials, ) - self.gnu_tree = GNUTree(f"{self.url}/tree.json.gz") + # no side-effect calls in constructor, if extra state is needed, as preconized + # by the pattern docstring, this must happen in the get_pages method. + self.gnu_tree: Optional[GNUTree] = None def get_pages(self) -> Iterator[GNUPageType]: """ Yield a single page listing all GNU projects. """ + # first fetch the manifest to parse + self.gnu_tree = GNUTree(f"{self.url}/tree.json.gz") yield self.gnu_tree.projects def get_origins_from_page(self, page: GNUPageType) -> Iterator[ListedOrigin]: @@ -49,6 +53,7 @@ class GNULister(StatelessLister[GNUPageType]): Iterate on all GNU projects and yield ListedOrigin instances. """ assert self.lister_obj.id is not None + assert self.gnu_tree is not None artifacts = self.gnu_tree.artifacts