From fc2edd24aa4b71376e2bfba6dbcab1fa68af6f72 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Thu, 17 Feb 2022 09:52:19 +0100 Subject: [PATCH] launchpad: Manage unhandled exceptions when listing Prior to this commit, the listing could fail when either reading a page or the page of results (lauchpad api raises RestfulError). This now retries when those kind of exceptions happen. If the error persists (after multiple tryouts and exponential backoff), the listing continues nonetheless (with warning logs). Note that if the page ends up being empty, it's no longer accounted for. This actually allows the listing to finish in case of issues. Related to T3945 --- swh/lister/launchpad/lister.py | 47 ++++++++++++++----- swh/lister/launchpad/tests/test_lister.py | 55 ++++++++++++++++++++--- 2 files changed, 85 insertions(+), 17 deletions(-) diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py index d79dd43..9bfe306 100644 --- a/swh/lister/launchpad/lister.py +++ b/swh/lister/launchpad/lister.py @@ -10,8 +10,10 @@ from typing import Any, Dict, Iterator, Optional, Tuple import iso8601 from launchpadlib.launchpad import Launchpad +from lazr.restfulclient.errors import RestfulError from lazr.restfulclient.resource import Collection +from swh.lister.utils import retry_if_exception, throttling_retry from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -38,6 +40,10 @@ def origin(vcs_type: str, repo: Any) -> str: return repo.git_https_url if vcs_type == "git" else repo.web_link +def retry_if_restful_error(retry_state): + return retry_if_exception(retry_state, lambda e: isinstance(e, RestfulError)) + + class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): """ List repositories from Launchpad (git or bzr). @@ -90,6 +96,23 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): d[attribute_name] = date_last_modified.isoformat() return d + @throttling_retry(retry=retry_if_restful_error) + def _page_request( + self, launchpad, vcs_type: str, date_last_modified: Optional[datetime] + ) -> Optional[Collection]: + """Querying the page of results for a given vcs_type since the date_last_modified. If + some issues occurs, this will deal with the retrying policy. + + """ + get_vcs_fns = { + "git": launchpad.git_repositories.getRepositories, + "bzr": launchpad.branches.getBranches, + } + + return get_vcs_fns[vcs_type]( + order_by="most neglected first", modified_since_date=date_last_modified, + ) + def get_pages(self) -> Iterator[LaunchpadPageType]: """ Yields an iterator on all git/bzr repositories hosted on Launchpad sorted @@ -103,27 +126,31 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): "git": self.state.git_date_last_modified, "bzr": self.state.bzr_date_last_modified, } - for vcs_type, get_vcs_fn in [ - ("git", launchpad.git_repositories.getRepositories), - ("bzr", launchpad.branches.getBranches), - ]: - yield vcs_type, get_vcs_fn( - order_by="most neglected first", - modified_since_date=self.date_last_modified[vcs_type], - ) + for vcs_type in ["git", "bzr"]: + try: + result = self._page_request( + launchpad, vcs_type, self.date_last_modified[vcs_type] + ) + except RestfulError as e: + logger.warning("Listing %s origins raised %s", vcs_type, e) + result = None + if not result: + continue + yield vcs_type, result + @throttling_retry(retry=retry_if_restful_error) def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]: """ Iterate on all git repositories and yield ListedOrigin instances. """ assert self.lister_obj.id is not None - prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None} - vcs_type, repos = page assert vcs_type in {"git", "bzr"} + prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None} + for repo in repos: origin_url = origin(vcs_type, repo) diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py index 7473b6c..59fe605 100644 --- a/swh/lister/launchpad/tests/test_lister.py +++ b/swh/lister/launchpad/tests/test_lister.py @@ -8,6 +8,7 @@ import json from pathlib import Path from typing import List +from lazr.restfulclient.errors import RestfulError import pytest from ..lister import LaunchpadLister, origin @@ -57,11 +58,18 @@ def launchpad_bzr_response(datadir): def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None): mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad") mock_getRepositories = mock_launchpad.git_repositories.getRepositories - mock_getRepositories.return_value = launchpad_response + if isinstance(launchpad_response, Exception): + mock_getRepositories.side_effect = launchpad_response + else: + mock_getRepositories.return_value = launchpad_response mock_getBranches = mock_launchpad.branches.getBranches - mock_getBranches.return_value = ( - [] if launchpad_bzr_response is None else launchpad_bzr_response - ) + if launchpad_bzr_response is not None: + if isinstance(launchpad_bzr_response, Exception): + mock_getBranches.side_effect = launchpad_bzr_response + else: + mock_getBranches.return_value = launchpad_bzr_response + else: + mock_getBranches.return_value = [] # empty page mock_launchpad.login_anonymously.return_value = mock_launchpad return mock_getRepositories, mock_getBranches @@ -166,7 +174,7 @@ def test_launchpad_incremental_lister( assert lister.incremental assert lister.updated - assert stats.pages == 2, "Empty bzr response still accounts for 1 page" + assert stats.pages == 1, "Empty bzr page response is ignored" assert stats.origins == len(launchpad_response2) mock_getRepositories.assert_called_once_with( @@ -192,7 +200,7 @@ def test_launchpad_lister_invalid_url_filtering( stats = lister.run() assert not lister.updated - assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)" + assert stats.pages == 1, "Empty pages are ignored(only 1 git page of results)" assert stats.origins == 0 @@ -211,5 +219,38 @@ def test_launchpad_lister_duplicated_origin( stats = lister.run() assert lister.updated - assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)" + assert stats.pages == 1, "Empty bzr page are ignored (only 1 git page of results)" assert stats.origins == 1 + + +def test_launchpad_lister_raise_during_listing( + swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response +): + lister = LaunchpadLister(scheduler=swh_scheduler) + # Exponential retries take a long time, so stub time.sleep + mocker.patch.object(lister._page_request.retry, "sleep") + + mock_getRepositories, mock_getBranches = _mock_launchpad( + mocker, + RestfulError("Refuse to list git page"), # breaks git page listing + launchpad_bzr_response, + ) + + stats = lister.run() + + assert lister.updated + assert stats.pages == 1 + assert stats.origins == len(launchpad_bzr_response) + + mock_getRepositories, mock_getBranches = _mock_launchpad( + mocker, + launchpad_response1, + RestfulError("Refuse to list bzr"), # breaks bzr page listing + ) + + lister = LaunchpadLister(scheduler=swh_scheduler) + stats = lister.run() + + assert lister.updated + assert stats.pages == 1 + assert stats.origins == len(launchpad_response1)