launchpad: Manage unhandled exceptions when listing

Prior to this commit, the listing could fail when either reading a page or the page of
results (lauchpad api raises RestfulError). This now retries when those kind of
exceptions happen. If the error persists (after multiple tryouts and exponential
backoff), the listing continues nonetheless (with warning logs).

Note that if the page ends up being empty, it's no longer accounted for.

This actually allows the listing to finish in case of issues.

Related to T3945
This commit is contained in:
Antoine R. Dumont (@ardumont) 2022-02-17 09:52:19 +01:00
parent 262f9369c8
commit fc2edd24aa
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
2 changed files with 85 additions and 17 deletions

View file

@ -10,8 +10,10 @@ from typing import Any, Dict, Iterator, Optional, Tuple
import iso8601
from launchpadlib.launchpad import Launchpad
from lazr.restfulclient.errors import RestfulError
from lazr.restfulclient.resource import Collection
from swh.lister.utils import retry_if_exception, throttling_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
@ -38,6 +40,10 @@ def origin(vcs_type: str, repo: Any) -> str:
return repo.git_https_url if vcs_type == "git" else repo.web_link
def retry_if_restful_error(retry_state):
return retry_if_exception(retry_state, lambda e: isinstance(e, RestfulError))
class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
"""
List repositories from Launchpad (git or bzr).
@ -90,6 +96,23 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
d[attribute_name] = date_last_modified.isoformat()
return d
@throttling_retry(retry=retry_if_restful_error)
def _page_request(
self, launchpad, vcs_type: str, date_last_modified: Optional[datetime]
) -> Optional[Collection]:
"""Querying the page of results for a given vcs_type since the date_last_modified. If
some issues occurs, this will deal with the retrying policy.
"""
get_vcs_fns = {
"git": launchpad.git_repositories.getRepositories,
"bzr": launchpad.branches.getBranches,
}
return get_vcs_fns[vcs_type](
order_by="most neglected first", modified_since_date=date_last_modified,
)
def get_pages(self) -> Iterator[LaunchpadPageType]:
"""
Yields an iterator on all git/bzr repositories hosted on Launchpad sorted
@ -103,27 +126,31 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
"git": self.state.git_date_last_modified,
"bzr": self.state.bzr_date_last_modified,
}
for vcs_type, get_vcs_fn in [
("git", launchpad.git_repositories.getRepositories),
("bzr", launchpad.branches.getBranches),
]:
yield vcs_type, get_vcs_fn(
order_by="most neglected first",
modified_since_date=self.date_last_modified[vcs_type],
)
for vcs_type in ["git", "bzr"]:
try:
result = self._page_request(
launchpad, vcs_type, self.date_last_modified[vcs_type]
)
except RestfulError as e:
logger.warning("Listing %s origins raised %s", vcs_type, e)
result = None
if not result:
continue
yield vcs_type, result
@throttling_retry(retry=retry_if_restful_error)
def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]:
"""
Iterate on all git repositories and yield ListedOrigin instances.
"""
assert self.lister_obj.id is not None
prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
vcs_type, repos = page
assert vcs_type in {"git", "bzr"}
prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
for repo in repos:
origin_url = origin(vcs_type, repo)

View file

@ -8,6 +8,7 @@ import json
from pathlib import Path
from typing import List
from lazr.restfulclient.errors import RestfulError
import pytest
from ..lister import LaunchpadLister, origin
@ -57,11 +58,18 @@ def launchpad_bzr_response(datadir):
def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None):
mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad")
mock_getRepositories = mock_launchpad.git_repositories.getRepositories
mock_getRepositories.return_value = launchpad_response
if isinstance(launchpad_response, Exception):
mock_getRepositories.side_effect = launchpad_response
else:
mock_getRepositories.return_value = launchpad_response
mock_getBranches = mock_launchpad.branches.getBranches
mock_getBranches.return_value = (
[] if launchpad_bzr_response is None else launchpad_bzr_response
)
if launchpad_bzr_response is not None:
if isinstance(launchpad_bzr_response, Exception):
mock_getBranches.side_effect = launchpad_bzr_response
else:
mock_getBranches.return_value = launchpad_bzr_response
else:
mock_getBranches.return_value = [] # empty page
mock_launchpad.login_anonymously.return_value = mock_launchpad
return mock_getRepositories, mock_getBranches
@ -166,7 +174,7 @@ def test_launchpad_incremental_lister(
assert lister.incremental
assert lister.updated
assert stats.pages == 2, "Empty bzr response still accounts for 1 page"
assert stats.pages == 1, "Empty bzr page response is ignored"
assert stats.origins == len(launchpad_response2)
mock_getRepositories.assert_called_once_with(
@ -192,7 +200,7 @@ def test_launchpad_lister_invalid_url_filtering(
stats = lister.run()
assert not lister.updated
assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)"
assert stats.pages == 1, "Empty pages are ignored(only 1 git page of results)"
assert stats.origins == 0
@ -211,5 +219,38 @@ def test_launchpad_lister_duplicated_origin(
stats = lister.run()
assert lister.updated
assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)"
assert stats.pages == 1, "Empty bzr page are ignored (only 1 git page of results)"
assert stats.origins == 1
def test_launchpad_lister_raise_during_listing(
swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response
):
lister = LaunchpadLister(scheduler=swh_scheduler)
# Exponential retries take a long time, so stub time.sleep
mocker.patch.object(lister._page_request.retry, "sleep")
mock_getRepositories, mock_getBranches = _mock_launchpad(
mocker,
RestfulError("Refuse to list git page"), # breaks git page listing
launchpad_bzr_response,
)
stats = lister.run()
assert lister.updated
assert stats.pages == 1
assert stats.origins == len(launchpad_bzr_response)
mock_getRepositories, mock_getBranches = _mock_launchpad(
mocker,
launchpad_response1,
RestfulError("Refuse to list bzr"), # breaks bzr page listing
)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert lister.updated
assert stats.pages == 1
assert stats.origins == len(launchpad_response1)