launchpad: Manage unhandled exceptions when listing
Prior to this commit, the listing could fail when either reading a page or the page of results (lauchpad api raises RestfulError). This now retries when those kind of exceptions happen. If the error persists (after multiple tryouts and exponential backoff), the listing continues nonetheless (with warning logs). Note that if the page ends up being empty, it's no longer accounted for. This actually allows the listing to finish in case of issues. Related to T3945
This commit is contained in:
parent
262f9369c8
commit
fc2edd24aa
2 changed files with 85 additions and 17 deletions
|
@ -10,8 +10,10 @@ from typing import Any, Dict, Iterator, Optional, Tuple
|
|||
|
||||
import iso8601
|
||||
from launchpadlib.launchpad import Launchpad
|
||||
from lazr.restfulclient.errors import RestfulError
|
||||
from lazr.restfulclient.resource import Collection
|
||||
|
||||
from swh.lister.utils import retry_if_exception, throttling_retry
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
|
@ -38,6 +40,10 @@ def origin(vcs_type: str, repo: Any) -> str:
|
|||
return repo.git_https_url if vcs_type == "git" else repo.web_link
|
||||
|
||||
|
||||
def retry_if_restful_error(retry_state):
|
||||
return retry_if_exception(retry_state, lambda e: isinstance(e, RestfulError))
|
||||
|
||||
|
||||
class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
|
||||
"""
|
||||
List repositories from Launchpad (git or bzr).
|
||||
|
@ -90,6 +96,23 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
|
|||
d[attribute_name] = date_last_modified.isoformat()
|
||||
return d
|
||||
|
||||
@throttling_retry(retry=retry_if_restful_error)
|
||||
def _page_request(
|
||||
self, launchpad, vcs_type: str, date_last_modified: Optional[datetime]
|
||||
) -> Optional[Collection]:
|
||||
"""Querying the page of results for a given vcs_type since the date_last_modified. If
|
||||
some issues occurs, this will deal with the retrying policy.
|
||||
|
||||
"""
|
||||
get_vcs_fns = {
|
||||
"git": launchpad.git_repositories.getRepositories,
|
||||
"bzr": launchpad.branches.getBranches,
|
||||
}
|
||||
|
||||
return get_vcs_fns[vcs_type](
|
||||
order_by="most neglected first", modified_since_date=date_last_modified,
|
||||
)
|
||||
|
||||
def get_pages(self) -> Iterator[LaunchpadPageType]:
|
||||
"""
|
||||
Yields an iterator on all git/bzr repositories hosted on Launchpad sorted
|
||||
|
@ -103,27 +126,31 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
|
|||
"git": self.state.git_date_last_modified,
|
||||
"bzr": self.state.bzr_date_last_modified,
|
||||
}
|
||||
for vcs_type, get_vcs_fn in [
|
||||
("git", launchpad.git_repositories.getRepositories),
|
||||
("bzr", launchpad.branches.getBranches),
|
||||
]:
|
||||
yield vcs_type, get_vcs_fn(
|
||||
order_by="most neglected first",
|
||||
modified_since_date=self.date_last_modified[vcs_type],
|
||||
)
|
||||
for vcs_type in ["git", "bzr"]:
|
||||
try:
|
||||
result = self._page_request(
|
||||
launchpad, vcs_type, self.date_last_modified[vcs_type]
|
||||
)
|
||||
except RestfulError as e:
|
||||
logger.warning("Listing %s origins raised %s", vcs_type, e)
|
||||
result = None
|
||||
if not result:
|
||||
continue
|
||||
yield vcs_type, result
|
||||
|
||||
@throttling_retry(retry=retry_if_restful_error)
|
||||
def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]:
|
||||
"""
|
||||
Iterate on all git repositories and yield ListedOrigin instances.
|
||||
"""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
|
||||
|
||||
vcs_type, repos = page
|
||||
|
||||
assert vcs_type in {"git", "bzr"}
|
||||
|
||||
prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
|
||||
|
||||
for repo in repos:
|
||||
origin_url = origin(vcs_type, repo)
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import json
|
|||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
from lazr.restfulclient.errors import RestfulError
|
||||
import pytest
|
||||
|
||||
from ..lister import LaunchpadLister, origin
|
||||
|
@ -57,11 +58,18 @@ def launchpad_bzr_response(datadir):
|
|||
def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None):
|
||||
mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad")
|
||||
mock_getRepositories = mock_launchpad.git_repositories.getRepositories
|
||||
mock_getRepositories.return_value = launchpad_response
|
||||
if isinstance(launchpad_response, Exception):
|
||||
mock_getRepositories.side_effect = launchpad_response
|
||||
else:
|
||||
mock_getRepositories.return_value = launchpad_response
|
||||
mock_getBranches = mock_launchpad.branches.getBranches
|
||||
mock_getBranches.return_value = (
|
||||
[] if launchpad_bzr_response is None else launchpad_bzr_response
|
||||
)
|
||||
if launchpad_bzr_response is not None:
|
||||
if isinstance(launchpad_bzr_response, Exception):
|
||||
mock_getBranches.side_effect = launchpad_bzr_response
|
||||
else:
|
||||
mock_getBranches.return_value = launchpad_bzr_response
|
||||
else:
|
||||
mock_getBranches.return_value = [] # empty page
|
||||
mock_launchpad.login_anonymously.return_value = mock_launchpad
|
||||
|
||||
return mock_getRepositories, mock_getBranches
|
||||
|
@ -166,7 +174,7 @@ def test_launchpad_incremental_lister(
|
|||
|
||||
assert lister.incremental
|
||||
assert lister.updated
|
||||
assert stats.pages == 2, "Empty bzr response still accounts for 1 page"
|
||||
assert stats.pages == 1, "Empty bzr page response is ignored"
|
||||
assert stats.origins == len(launchpad_response2)
|
||||
|
||||
mock_getRepositories.assert_called_once_with(
|
||||
|
@ -192,7 +200,7 @@ def test_launchpad_lister_invalid_url_filtering(
|
|||
stats = lister.run()
|
||||
|
||||
assert not lister.updated
|
||||
assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)"
|
||||
assert stats.pages == 1, "Empty pages are ignored(only 1 git page of results)"
|
||||
assert stats.origins == 0
|
||||
|
||||
|
||||
|
@ -211,5 +219,38 @@ def test_launchpad_lister_duplicated_origin(
|
|||
stats = lister.run()
|
||||
|
||||
assert lister.updated
|
||||
assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)"
|
||||
assert stats.pages == 1, "Empty bzr page are ignored (only 1 git page of results)"
|
||||
assert stats.origins == 1
|
||||
|
||||
|
||||
def test_launchpad_lister_raise_during_listing(
|
||||
swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response
|
||||
):
|
||||
lister = LaunchpadLister(scheduler=swh_scheduler)
|
||||
# Exponential retries take a long time, so stub time.sleep
|
||||
mocker.patch.object(lister._page_request.retry, "sleep")
|
||||
|
||||
mock_getRepositories, mock_getBranches = _mock_launchpad(
|
||||
mocker,
|
||||
RestfulError("Refuse to list git page"), # breaks git page listing
|
||||
launchpad_bzr_response,
|
||||
)
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
assert lister.updated
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == len(launchpad_bzr_response)
|
||||
|
||||
mock_getRepositories, mock_getBranches = _mock_launchpad(
|
||||
mocker,
|
||||
launchpad_response1,
|
||||
RestfulError("Refuse to list bzr"), # breaks bzr page listing
|
||||
)
|
||||
|
||||
lister = LaunchpadLister(scheduler=swh_scheduler)
|
||||
stats = lister.run()
|
||||
|
||||
assert lister.updated
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == len(launchpad_response1)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue