From 73f85c0b8a429c973334606263722fc3086178c1 Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Fri, 23 Jul 2021 13:51:10 +0200 Subject: [PATCH] gitlab: Adapt requests retry policy to consider HTTP 50x status codes Temporarily server failures can happen when listing a GitLab instance, HTTP status codes 502, 503 or 520 are returned in that case. So adapt lister requests retry policy to execute requests again when such errors are encountered. Related to T3442 --- swh/lister/gitlab/lister.py | 4 +-- swh/lister/gitlab/tests/test_lister.py | 43 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 7adf73b..8a0dd1d 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -17,7 +17,7 @@ from tenacity.before_sleep import before_sleep_log from swh.lister import USER_AGENT from swh.lister.pattern import CredentialsType, Lister -from swh.lister.utils import retry_attempt, throttling_retry +from swh.lister.utils import is_retryable_exception, retry_attempt, throttling_retry from swh.scheduler.model import ListedOrigin logger = logging.getLogger(__name__) @@ -56,7 +56,7 @@ def _if_rate_limited(retry_state) -> bool: isinstance(exc, HTTPError) and exc.response.status_code == codes.forbidden and int(exc.response.headers.get("RateLimit-Remaining", "0")) == 0 - ) + ) or is_retryable_exception(exc) return False diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py index 2a381ae..4520aaa 100644 --- a/swh/lister/gitlab/tests/test_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -201,6 +201,49 @@ def test_lister_gitlab_rate_limit(swh_scheduler, requests_mock, datadir, mocker) assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE]) +@pytest.mark.parametrize("status_code", [502, 503, 520]) +def test_lister_gitlab_http_errors( + swh_scheduler, requests_mock, datadir, mocker, status_code +): + """Gitlab lister should retry requests when encountering HTTP 50x errors + + """ + instance = "gite.lirmm.fr" + url = api_url(instance) + lister = GitLabLister(swh_scheduler, url=url, instance=instance) + + url_page1 = lister.page_url() + response1 = gitlab_page_response(datadir, instance, 1) + url_page2 = lister.page_url(2) + response2 = gitlab_page_response(datadir, instance, 2) + + requests_mock.get( + url_page1, + [{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}], + additional_matcher=_match_request, + ) + requests_mock.get( + url_page2, + [ + # first request ends up with error + {"status_code": status_code}, + # second request is ok + {"json": response2}, + ], + additional_matcher=_match_request, + ) + + # To avoid this test being too slow, we mock sleep within the retry behavior + mock_sleep = mocker.patch.object(lister.get_page_result.retry, "sleep") + + listed_result = lister.run() + + expected_nb_origins = len(response1) + len(response2) + assert listed_result == ListerStats(pages=2, origins=expected_nb_origins) + + assert_sleep_calls(mocker, mock_sleep, [1]) + + def test_lister_gitlab_credentials(swh_scheduler): """Gitlab lister supports credentials configuration