From 9ca5295a408b7f1f5be3a39ca7c4e00524dab4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Gom=C3=A8s?= Date: Wed, 26 May 2021 12:05:39 +0200 Subject: [PATCH] sourceforge: retry for all retryable exceptions Since this lister is doing a lot more requests than most other, it makes sense that issues would arise more often. We want the lister to continue even if the website is having issues and not break on the first 500 or closed connection it encounters. This change introduces a mechanism to retry all exceptions worth retrying and uses it for the SourceForge lister. Other listers might benefit from this, but this is out of scope here. Tests had to be adjusted to stub the sleep function since retries happened way more often. --- swh/lister/sourceforge/lister.py | 7 +++- swh/lister/sourceforge/tests/test_lister.py | 17 +++++++- swh/lister/utils.py | 43 +++++++++++++++++---- 3 files changed, 56 insertions(+), 11 deletions(-) diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py index ae8c05f..15ad7f2 100644 --- a/swh/lister/sourceforge/lister.py +++ b/swh/lister/sourceforge/lister.py @@ -15,7 +15,7 @@ import requests from tenacity.before_sleep import before_sleep_log from swh.core.api.classes import stream_results -from swh.lister.utils import throttling_retry +from swh.lister.utils import retry_policy_generic, throttling_retry from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin @@ -186,7 +186,10 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]): self._project_last_modified = listed_origins return listed_origins - @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) + @throttling_retry( + retry=retry_policy_generic, + before_sleep=before_sleep_log(logger, logging.WARNING), + ) def page_request(self, url, params) -> requests.Response: # Log listed URL to ease debugging logger.debug("Fetching URL %s with params %s", url, params) diff --git a/swh/lister/sourceforge/tests/test_lister.py b/swh/lister/sourceforge/tests/test_lister.py index 6c701cd..0b7b199 100644 --- a/swh/lister/sourceforge/tests/test_lister.py +++ b/swh/lister/sourceforge/tests/test_lister.py @@ -331,20 +331,33 @@ def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir) @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404]) -def test_sourceforge_lister_http_error(swh_scheduler, requests_mock, status_code): +def test_sourceforge_lister_http_error( + swh_scheduler, requests_mock, status_code, mocker +): lister = SourceForgeLister(scheduler=swh_scheduler) + # Exponential retries take a long time, so stub time.sleep + mocked_sleep = mocker.patch.object(lister.page_request.retry, "sleep") + requests_mock.get(MAIN_SITEMAP_URL, status_code=status_code) with pytest.raises(HTTPError): lister.run() + exp_retries = [] + if status_code >= 500: + exp_retries = [1.0, 10.0, 100.0, 1000.0] + + assert_sleep_calls(mocker, mocked_sleep, exp_retries) + @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404]) def test_sourceforge_lister_project_error( - datadir, swh_scheduler, requests_mock, status_code, + datadir, swh_scheduler, requests_mock, status_code, mocker ): lister = SourceForgeLister(scheduler=swh_scheduler) + # Exponential retries take a long time, so stub time.sleep + mocker.patch.object(lister.page_request.retry, "sleep") requests_mock.get( MAIN_SITEMAP_URL, diff --git a/swh/lister/utils.py b/swh/lister/utils.py index c7b6a4c..9df6907 100644 --- a/swh/lister/utils.py +++ b/swh/lister/utils.py @@ -2,9 +2,9 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Iterator, Tuple +from typing import Callable, Iterator, Tuple -from requests.exceptions import HTTPError +from requests.exceptions import ConnectionError, HTTPError from requests.status_codes import codes from tenacity import retry as tenacity_retry from tenacity.stop import stop_after_attempt @@ -45,6 +45,16 @@ def is_throttling_exception(e: Exception) -> bool: ) +def is_retryable_exception(e: Exception) -> bool: + """ + Checks if an exception is worth retrying (connection, throttling or a server error). + """ + is_connection_error = isinstance(e, ConnectionError) + is_500_error = isinstance(e, HTTPError) and e.response.status_code >= 500 + + return is_connection_error or is_throttling_exception(e) or is_500_error + + def retry_attempt(retry_state): """ Utility function to get last retry attempt info based on the @@ -58,16 +68,35 @@ def retry_attempt(retry_state): return attempt +def retry_if_exception(retry_state, predicate: Callable[[Exception], bool]) -> bool: + """ + Custom tenacity retry predicate for handling exceptions with the given predicate. + """ + attempt = retry_attempt(retry_state) + if attempt.failed: + exception = attempt.exception() + return predicate(exception) + return False + + def retry_if_throttling(retry_state) -> bool: """ Custom tenacity retry predicate for handling HTTP responses with status code 429 (too many requests). """ - attempt = retry_attempt(retry_state) - if attempt.failed: - exception = attempt.exception() - return is_throttling_exception(exception) - return False + return retry_if_exception(retry_state, is_throttling_exception) + + +def retry_policy_generic(retry_state) -> bool: + """ + Custom tenacity retry predicate for handling failed requests: + - ConnectionError + - Server errors (status >= 500) + - Throttling errors (status == 429) + + This does not handle 404, 403 or other status codes. + """ + return retry_if_exception(retry_state, is_retryable_exception) WAIT_EXP_BASE = 10