Use http_retry decorator from swh.core.retry module
The http_retry decorator has been moved to swh-core package in order to ease its reuse across swh packages.
This commit is contained in:
parent
1ee549fc9d
commit
4f57e84450
13 changed files with 32 additions and 201 deletions
|
@ -318,7 +318,7 @@ then immediately stop the listing by doing an equivalent of
|
|||
:py:meth:`Response.raise_for_status` from the ``requests`` library. As for rate-limiting
|
||||
errors, we have a strategy of using a flexible decorator to handle the retrying for us.
|
||||
It is based on the ``tenacity`` library and accessible as :py:func:`http_retry` from
|
||||
:py:mod:`swh.lister.utils`.
|
||||
:py:mod:`swh.core.retry`.
|
||||
|
||||
Pagination
|
||||
^^^^^^^^^^
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
swh.core[db,github] >= 2.16.1
|
||||
swh.core[db,github] >= 2.22.0
|
||||
swh.scheduler >= 0.8
|
||||
|
|
|
@ -9,8 +9,8 @@ import os
|
|||
|
||||
import pytest
|
||||
|
||||
from swh.core.retry import MAX_NUMBER_ATTEMPTS
|
||||
from swh.lister.bitbucket.lister import BitbucketLister
|
||||
from swh.lister.utils import MAX_NUMBER_ATTEMPTS
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -188,7 +188,6 @@ def test_bitbucket_lister_buggy_page(
|
|||
bb_api_repositories_page1,
|
||||
bb_api_repositories_page2,
|
||||
):
|
||||
|
||||
requests_mock.get(
|
||||
BitbucketLister.API_URL,
|
||||
[
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2018-2022 The Software Heritage developers
|
||||
# Copyright (C) 2018-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -14,8 +14,8 @@ from requests.exceptions import HTTPError
|
|||
from requests.status_codes import codes
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.core.retry import http_retry, is_retryable_exception
|
||||
from swh.lister.pattern import CredentialsType, Lister
|
||||
from swh.lister.utils import http_retry, is_retryable_exception
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
|
@ -12,11 +12,11 @@ from typing import Dict, List
|
|||
import pytest
|
||||
from requests.status_codes import codes
|
||||
|
||||
from swh.core.retry import WAIT_EXP_BASE
|
||||
from swh.lister import USER_AGENT_TEMPLATE
|
||||
from swh.lister.gitlab.lister import GitLabLister, _parse_id_after
|
||||
from swh.lister.pattern import ListerStats
|
||||
from swh.lister.tests.test_utils import assert_sleep_calls
|
||||
from swh.lister.utils import WAIT_EXP_BASE
|
||||
from swh.lister.tests.utils import assert_sleep_calls
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -8,9 +8,9 @@ from pathlib import Path
|
|||
|
||||
import iso8601
|
||||
|
||||
from swh.core.retry import WAIT_EXP_BASE
|
||||
from swh.lister.golang.lister import GolangLister, GolangStateType
|
||||
from swh.lister.tests.test_utils import assert_sleep_calls
|
||||
from swh.lister.utils import WAIT_EXP_BASE
|
||||
from swh.lister.tests.utils import assert_sleep_calls
|
||||
|
||||
# https://pkg.go.dev prefix omitted
|
||||
expected_listed = [
|
||||
|
@ -98,7 +98,6 @@ def _generate_responses(datadir, requests_mock):
|
|||
|
||||
|
||||
def test_golang_lister(swh_scheduler, mocker, requests_mock, datadir):
|
||||
|
||||
# Exponential retries take a long time, so stub time.sleep
|
||||
mocked_sleep = mocker.patch.object(GolangLister.http_request.retry, "sleep")
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2020-2022 The Software Heritage developers
|
||||
# Copyright (C) 2020-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -14,7 +14,7 @@ from lazr.restfulclient.errors import RestfulError
|
|||
from lazr.restfulclient.resource import Collection
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.lister.utils import http_retry, retry_if_exception
|
||||
from swh.core.retry import http_retry, retry_if_exception
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2020-2022 The Software Heritage developers
|
||||
# Copyright (C) 2020-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -16,12 +16,13 @@ from tenacity.before_sleep import before_sleep_log
|
|||
|
||||
from swh.core.config import load_from_envvar
|
||||
from swh.core.github.utils import GitHubSession
|
||||
from swh.core.retry import http_retry
|
||||
from swh.core.utils import grouper
|
||||
from swh.scheduler import get_scheduler, model
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
|
||||
from . import USER_AGENT_TEMPLATE
|
||||
from .utils import http_retry, is_valid_origin_url
|
||||
from .utils import is_valid_origin_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -153,7 +154,6 @@ class Lister(Generic[StateType, PageType]):
|
|||
|
||||
@http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
|
||||
def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
|
||||
|
||||
logger.debug("Fetching URL %s with params %s", url, kwargs.get("params"))
|
||||
|
||||
response = self.session.request(method, url, **kwargs)
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2018-2021 The Software Heritage developers
|
||||
# Copyright (C) 2018-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -13,7 +13,7 @@ from xmlrpc.client import Fault, ServerProxy
|
|||
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.lister.utils import http_retry
|
||||
from swh.core.retry import http_retry
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2021-2022 The Software Heritage developers
|
||||
# Copyright (C) 2021-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -13,6 +13,7 @@ from iso8601 import iso8601
|
|||
import pytest
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from swh.core.retry import WAIT_EXP_BASE
|
||||
from swh.lister import USER_AGENT_TEMPLATE
|
||||
from swh.lister.sourceforge.lister import (
|
||||
MAIN_SITEMAP_URL,
|
||||
|
@ -20,8 +21,7 @@ from swh.lister.sourceforge.lister import (
|
|||
SourceForgeLister,
|
||||
SourceForgeListerState,
|
||||
)
|
||||
from swh.lister.tests.test_utils import assert_sleep_calls
|
||||
from swh.lister.utils import WAIT_EXP_BASE
|
||||
from swh.lister.tests.utils import assert_sleep_calls
|
||||
|
||||
# Mapping of project name to namespace
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
@ -368,7 +368,6 @@ def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, m
|
|||
|
||||
|
||||
def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir):
|
||||
|
||||
lister = SourceForgeLister(scheduler=swh_scheduler)
|
||||
|
||||
# Exponential retries take a long time, so stub time.sleep
|
||||
|
|
|
@ -1,13 +1,10 @@
|
|||
# Copyright (C) 2018-2022 the Software Heritage developers
|
||||
# Copyright (C) 2018-2023 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
from requests.status_codes import codes
|
||||
from tenacity.wait import wait_fixed
|
||||
|
||||
from swh.lister.utils import MAX_NUMBER_ATTEMPTS, WAIT_EXP_BASE, http_retry, split_range
|
||||
from swh.lister.utils import split_range
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -41,90 +38,3 @@ def test_split_range_errors(total_pages, nb_pages):
|
|||
for total_pages, nb_pages in [(None, 1), (100, None)]:
|
||||
with pytest.raises(TypeError):
|
||||
next(split_range(total_pages, nb_pages))
|
||||
|
||||
|
||||
TEST_URL = "https://example.og/api/repositories"
|
||||
|
||||
|
||||
@http_retry()
|
||||
def make_request():
|
||||
response = requests.get(TEST_URL)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
|
||||
def assert_sleep_calls(mocker, mock_sleep, sleep_params):
|
||||
mock_sleep.assert_has_calls([mocker.call(param) for param in sleep_params])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"status_code",
|
||||
[
|
||||
codes.too_many_requests,
|
||||
codes.internal_server_error,
|
||||
codes.bad_gateway,
|
||||
codes.service_unavailable,
|
||||
],
|
||||
)
|
||||
def test_http_retry(requests_mock, mocker, status_code):
|
||||
data = {"result": {}}
|
||||
requests_mock.get(
|
||||
TEST_URL,
|
||||
[
|
||||
{"status_code": status_code},
|
||||
{"status_code": status_code},
|
||||
{"status_code": codes.ok, "json": data},
|
||||
],
|
||||
)
|
||||
|
||||
mock_sleep = mocker.patch.object(make_request.retry, "sleep")
|
||||
|
||||
response = make_request()
|
||||
|
||||
assert_sleep_calls(mocker, mock_sleep, [1, WAIT_EXP_BASE])
|
||||
|
||||
assert response.json() == data
|
||||
|
||||
|
||||
def test_http_retry_max_attemps(requests_mock, mocker):
|
||||
requests_mock.get(
|
||||
TEST_URL,
|
||||
[{"status_code": codes.too_many_requests}] * (MAX_NUMBER_ATTEMPTS),
|
||||
)
|
||||
|
||||
mock_sleep = mocker.patch.object(make_request.retry, "sleep")
|
||||
|
||||
with pytest.raises(requests.exceptions.HTTPError) as e:
|
||||
make_request()
|
||||
|
||||
assert e.value.response.status_code == codes.too_many_requests
|
||||
|
||||
assert_sleep_calls(
|
||||
mocker,
|
||||
mock_sleep,
|
||||
[float(WAIT_EXP_BASE**i) for i in range(MAX_NUMBER_ATTEMPTS - 1)],
|
||||
)
|
||||
|
||||
|
||||
@http_retry(wait=wait_fixed(WAIT_EXP_BASE))
|
||||
def make_request_wait_fixed():
|
||||
response = requests.get(TEST_URL)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
|
||||
def test_http_retry_wait_fixed(requests_mock, mocker):
|
||||
requests_mock.get(
|
||||
TEST_URL,
|
||||
[
|
||||
{"status_code": codes.too_many_requests},
|
||||
{"status_code": codes.too_many_requests},
|
||||
{"status_code": codes.ok},
|
||||
],
|
||||
)
|
||||
|
||||
mock_sleep = mocker.patch.object(make_request_wait_fixed.retry, "sleep")
|
||||
|
||||
make_request_wait_fixed()
|
||||
|
||||
assert_sleep_calls(mocker, mock_sleep, [WAIT_EXP_BASE] * 2)
|
||||
|
|
8
swh/lister/tests/utils.py
Normal file
8
swh/lister/tests/utils.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
def assert_sleep_calls(mocker, mock_sleep, sleep_params):
|
||||
mock_sleep.assert_has_calls([mocker.call(param) for param in sleep_params])
|
|
@ -1,16 +1,10 @@
|
|||
# Copyright (C) 2018-2022 the Software Heritage developers
|
||||
# Copyright (C) 2018-2023 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from typing import Callable, Iterator, Optional, Tuple
|
||||
from typing import Iterator, Optional, Tuple
|
||||
import urllib.parse
|
||||
|
||||
from requests.exceptions import ConnectionError, HTTPError
|
||||
from requests.status_codes import codes
|
||||
from tenacity import retry as tenacity_retry
|
||||
from tenacity.stop import stop_after_attempt
|
||||
from tenacity.wait import wait_exponential
|
||||
|
||||
|
||||
def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
|
||||
"""Split `total_pages` into mostly `nb_pages` ranges. In some cases, the last range can
|
||||
|
@ -36,84 +30,6 @@ def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
|
|||
yield index, total_pages
|
||||
|
||||
|
||||
def is_throttling_exception(e: Exception) -> bool:
|
||||
"""
|
||||
Checks if an exception is a requests.exception.HTTPError for
|
||||
a response with status code 429 (too many requests).
|
||||
"""
|
||||
return (
|
||||
isinstance(e, HTTPError) and e.response.status_code == codes.too_many_requests
|
||||
)
|
||||
|
||||
|
||||
def is_retryable_exception(e: Exception) -> bool:
|
||||
"""
|
||||
Checks if an exception is worth retrying (connection, throttling or a server error).
|
||||
"""
|
||||
is_connection_error = isinstance(e, ConnectionError)
|
||||
is_500_error = isinstance(e, HTTPError) and e.response.status_code >= 500
|
||||
|
||||
return is_connection_error or is_throttling_exception(e) or is_500_error
|
||||
|
||||
|
||||
def retry_if_exception(retry_state, predicate: Callable[[Exception], bool]) -> bool:
|
||||
"""
|
||||
Custom tenacity retry predicate for handling exceptions with the given predicate.
|
||||
"""
|
||||
attempt = retry_state.outcome
|
||||
if attempt.failed:
|
||||
exception = attempt.exception()
|
||||
return predicate(exception)
|
||||
return False
|
||||
|
||||
|
||||
def retry_policy_generic(retry_state) -> bool:
|
||||
"""
|
||||
Custom tenacity retry predicate for handling failed requests:
|
||||
- ConnectionError
|
||||
- Server errors (status >= 500)
|
||||
- Throttling errors (status == 429)
|
||||
|
||||
This does not handle 404, 403 or other status codes.
|
||||
"""
|
||||
return retry_if_exception(retry_state, is_retryable_exception)
|
||||
|
||||
|
||||
WAIT_EXP_BASE = 10
|
||||
MAX_NUMBER_ATTEMPTS = 5
|
||||
|
||||
|
||||
def http_retry(
|
||||
retry=retry_policy_generic,
|
||||
wait=wait_exponential(exp_base=WAIT_EXP_BASE),
|
||||
stop=stop_after_attempt(max_attempt_number=MAX_NUMBER_ATTEMPTS),
|
||||
**retry_args,
|
||||
):
|
||||
"""
|
||||
Decorator based on `tenacity` for retrying a function possibly raising
|
||||
requests.exception.HTTPError for status code 429 (too many requests).
|
||||
|
||||
It provides a default configuration that should work properly in most
|
||||
cases but all `tenacity.retry` parameters can also be overridden in client
|
||||
code.
|
||||
|
||||
When the mmaximum of attempts is reached, the HTTPError exception will then
|
||||
be reraised.
|
||||
|
||||
Args:
|
||||
retry: function defining request retry condition (default to 429 status code)
|
||||
https://tenacity.readthedocs.io/en/latest/#whether-to-retry
|
||||
|
||||
wait: function defining wait strategy before retrying (default to exponential
|
||||
backoff) https://tenacity.readthedocs.io/en/latest/#waiting-before-retrying
|
||||
|
||||
stop: function defining when to stop retrying (default after 5 attempts)
|
||||
https://tenacity.readthedocs.io/en/latest/#stopping
|
||||
|
||||
"""
|
||||
return tenacity_retry(retry=retry, wait=wait, stop=stop, reraise=True, **retry_args)
|
||||
|
||||
|
||||
def is_valid_origin_url(url: Optional[str]) -> bool:
|
||||
"""Returns whether the given string is a valid origin URL.
|
||||
This excludes Git SSH URLs and pseudo-URLs (eg. ``ssh://git@example.org:foo``
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue