Refactor and deduplicate HTTP requests code in listers
Numerous listers were using the same page_request method or equivalent in their implementation so prefer to deduplicate that code by adding an http_request method in base lister class: swh.lister.pattern.Lister. That method simply wraps a call to requests.Session.request and logs some useful info for debugging and error reporting, also an HTTPError will be raised if a request ends up with an error. All listers using that new method now benefit of requests retry when an HTTP error occurs thanks to the use of the http_retry decorator.
This commit is contained in:
parent
9c55acd286
commit
db6ce12e9e
28 changed files with 174 additions and 449 deletions
|
@ -10,14 +10,10 @@ import logging
|
|||
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
||||
|
||||
import iso8601
|
||||
import requests
|
||||
from tenacity import before_sleep_log
|
||||
|
||||
from swh.lister.utils import http_retry
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from .. import USER_AGENT
|
||||
from ..pattern import CredentialsType, Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -59,10 +55,7 @@ class GolangLister(Lister[GolangStateType, GolangPageType]):
|
|||
credentials=credentials,
|
||||
)
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{"Accept": "application/json", "User-Agent": USER_AGENT}
|
||||
)
|
||||
self.session.headers.update({"Accept": "application/json"})
|
||||
self.incremental = incremental
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> GolangStateType:
|
||||
|
@ -87,24 +80,8 @@ class GolangLister(Lister[GolangStateType, GolangPageType]):
|
|||
):
|
||||
self.updated = True
|
||||
|
||||
@http_retry(
|
||||
before_sleep=before_sleep_log(logger, logging.WARNING),
|
||||
)
|
||||
def api_request(self, url: str) -> List[str]:
|
||||
logger.debug("Fetching URL %s", url)
|
||||
|
||||
response = self.session.get(url)
|
||||
|
||||
if response.status_code not in (200, 304):
|
||||
# Log response content to ease debugging
|
||||
logger.warning(
|
||||
"Unexpected HTTP status code %s for URL %s",
|
||||
response.status_code,
|
||||
response.url,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
response = self.http_request(url)
|
||||
return response.text.split()
|
||||
|
||||
def get_single_page(
|
||||
|
|
|
@ -98,11 +98,12 @@ def _generate_responses(datadir, requests_mock):
|
|||
|
||||
|
||||
def test_golang_lister(swh_scheduler, mocker, requests_mock, datadir):
|
||||
# first listing, should return one origin per package
|
||||
lister = GolangLister(scheduler=swh_scheduler)
|
||||
|
||||
# Exponential retries take a long time, so stub time.sleep
|
||||
mocked_sleep = mocker.patch.object(lister.api_request.retry, "sleep")
|
||||
mocked_sleep = mocker.patch.object(GolangLister.http_request.retry, "sleep")
|
||||
|
||||
# first listing, should return one origin per package
|
||||
lister = GolangLister(scheduler=swh_scheduler)
|
||||
|
||||
_generate_responses(datadir, requests_mock)
|
||||
|
||||
|
@ -131,7 +132,7 @@ def test_golang_lister(swh_scheduler, mocker, requests_mock, datadir):
|
|||
|
||||
# doing it all again (without incremental) should give us the same result
|
||||
lister = GolangLister(scheduler=swh_scheduler)
|
||||
mocked_sleep = mocker.patch.object(lister.api_request.retry, "sleep")
|
||||
|
||||
_generate_responses(datadir, requests_mock)
|
||||
stats = lister.run()
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue