Refactor and deduplicate HTTP requests code in listers
Numerous listers were using the same page_request method or equivalent in their implementation so prefer to deduplicate that code by adding an http_request method in base lister class: swh.lister.pattern.Lister. That method simply wraps a call to requests.Session.request and logs some useful info for debugging and error reporting, also an HTTPError will be raised if a request ends up with an error. All listers using that new method now benefit of requests retry when an HTTP error occurs thanks to the use of the http_retry decorator.
This commit is contained in:
parent
9c55acd286
commit
db6ce12e9e
28 changed files with 174 additions and 449 deletions
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2018-2021 the Software Heritage developers
|
||||
# Copyright (C) 2018-2022 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
@ -7,12 +7,8 @@ import logging
|
|||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import iso8601
|
||||
import requests
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.lister import USER_AGENT
|
||||
from swh.lister.pattern import CredentialsType, Lister
|
||||
from swh.lister.utils import http_retry
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
|
@ -75,10 +71,7 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
|
|||
self.page_size += 1
|
||||
self.incremental = incremental
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{"Accept": "application/json", "User-Agent": USER_AGENT}
|
||||
)
|
||||
self.session.headers.update({"Accept": "application/json"})
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> NpmListerState:
|
||||
return NpmListerState(**d)
|
||||
|
@ -95,21 +88,6 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
|
|||
params["startkey"] = last_package_id
|
||||
return params
|
||||
|
||||
@http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
|
||||
def page_request(self, last_package_id: str) -> requests.Response:
|
||||
params = self.request_params(last_package_id)
|
||||
logger.debug("Fetching URL %s with params %s", self.url, params)
|
||||
response = self.session.get(self.url, params=params)
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
"Unexpected HTTP status code %s on %s: %s",
|
||||
response.status_code,
|
||||
response.url,
|
||||
response.content,
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def get_pages(self) -> Iterator[List[Dict[str, Any]]]:
|
||||
last_package_id: str = "0" if self.incremental else '""'
|
||||
if (
|
||||
|
@ -121,7 +99,9 @@ class NpmLister(Lister[NpmListerState, List[Dict[str, Any]]]):
|
|||
|
||||
while True:
|
||||
|
||||
response = self.page_request(last_package_id)
|
||||
response = self.http_request(
|
||||
self.url, params=self.request_params(last_package_id)
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
page = data["results"] if self.incremental else data["rows"]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2018-2021 The Software Heritage developers
|
||||
# Copyright (C) 2018-2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -37,7 +37,7 @@ def npm_incremental_listing_page2(datadir):
|
|||
|
||||
@pytest.fixture(autouse=True)
|
||||
def retry_sleep_mock(mocker):
|
||||
mocker.patch.object(NpmLister.page_request.retry, "sleep")
|
||||
mocker.patch.object(NpmLister.http_request.retry, "sleep")
|
||||
|
||||
|
||||
def _check_listed_npm_packages(lister, packages, scheduler_origins):
|
||||
|
@ -78,19 +78,21 @@ def test_npm_lister_full(
|
|||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
spy_get = mocker.spy(lister.session, "get")
|
||||
spy_request = mocker.spy(lister.session, "request")
|
||||
|
||||
stats = lister.run()
|
||||
assert stats.pages == 2
|
||||
assert stats.origins == page_size * stats.pages
|
||||
|
||||
spy_get.assert_has_calls(
|
||||
spy_request.assert_has_calls(
|
||||
[
|
||||
mocker.call(
|
||||
"GET",
|
||||
lister.API_FULL_LISTING_URL,
|
||||
params=_url_params(page_size + 1, startkey='""'),
|
||||
),
|
||||
mocker.call(
|
||||
"GET",
|
||||
lister.API_FULL_LISTING_URL,
|
||||
params=_url_params(
|
||||
page_size + 1,
|
||||
|
@ -132,7 +134,7 @@ def test_npm_lister_incremental(
|
|||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
spy_get = mocker.spy(lister.session, "get")
|
||||
spy_request = mocker.spy(lister.session, "request")
|
||||
|
||||
assert lister.get_state_from_scheduler() == NpmListerState()
|
||||
|
||||
|
@ -142,13 +144,15 @@ def test_npm_lister_incremental(
|
|||
|
||||
last_seq = npm_incremental_listing_page2["results"][-1]["seq"]
|
||||
|
||||
spy_get.assert_has_calls(
|
||||
spy_request.assert_has_calls(
|
||||
[
|
||||
mocker.call(
|
||||
"GET",
|
||||
lister.API_INCREMENTAL_LISTING_URL,
|
||||
params=_url_params(page_size, since="0"),
|
||||
),
|
||||
mocker.call(
|
||||
"GET",
|
||||
lister.API_INCREMENTAL_LISTING_URL,
|
||||
params=_url_params(
|
||||
page_size,
|
||||
|
@ -156,6 +160,7 @@ def test_npm_lister_incremental(
|
|||
),
|
||||
),
|
||||
mocker.call(
|
||||
"GET",
|
||||
lister.API_INCREMENTAL_LISTING_URL,
|
||||
params=_url_params(page_size, since=str(last_seq)),
|
||||
),
|
||||
|
@ -189,11 +194,12 @@ def test_npm_lister_incremental_restart(
|
|||
|
||||
requests_mock.get(lister.API_INCREMENTAL_LISTING_URL, json={"results": []})
|
||||
|
||||
spy_get = mocker.spy(lister.session, "get")
|
||||
spy_request = mocker.spy(lister.session, "request")
|
||||
|
||||
lister.run()
|
||||
|
||||
spy_get.assert_called_with(
|
||||
spy_request.assert_called_with(
|
||||
"GET",
|
||||
lister.API_INCREMENTAL_LISTING_URL,
|
||||
params=_url_params(page_size, since=str(last_seq)),
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue