Refactor and deduplicate HTTP requests code in listers
Numerous listers were using the same page_request method or equivalent in their implementation so prefer to deduplicate that code by adding an http_request method in base lister class: swh.lister.pattern.Lister. That method simply wraps a call to requests.Session.request and logs some useful info for debugging and error reporting, also an HTTPError will be raised if a request ends up with an error. All listers using that new method now benefit of requests retry when an HTTP error occurs thanks to the use of the http_retry decorator.
This commit is contained in:
parent
9c55acd286
commit
db6ce12e9e
28 changed files with 174 additions and 449 deletions
|
@ -2,15 +2,13 @@
|
|||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from typing import Iterator, List, Optional
|
||||
|
||||
import iso8601
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.lister.utils import http_retry
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
|
@ -52,7 +50,7 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
instance=self.INSTANCE,
|
||||
url=self.BASE_URL,
|
||||
)
|
||||
self.session = requests.Session()
|
||||
|
||||
self.session.headers.update(
|
||||
{
|
||||
"Accept": "application/json",
|
||||
|
@ -60,23 +58,6 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
}
|
||||
)
|
||||
|
||||
@http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
|
||||
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
|
||||
|
||||
logger.debug("Fetching URL %s with params %s", url, params)
|
||||
|
||||
response = self.session.get(url, params=params)
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
"Unexpected HTTP status code %s on %s: %s",
|
||||
response.status_code,
|
||||
response.url,
|
||||
response.content,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
return response
|
||||
|
||||
def get_pages(self) -> Iterator[PubDevListerPage]:
|
||||
"""Yield an iterator which returns 'page'
|
||||
|
||||
|
@ -88,8 +69,8 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
|
||||
There is only one page that list all origins url based on "{base_url}packages/{pkgname}"
|
||||
"""
|
||||
response = self.page_request(
|
||||
url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url), params={}
|
||||
response = self.http_request(
|
||||
url=self.PACKAGE_NAMES_URL_PATTERN.format(base_url=self.url)
|
||||
)
|
||||
yield response.json()["packages"]
|
||||
|
||||
|
@ -102,7 +83,7 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
base_url=self.url, pkgname=pkgname
|
||||
)
|
||||
try:
|
||||
response = self.page_request(url=package_info_url, params={})
|
||||
response = self.http_request(url=package_info_url)
|
||||
except HTTPError:
|
||||
logger.warning(
|
||||
"Failed to fetch metadata for package %s, skipping it from listing.",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue