Refactor and deduplicate HTTP requests code in listers
Numerous listers were using the same page_request method or equivalent in their implementation so prefer to deduplicate that code by adding an http_request method in base lister class: swh.lister.pattern.Lister. That method simply wraps a call to requests.Session.request and logs some useful info for debugging and error reporting, also an HTTPError will be raised if a request ends up with an error. All listers using that new method now benefit of requests retry when an HTTP error occurs thanks to the use of the http_retry decorator.
This commit is contained in:
parent
9c55acd286
commit
db6ce12e9e
28 changed files with 174 additions and 449 deletions
|
@ -1,9 +1,8 @@
|
|||
# Copyright (C) 2017-2021 The Software Heritage developers
|
||||
# Copyright (C) 2017-2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
import bz2
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
|
@ -17,12 +16,11 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
|
|||
from urllib.parse import urljoin
|
||||
|
||||
from debian.deb822 import Sources
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from .. import USER_AGENT
|
||||
from ..pattern import CredentialsType, Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -95,9 +93,6 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
|
|||
self.suites = suites
|
||||
self.components = components
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({"User-Agent": USER_AGENT})
|
||||
|
||||
# will hold all listed origins info
|
||||
self.listed_origins: Dict[DebianOrigin, ListedOrigin] = {}
|
||||
# will contain origin urls that have already been listed
|
||||
|
@ -132,9 +127,11 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
|
|||
def page_request(self, suite: Suite, component: Component) -> DebianPageType:
|
||||
"""Return parsed package Sources file for a given debian suite and component."""
|
||||
for url, compression in self.debian_index_urls(suite, component):
|
||||
response = requests.get(url, stream=True)
|
||||
logging.debug("Fetched URL: %s, status code: %s", url, response.status_code)
|
||||
if response.status_code == 200:
|
||||
try:
|
||||
response = self.http_request(url, stream=True)
|
||||
except HTTPError:
|
||||
pass
|
||||
else:
|
||||
last_modified = response.headers.get("Last-Modified")
|
||||
self.last_sources_update = (
|
||||
parsedate_to_datetime(last_modified) if last_modified else None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue