Refactor and deduplicate HTTP requests code in listers

Numerous listers were using the same page_request method or equivalent
in their implementation so prefer to deduplicate that code by adding
an http_request method in base lister class: swh.lister.pattern.Lister.

That method simply wraps a call to requests.Session.request and logs
some useful info for debugging and error reporting, also an HTTPError
will be raised if a request ends up with an error.

All listers using that new method now benefit of requests retry when
an HTTP error occurs thanks to the use of the http_retry decorator.
This commit is contained in:
Antoine Lambert 2022-09-21 19:53:22 +02:00
parent 9c55acd286
commit db6ce12e9e
28 changed files with 174 additions and 449 deletions

View file

@ -1,4 +1,4 @@
# Copyright (C) 2018-2021 The Software Heritage developers
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -10,12 +10,10 @@ from typing import Any, Dict, Iterator, Optional, Tuple
from urllib.parse import parse_qs, urlencode, urlparse
import iso8601
import requests
from requests.exceptions import HTTPError
from requests.status_codes import codes
from tenacity.before_sleep import before_sleep_log
from swh.lister import USER_AGENT
from swh.lister.pattern import CredentialsType, Lister
from swh.lister.utils import http_retry, is_retryable_exception
from swh.scheduler.model import ListedOrigin
@ -118,10 +116,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
self.last_page: Optional[str] = None
self.per_page = 100
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": USER_AGENT}
)
self.session.headers.update({"Accept": "application/json"})
if len(self.credentials) > 0:
cred = random.choice(self.credentials)