cgit: Enable to retry throttled HTTP requests
Related to T3645
This commit is contained in:
parent
20232cc36e
commit
24bc671679
2 changed files with 31 additions and 0 deletions
|
@ -11,9 +11,11 @@ from urllib.parse import urljoin, urlparse
|
|||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.lister import USER_AGENT
|
||||
from swh.lister.pattern import CredentialsType, StatelessLister
|
||||
from swh.lister.utils import throttling_retry
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
|
@ -74,6 +76,7 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
)
|
||||
self.base_git_url = base_git_url
|
||||
|
||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.DEBUG))
|
||||
def _get_and_parse(self, url: str) -> BeautifulSoup:
|
||||
"""Get the given url and parse the retrieved HTML using BeautifulSoup"""
|
||||
response = self.session.get(url)
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
# See top-level LICENSE file for more information
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
@ -229,3 +230,30 @@ def test_lister_cgit_with_base_git_url(
|
|||
assert (
|
||||
listed_origin.url.startswith(url) is False
|
||||
), f"url should be mapped to {base_git_url}"
|
||||
|
||||
|
||||
def test_lister_cgit_get_pages_with_pages_and_retry(
|
||||
requests_mock_datadir, requests_mock, datadir, mocker, swh_scheduler
|
||||
):
|
||||
url = "https://git.tizen/cgit/"
|
||||
|
||||
with open(os.path.join(datadir, "https_git.tizen/cgit,ofs=50"), "rb") as page:
|
||||
|
||||
requests_mock.get(
|
||||
f"{url}?ofs=50",
|
||||
[
|
||||
{"content": None, "status_code": 429},
|
||||
{"content": None, "status_code": 429},
|
||||
{"content": page.read(), "status_code": 200},
|
||||
],
|
||||
)
|
||||
|
||||
lister_cgit = CGitLister(swh_scheduler, url=url)
|
||||
|
||||
mocker.patch.object(lister_cgit._get_and_parse.retry, "sleep")
|
||||
|
||||
repos: List[List[str]] = list(lister_cgit.get_pages())
|
||||
flattened_repos = sum(repos, [])
|
||||
# we should have 16 repos (listed on 3 pages)
|
||||
assert len(repos) == 3
|
||||
assert len(flattened_repos) == 16
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue