gogs: Skip pages with error 500
This also affects the gitea lister
This commit is contained in:
parent
fa65f270ed
commit
bd35d54398
3 changed files with 66 additions and 22 deletions
|
@ -9,6 +9,7 @@ from typing import Dict, List, Tuple
|
|||
|
||||
import pytest
|
||||
import requests
|
||||
from requests import HTTPError
|
||||
|
||||
from swh.lister.gitea.lister import GiteaLister
|
||||
from swh.lister.gogs.lister import GogsListerPage
|
||||
|
@ -138,16 +139,38 @@ def test_gitea_auth_instance(swh_scheduler, requests_mock, trygitea_p1):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("http_code", [400, 500, 502])
|
||||
def test_gitea_list_http_error(swh_scheduler, requests_mock, http_code):
|
||||
def test_gitea_list_http_error(
|
||||
swh_scheduler, requests_mock, http_code, trygitea_p1, trygitea_p2
|
||||
):
|
||||
"""Test handling of some HTTP errors commonly encountered"""
|
||||
|
||||
lister = GiteaLister(scheduler=swh_scheduler, url=TRYGITEA_URL, page_size=3)
|
||||
|
||||
p1_text, p1_headers, _, p1_origin_urls = trygitea_p1
|
||||
p3_text, p3_headers, _, p3_origin_urls = trygitea_p2
|
||||
|
||||
base_url = TRYGITEA_URL + lister.REPO_LIST_PATH
|
||||
requests_mock.get(base_url, status_code=http_code)
|
||||
requests_mock.get(
|
||||
base_url,
|
||||
[
|
||||
{"text": p1_text, "headers": p1_headers, "status_code": 200},
|
||||
{"status_code": http_code},
|
||||
{"text": p3_text, "headers": p3_headers, "status_code": 200},
|
||||
],
|
||||
)
|
||||
|
||||
with pytest.raises(requests.HTTPError):
|
||||
# pages with fatal repositories should be skipped (no error raised)
|
||||
# See T4423 for more details
|
||||
if http_code == 500:
|
||||
lister.run()
|
||||
else:
|
||||
with pytest.raises(HTTPError):
|
||||
lister.run()
|
||||
|
||||
# Both P1 and P3 origins should be listed in case of 500 error
|
||||
# While in other cases, only P1 origins should be listed
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
assert len(scheduler_origins) == 0
|
||||
check_listed_origins(
|
||||
(p1_origin_urls + p3_origin_urls) if http_code == 500 else p1_origin_urls,
|
||||
scheduler_origins,
|
||||
)
|
||||
|
|
|
@ -2,12 +2,11 @@
|
|||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
import logging
|
||||
import random
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from urllib.parse import parse_qs, urljoin, urlparse
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
||||
from urllib.parse import parse_qs, parse_qsl, urlencode, urljoin, urlparse
|
||||
|
||||
import iso8601
|
||||
import requests
|
||||
|
@ -97,8 +96,6 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
|||
# Raises an error on Gogs, or a warning on Gitea
|
||||
self.on_anonymous_mode()
|
||||
|
||||
self.max_page_limit = 2
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{
|
||||
|
@ -120,7 +117,9 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
|||
return asdict(state)
|
||||
|
||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
|
||||
def page_request(self, url, params) -> requests.Response:
|
||||
def page_request(
|
||||
self, url: str, params: Dict[str, Any]
|
||||
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
|
||||
logger.debug("Fetching URL %s with params %s", url, params)
|
||||
|
||||
|
@ -133,9 +132,20 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
|||
response.url,
|
||||
response.content,
|
||||
)
|
||||
response.raise_for_status()
|
||||
if (
|
||||
response.status_code == 500
|
||||
): # Temporary hack for skipping fatal repos (T4423)
|
||||
url_parts = urlparse(url)
|
||||
query: Dict[str, Any] = dict(parse_qsl(url_parts.query))
|
||||
query.update({"page": _parse_page_id(url) + 1})
|
||||
next_page_link = url_parts._replace(query=urlencode(query)).geturl()
|
||||
body: Dict[str, Any] = {"data": []}
|
||||
links = {"next": {"url": next_page_link}}
|
||||
return body, links
|
||||
else:
|
||||
response.raise_for_status()
|
||||
|
||||
return response
|
||||
return response.json(), response.links
|
||||
|
||||
@classmethod
|
||||
def extract_repos(cls, body: Dict[str, Any]) -> List[Repo]:
|
||||
|
@ -149,21 +159,24 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
|||
|
||||
# base with trailing slash, path without leading slash for urljoin
|
||||
next_link: Optional[str] = urljoin(self.url, self.REPO_LIST_PATH)
|
||||
response = self.page_request(next_link, {**self.query_params, "page": page_id})
|
||||
|
||||
body, links = self.page_request(
|
||||
next_link, {**self.query_params, "page": page_id}
|
||||
)
|
||||
|
||||
while next_link is not None:
|
||||
repos = self.extract_repos(response.json())
|
||||
repos = self.extract_repos(body)
|
||||
|
||||
assert len(response.links) > 0, "API changed: no Link header found"
|
||||
if "next" in response.links:
|
||||
next_link = response.links["next"]["url"]
|
||||
assert len(links) > 0, "API changed: no Link header found"
|
||||
if "next" in links:
|
||||
next_link = links["next"]["url"]
|
||||
else:
|
||||
next_link = None # Happens for the last page
|
||||
|
||||
yield GogsListerPage(repos=repos, next_link=next_link)
|
||||
|
||||
if next_link is not None:
|
||||
response = self.page_request(next_link, {})
|
||||
body, links = self.page_request(next_link, {})
|
||||
|
||||
def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of Gogs repositories into a list of ListedOrigins"""
|
||||
|
|
|
@ -186,7 +186,7 @@ def test_gogs_list_http_error(
|
|||
lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret")
|
||||
|
||||
p1_text, p1_headers, _, p1_origin_urls = trygogs_p1
|
||||
p3_text, p3_headers, _, _ = trygogs_p3_last
|
||||
p3_text, p3_headers, _, p3_origin_urls = trygogs_p3_last
|
||||
|
||||
base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
|
||||
requests_mock.get(
|
||||
|
@ -198,13 +198,21 @@ def test_gogs_list_http_error(
|
|||
],
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPError):
|
||||
# pages with fatal repositories should be skipped (no error raised)
|
||||
# See T4423 for more details
|
||||
if http_code == 500:
|
||||
lister.run()
|
||||
else:
|
||||
with pytest.raises(HTTPError):
|
||||
lister.run()
|
||||
|
||||
# Both P1 and P3 origins should be listed in case of 500 error
|
||||
# While in other cases, only P1 origins should be listed
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
check_listed_origins(
|
||||
p1_origin_urls, scheduler_origins
|
||||
) # Only the first page is listed
|
||||
(p1_origin_urls + p3_origin_urls) if http_code == 500 else p1_origin_urls,
|
||||
scheduler_origins,
|
||||
)
|
||||
|
||||
|
||||
def test_gogs_incremental_lister(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue