gitlab: Handle HTTP status code 500 when listing projects

GitLab API can return errors 500 when listing projects
(see https://gitlab.com/gitlab-org/gitlab/-/issues/262629).

To avoid ending the listing prematurely, skip buggy URLs and move
to next pages.

Related to T3442
This commit is contained in:
Antoine Lambert 2021-07-23 14:33:15 +02:00
parent 52c3150155
commit e904f4760e
2 changed files with 54 additions and 2 deletions

View file

@ -111,6 +111,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
)
self.incremental = incremental
self.last_page: Optional[str] = None
self.per_page = 100
self.session = requests.Session()
self.session.headers.update(
@ -145,7 +146,25 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
response.url,
response.content,
)
response.raise_for_status()
# GitLab API can return errors 500 when listing projects.
# https://gitlab.com/gitlab-org/gitlab/-/issues/262629
# To avoid ending the listing prematurely, skip buggy URLs and move
# to next pages.
if response.status_code == 500:
id_after = _parse_id_after(url)
assert id_after is not None
while True:
next_id_after = id_after + self.per_page
url = url.replace(f"id_after={id_after}", f"id_after={next_id_after}")
response = self.session.get(url)
if response.status_code == 200:
break
else:
id_after = next_id_after
else:
response.raise_for_status()
repositories: Tuple[Repository, ...] = tuple(response.json())
if hasattr(response, "links") and response.links.get("next"):
next_page = response.links["next"]["url"]
@ -160,7 +179,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
"order_by": "id",
"sort": "asc",
"simple": "true",
"per_page": "100",
"per_page": f"{self.per_page}",
}
if id_after is not None:
parameters["id_after"] = str(id_after)

View file

@ -244,6 +244,39 @@ def test_lister_gitlab_http_errors(
assert_sleep_calls(mocker, mock_sleep, [1])
def test_lister_gitlab_http_error_500(swh_scheduler, requests_mock, datadir):
"""Gitlab lister should skip buggy URL and move to next page.
"""
instance = "gite.lirmm.fr"
url = api_url(instance)
lister = GitLabLister(swh_scheduler, url=url, instance=instance)
url_page1 = lister.page_url()
response1 = gitlab_page_response(datadir, instance, 1)
url_page2 = lister.page_url(lister.per_page)
url_page3 = lister.page_url(2 * lister.per_page)
response3 = gitlab_page_response(datadir, instance, 3)
requests_mock.get(
url_page1,
[{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
additional_matcher=_match_request,
)
requests_mock.get(
url_page2, [{"status_code": 500},], additional_matcher=_match_request,
)
requests_mock.get(
url_page3, [{"json": response3}], additional_matcher=_match_request,
)
listed_result = lister.run()
expected_nb_origins = len(response1) + len(response3)
assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
def test_lister_gitlab_credentials(swh_scheduler):
"""Gitlab lister supports credentials configuration