bitbucket: Skip buggy page when listing
Some URLs of the repositories endpoint from BitBucket REST API 2.0 can return an error 500. In that case, skip the buggy repositories page and get next one to continue listing and avoid to end it prematurely. Related to #4239
This commit is contained in:
parent
7da7fa57d0
commit
5d0f35aa69
3 changed files with 55 additions and 6 deletions
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2017-2022 The Software Heritage developers
|
||||
# Copyright (C) 2017-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -11,6 +11,7 @@ from typing import Any, Dict, Iterator, List, Optional
|
|||
from urllib import parse
|
||||
|
||||
import iso8601
|
||||
from requests import HTTPError
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
@ -118,9 +119,22 @@ class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]):
|
|||
|
||||
while True:
|
||||
self.url_params["after"] = last_repo_cdate
|
||||
body = self.http_request(self.url, params=self.url_params).json()
|
||||
|
||||
yield body["values"]
|
||||
try:
|
||||
body = self.http_request(self.url, params=self.url_params).json()
|
||||
yield body["values"]
|
||||
except HTTPError as e:
|
||||
if e.response.status_code == 500:
|
||||
logger.warning(
|
||||
"URL %s is buggy (error 500), skip it and get next page.",
|
||||
e.response.url,
|
||||
)
|
||||
body = self.http_request(
|
||||
self.url,
|
||||
params={
|
||||
"pagelen": self.url_params["pagelen"],
|
||||
"fields": "next",
|
||||
},
|
||||
).json()
|
||||
|
||||
next_page_url = body.get("next")
|
||||
if next_page_url is not None:
|
||||
|
|
|
@ -161,5 +161,5 @@
|
|||
}
|
||||
}
|
||||
],
|
||||
"next": "https://api.bitbucket.org/2.0/repositories?pagelen=10&after=2011-09-03T12%3A33%3A16.028393%2B00%3A00&fields=next%2Cvalues.links.clone.href%2Cvalues.slug%2Cvalues.scm%2Cvalues.updated_on%2Cvalues.created_on"
|
||||
"next": "https://api.bitbucket.org/2.0/repositories?pagelen=10&fields=next%2Cvalues.links.clone.href%2Cvalues.scm%2Cvalues.updated_on%2Cvalues.created_on&after=2011-09-03T12%3A33%3A16.028393%2B00%3A00"
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2017-2022 The Software Heritage developers
|
||||
# Copyright (C) 2017-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -10,6 +10,7 @@ import os
|
|||
import pytest
|
||||
|
||||
from swh.lister.bitbucket.lister import BitbucketLister
|
||||
from swh.lister.utils import MAX_NUMBER_ATTEMPTS
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -178,3 +179,37 @@ def test_bitbucket_full_lister(
|
|||
)
|
||||
|
||||
_check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins)
|
||||
|
||||
|
||||
def test_bitbucket_lister_buggy_page(
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
bb_api_repositories_page1,
|
||||
bb_api_repositories_page2,
|
||||
):
|
||||
|
||||
requests_mock.get(
|
||||
BitbucketLister.API_URL,
|
||||
[
|
||||
{"json": bb_api_repositories_page1, "status_code": 200},
|
||||
*[{"json": None, "status_code": 500}] * MAX_NUMBER_ATTEMPTS,
|
||||
{"json": {"next": bb_api_repositories_page1["next"]}, "status_code": 200},
|
||||
{"json": bb_api_repositories_page2, "status_code": 200},
|
||||
],
|
||||
)
|
||||
|
||||
lister = BitbucketLister(scheduler=swh_scheduler, page_size=10)
|
||||
|
||||
mocker.patch.object(lister.http_request.retry, "sleep")
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 2
|
||||
assert stats.origins == 20
|
||||
assert len(swh_scheduler.get_listed_origins(lister.lister_obj.id).results) == 20
|
||||
|
||||
assert (
|
||||
requests_mock.request_history[MAX_NUMBER_ATTEMPTS + 2].url
|
||||
== bb_api_repositories_page1["next"]
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue