sourceforge: don't abort on error for project
It's suboptimal to say the least to stop the entire lister process if a single project page is somehow broken (404, most likely). This change logs the issue as a warning and carries on, as well as some minor logging changes and comments touch ups.
This commit is contained in:
parent
2ff549e125
commit
8f3bbacd5e
2 changed files with 64 additions and 5 deletions
|
@ -195,10 +195,9 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
|||
if response.status_code != 200:
|
||||
# Log response content to ease debugging
|
||||
logger.warning(
|
||||
"Unexpected HTTP status code %s on %s: %s",
|
||||
"Unexpected HTTP status code %s for URL %s",
|
||||
response.status_code,
|
||||
response.url,
|
||||
response.content,
|
||||
)
|
||||
# The lister must fail on blocking errors
|
||||
response.raise_for_status()
|
||||
|
@ -294,7 +293,8 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
|||
else:
|
||||
logger.debug("Project '%s' does not have any VCS", project)
|
||||
else:
|
||||
# Should always match, let's log it
|
||||
# Should almost always match, let's log it
|
||||
# The only ones that don't match are mostly specialized one-off URLs.
|
||||
msg = "Project URL '%s' does not match expected pattern"
|
||||
logger.warning(msg, project_url)
|
||||
|
||||
|
@ -324,11 +324,15 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
|||
msg = "New project during an incremental run: %s/%s"
|
||||
logger.debug(msg, namespace, project)
|
||||
|
||||
res = self.page_request(endpoint, {}).json()
|
||||
try:
|
||||
res = self.page_request(endpoint, {}).json()
|
||||
except requests.HTTPError:
|
||||
# We've already logged in `page_request`
|
||||
return []
|
||||
|
||||
tools = res.get("tools")
|
||||
if tools is None:
|
||||
# This probably never happens
|
||||
# This rarely happens, on very old URLs
|
||||
logger.warning("Project '%s' does not have any tools", endpoint)
|
||||
return []
|
||||
|
||||
|
|
|
@ -338,3 +338,58 @@ def test_sourceforge_lister_http_error(swh_scheduler, requests_mock, status_code
|
|||
|
||||
with pytest.raises(HTTPError):
|
||||
lister.run()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404])
|
||||
def test_sourceforge_lister_project_error(
|
||||
datadir, swh_scheduler, requests_mock, status_code,
|
||||
):
|
||||
lister = SourceForgeLister(scheduler=swh_scheduler)
|
||||
|
||||
requests_mock.get(
|
||||
MAIN_SITEMAP_URL,
|
||||
text=get_main_sitemap(datadir),
|
||||
additional_matcher=_check_request_headers,
|
||||
)
|
||||
requests_mock.get(
|
||||
"https://sourceforge.net/allura_sitemap/sitemap-0.xml",
|
||||
text=get_subsitemap_0(datadir),
|
||||
additional_matcher=_check_request_headers,
|
||||
)
|
||||
requests_mock.get(
|
||||
"https://sourceforge.net/allura_sitemap/sitemap-1.xml",
|
||||
text=get_subsitemap_1(datadir),
|
||||
additional_matcher=_check_request_headers,
|
||||
)
|
||||
# Request mocks precedence is LIFO
|
||||
requests_mock.get(
|
||||
re.compile("https://sourceforge.net/rest/.*"),
|
||||
json=functools.partial(get_project_json, datadir),
|
||||
additional_matcher=_check_request_headers,
|
||||
)
|
||||
# Make all `mramm` requests fail
|
||||
# `mramm` is in subsitemap 0, which ensures we keep listing after an error.
|
||||
requests_mock.get(
|
||||
re.compile("https://sourceforge.net/rest/p/mramm"), status_code=status_code
|
||||
)
|
||||
|
||||
stats = lister.run()
|
||||
# - os3dmodels (2 repos),
|
||||
# - mojunk (3 repos),
|
||||
# - backapps/website (1 repo).
|
||||
# adobe and backapps itself have no repos.
|
||||
# Did *not* list mramm
|
||||
assert stats.pages == 3
|
||||
assert stats.origins == 6
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
|
||||
# Ensure no `mramm` origins are listed, but all others are.
|
||||
assert res == {
|
||||
"svn.code.sf.net/p/backapps/website/code": ("svn", "2021-02-11"),
|
||||
"git.code.sf.net/p/os3dmodels/git": ("git", "2017-03-31"),
|
||||
"svn.code.sf.net/p/os3dmodels/svn": ("svn", "2017-03-31"),
|
||||
"git.code.sf.net/p/mojunk/git": ("git", "2017-12-31"),
|
||||
"git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
|
||||
"svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue