From 7c8428d01c7346b6d2b1aee84ee1b565f61c07ed Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Wed, 13 Apr 2022 17:45:34 +0200 Subject: [PATCH] maven: Continue listing if unable to retrieve pom information This aligns the behavior with other listers (e.g. sourceforge, ...) to continue listing if some information is not retrievable at all. Related to T3874 --- swh/lister/maven/lister.py | 9 ++++-- swh/lister/maven/tests/test_lister.py | 46 ++++++++++++++------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py index b303e3f..0217943 100644 --- a/swh/lister/maven/lister.py +++ b/swh/lister/maven/lister.py @@ -258,9 +258,9 @@ class MavenLister(Lister[MavenListerState, RepoPage]): logger.info("Fetching poms..") for pom in out_pom: - text = self.page_request(pom, {}) try: - project = xmltodict.parse(text.content.decode()) + response = self.page_request(pom, {}) + project = xmltodict.parse(response.content.decode()) if "scm" in project["project"]: if "connection" in project["project"]["scm"]: scm = project["project"]["scm"]["connection"] @@ -278,6 +278,11 @@ class MavenLister(Lister[MavenListerState, RepoPage]): logger.debug("No scm.connection in pom %s", pom) else: logger.debug("No scm in pom %s", pom) + except requests.HTTPError: + logger.warning( + "POM info page could not be fetched, skipping project '%s'", + pom, + ) except xmltodict.expat.ExpatError as error: logger.info("Could not parse POM %s XML: %s. Next.", pom, error) diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py index c8142ef..267da95 100644 --- a/swh/lister/maven/tests/test_lister.py +++ b/swh/lister/maven/tests/test_lister.py @@ -291,35 +291,37 @@ def test_maven_incremental_listing( @pytest.mark.parametrize("http_code", [400, 404, 500, 502]) -def test_maven_list_http_error( +def test_maven_list_http_error_on_index_read( swh_scheduler, requests_mock, mocker, maven_index, http_code ): - """Test handling of some common HTTP errors: - - 400: Bad request. - - 404: Resource no found. - - 500: Internal server error. - - 502: Bad gateway ou proxy Error. - """ + """should stop listing if the lister fails to retrieve the main index url.""" + + lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) + requests_mock.get(INDEX_URL, status_code=http_code) + with pytest.raises(requests.HTTPError): # listing cannot continues so stop + lister.run() + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + assert len(scheduler_origins) == 0 + + +@pytest.mark.parametrize("http_code", [400, 404, 500, 502]) +def test_maven_list_http_error_artifacts( + swh_scheduler, requests_mock, mocker, maven_index, http_code, maven_pom_2 +): + """should continue listing when failing to retrieve artifacts.""" + # Test failure of artefacts retrieval. + requests_mock.get(INDEX_URL, text=maven_index) + requests_mock.get(URL_POM_1, status_code=http_code) + requests_mock.get(URL_POM_2, text=maven_pom_2) lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL) - # Test failure of index retrieval. - - requests_mock.get(INDEX_URL, status_code=http_code) - - with pytest.raises(requests.HTTPError): - lister.run() - - # Test failure of artefacts retrieval. - - requests_mock.get(INDEX_URL, text=maven_index) - requests_mock.get(URL_POM_1, status_code=http_code) - - with pytest.raises(requests.HTTPError): - lister.run() + # on artifacts though, that raises but continue listing + lister.run() # If the maven_index step succeeded but not the get_pom step, # then we get only the 2 maven-jar origins (and not the 2 additional # src origins). scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results - assert len(scheduler_origins) == 2 + assert len(scheduler_origins) == 3