From 0222a8f5c474910e5968f0646a5aea75c860a961 Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Fri, 29 Apr 2022 13:47:30 +0200 Subject: [PATCH] maven: Handle null mtime value in index for jar archive It exists cases where the modification time for a jar archive in a maven index is null which was leading to a processing error by the lister. So handle that case to avoid premature exit of the listing process. Related to T3874 --- swh/lister/maven/lister.py | 4 +-- .../data/http_indexes/export_null_mtime.fld | 21 +++++++++++++++ swh/lister/maven/tests/test_lister.py | 27 +++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 swh/lister/maven/tests/data/http_indexes/export_null_mtime.fld diff --git a/swh/lister/maven/lister.py b/swh/lister/maven/lister.py index 80a17cb..bc1c2b6 100644 --- a/swh/lister/maven/lister.py +++ b/swh/lister/maven/lister.py @@ -304,11 +304,11 @@ class MavenLister(Lister[MavenListerState, RepoPage]): # Origin is gathering source archives: last_update_dt = None last_update_iso = "" - last_update_seconds = str(page["time"])[:-3] try: + last_update_seconds = str(page["time"])[:-3] last_update_dt = datetime.fromtimestamp(int(last_update_seconds)) last_update_dt = last_update_dt.astimezone(timezone.utc) - except OverflowError: + except (OverflowError, ValueError): logger.warning("- Failed to convert datetime %s.", last_update_seconds) if last_update_dt: last_update_iso = last_update_dt.isoformat() diff --git a/swh/lister/maven/tests/data/http_indexes/export_null_mtime.fld b/swh/lister/maven/tests/data/http_indexes/export_null_mtime.fld new file mode 100644 index 0000000..7798a5b --- /dev/null +++ b/swh/lister/maven/tests/data/http_indexes/export_null_mtime.fld @@ -0,0 +1,21 @@ +doc 0 + field 0 + name u + type string + value al.aldi|sprova4j|0.1.0|sources|jar + field 1 + name m + type string + value 1633786348254 + field 2 + name i + type string + value jar|0|14316|2|2|0|jar + field 10 + name n + type string + value sprova4j + field 11 + name d + type string + value Java client for Sprova Test Management diff --git a/swh/lister/maven/tests/test_lister.py b/swh/lister/maven/tests/test_lister.py index 32f1a39..d8e30ab 100644 --- a/swh/lister/maven/tests/test_lister.py +++ b/swh/lister/maven/tests/test_lister.py @@ -66,6 +66,11 @@ def maven_pom_1(datadir) -> str: return Path(datadir, "https_maven.org", "sprova4j-0.1.0.pom").read_text() +@pytest.fixture +def maven_index_null_mtime(datadir) -> str: + return Path(datadir, "http_indexes", "export_null_mtime.fld").read_text() + + @pytest.fixture def maven_pom_1_malformed(datadir) -> str: return Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_text() @@ -290,3 +295,25 @@ def test_maven_list_http_error_artifacts( # then we get only one maven-jar origin and one git origin. scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 3 + + +def test_maven_lister_null_mtime(swh_scheduler, requests_mock, maven_index_null_mtime): + + requests_mock.get(INDEX_URL, text=maven_index_null_mtime) + + # Run the lister. + lister = MavenLister( + scheduler=swh_scheduler, + url=MVN_URL, + instance="maven.org", + index_url=INDEX_URL, + incremental=False, + ) + + stats = lister.run() + + # Start test checks. + assert stats.pages == 1 + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + assert len(scheduler_origins) == 1 + assert scheduler_origins[0].last_update is None