maven: Create one origin per package instead of one per package version
Previously the maven lister was creating an origin for each source archive (jar, zip) it discovered during the listing process. This is not the way Software Heritage decided to archive sources coming from package managers. Instead one origin should be created per package and all its versions should be found as releases in the snapshot produced by the package loader. So modify the maven lister in order to create one origin per package grouping all its versions. This change also modifies the way incremental listing is handled, ListedOrigin instances will be yielded only if we discovered new versions of a package since the last listing. Tests have been updated to reflect these changes. Related to T3874
This commit is contained in:
parent
985b71e80c
commit
22bcd9deb2
5 changed files with 162 additions and 252 deletions
|
@ -97,6 +97,8 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
}
|
||||
)
|
||||
|
||||
self.jar_origins: Dict[str, ListedOrigin] = {}
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> MavenListerState:
|
||||
return MavenListerState(**d)
|
||||
|
||||
|
@ -185,22 +187,10 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
m_doc = re_doc.match(line)
|
||||
if m_doc is not None:
|
||||
doc_id = int(m_doc.group("doc"))
|
||||
if (
|
||||
self.incremental
|
||||
and self.state
|
||||
and self.state.last_seen_doc
|
||||
and self.state.last_seen_doc >= doc_id
|
||||
):
|
||||
# jar_src["doc"] contains the id of the current document, whatever
|
||||
# its type (scm or jar).
|
||||
jar_src["doc"] = None
|
||||
else:
|
||||
jar_src["doc"] = doc_id
|
||||
# jar_src["doc"] contains the id of the current document, whatever
|
||||
# its type (scm or jar).
|
||||
jar_src["doc"] = doc_id
|
||||
else:
|
||||
# If incremental mode, we don't record any line that is
|
||||
# before our last recorded doc id.
|
||||
if self.incremental and jar_src["doc"] is None:
|
||||
continue
|
||||
m_val = re_val.match(line)
|
||||
if m_val is not None:
|
||||
(gid, aid, version, classifier, ext) = m_val.groups()
|
||||
|
@ -315,35 +305,61 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
|
|||
)
|
||||
yield origin
|
||||
else:
|
||||
# Origin is a source archive:
|
||||
# Origin is gathering source archives:
|
||||
last_update_dt = None
|
||||
last_update_iso = ""
|
||||
last_update_seconds = str(page["time"])[:-3]
|
||||
try:
|
||||
last_update_dt = datetime.fromtimestamp(int(last_update_seconds))
|
||||
last_update_dt_tz = last_update_dt.astimezone(timezone.utc)
|
||||
last_update_dt = last_update_dt.astimezone(timezone.utc)
|
||||
except OverflowError:
|
||||
logger.warning("- Failed to convert datetime %s.", last_update_seconds)
|
||||
if last_update_dt:
|
||||
last_update_iso = last_update_dt_tz.isoformat()
|
||||
origin = ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
url=page["url"],
|
||||
visit_type=page["type"],
|
||||
last_update=last_update_dt_tz,
|
||||
extra_loader_arguments={
|
||||
"artifacts": [
|
||||
{
|
||||
"time": last_update_iso,
|
||||
"gid": page["gid"],
|
||||
"aid": page["aid"],
|
||||
"version": page["version"],
|
||||
"base_url": self.BASE_URL,
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
yield origin
|
||||
last_update_iso = last_update_dt.isoformat()
|
||||
|
||||
# Origin URL will target page holding sources for all versions of
|
||||
# an artifactId (package name) inside a groupId (namespace)
|
||||
path = "/".join(page["gid"].split("."))
|
||||
origin_url = urljoin(self.BASE_URL, f"{path}/{page['aid']}")
|
||||
|
||||
artifact = {
|
||||
**{k: v for k, v in page.items() if k != "doc"},
|
||||
"time": last_update_iso,
|
||||
"base_url": self.BASE_URL,
|
||||
}
|
||||
|
||||
if origin_url not in self.jar_origins:
|
||||
# Create ListedOrigin instance if we did not see that origin yet
|
||||
jar_origin = ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
url=origin_url,
|
||||
visit_type=page["type"],
|
||||
last_update=last_update_dt,
|
||||
extra_loader_arguments={"artifacts": [artifact]},
|
||||
)
|
||||
self.jar_origins[origin_url] = jar_origin
|
||||
else:
|
||||
# Update list of source artifacts for that origin otherwise
|
||||
jar_origin = self.jar_origins[origin_url]
|
||||
artifacts = jar_origin.extra_loader_arguments["artifacts"]
|
||||
if artifact not in artifacts:
|
||||
artifacts.append(artifact)
|
||||
|
||||
if (
|
||||
jar_origin.last_update
|
||||
and last_update_dt
|
||||
and last_update_dt > jar_origin.last_update
|
||||
):
|
||||
jar_origin.last_update = last_update_dt
|
||||
|
||||
if not self.incremental or (
|
||||
self.state and page["doc"] > self.state.last_seen_doc
|
||||
):
|
||||
# Yield origin with updated source artifacts, multiple instances of
|
||||
# ListedOrigin for the same origin URL but with different artifacts
|
||||
# list will be sent to the scheduler but it will deduplicate them and
|
||||
# take the latest one to upsert in database
|
||||
yield jar_origin
|
||||
|
||||
def commit_page(self, page: RepoPage) -> None:
|
||||
"""Update currently stored state using the latest listed doc.
|
||||
|
|
|
@ -1,113 +0,0 @@
|
|||
doc 0
|
||||
field 0
|
||||
name u
|
||||
type string
|
||||
value al.aldi|sprova4j|0.1.0|sources|jar
|
||||
field 1
|
||||
name m
|
||||
type string
|
||||
value 1626111735737
|
||||
field 2
|
||||
name i
|
||||
type string
|
||||
value jar|1626109619335|14316|2|2|0|jar
|
||||
field 10
|
||||
name n
|
||||
type string
|
||||
value sprova4j
|
||||
field 11
|
||||
name d
|
||||
type string
|
||||
value Java client for Sprova Test Management
|
||||
doc 1
|
||||
field 0
|
||||
name u
|
||||
type string
|
||||
value al.aldi|sprova4j|0.1.0|NA|pom
|
||||
field 1
|
||||
name m
|
||||
type string
|
||||
value 1626111735764
|
||||
field 2
|
||||
name i
|
||||
type string
|
||||
value jar|1626109636636|-1|1|0|0|pom
|
||||
field 10
|
||||
name n
|
||||
type string
|
||||
value sprova4j
|
||||
field 11
|
||||
name d
|
||||
type string
|
||||
value Java client for Sprova Test Management
|
||||
doc 2
|
||||
field 0
|
||||
name u
|
||||
type string
|
||||
value al.aldi|sprova4j|0.1.1|sources|jar
|
||||
field 1
|
||||
name m
|
||||
type string
|
||||
value 1626111784883
|
||||
field 2
|
||||
name i
|
||||
type string
|
||||
value jar|1626111425534|14510|2|2|0|jar
|
||||
field 10
|
||||
name n
|
||||
type string
|
||||
value sprova4j
|
||||
field 11
|
||||
name d
|
||||
type string
|
||||
value Java client for Sprova Test Management
|
||||
doc 3
|
||||
field 0
|
||||
name u
|
||||
type string
|
||||
value al.aldi|sprova4j|0.1.1|NA|pom
|
||||
field 1
|
||||
name m
|
||||
type string
|
||||
value 1626111784915
|
||||
field 2
|
||||
name i
|
||||
type string
|
||||
value jar|1626111437014|-1|1|0|0|pom
|
||||
field 10
|
||||
name n
|
||||
type string
|
||||
value sprova4j
|
||||
field 11
|
||||
name d
|
||||
type string
|
||||
value Java client for Sprova Test Management
|
||||
doc 4
|
||||
field 14
|
||||
name DESCRIPTOR
|
||||
type string
|
||||
value NexusIndex
|
||||
field 15
|
||||
name IDXINFO
|
||||
type string
|
||||
value 1.0|index
|
||||
doc 5
|
||||
field 16
|
||||
name allGroups
|
||||
type string
|
||||
value allGroups
|
||||
field 17
|
||||
name allGroupsList
|
||||
type string
|
||||
value al.aldi
|
||||
doc 6
|
||||
field 18
|
||||
name rootGroups
|
||||
type string
|
||||
value rootGroups
|
||||
field 19
|
||||
name rootGroupsList
|
||||
type string
|
||||
value al
|
||||
END
|
||||
checksum 00000000003321211082
|
0
swh/lister/maven/tests/data/http_indexes/export_incr.fld → swh/lister/maven/tests/data/http_indexes/export_full.fld
Executable file → Normal file
0
swh/lister/maven/tests/data/http_indexes/export_incr.fld → swh/lister/maven/tests/data/http_indexes/export_full.fld
Executable file → Normal file
|
@ -0,0 +1,42 @@
|
|||
doc 0
|
||||
field 0
|
||||
name u
|
||||
type string
|
||||
value al.aldi|sprova4j|0.1.0|sources|jar
|
||||
field 1
|
||||
name m
|
||||
type string
|
||||
value 1633786348254
|
||||
field 2
|
||||
name i
|
||||
type string
|
||||
value jar|1626109619335|14316|2|2|0|jar
|
||||
field 10
|
||||
name n
|
||||
type string
|
||||
value sprova4j
|
||||
field 11
|
||||
name d
|
||||
type string
|
||||
value Java client for Sprova Test Management
|
||||
doc 1
|
||||
field 0
|
||||
name u
|
||||
type string
|
||||
value al.aldi|sprova4j|0.1.0|NA|pom
|
||||
field 1
|
||||
name m
|
||||
type string
|
||||
value 1633786348271
|
||||
field 2
|
||||
name i
|
||||
type string
|
||||
value jar|1626109636636|-1|1|0|0|pom
|
||||
field 10
|
||||
name n
|
||||
type string
|
||||
value sprova4j
|
||||
field 11
|
||||
name d
|
||||
type string
|
||||
value Java client for Sprova Test Management
|
|
@ -1,9 +1,8 @@
|
|||
# Copyright (C) 2021 The Software Heritage developers
|
||||
# Copyright (C) 2021-2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from datetime import timezone
|
||||
from pathlib import Path
|
||||
|
||||
import iso8601
|
||||
|
@ -26,10 +25,7 @@ LIST_GIT = (
|
|||
|
||||
LIST_GIT_INCR = ("git://github.com/ArangoDB-Community/arangodb-graphql-java.git",)
|
||||
|
||||
LIST_SRC = (
|
||||
MVN_URL + "al/aldi/sprova4j/0.1.0/sprova4j-0.1.0-sources.jar",
|
||||
MVN_URL + "al/aldi/sprova4j/0.1.1/sprova4j-0.1.1-sources.jar",
|
||||
)
|
||||
LIST_SRC = (MVN_URL + "al/aldi/sprova4j",)
|
||||
|
||||
LIST_SRC_DATA = (
|
||||
{
|
||||
|
@ -40,6 +36,7 @@ LIST_SRC_DATA = (
|
|||
"gid": "al.aldi",
|
||||
"aid": "sprova4j",
|
||||
"version": "0.1.0",
|
||||
"base_url": MVN_URL,
|
||||
},
|
||||
{
|
||||
"type": "maven",
|
||||
|
@ -49,18 +46,19 @@ LIST_SRC_DATA = (
|
|||
"gid": "al.aldi",
|
||||
"aid": "sprova4j",
|
||||
"version": "0.1.1",
|
||||
"base_url": MVN_URL,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def maven_index(datadir) -> str:
|
||||
return Path(datadir, "http_indexes", "export.fld").read_text()
|
||||
def maven_index_full(datadir) -> str:
|
||||
return Path(datadir, "http_indexes", "export_full.fld").read_text()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def maven_index_incr(datadir) -> str:
|
||||
return Path(datadir, "http_indexes", "export_incr.fld").read_text()
|
||||
def maven_index_incr_first(datadir) -> str:
|
||||
return Path(datadir, "http_indexes", "export_incr_first.fld").read_text()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -83,17 +81,21 @@ def maven_pom_3(datadir) -> str:
|
|||
return Path(datadir, "https_maven.org", "arangodb-graphql-1.2.pom").read_text()
|
||||
|
||||
|
||||
def test_maven_full_listing(
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
maven_index,
|
||||
maven_pom_1,
|
||||
maven_pom_2,
|
||||
@pytest.fixture(autouse=True)
|
||||
def network_requests_mock(
|
||||
requests_mock, maven_index_full, maven_pom_1, maven_pom_2, maven_pom_3
|
||||
):
|
||||
requests_mock.get(INDEX_URL, text=maven_index_full)
|
||||
requests_mock.get(URL_POM_1, text=maven_pom_1)
|
||||
requests_mock.get(URL_POM_2, text=maven_pom_2)
|
||||
requests_mock.get(URL_POM_3, text=maven_pom_3)
|
||||
|
||||
|
||||
def test_maven_full_listing(swh_scheduler):
|
||||
"""Covers full listing of multiple pages, checking page results and listed
|
||||
origins, statelessness."""
|
||||
|
||||
# Run the lister.
|
||||
lister = MavenLister(
|
||||
scheduler=swh_scheduler,
|
||||
url=MVN_URL,
|
||||
|
@ -102,43 +104,25 @@ def test_maven_full_listing(
|
|||
incremental=False,
|
||||
)
|
||||
|
||||
# Set up test.
|
||||
index_text = maven_index
|
||||
requests_mock.get(INDEX_URL, text=index_text)
|
||||
requests_mock.get(URL_POM_1, text=maven_pom_1)
|
||||
requests_mock.get(URL_POM_2, text=maven_pom_2)
|
||||
|
||||
# Then run the lister.
|
||||
stats = lister.run()
|
||||
|
||||
# Start test checks.
|
||||
assert stats.pages == 4
|
||||
assert stats.origins == 4
|
||||
assert stats.pages == 5
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
origin_urls = [origin.url for origin in scheduler_origins]
|
||||
assert sorted(origin_urls) == sorted(LIST_GIT + LIST_SRC)
|
||||
|
||||
# 3 git origins + 1 maven origin with 2 releases (one per jar)
|
||||
assert len(origin_urls) == 4
|
||||
assert sorted(origin_urls) == sorted(LIST_GIT + LIST_GIT_INCR + LIST_SRC)
|
||||
|
||||
for origin in scheduler_origins:
|
||||
if origin.visit_type == "maven":
|
||||
for src in LIST_SRC_DATA:
|
||||
if src.get("url") == origin.url:
|
||||
last_update_src = iso8601.parse_date(src.get("time")).astimezone(
|
||||
tz=timezone.utc
|
||||
)
|
||||
assert last_update_src == origin.last_update
|
||||
artifact = origin.extra_loader_arguments["artifacts"][0]
|
||||
assert src.get("time") == artifact["time"]
|
||||
assert src.get("gid") == artifact["gid"]
|
||||
assert src.get("aid") == artifact["aid"]
|
||||
assert src.get("version") == artifact["version"]
|
||||
assert MVN_URL == artifact["base_url"]
|
||||
break
|
||||
else:
|
||||
raise AssertionError(
|
||||
"Could not find scheduler origin in referenced origins."
|
||||
)
|
||||
last_update_src = iso8601.parse_date(src["time"])
|
||||
assert last_update_src <= origin.last_update
|
||||
assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA)
|
||||
|
||||
scheduler_state = lister.get_state_from_scheduler()
|
||||
assert scheduler_state is not None
|
||||
assert scheduler_state.last_seen_doc == -1
|
||||
|
@ -148,10 +132,7 @@ def test_maven_full_listing(
|
|||
def test_maven_full_listing_malformed(
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
maven_index,
|
||||
maven_pom_1_malformed,
|
||||
maven_pom_2,
|
||||
):
|
||||
"""Covers full listing of multiple pages, checking page results with a malformed
|
||||
scm entry in pom."""
|
||||
|
@ -165,39 +146,28 @@ def test_maven_full_listing_malformed(
|
|||
)
|
||||
|
||||
# Set up test.
|
||||
index_text = maven_index
|
||||
requests_mock.get(INDEX_URL, text=index_text)
|
||||
requests_mock.get(URL_POM_1, text=maven_pom_1_malformed)
|
||||
requests_mock.get(URL_POM_2, text=maven_pom_2)
|
||||
|
||||
# Then run the lister.
|
||||
stats = lister.run()
|
||||
|
||||
# Start test checks.
|
||||
assert stats.pages == 4
|
||||
assert stats.origins == 3
|
||||
assert stats.pages == 5
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
origin_urls = [origin.url for origin in scheduler_origins]
|
||||
LIST_SRC_1 = ("https://github.com/aldialimucaj/sprova4j.git",)
|
||||
assert sorted(origin_urls) == sorted(LIST_SRC_1 + LIST_SRC)
|
||||
|
||||
# 2 git origins + 1 maven origin with 2 releases (one per jar)
|
||||
assert len(origin_urls) == 3
|
||||
assert sorted(origin_urls) == sorted((LIST_GIT[1],) + LIST_GIT_INCR + LIST_SRC)
|
||||
|
||||
for origin in scheduler_origins:
|
||||
if origin.visit_type == "maven":
|
||||
for src in LIST_SRC_DATA:
|
||||
if src.get("url") == origin.url:
|
||||
artifact = origin.extra_loader_arguments["artifacts"][0]
|
||||
assert src.get("time") == artifact["time"]
|
||||
assert src.get("gid") == artifact["gid"]
|
||||
assert src.get("aid") == artifact["aid"]
|
||||
assert src.get("version") == artifact["version"]
|
||||
assert MVN_URL == artifact["base_url"]
|
||||
break
|
||||
else:
|
||||
raise AssertionError(
|
||||
"Could not find scheduler origin in referenced origins."
|
||||
)
|
||||
last_update_src = iso8601.parse_date(src["time"])
|
||||
assert last_update_src <= origin.last_update
|
||||
assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA)
|
||||
|
||||
scheduler_state = lister.get_state_from_scheduler()
|
||||
assert scheduler_state is not None
|
||||
assert scheduler_state.last_seen_doc == -1
|
||||
|
@ -207,12 +177,8 @@ def test_maven_full_listing_malformed(
|
|||
def test_maven_incremental_listing(
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
maven_index,
|
||||
maven_index_incr,
|
||||
maven_pom_1,
|
||||
maven_pom_2,
|
||||
maven_pom_3,
|
||||
maven_index_full,
|
||||
maven_index_incr_first,
|
||||
):
|
||||
"""Covers full listing of multiple pages, checking page results and listed
|
||||
origins, with a second updated run for statefulness."""
|
||||
|
@ -226,9 +192,7 @@ def test_maven_incremental_listing(
|
|||
)
|
||||
|
||||
# Set up test.
|
||||
requests_mock.get(INDEX_URL, text=maven_index)
|
||||
requests_mock.get(URL_POM_1, text=maven_pom_1)
|
||||
requests_mock.get(URL_POM_2, text=maven_pom_2)
|
||||
requests_mock.get(INDEX_URL, text=maven_index_incr_first)
|
||||
|
||||
# Then run the lister.
|
||||
stats = lister.run()
|
||||
|
@ -236,8 +200,20 @@ def test_maven_incremental_listing(
|
|||
# Start test checks.
|
||||
assert lister.incremental
|
||||
assert lister.updated
|
||||
assert stats.pages == 4
|
||||
assert stats.origins == 4
|
||||
assert stats.pages == 2
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
origin_urls = [origin.url for origin in scheduler_origins]
|
||||
|
||||
# 1 git origins + 1 maven origin with 1 release (one per jar)
|
||||
assert len(origin_urls) == 2
|
||||
assert sorted(origin_urls) == sorted((LIST_GIT[0],) + LIST_SRC)
|
||||
|
||||
for origin in scheduler_origins:
|
||||
if origin.visit_type == "maven":
|
||||
last_update_src = iso8601.parse_date(LIST_SRC_DATA[0]["time"])
|
||||
assert last_update_src == origin.last_update
|
||||
assert origin.extra_loader_arguments["artifacts"] == [LIST_SRC_DATA[0]]
|
||||
|
||||
# Second execution of the lister, incremental mode
|
||||
lister = MavenLister(
|
||||
|
@ -250,12 +226,11 @@ def test_maven_incremental_listing(
|
|||
|
||||
scheduler_state = lister.get_state_from_scheduler()
|
||||
assert scheduler_state is not None
|
||||
assert scheduler_state.last_seen_doc == 3
|
||||
assert scheduler_state.last_seen_pom == 3
|
||||
assert scheduler_state.last_seen_doc == 1
|
||||
assert scheduler_state.last_seen_pom == 1
|
||||
|
||||
# Set up test.
|
||||
requests_mock.get(INDEX_URL, text=maven_index_incr)
|
||||
requests_mock.get(URL_POM_3, text=maven_pom_3)
|
||||
requests_mock.get(INDEX_URL, text=maven_index_full)
|
||||
|
||||
# Then run the lister.
|
||||
stats = lister.run()
|
||||
|
@ -263,26 +238,19 @@ def test_maven_incremental_listing(
|
|||
# Start test checks.
|
||||
assert lister.incremental
|
||||
assert lister.updated
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == 1
|
||||
assert stats.pages == 4
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
origin_urls = [origin.url for origin in scheduler_origins]
|
||||
|
||||
assert sorted(origin_urls) == sorted(LIST_SRC + LIST_GIT + LIST_GIT_INCR)
|
||||
|
||||
for origin in scheduler_origins:
|
||||
if origin.visit_type == "maven":
|
||||
for src in LIST_SRC_DATA:
|
||||
if src.get("url") == origin.url:
|
||||
artifact = origin.extra_loader_arguments["artifacts"][0]
|
||||
assert src.get("time") == artifact["time"]
|
||||
assert src.get("gid") == artifact["gid"]
|
||||
assert src.get("aid") == artifact["aid"]
|
||||
assert src.get("version") == artifact["version"]
|
||||
break
|
||||
else:
|
||||
raise AssertionError
|
||||
last_update_src = iso8601.parse_date(src["time"])
|
||||
assert last_update_src <= origin.last_update
|
||||
assert origin.extra_loader_arguments["artifacts"] == list(LIST_SRC_DATA)
|
||||
|
||||
scheduler_state = lister.get_state_from_scheduler()
|
||||
assert scheduler_state is not None
|
||||
|
@ -291,9 +259,7 @@ def test_maven_incremental_listing(
|
|||
|
||||
|
||||
@pytest.mark.parametrize("http_code", [400, 404, 500, 502])
|
||||
def test_maven_list_http_error_on_index_read(
|
||||
swh_scheduler, requests_mock, mocker, maven_index, http_code
|
||||
):
|
||||
def test_maven_list_http_error_on_index_read(swh_scheduler, requests_mock, http_code):
|
||||
"""should stop listing if the lister fails to retrieve the main index url."""
|
||||
|
||||
lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL)
|
||||
|
@ -307,21 +273,20 @@ def test_maven_list_http_error_on_index_read(
|
|||
|
||||
@pytest.mark.parametrize("http_code", [400, 404, 500, 502])
|
||||
def test_maven_list_http_error_artifacts(
|
||||
swh_scheduler, requests_mock, mocker, maven_index, http_code, maven_pom_2
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
http_code,
|
||||
):
|
||||
"""should continue listing when failing to retrieve artifacts."""
|
||||
# Test failure of artefacts retrieval.
|
||||
requests_mock.get(INDEX_URL, text=maven_index)
|
||||
requests_mock.get(URL_POM_1, status_code=http_code)
|
||||
requests_mock.get(URL_POM_2, text=maven_pom_2)
|
||||
|
||||
lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL)
|
||||
|
||||
# on artifacts though, that raises but continue listing
|
||||
lister.run()
|
||||
|
||||
# If the maven_index step succeeded but not the get_pom step,
|
||||
# then we get only the 2 maven-jar origins (and not the 2 additional
|
||||
# src origins).
|
||||
# If the maven_index_full step succeeded but not the get_pom step,
|
||||
# then we get only one maven-jar origin and one git origin.
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
assert len(scheduler_origins) == 3
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue