maven: dismiss origins if they are malformed - e.g. wrong pom scm format, add test.

This commit is contained in:
Boris Baldassari 2022-01-31 20:35:57 +01:00
parent a1000dfeb7
commit 24eeabfade
3 changed files with 156 additions and 5 deletions

View file

@ -274,6 +274,7 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
"""
assert self.lister_obj.id is not None
scm_types_ok = ("git", "svn", "hg", "cvs", "bzr")
if page["type"] == "scm":
# If origin is a scm url: detect scm type and yield.
# Note that the official format is:
@ -283,11 +284,12 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
m_scm = re.match(r"^scm:(?P<type>[^:]+):(?P<url>.*)$", page["url"])
if m_scm is not None:
scm_type = m_scm.group("type")
scm_url = m_scm.group("url")
origin = ListedOrigin(
lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
)
yield origin
if scm_type in scm_types_ok:
scm_url = m_scm.group("url")
origin = ListedOrigin(
lister_id=self.lister_obj.id, url=scm_url, visit_type=scm_type,
)
yield origin
else:
if page["url"].endswith(".git"):
origin = ListedOrigin(

View file

@ -0,0 +1,86 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<groupId>al.aldi</groupId>
<artifactId>sprova4j</artifactId>
<version>0.1.0</version>
<name>sprova4j</name>
<description>Java client for Sprova Test Management</description>
<url>https://github.com/aldialimucaj/sprova4j</url>
<inceptionYear>2018</inceptionYear>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<developers>
<developer>
<id>aldi</id>
<name>Aldi Alimucaj</name>
<email>aldi.alimucaj@gmail.com</email>
</developer>
</developers>
<scm>
<connection>scm:https://github.com/aldialimucaj/sprova4j.git</connection>
<developerConnection>scm:ghttps://github.com/aldialimucaj/sprova4j.git</developerConnection>
<url>https://github.com/aldialimucaj/sprova4j</url>
</scm>
<dependencies>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.3</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.10.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>com.squareup.okio</groupId>
<artifactId>okio</artifactId>
<version>1.0.0</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.json</artifactId>
<version>1.1.2</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>javax.json</groupId>
<artifactId>javax.json-api</artifactId>
<version>1.1.2</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
<version>2.0.1.Final</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>mockwebserver</artifactId>
<version>3.10.0</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View file

@ -69,6 +69,12 @@ def maven_pom_1(datadir) -> str:
return text
@pytest.fixture
def maven_pom_1_malformed(datadir) -> str:
text = Path(datadir, "https_maven.org", "sprova4j-0.1.0.malformed.pom").read_text()
return text
@pytest.fixture
def maven_pom_2(datadir) -> str:
text = Path(datadir, "https_maven.org", "sprova4j-0.1.1.pom").read_text()
@ -132,6 +138,63 @@ def test_maven_full_listing(
assert scheduler_state.last_seen_pom == -1
def test_maven_full_listing_malformed(
swh_scheduler,
requests_mock,
mocker,
maven_index,
maven_pom_1_malformed,
maven_pom_2,
):
"""Covers full listing of multiple pages, checking page results with a malformed
scm entry in pom."""
lister = MavenLister(
scheduler=swh_scheduler,
url=MVN_URL,
instance="maven.org",
index_url=INDEX_URL,
incremental=False,
)
# Set up test.
index_text = maven_index
requests_mock.get(INDEX_URL, text=index_text)
requests_mock.get(URL_POM_1, text=maven_pom_1_malformed)
requests_mock.get(URL_POM_2, text=maven_pom_2)
# Then run the lister.
stats = lister.run()
# Start test checks.
assert stats.pages == 4
assert stats.origins == 3
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
origin_urls = [origin.url for origin in scheduler_origins]
LIST_SRC_1 = ("https://github.com/aldialimucaj/sprova4j.git",)
assert sorted(origin_urls) == sorted(LIST_SRC_1 + LIST_SRC)
for origin in scheduler_origins:
if origin.visit_type == "maven":
for src in LIST_SRC_DATA:
if src.get("url") == origin.url:
artifact = origin.extra_loader_arguments["artifacts"][0]
assert src.get("time") == artifact["time"]
assert src.get("gid") == artifact["gid"]
assert src.get("aid") == artifact["aid"]
assert src.get("version") == artifact["version"]
assert MVN_URL == artifact["base_url"]
break
else:
raise AssertionError
scheduler_state = lister.get_state_from_scheduler()
assert scheduler_state is not None
assert scheduler_state.last_seen_doc == -1
assert scheduler_state.last_seen_pom == -1
def test_maven_incremental_listing(
swh_scheduler,
requests_mock,