maven: Use BeautifulSoup instead of xmltodict for parsing pom files

xmltodict cannot parse POM files with multi-byte encoding so prefer to
use the XML parser of BeautifulSoup based on lxml instead.

Also drop xmltodict requirement as it is no longer used in swh-lister
codebase.
This commit is contained in:
Antoine Lambert 2022-08-08 16:30:47 +02:00
parent d51bce0a1c
commit cee6bcb514
5 changed files with 807 additions and 13 deletions

View file

@ -40,8 +40,5 @@ ignore_missing_imports = True
[mypy-urllib3.util.*]
ignore_missing_imports = True
[mypy-xmltodict.*]
ignore_missing_imports = True
[mypy-dulwich.*]
ignore_missing_imports = True

View file

@ -5,6 +5,5 @@ iso8601
beautifulsoup4
launchpadlib
tenacity >= 6.2
xmltodict
lxml
dulwich

View file

@ -10,9 +10,10 @@ import re
from typing import Any, Dict, Iterator, Optional
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import lxml
import requests
from tenacity.before_sleep import before_sleep_log
import xmltodict
from swh.core.github.utils import GitHubSession
from swh.lister.utils import throttling_retry
@ -252,16 +253,18 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
for pom in out_pom:
try:
response = self.page_request(pom, {})
project = xmltodict.parse(response.content)
project_d = project.get("project", {})
scm_d = project_d.get("scm")
if scm_d is not None:
connection = scm_d.get("connection")
parsed_pom = BeautifulSoup(response.content, "xml")
project = parsed_pom.find("project")
if project is None:
continue
scm = project.find("scm")
if scm is not None:
connection = scm.find("connection")
if connection is not None:
artifact_metadata_d = {
"type": "scm",
"doc": out_pom[pom],
"url": connection,
"url": connection.text,
}
logger.debug("* Yielding pom %s: %s", pom, artifact_metadata_d)
yield artifact_metadata_d
@ -274,8 +277,8 @@ class MavenLister(Lister[MavenListerState, RepoPage]):
"POM info page could not be fetched, skipping project '%s'",
pom,
)
except xmltodict.expat.ExpatError as error:
logger.info("Could not parse POM %s XML: %s. Next.", pom, error)
except lxml.etree.Error as error:
logger.info("Could not parse POM %s XML: %s.", pom, error)
def get_scm(self, page: RepoPage) -> Optional[ListedOrigin]:
"""Retrieve scm origin out of the page information. Only called when type of the

View file

@ -0,0 +1,769 @@
<?xml version="1.0" encoding="gb18030"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<parent>
<groupId>org.sonatype.oss</groupId>
<artifactId>oss-parent</artifactId>
<version>7</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>com.alibaba.citrus</groupId>
<artifactId>citrus-parent</artifactId>
<packaging>pom</packaging>
<name>Citrus Parent Project</name>
<version>3.0.7</version>
<description>Another Java-based WEB Framework</description>
<url>http://www.openwebx.org/</url>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:git:https://github.com/webx/citrus</connection>
<developerConnection>scm:git:git@github.com:webx/citrus.git</developerConnection>
<url>http://github.com/webx/citrus</url>
</scm>
<developers>
<developer>
<id>baobao</id>
<name>Michael Zhou</name>
<email>yizhi@taobao.com</email>
<roles>
<role>architect</role>
<role>developer</role>
</roles>
<properties>
<picUrl>http://i54.tinypic.com/2jewmjr.jpg</picUrl>
</properties>
<timezone>-6</timezone>
</developer>
</developers>
<properties>
<java.version>1.5</java.version>
<java.encoding>GBK</java.encoding>
<webx-version>3.0.7</webx-version>
<spring-version>2.5.6.SEC03</spring-version>
<springext-plugin-version>1.0</springext-plugin-version>
<maven-util-version>1.0</maven-util-version>
<jetty-version>6.1.22</jetty-version>
<project.build.sourceEncoding>${java.encoding}</project.build.sourceEncoding>
</properties>
<profiles>
<profile>
<id>spring3</id>
<properties>
<spring-version>3.0.6.RELEASE</spring-version>
</properties>
</profile>
</profiles>
<modules>
<module>dist/webx</module>
<module>dist/test</module>
<module>common/expr</module>
<module>common/logconfig</module>
<module>common/util</module>
<module>common/springext</module>
<module>common/generictype</module>
<module>common/asm</module>
<module>common/hessian</module>
<module>common/codegen</module>
<module>test/util</module>
<module>test/webx</module>
<module>service/base</module>
<module>service/dataresolver</module>
<module>service/form</module>
<module>service/resource</module>
<module>service/upload</module>
<module>service/requestcontext</module>
<module>service/pipeline</module>
<module>service/configuration</module>
<module>service/mappingrule</module>
<module>service/moduleloader</module>
<module>service/pull</module>
<module>service/template</module>
<module>service/jsp</module>
<module>service/velocity</module>
<module>service/freemarker</module>
<module>service/uribroker</module>
<module>service/mail</module>
<module>webx/framework</module>
<module>webx/turbine</module>
<module>webx/dev</module>
</modules>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<!-- ======================================== -->
<!-- 子项目依赖 -->
<!-- ======================================== -->
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-webx-all</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-test-all</artifactId>
<version>${webx-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-logconfig</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-expr</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-util</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-springext</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-generictype</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-asm</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-hessian</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-common-codegen</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-test-util</artifactId>
<version>${webx-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-test-webx</artifactId>
<version>${webx-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-base</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-dataresolver</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-form</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-resource</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-upload</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-requestcontext</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-pipeline</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-configuration</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-mappingrule</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-moduleloader</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-pull</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-template</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-jsp</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-velocity</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-freemarker</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-uribroker</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-service-mail</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-webx-framework</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-webx-turbine</artifactId>
<version>${webx-version}</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>citrus-webx-dev</artifactId>
<version>${webx-version}</version>
</dependency>
<!-- ================================================= -->
<!-- 日志及相关依赖用slf4j+logback代替jcl+log4j -->
<!-- ================================================= -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.6.1</version>
</dependency>
<!-- 将现有的jakarta commons logging的调用转换成lsf4j的调用。 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.6.1</version>
</dependency>
<!-- Hack确保commons-logging的jar包不被引入否则将和jcl-over-slf4j冲突 -->
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.1</version>
<scope>provided</scope>
</dependency>
<!-- slf4j的实现logback用来取代log4j。更快、更强 -->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>0.9.24</version>
<scope>runtime</scope>
</dependency>
<!-- log4j的实现备而不用。 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.6.1</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
<scope>runtime</scope>
</dependency>
<!-- ======================================== -->
<!-- 单元测试及相关依赖 -->
<!-- ======================================== -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-library</artifactId>
<version>1.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>httpunit</groupId>
<artifactId>httpunit</artifactId>
<version>1.7</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>jtidy</groupId>
<artifactId>jtidy</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>rhino</groupId>
<artifactId>js</artifactId>
<version>1.7R1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>nekohtml</groupId>
<artifactId>nekohtml</artifactId>
<version>1.9.6</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.9.1</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>xalan</groupId>
<artifactId>xalan</artifactId>
<version>2.7.1</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.easymock</groupId>
<artifactId>easymockclassextension</artifactId>
<version>3.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.tomcat</groupId>
<artifactId>jasper</artifactId>
<version>6.0.33</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jvnet.mock-javamail</groupId>
<artifactId>mock-javamail</artifactId>
<version>1.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>oro</groupId>
<artifactId>oro</artifactId>
<version>2.0.8</version>
</dependency>
<!-- ======================================== -->
<!-- 其它第三方依赖 -->
<!-- ======================================== -->
<dependency>
<groupId>ecs</groupId>
<artifactId>ecs</artifactId>
<version>1.4.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-jexl</artifactId>
<version>2.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.velocity</groupId>
<artifactId>velocity</artifactId>
<version>1.6.4</version>
</dependency>
<dependency>
<groupId>org.freemarker</groupId>
<artifactId>freemarker</artifactId>
<version>2.3.16</version>
</dependency>
<dependency>
<groupId>commons-fileupload</groupId>
<artifactId>commons-fileupload</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-all</artifactId>
<version>1.6.3</version>
<scope>runtime</scope>
<exclusions>
<exclusion>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.ant</groupId>
<artifactId>ant-launcher</artifactId>
</exclusion>
<exclusion>
<groupId>jline</groupId>
<artifactId>jline</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
<exclusions>
<exclusion>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>cglib</groupId>
<artifactId>cglib-nodep</artifactId>
<version>2.2</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.5</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.mail</groupId>
<artifactId>mail</artifactId>
<version>1.4.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>javax.activation</groupId>
<artifactId>activation</artifactId>
<version>1.1</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>janino</groupId>
<artifactId>janino</artifactId>
<version>2.5.10</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
<version>1.3.04</version>
</dependency>
<!-- ======================================== -->
<!-- Spring依赖 -->
<!-- ======================================== -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-beans</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-aop</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context-support</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-tx</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-orm</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-webmvc</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>${spring-version}</version>
<scope>test</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<inherited>false</inherited>
<configuration>
<skip>false</skip>
</configuration>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<phase>compile</phase>
<configuration>
<target>
<echo message="project home: ${basedir}"/>
<echo message="java home: ${java.home}"/>
<echo message="java source version: ${java.version}"/>
<echo message="java target version: ${java.version}"/>
<echo message="java source charset: ${java.encoding}"/>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<includes>
<include>**/*Tests.java</include>
</includes>
<argLine>-Xmx256m</argLine>
</configuration>
</plugin>
<plugin>
<artifactId>maven-eclipse-plugin</artifactId>
<configuration>
<downloadSources>true</downloadSources>
<classpathContainers>
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
</classpathContainers>
</configuration>
</plugin>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.mortbay.jetty</groupId>
<artifactId>maven-jetty-plugin</artifactId>
<configuration>
<stopKey>citrus</stopKey>
<stopPort>9999</stopPort>
<systemProperties>
<systemProperty>
<name>productionMode</name>
<value>false</value>
</systemProperty>
</systemProperties>
</configuration>
</plugin>
<plugin>
<groupId>com.alibaba.citrus.tool</groupId>
<artifactId>maven-springext-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-gpg-plugin</artifactId>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.6</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<!-- 较低版本有bug导致hessian不能编译通过。 -->
<version>2.3.2</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>2.3.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.7</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<plugin>
<artifactId>maven-eclipse-plugin</artifactId>
<version>2.8</version>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>1.4</version>
</plugin>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.1.2</version>
</plugin>
<plugin>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.8</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.10</version>
</plugin>
<plugin>
<groupId>org.mortbay.jetty</groupId>
<artifactId>maven-jetty-plugin</artifactId>
<version>${jetty-version}</version>
</plugin>
<plugin>
<groupId>com.alibaba.citrus.tool</groupId>
<artifactId>maven-springext-plugin</artifactId>
<version>${springext-plugin-version}</version>
</plugin>
<plugin>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.4</version>
</plugin>
<!-- - This plugin's configuration is used to store Eclipse
m2e settings only. - It has no influence on the Maven build itself. -->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<versionRange>[1.0,)</versionRange>
<goals>
<goal>run</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-enforcer-plugin</artifactId>
<versionRange>[1.0,)</versionRange>
<goals>
<goal>enforce</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

View file

@ -30,6 +30,10 @@ GIT_REPO_URL1_GIT = f"git://github.com/{USER_REPO1}.git"
GIT_REPO_URL1_API = f"https://api.github.com/repos/{USER_REPO1}"
LIST_GIT_INCR = (GIT_REPO_URL1_HTTPS,)
USER_REPO2 = "webx/citrus"
GIT_REPO_URL2_HTTPS = f"https://github.com/{USER_REPO2}"
GIT_REPO_URL2_API = f"https://api.github.com/repos/{USER_REPO2}"
LIST_SRC = (MVN_URL + "al/aldi/sprova4j",)
LIST_SRC_DATA = (
@ -91,12 +95,18 @@ def maven_pom_3(datadir) -> bytes:
return Path(datadir, "https_maven.org", "arangodb-graphql-1.2.pom").read_bytes()
@pytest.fixture
def maven_pom_multi_byte_encoding(datadir) -> bytes:
return Path(datadir, "https_maven.org", "citrus-parent-3.0.7.pom").read_bytes()
@pytest.fixture
def requests_mock(requests_mock):
"""If github api calls for the configured scm repository, returns its canonical url."""
for url_api, url_html in [
(GIT_REPO_URL0_API, GIT_REPO_URL0_HTTPS),
(GIT_REPO_URL1_API, GIT_REPO_URL1_HTTPS),
(GIT_REPO_URL2_API, GIT_REPO_URL2_HTTPS),
]:
requests_mock.get(
url_api,
@ -351,3 +361,19 @@ def test_maven_list_pom_bad_encoding(swh_scheduler, requests_mock, maven_pom_1):
# then we get only one maven-jar origin and one git origin.
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == 2
def test_maven_list_pom_multi_byte_encoding(
swh_scheduler, requests_mock, maven_pom_multi_byte_encoding
):
"""should parse POM file with multi-byte encoding."""
# replace pom file with a multi-byte encoding one
requests_mock.get(URL_POM_1, content=maven_pom_multi_byte_encoding)
lister = MavenLister(scheduler=swh_scheduler, url=MVN_URL, index_url=INDEX_URL)
lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == 3