cpan: Fix module version extraction for some edge cases

CPAN API can return versions that are not of str type: either
int or float.

When version equals 0, it means that version failed to be parsed
by CPAN so we try to extract it from release name in that case.

Otherwise we ensure to convert the version to str type.

Related to T2833
This commit is contained in:
Antoine Lambert 2022-10-10 15:55:54 +02:00
parent f57b8f3a2c
commit 05cd1de1cd
6 changed files with 342 additions and 7 deletions

View file

@ -6,7 +6,7 @@
from collections import defaultdict
from datetime import datetime
import logging
from typing import Any, Dict, Iterator, List, Optional, Set
from typing import Any, Dict, Iterator, List, Optional, Set, Union
import iso8601
@ -47,6 +47,18 @@ def get_field_value(entry, field_name):
return field_value
def get_module_version(
module_name: str, module_version: Union[str, float, int], release_name: str
) -> str:
# some old versions fail to be parsed and cpan api set version to 0
if module_version == 0:
prefix = f"{module_name}-"
if release_name.startswith(prefix):
# extract version from release name
module_version = release_name.replace(prefix, "", 1)
return str(module_version)
class CpanLister(StatelessLister[CpanListerPage]):
"""The Cpan lister list origins from 'Cpan', the Comprehensive Perl Archive
Network."""
@ -104,6 +116,10 @@ class CpanLister(StatelessLister[CpanListerPage]):
module_author_fullname = get_field_value(entry, "metadata.author")
release_name = get_field_value(entry, "name")
module_version = get_module_version(
module_name, module_version, release_name
)
self.artifacts[module_name].append(
{
"url": module_download_url,

View file

@ -0,0 +1,39 @@
{
"_shards": {
"successful": 3,
"failed": 0,
"total": 3
},
"hits": {
"max_score": 16.105877,
"hits": [
{
"_id": "FM3U2W_LR4pgKJepBaDKUb4WEy0",
"_index": "cpan_v1_01",
"_type": "release",
"_source": {
"distribution": "UDPServersAndClients",
"date": "2006-04-20T00:03:25",
"checksum_sha256": "763da87c32e65cc7ff72d70a503b4e9497f6b506c174b82c97671af8667c1922",
"stat": {
"size": 5576
},
"author": "ROBINBANK",
"version": 0,
"download_url": "https://cpan.metacpan.org/authors/id/R/RO/ROBINBANK/UDPServersAndClients.zip",
"metadata": {
"author": [
"unknown"
]
},
"name": "UDPServersAndClients"
},
"_score": 16.105877
}
],
"total": 1
},
"took": 2,
"timed_out": false,
"_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
}

View file

@ -0,0 +1,85 @@
{
"took": 3,
"_shards": {
"successful": 3,
"failed": 0,
"total": 3
},
"timed_out": false,
"hits": {
"max_score": 13.962857,
"hits": [
{
"_score": 13.962857,
"_type": "release",
"_source": {
"version": 0,
"checksum_sha256": "a19fa7e735ea3406dfeb9c72f35fb2b64fda1e8035ce6ba0fabc15ce1c1e2f41",
"metadata": {
"author": [
"unknown"
]
},
"author": "MICB",
"name": "Compiler-a3",
"download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a3.tar.gz",
"date": "1996-09-02T14:04:00",
"stat": {
"size": 89134
},
"distribution": "Compiler"
},
"_id": "aBI9p6X_yq6r9e8pk7U17pbZMPM",
"_index": "cpan_v1_01"
},
{
"_score": 13.707853,
"_source": {
"checksum_sha256": "def01b544d23c76ec19cc2288a3295b39abcdbdea6dbded5b7fe6d17cd4525de",
"version": 0,
"name": "Compiler-a2",
"author": "MICB",
"metadata": {
"author": [
"unknown"
]
},
"date": "1996-08-22T14:30:00",
"download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a2.tar.gz",
"distribution": "Compiler",
"stat": {
"size": 85123
}
},
"_type": "release",
"_id": "fG9UelWPReQei13FQ4EAHytuZCo",
"_index": "cpan_v1_01"
},
{
"_source": {
"checksum_sha256": "b1f7afd4fa8825adf2c17a0cbd8706484e6d2da5294786a5e6e49c205708ee41",
"version": 0,
"name": "Compiler-a1",
"metadata": {
"author": [
"unknown"
]
},
"author": "MICB",
"date": "1996-05-13T11:39:00",
"download_url": "https://cpan.metacpan.org/authors/id/M/MI/MICB/Compiler-a1.tar.gz",
"stat": {
"size": 61093
},
"distribution": "Compiler"
},
"_type": "release",
"_id": "8H7BRLllDoyILyqsjjV8sqkBpQY",
"_index": "cpan_v1_01",
"_score": 13.572314
}
],
"total": 3
},
"_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
}

View file

@ -0,0 +1,131 @@
{
"timed_out": false,
"_shards": {
"failed": 0,
"total": 3,
"successful": 3
},
"took": 14,
"hits": {
"total": 5,
"hits": [
{
"_score": 14.460719,
"_type": "release",
"_source": {
"stat": {
"size": 10738
},
"download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL1.tar.gz",
"distribution": "Call-Context",
"author": "FELIPE",
"version": "0.03-TRIAL1",
"checksum_sha256": "82aa854d6ae68342b58361b089c7f480b5b75e94f0c85c1d311f8cace1bfadea",
"metadata": {
"author": [
"Felipe Gasper (FELIPE)"
]
},
"name": "Call-Context-0.03-TRIAL1",
"date": "2018-10-25T03:47:31"
},
"_index": "cpan_v1_01",
"_id": "Cjw1voci7z74uflSPriBTT_A_5c"
},
{
"_id": "VdVDByg5PHxbDh9HnvKAzf8QOws",
"_index": "cpan_v1_01",
"_source": {
"download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.01.tar.gz",
"stat": {
"size": 10019
},
"author": "FELIPE",
"distribution": "Call-Context",
"version": 0.01,
"date": "2016-11-12T23:12:54",
"checksum_sha256": "21bf762ef5b3cbf1047192c2a3c499e9bd315b11e5530bd133856cdf87187b24",
"name": "Call-Context-0.01",
"metadata": {
"author": [
"Felipe Gasper (FELIPE)"
]
}
},
"_type": "release",
"_score": 14.460719
},
{
"_score": 14.314282,
"_id": "_MA6FD8SOhOmTG8JUhvl3CN186I",
"_type": "release",
"_source": {
"stat": {
"size": 10046
},
"download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.02.tar.gz",
"distribution": "Call-Context",
"author": "FELIPE",
"version": 0.02,
"metadata": {
"author": [
"Felipe Gasper (FELIPE)"
]
},
"checksum_sha256": "b80d977f1df0e08bda2808124cd7218ad83f802e1a54aa258e17748ff5c02a0a",
"name": "Call-Context-0.02",
"date": "2016-11-13T01:07:43"
},
"_index": "cpan_v1_01"
},
{
"_id": "veMmCu9wirwpTX7czbuQq6SnKQQ",
"_type": "release",
"_source": {
"stat": {
"size": 10741
},
"download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03-TRIAL2.tar.gz",
"distribution": "Call-Context",
"author": "FELIPE",
"version": "0.03-TRIAL2",
"name": "Call-Context-0.03-TRIAL2",
"metadata": {
"author": [
"Felipe Gasper (FELIPE)"
]
},
"checksum_sha256": "4ca799d81fc96a774f4f315c38eb3e53616322c332d47f1e3f756814b5bf4b5e",
"date": "2018-10-26T13:56:41"
},
"_index": "cpan_v1_01",
"_score": 14.291793
},
{
"_type": "release",
"_source": {
"version": "0.03",
"date": "2018-10-27T00:20:13",
"checksum_sha256": "0ee6bf46bc72755adb7a6b08e79d12e207de5f7809707b3c353b58cb2f0b5a26",
"metadata": {
"author": [
"Felipe Gasper (FELIPE)"
]
},
"name": "Call-Context-0.03",
"download_url": "https://cpan.metacpan.org/authors/id/F/FE/FELIPE/Call-Context-0.03.tar.gz",
"stat": {
"size": 10730
},
"author": "FELIPE",
"distribution": "Call-Context"
},
"_index": "cpan_v1_01",
"_id": "CAAVfGh_7XpKnzpnLVaBKg8IPMM",
"_score": 14.291793
}
],
"max_score": 14.460719
},
"_scroll_id": "cXVlcnlUaGVuRmV0Y2g7Mzs5OTQ2NzY3ODU6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTQ2NzY3ODQ6ZHZIZWxCb3BUZi1Cb3NwRDB5NmRQUTs5OTUyMzQzMTA6eXptdmszQUNUam1XbVJjRjRkRk9UdzswOw=="
}

View file

@ -10,7 +10,7 @@ from pathlib import Path
import pytest
from swh.lister.cpan.lister import CpanLister
from swh.lister.cpan.lister import CpanLister, get_module_version
@pytest.fixture
@ -20,16 +20,42 @@ def release_search_response(datadir):
)
def release_scroll_response(datadir, page):
return json.loads(
Path(
datadir, "https_fastapi.metacpan.org", f"v1__search_scroll_page{page}"
).read_bytes()
)
@pytest.fixture
def release_scroll_first_response(datadir):
return json.loads(
Path(datadir, "https_fastapi.metacpan.org", "v1__search_scroll").read_bytes()
)
return release_scroll_response(datadir, page=1)
@pytest.fixture
def release_scroll_second_response(datadir):
return release_scroll_response(datadir, page=2)
@pytest.fixture
def release_scroll_third_response(datadir):
return release_scroll_response(datadir, page=3)
@pytest.fixture
def release_scroll_fourth_response(datadir):
return release_scroll_response(datadir, page=4)
@pytest.fixture(autouse=True)
def mock_network_requests(
requests_mock, release_search_response, release_scroll_first_response
requests_mock,
release_search_response,
release_scroll_first_response,
release_scroll_second_response,
release_scroll_third_response,
release_scroll_fourth_response,
):
requests_mock.get(
"https://fastapi.metacpan.org/v1/release/_search",
@ -41,13 +67,45 @@ def mock_network_requests(
{
"json": release_scroll_first_response,
},
{
"json": release_scroll_second_response,
},
{
"json": release_scroll_third_response,
},
{
"json": release_scroll_fourth_response,
},
{"json": {"hits": {"hits": []}, "_scroll_id": ""}},
],
)
@pytest.mark.parametrize(
"module_name,module_version,release_name,expected_version",
[
("Validator-Custom", "0.1207", "Validator-Custom-0.1207", "0.1207"),
("UDPServersAndClients", 0, "UDPServersAndClients", "0"),
("Compiler", 0, "Compiler-a1", "a1"),
("Call-Context", 0.01, "Call-Context-0.01", "0.01"),
],
)
def test_get_module_version(
module_name, module_version, release_name, expected_version
):
assert (
get_module_version(module_name, module_version, release_name)
== expected_version
)
def test_cpan_lister(
swh_scheduler, release_search_response, release_scroll_first_response
swh_scheduler,
release_search_response,
release_scroll_first_response,
release_scroll_second_response,
release_scroll_third_response,
release_scroll_fourth_response,
):
lister = CpanLister(scheduler=swh_scheduler)
res = lister.run()
@ -58,6 +116,9 @@ def test_cpan_lister(
for release in chain(
release_search_response["hits"]["hits"],
release_scroll_first_response["hits"]["hits"],
release_scroll_second_response["hits"]["hits"],
release_scroll_third_response["hits"]["hits"],
release_scroll_fourth_response["hits"]["hits"],
):
distribution = release["_source"]["distribution"]
release_name = release["_source"]["name"]
@ -69,6 +130,9 @@ def test_cpan_lister(
author_fullname = release["_source"]["metadata"]["author"][0]
date = release["_source"]["date"]
origin_url = f"https://metacpan.org/dist/{distribution}"
version = get_module_version(distribution, version, release_name)
expected_origins.add(origin_url)
expected_artifacts[origin_url].append(
{