pubdev: Modify origin URL for listed packages

Use https://pub.dev/packages/<package_name> instead of
https://pub.dev/api/packages/<package_name>
This commit is contained in:
Antoine Lambert 2022-09-02 16:12:13 +02:00
parent b6c69e5075
commit 49b79b0759
3 changed files with 22 additions and 28 deletions

View file

@ -28,9 +28,10 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
VISIT_TYPE = "pubdev"
INSTANCE = "pubdev"
BASE_URL = "https://pub.dev/api/"
PACKAGE_NAMES_URL_PATTERN = "{base_url}package-names"
PACKAGE_INFO_URL_PATTERN = "{base_url}packages/{pkgname}"
BASE_URL = "https://pub.dev/"
PACKAGE_NAMES_URL_PATTERN = "{base_url}api/package-names"
PACKAGE_INFO_URL_PATTERN = "{base_url}api/packages/{pkgname}"
ORIGIN_URL_PATTERN = "{base_url}packages/{pkgname}"
def __init__(
self,
@ -89,12 +90,12 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
assert self.lister_obj.id is not None
for pkgname in page:
url = self.PACKAGE_INFO_URL_PATTERN.format(
origin_url = self.ORIGIN_URL_PATTERN.format(
base_url=self.url, pkgname=pkgname
)
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type=self.VISIT_TYPE,
url=url,
url=origin_url,
last_update=None,
)

View file

@ -1 +1,7 @@
{"packages":["Autolinker","pdf"],"nextUrl":null}
{
"packages": [
"Autolinker",
"Babylon"
],
"nextUrl": null
}

View file

@ -2,16 +2,13 @@
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.pubdev.lister import PubDevLister
expected_origins = [
{
"url": "https://pub.dev/api/packages/Autolinker",
},
{
"url": "https://pub.dev/api/packages/pdf",
},
]
expected_origins = {
"https://pub.dev/packages/Autolinker",
"https://pub.dev/packages/Babylon",
}
def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler):
@ -19,22 +16,12 @@ def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler):
res = lister.run()
assert res.pages == 1
assert res.origins == 1 + 1
assert res.origins == 2
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(expected_origins)
assert {
(
scheduled.visit_type,
scheduled.url,
)
for scheduled in scheduler_origins
} == {
(
"pubdev",
expected["url"],
)
for expected in expected_origins
}
for origin in scheduler_origins:
assert origin.visit_type == "pubdev"
assert origin.url in expected_origins