pubdev: Retrieve last publication date for each listed package
In order to get a last_update for each ListedOrigin sent to scheduler database, send an extra HTTP request for each listed package to the /api/packages/<package_name> endpoint of pub.dev API. A pub.dev developer inform us that endpoint is heavily used and cached so there is no particular issues to query that endpoint for each package in a row periodically.
This commit is contained in:
parent
49b79b0759
commit
44560c2383
4 changed files with 128 additions and 1 deletions
|
@ -5,7 +5,9 @@
|
|||
import logging
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import iso8601
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.lister.utils import throttling_retry
|
||||
|
@ -90,6 +92,22 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
assert self.lister_obj.id is not None
|
||||
|
||||
for pkgname in page:
|
||||
package_info_url = self.PACKAGE_INFO_URL_PATTERN.format(
|
||||
base_url=self.url, pkgname=pkgname
|
||||
)
|
||||
try:
|
||||
response = self.page_request(url=package_info_url, params={})
|
||||
except HTTPError:
|
||||
logger.warning(
|
||||
"Failed to fetch metadata for package %s, skipping it from listing.",
|
||||
pkgname,
|
||||
)
|
||||
continue
|
||||
package_metadata = response.json()
|
||||
package_versions = package_metadata["versions"]
|
||||
last_published = max(
|
||||
package_version["published"] for package_version in package_versions
|
||||
)
|
||||
origin_url = self.ORIGIN_URL_PATTERN.format(
|
||||
base_url=self.url, pkgname=pkgname
|
||||
)
|
||||
|
@ -97,5 +115,5 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
|
|||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=origin_url,
|
||||
last_update=None,
|
||||
last_update=iso8601.parse_date(last_published),
|
||||
)
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
{
|
||||
"name": "Autolinker",
|
||||
"latest": {
|
||||
"version": "0.1.1",
|
||||
"pubspec": {
|
||||
"version": "0.1.1",
|
||||
"homepage": "https://github.com/hackcave",
|
||||
"description": "Port of Autolinker.js to dart",
|
||||
"name": "Autolinker",
|
||||
"author": "hackcave <hackers@hackcave.org>"
|
||||
},
|
||||
"archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz",
|
||||
"archive_sha256": "0a5209a2d5a292a26fc65d7edb430163f209a7c7c24ba4f301676f1afd79fa3f",
|
||||
"published": "2014-12-24T22:34:02.534090Z"
|
||||
},
|
||||
"versions": [
|
||||
{
|
||||
"version": "0.1.0",
|
||||
"pubspec": {
|
||||
"version": "0.1.0",
|
||||
"homepage": "https://github.com/hackcave",
|
||||
"description": "Port of Autolinker.js to dart",
|
||||
"name": "Autolinker",
|
||||
"author": "hackcave <hackers@hackcave.org>"
|
||||
},
|
||||
"archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.0.tar.gz",
|
||||
"archive_sha256": "717b30e27311c775293d4795ce33d15cedb5e5d21fa140f2cb46b30f3e969041",
|
||||
"published": "2014-12-24T21:16:03.118270Z"
|
||||
},
|
||||
{
|
||||
"version": "0.1.1",
|
||||
"pubspec": {
|
||||
"version": "0.1.1",
|
||||
"homepage": "https://github.com/hackcave",
|
||||
"description": "Port of Autolinker.js to dart",
|
||||
"name": "Autolinker",
|
||||
"author": "hackcave <hackers@hackcave.org>"
|
||||
},
|
||||
"archive_url": "https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz",
|
||||
"archive_sha256": "0a5209a2d5a292a26fc65d7edb430163f209a7c7c24ba4f301676f1afd79fa3f",
|
||||
"published": "2014-12-24T22:34:02.534090Z"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
{
|
||||
"name": "Babylon",
|
||||
"latest": {
|
||||
"version": "0.0.3",
|
||||
"pubspec": {
|
||||
"version": "0.0.3",
|
||||
"name": "Babylon",
|
||||
"dependencies": {
|
||||
"js": ">=0.6.0",
|
||||
"browser": ">=0.10.0+2"
|
||||
},
|
||||
"author": "Cedric Krause <cedric@cedware.com>",
|
||||
"description": "A starting point for Dart libraries or applications.",
|
||||
"homepage": "https://www.cedware.com",
|
||||
"environment": {
|
||||
"sdk": ">=1.0.0 <2.0.0"
|
||||
},
|
||||
"dev_dependencies": {
|
||||
"test": ">=0.12.0 <0.13.0"
|
||||
}
|
||||
},
|
||||
"archive_url": "https://pub.dartlang.org/packages/Babylon/versions/0.0.3.tar.gz",
|
||||
"archive_sha256": "a18166c8082d795f22c38270b7fed0c306d5cb59fe390ce3a34c300770c4a8b3",
|
||||
"published": "2016-06-01T19:15:38.052Z"
|
||||
},
|
||||
"versions": [
|
||||
{
|
||||
"version": "0.0.3",
|
||||
"pubspec": {
|
||||
"version": "0.0.3",
|
||||
"name": "Babylon",
|
||||
"dependencies": {
|
||||
"js": ">=0.6.0",
|
||||
"browser": ">=0.10.0+2"
|
||||
},
|
||||
"author": "Cedric Krause <cedric@cedware.com>",
|
||||
"description": "A starting point for Dart libraries or applications.",
|
||||
"homepage": "https://www.cedware.com",
|
||||
"environment": {
|
||||
"sdk": ">=1.0.0 <2.0.0"
|
||||
},
|
||||
"dev_dependencies": {
|
||||
"test": ">=0.12.0 <0.13.0"
|
||||
}
|
||||
},
|
||||
"archive_url": "https://pub.dartlang.org/packages/Babylon/versions/0.0.3.tar.gz",
|
||||
"archive_sha256": "a18166c8082d795f22c38270b7fed0c306d5cb59fe390ce3a34c300770c4a8b3",
|
||||
"published": "2016-06-01T19:15:38.052Z"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -25,3 +25,17 @@ def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler):
|
|||
for origin in scheduler_origins:
|
||||
assert origin.visit_type == "pubdev"
|
||||
assert origin.url in expected_origins
|
||||
assert origin.last_update is not None
|
||||
|
||||
|
||||
def test_pubdev_lister_skip_package(
|
||||
datadir, requests_mock_datadir, swh_scheduler, requests_mock
|
||||
):
|
||||
|
||||
requests_mock.get("https://pub.dev/api/packages/Autolinker", status_code=404)
|
||||
|
||||
lister = PubDevLister(scheduler=swh_scheduler)
|
||||
res = lister.run()
|
||||
|
||||
assert res.pages == 1
|
||||
assert res.origins == 1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue