cran: Retrieve last update date for each listed package
R package last update date can be found in the "Packaged" field of package info returned by tools::CRAN_package_db(). So retrieve it and parse it as a datetime to provide as last_update parameter value in ListedOrigin model. Closes T2989
This commit is contained in:
parent
6f40ab4c57
commit
22eeb0956e
4 changed files with 83 additions and 10 deletions
|
@ -4,6 +4,6 @@
|
|||
# all the packages of R and their description, then convert the API
|
||||
# response to JSON string and print it
|
||||
|
||||
db <- tools::CRAN_package_db()[, c("Package", "Version")]
|
||||
db <- tools::CRAN_package_db()[, c("Package", "Version", "Packaged")]
|
||||
dbjson <- jsonlite::toJSON(db)
|
||||
print(dbjson)
|
|
@ -2,10 +2,11 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from typing import Dict, Iterator, List, Tuple
|
||||
from typing import Dict, Iterator, List, Optional, Tuple
|
||||
|
||||
import pkg_resources
|
||||
|
||||
|
@ -47,6 +48,7 @@ class CRANLister(StatelessLister[PageType]):
|
|||
lister_id=self.lister_obj.id,
|
||||
url=origin_url,
|
||||
visit_type="tar",
|
||||
last_update=parse_packaged_date(package_info),
|
||||
extra_loader_arguments={
|
||||
"artifacts": [
|
||||
{"url": artifact_url, "version": package_info["Version"]}
|
||||
|
@ -96,3 +98,28 @@ def compute_origin_urls(package_info: Dict[str, str]) -> Tuple[str, str]:
|
|||
origin_url = f"{CRAN_MIRROR}/package={package}"
|
||||
artifact_url = f"{CRAN_MIRROR}/src/contrib/{package}_{version}.tar.gz"
|
||||
return origin_url, artifact_url
|
||||
|
||||
|
||||
def parse_packaged_date(package_info: Dict[str, str]) -> Optional[datetime]:
|
||||
packaged_at_str = package_info.get("Packaged", "")
|
||||
packaged_at = None
|
||||
if packaged_at_str:
|
||||
try:
|
||||
# Packaged field format: "%Y-%m-%d %H:%M:%S UTC; <packager>",
|
||||
packaged_at = datetime.strptime(
|
||||
packaged_at_str.split(" UTC;")[0], "%Y-%m-%d %H:%M:%S",
|
||||
).replace(tzinfo=timezone.utc)
|
||||
except Exception:
|
||||
try:
|
||||
# Some old packages have a different date format:
|
||||
# "%a %b %d %H:%M:%S %Y; <packager>"
|
||||
packaged_at = datetime.strptime(
|
||||
packaged_at_str.split(";")[0], "%a %b %d %H:%M:%S %Y",
|
||||
).replace(tzinfo=timezone.utc)
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"Could not parse %s package release date: %s",
|
||||
package_info["Package"],
|
||||
packaged_at_str,
|
||||
)
|
||||
return packaged_at
|
||||
|
|
|
@ -2,27 +2,39 @@
|
|||
|
||||
{
|
||||
"Package": "SeleMix",
|
||||
"Version": "1.0.1"
|
||||
"Version": "1.0.2",
|
||||
"Packaged": "2020-11-28 22:16:43 UTC; Teresa"
|
||||
},
|
||||
{
|
||||
"Package": "plink",
|
||||
"Version": "1.5-1"
|
||||
"Version": "1.5-1",
|
||||
"Packaged": "2017-04-26 11:36:15 UTC; Jonathan"
|
||||
},
|
||||
{
|
||||
"Package": "justifier",
|
||||
"Version": "0.1.0"
|
||||
"Package": "jsonlite",
|
||||
"Version": "1.7.2",
|
||||
"Packaged": "2020-12-09 13:54:18 UTC; jeroen"
|
||||
|
||||
},
|
||||
{
|
||||
"Package": "Records",
|
||||
"Version": "1.0"
|
||||
"Version": "1.0",
|
||||
"Packaged": "2012-10-29 08:57:37 UTC; ripley"
|
||||
},
|
||||
{
|
||||
"Package": "scRNAtools",
|
||||
"Version": "1.0"
|
||||
"Version": "1.0",
|
||||
"Packaged": "2018-07-04 00:49:45 UTC; dell"
|
||||
},
|
||||
{
|
||||
"Package": "Deriv",
|
||||
"Version": "3.9.0"
|
||||
"Version": "4.1.2",
|
||||
"Packaged": "2020-12-10 11:12:28 UTC; sokol"
|
||||
},
|
||||
{
|
||||
"Package": "BayesValidate",
|
||||
"Version": "0.0",
|
||||
"Packaged": "Thu Mar 30 10:48:35 2006; hornik"
|
||||
}
|
||||
|
||||
]
|
|
@ -3,12 +3,18 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from os import path
|
||||
|
||||
import pytest
|
||||
|
||||
from swh.lister.cran.lister import CRAN_MIRROR, CRANLister, compute_origin_urls
|
||||
from swh.lister.cran.lister import (
|
||||
CRAN_MIRROR,
|
||||
CRANLister,
|
||||
compute_origin_urls,
|
||||
parse_packaged_date,
|
||||
)
|
||||
|
||||
|
||||
def test_cran_compute_origin_urls():
|
||||
|
@ -26,6 +32,32 @@ def test_cran_compute_origin_urls_failure():
|
|||
compute_origin_urls(incomplete_repo)
|
||||
|
||||
|
||||
def test_parse_packaged_date():
|
||||
common_date_format = {
|
||||
"Package": "test",
|
||||
"Packaged": "2017-04-26 11:36:15 UTC; Jonathan",
|
||||
}
|
||||
assert parse_packaged_date(common_date_format) == datetime(
|
||||
year=2017, month=4, day=26, hour=11, minute=36, second=15, tzinfo=timezone.utc
|
||||
)
|
||||
old_date_format = {
|
||||
"Package": "test",
|
||||
"Packaged": "Thu Mar 30 10:48:35 2006; hornik",
|
||||
}
|
||||
assert parse_packaged_date(old_date_format) == datetime(
|
||||
year=2006, month=3, day=30, hour=10, minute=48, second=35, tzinfo=timezone.utc
|
||||
)
|
||||
invalid_date_format = {
|
||||
"Package": "test",
|
||||
"Packaged": "foo",
|
||||
}
|
||||
assert parse_packaged_date(invalid_date_format) is None
|
||||
missing_date = {
|
||||
"Package": "test",
|
||||
}
|
||||
assert parse_packaged_date(missing_date) is None
|
||||
|
||||
|
||||
def test_cran_lister_cran(datadir, swh_scheduler, mocker):
|
||||
with open(path.join(datadir, "list-r-packages.json")) as f:
|
||||
cran_data = json.loads(f.read())
|
||||
|
@ -55,3 +87,5 @@ def test_cran_lister_cran(datadir, swh_scheduler, mocker):
|
|||
assert filtered_origins[0].extra_loader_arguments == {
|
||||
"artifacts": [{"url": artifact_url, "version": package_info["Version"]}]
|
||||
}
|
||||
|
||||
filtered_origins[0].last_update == parse_packaged_date(package_info)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue