cran.lister: Use cran's canonical url for origin url

Prior to this commit, we sent the origin url as a versioned artifact.
Now we send the origin url as a CRAN's canonical one, and the associated list
of artifacts found there (only 1 today).
This commit is contained in:
Antoine R. Dumont (@ardumont) 2020-01-14 14:24:13 +01:00
parent 767c4c6dc7
commit 4761773631
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
2 changed files with 46 additions and 24 deletions

View file

@ -8,7 +8,7 @@ import logging
import pkg_resources
import subprocess
from typing import List, Mapping
from typing import List, Mapping, Tuple
from swh.lister.cran.models import CRANModel
@ -19,28 +19,39 @@ from swh.scheduler.utils import create_task_dict
logger = logging.getLogger(__name__)
CRAN_MIRROR = 'https://cran.r-project.org'
class CRANLister(SimpleLister):
MODEL = CRANModel
LISTER_NAME = 'cran'
instance = 'cran'
def task_dict(self, origin_type, origin_url, **kwargs):
def task_dict(self, origin_type, origin_url, version=None, html_url=None,
policy=None, **kwargs):
"""Return task format dict. This creates tasks with args and kwargs
set, for example::
args: []
kwargs: {
'url': 'https://cran.r-project.org/...',
'version': '0.0.1'
'url': 'https://cran.r-project.org/Packages/<package>...',
'artifacts': [{
'url': 'https://cran.r-project.org/...',
'version': '0.0.1',
}]
}
"""
policy = kwargs.get('policy', 'oneshot')
version = kwargs.get('version')
if not policy:
policy = 'oneshot'
artifact_url = html_url
assert origin_type == 'tar'
return create_task_dict(
'load-cran', policy,
url=origin_url, version=version, retries_left=3
url=origin_url, artifacts=[{
'url': artifact_url,
'version': version
}], retries_left=3
)
def safely_issue_request(self, identifier):
@ -95,7 +106,7 @@ class CRANLister(SimpleLister):
"""
logger.debug('repo: %s', repo)
project_url = compute_package_url(repo)
origin_url, artifact_url = compute_origin_urls(repo)
package = repo['Package']
version = repo['Version']
return {
@ -103,8 +114,8 @@ class CRANLister(SimpleLister):
'name': package,
'full_name': repo['Title'],
'version': version,
'html_url': project_url,
'origin_url': project_url,
'html_url': artifact_url,
'origin_url': origin_url,
'origin_type': 'tar',
}
@ -120,15 +131,18 @@ def read_cran_data() -> List[Mapping[str, str]]:
return json.loads(response.stdout.decode('utf-8'))
def compute_package_url(repo: Mapping[str, str]) -> str:
def compute_origin_urls(repo: Mapping[str, str]) -> Tuple[str, str]:
"""Compute the package url from the repo dict.
Args:
repo: dict with key 'Package', 'Version'
Returns:
the package url
the tuple project url, artifact url
"""
return 'https://cran.r-project.org/src/contrib' \
'/{Package}_{Version}.tar.gz'.format(**repo)
package = repo['Package']
version = repo['Version']
origin_url = f'{CRAN_MIRROR}/package={package}'
artifact_url = f'{CRAN_MIRROR}/src/contrib/{package}_{version}.tar.gz'
return origin_url, artifact_url

View file

@ -9,22 +9,25 @@ import pytest
from os import path
from unittest.mock import patch
from swh.lister.cran.lister import compute_package_url
from swh.lister.cran.lister import compute_origin_urls, CRAN_MIRROR
def test_cran_compute_package_url():
url = compute_package_url({'Package': 'something', 'Version': '0.0.1'})
def test_cran_compute_origin_urls():
pack = 'something'
vers = '0.0.1'
origin_url, artifact_url = compute_origin_urls({
'Package': pack,
'Version': vers,
})
assert url == 'https://cran.r-project.org/src/contrib/%s_%s.tar.gz' % (
'something',
'0.0.1',
)
assert origin_url == f'{CRAN_MIRROR}/package={pack}'
assert artifact_url == f'{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz'
def test_cran_compute_package_url_failure():
def test_cran_compute_origin_urls_failure():
for incomplete_repo in [{'Version': '0.0.1'}, {'Package': 'package'}, {}]:
with pytest.raises(KeyError):
compute_package_url(incomplete_repo)
compute_origin_urls(incomplete_repo)
@patch('swh.lister.cran.lister.read_cran_data')
@ -51,7 +54,12 @@ def test_cran_lister_cran(mock_cran, datadir, lister_cran):
# kwargs
kwargs = row['arguments']['kwargs']
assert len(kwargs) == 2
assert set(kwargs.keys()) == {'url', 'version'}
assert set(kwargs.keys()) == {'url', 'artifacts'}
artifacts = kwargs['artifacts']
assert len(artifacts) == 1
assert set(artifacts[0].keys()) == {'url', 'version'}
assert row['policy'] == 'oneshot'
assert row['retries_left'] == 3