swh.lister.packagist

Implement a packagist lister to list the
names and metadata url of all the
packages.

Closes 1776
This commit is contained in:
Archit Agrawal 2019-07-08 00:40:47 +05:30
parent 08ade29e6d
commit 5727f15cf3
12 changed files with 246 additions and 2 deletions

View file

@ -12,7 +12,7 @@ from swh.core.cli import CONTEXT_SETTINGS
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
'npm', 'phabricator', 'gnu', 'cran', 'cgit']
'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist']
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@ -133,6 +133,11 @@ def cli(ctx, db_url, listers, drop_tables):
url_prefix='http://git.savannah.gnu.org/git/',
override_config=override_conf)
elif lister == 'packagist':
from .packagist.models import ModelBase
from .packagist.lister import PackagistLister
_lister = PackagistLister(override_config=override_conf)
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %

View file

@ -13,6 +13,7 @@ def celery_includes():
'swh.lister.gitlab.tasks',
'swh.lister.gnu.tasks',
'swh.lister.npm.tasks',
'swh.lister.pypi.tasks',
'swh.lister.packagist.tasks',
'swh.lister.phabricator.tasks',
'swh.lister.pypi.tasks',
]

View file

View file

@ -0,0 +1,84 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import random
import json
from .models import PackagistModel
from swh.scheduler import utils
from swh.lister.core.simple_lister import SimpleLister
from swh.lister.core.lister_transports import ListerOnePageApiTransport
class PackagistLister(ListerOnePageApiTransport, SimpleLister):
"""List packages available in the Packagist package manger.
The lister sends the request to the url present in the class
variable `PAGE`, to receive a list of all the package names
present in the Packagist package manger. Iterates over all the
packages and constructs the metadata url of the package from
the name of the package and creates a loading task.
Task:
Type: load-packagist
Policy: recurring
Args:
<package_name>
<package_metadata_url>
Example:
Type: load-packagist
Policy: recurring
Args:
'hypejunction/hypegamemechanics'
'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json'
"""
MODEL = PackagistModel
LISTER_NAME = 'packagist'
PAGE = 'https://packagist.org/packages/list.json'
instance = 'packagist'
def __init__(self, override_config=None):
ListerOnePageApiTransport .__init__(self)
SimpleLister.__init__(self, override_config=override_config)
def task_dict(self, origin_type, origin_url, **kwargs):
"""Return task format dict
This is overridden from the lister_base as more information is
needed for the ingestion task creation.
"""
return utils.create_task_dict('load-%s' % origin_type, 'recurring',
kwargs.get('name'), origin_url)
def list_packages(self, response):
"""List the actual packagist origins from the response.
"""
response = json.loads(response.text)
packages = [name for name in response['packageNames']]
random.shuffle(packages)
return packages
def get_model_from_repo(self, repo_name):
"""Transform from repository representation to model
"""
url = 'https://repo.packagist.org/p/%s.json' % repo_name
return {
'uid': repo_name,
'name': repo_name,
'full_name': repo_name,
'html_url': url,
'origin_url': url,
'origin_type': 'packagist',
}
def transport_response_simplified(self, response):
"""Transform response to list for model manipulation
"""
return [self.get_model_from_repo(repo_name) for repo_name in response]

View file

@ -0,0 +1,16 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, String
from ..core.models import ModelBase
class PackagistModel(ModelBase):
"""a Packagist repository representation
"""
__tablename__ = 'packagist_repo'
uid = Column(String, primary_key=True)

View file

@ -0,0 +1,17 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.scheduler.celery_backend.config import app
from .lister import PackagistLister
@app.task(name=__name__ + '.PackagistListerTask')
def packagist_lister(**lister_args):
PackagistLister(**lister_args).run()
@app.task(name=__name__ + '.ping')
def ping():
return 'OK'

View file

View file

@ -0,0 +1,9 @@
{
"packageNames": [
"0.0.0/composer-include-files",
"0.0.0/laravel-env-shim",
"0.0.1/try-make-package",
"0099ff/dialogflowphp",
"00f100/array_dot"
]
}

View file

@ -0,0 +1 @@
from swh.lister.core.tests.conftest import * # noqa

View file

@ -0,0 +1,66 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import unittest
import requests_mock
from unittest.mock import patch
from swh.lister.packagist.lister import PackagistLister
from swh.lister.core.tests.test_lister import HttpSimpleListerTester
expected_packages = ['0.0.0/composer-include-files', '0.0.0/laravel-env-shim',
'0.0.1/try-make-package', '0099ff/dialogflowphp',
'00f100/array_dot']
expected_model = {
'uid': '0099ff/dialogflowphp',
'name': '0099ff/dialogflowphp',
'full_name': '0099ff/dialogflowphp',
'html_url':
'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
'origin_url':
'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
'origin_type': 'packagist',
}
class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase):
Lister = PackagistLister
PAGE = 'https://packagist.org/packages/list.json'
lister_subdir = 'packagist'
good_api_response_file = 'api_response.json'
entries = 5
@requests_mock.Mocker()
def test_list_packages(self, http_mocker):
"""List packages from simple api page should retrieve all packages within
"""
http_mocker.get(self.PAGE, text=self.mock_response)
fl = self.get_fl()
packages = fl.list_packages(self.get_api_response(0))
for package in expected_packages:
assert package in packages
def test_transport_response_simplified(self):
"""Test model created by the lister
"""
fl = self.get_fl()
model = fl.transport_response_simplified(['0099ff/dialogflowphp'])
assert len(model) == 1
for key, values in model[0].items():
assert values == expected_model[key]
def test_task_dict(self):
"""Test the task creation of lister
"""
fl = self.get_fl()
with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa
fl.task_dict(origin_type='packagist', origin_url='https://abc',
name='test_pack')
mock_create_tasks.assert_called_once_with(
'load-packagist', 'recurring', 'test_pack', 'https://abc')

View file

@ -0,0 +1,31 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from unittest.mock import patch
def test_ping(swh_app, celery_session_worker):
res = swh_app.send_task(
'swh.lister.packagist.tasks.ping')
assert res
res.wait()
assert res.successful()
assert res.result == 'OK'
@patch('swh.lister.packagist.tasks.PackagistLister')
def test_lister(lister, swh_app, celery_session_worker):
# setup the mocked PackagistLister
lister.return_value = lister
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.packagist.tasks.PackagistListerTask')
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with()