swh.lister.packagist
Implement a packagist lister to list the names and metadata url of all the packages. Closes 1776
This commit is contained in:
parent
08ade29e6d
commit
5727f15cf3
12 changed files with 246 additions and 2 deletions
14
README.md
14
README.md
|
@ -19,6 +19,7 @@ following Python modules:
|
|||
- `swh.lister.phabricator`
|
||||
- `swh.lister.cran`
|
||||
- `swh.lister.cgit`
|
||||
- `swh.lister.packagist`
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
@ -221,6 +222,19 @@ cgit_lister(url='https://cgit.kde.org/',
|
|||
url_prefix='https://anongit.kde.org/')
|
||||
```
|
||||
|
||||
## lister-packagist
|
||||
|
||||
Once configured, you can execute a Packagist lister using the following instructions
|
||||
in a `python3` script:
|
||||
|
||||
```lang=python
|
||||
import logging
|
||||
from swh.lister.packagist.tasks import packagist_lister
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
packagist_lister()
|
||||
```
|
||||
|
||||
Licensing
|
||||
---------
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ from swh.core.cli import CONTEXT_SETTINGS
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
|
||||
'npm', 'phabricator', 'gnu', 'cran', 'cgit']
|
||||
'npm', 'phabricator', 'gnu', 'cran', 'cgit', 'packagist']
|
||||
|
||||
|
||||
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
|
||||
|
@ -133,6 +133,11 @@ def cli(ctx, db_url, listers, drop_tables):
|
|||
url_prefix='http://git.savannah.gnu.org/git/',
|
||||
override_config=override_conf)
|
||||
|
||||
elif lister == 'packagist':
|
||||
from .packagist.models import ModelBase
|
||||
from .packagist.lister import PackagistLister
|
||||
_lister = PackagistLister(override_config=override_conf)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
'Invalid lister %s: only supported listers are %s' %
|
||||
|
|
|
@ -13,6 +13,7 @@ def celery_includes():
|
|||
'swh.lister.gitlab.tasks',
|
||||
'swh.lister.gnu.tasks',
|
||||
'swh.lister.npm.tasks',
|
||||
'swh.lister.pypi.tasks',
|
||||
'swh.lister.packagist.tasks',
|
||||
'swh.lister.phabricator.tasks',
|
||||
'swh.lister.pypi.tasks',
|
||||
]
|
||||
|
|
0
swh/lister/packagist/__init__.py
Normal file
0
swh/lister/packagist/__init__.py
Normal file
84
swh/lister/packagist/lister.py
Normal file
84
swh/lister/packagist/lister.py
Normal file
|
@ -0,0 +1,84 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import random
|
||||
import json
|
||||
from .models import PackagistModel
|
||||
|
||||
from swh.scheduler import utils
|
||||
from swh.lister.core.simple_lister import SimpleLister
|
||||
from swh.lister.core.lister_transports import ListerOnePageApiTransport
|
||||
|
||||
|
||||
class PackagistLister(ListerOnePageApiTransport, SimpleLister):
|
||||
"""List packages available in the Packagist package manger.
|
||||
|
||||
The lister sends the request to the url present in the class
|
||||
variable `PAGE`, to receive a list of all the package names
|
||||
present in the Packagist package manger. Iterates over all the
|
||||
packages and constructs the metadata url of the package from
|
||||
the name of the package and creates a loading task.
|
||||
|
||||
Task:
|
||||
Type: load-packagist
|
||||
Policy: recurring
|
||||
Args:
|
||||
<package_name>
|
||||
<package_metadata_url>
|
||||
|
||||
Example:
|
||||
Type: load-packagist
|
||||
Policy: recurring
|
||||
Args:
|
||||
'hypejunction/hypegamemechanics'
|
||||
'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json'
|
||||
|
||||
"""
|
||||
MODEL = PackagistModel
|
||||
LISTER_NAME = 'packagist'
|
||||
PAGE = 'https://packagist.org/packages/list.json'
|
||||
instance = 'packagist'
|
||||
|
||||
def __init__(self, override_config=None):
|
||||
ListerOnePageApiTransport .__init__(self)
|
||||
SimpleLister.__init__(self, override_config=override_config)
|
||||
|
||||
def task_dict(self, origin_type, origin_url, **kwargs):
|
||||
"""Return task format dict
|
||||
|
||||
This is overridden from the lister_base as more information is
|
||||
needed for the ingestion task creation.
|
||||
|
||||
"""
|
||||
return utils.create_task_dict('load-%s' % origin_type, 'recurring',
|
||||
kwargs.get('name'), origin_url)
|
||||
|
||||
def list_packages(self, response):
|
||||
"""List the actual packagist origins from the response.
|
||||
|
||||
"""
|
||||
response = json.loads(response.text)
|
||||
packages = [name for name in response['packageNames']]
|
||||
random.shuffle(packages)
|
||||
return packages
|
||||
|
||||
def get_model_from_repo(self, repo_name):
|
||||
"""Transform from repository representation to model
|
||||
|
||||
"""
|
||||
url = 'https://repo.packagist.org/p/%s.json' % repo_name
|
||||
return {
|
||||
'uid': repo_name,
|
||||
'name': repo_name,
|
||||
'full_name': repo_name,
|
||||
'html_url': url,
|
||||
'origin_url': url,
|
||||
'origin_type': 'packagist',
|
||||
}
|
||||
|
||||
def transport_response_simplified(self, response):
|
||||
"""Transform response to list for model manipulation
|
||||
|
||||
"""
|
||||
return [self.get_model_from_repo(repo_name) for repo_name in response]
|
16
swh/lister/packagist/models.py
Normal file
16
swh/lister/packagist/models.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from sqlalchemy import Column, String
|
||||
|
||||
from ..core.models import ModelBase
|
||||
|
||||
|
||||
class PackagistModel(ModelBase):
|
||||
"""a Packagist repository representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'packagist_repo'
|
||||
|
||||
uid = Column(String, primary_key=True)
|
17
swh/lister/packagist/tasks.py
Normal file
17
swh/lister/packagist/tasks.py
Normal file
|
@ -0,0 +1,17 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from swh.scheduler.celery_backend.config import app
|
||||
|
||||
from .lister import PackagistLister
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.PackagistListerTask')
|
||||
def packagist_lister(**lister_args):
|
||||
PackagistLister(**lister_args).run()
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.ping')
|
||||
def ping():
|
||||
return 'OK'
|
0
swh/lister/packagist/tests/__init__.py
Normal file
0
swh/lister/packagist/tests/__init__.py
Normal file
9
swh/lister/packagist/tests/api_response.json
Normal file
9
swh/lister/packagist/tests/api_response.json
Normal file
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"packageNames": [
|
||||
"0.0.0/composer-include-files",
|
||||
"0.0.0/laravel-env-shim",
|
||||
"0.0.1/try-make-package",
|
||||
"0099ff/dialogflowphp",
|
||||
"00f100/array_dot"
|
||||
]
|
||||
}
|
1
swh/lister/packagist/tests/conftest.py
Normal file
1
swh/lister/packagist/tests/conftest.py
Normal file
|
@ -0,0 +1 @@
|
|||
from swh.lister.core.tests.conftest import * # noqa
|
66
swh/lister/packagist/tests/test_lister.py
Normal file
66
swh/lister/packagist/tests/test_lister.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import unittest
|
||||
import requests_mock
|
||||
from unittest.mock import patch
|
||||
from swh.lister.packagist.lister import PackagistLister
|
||||
from swh.lister.core.tests.test_lister import HttpSimpleListerTester
|
||||
|
||||
|
||||
expected_packages = ['0.0.0/composer-include-files', '0.0.0/laravel-env-shim',
|
||||
'0.0.1/try-make-package', '0099ff/dialogflowphp',
|
||||
'00f100/array_dot']
|
||||
|
||||
expected_model = {
|
||||
'uid': '0099ff/dialogflowphp',
|
||||
'name': '0099ff/dialogflowphp',
|
||||
'full_name': '0099ff/dialogflowphp',
|
||||
'html_url':
|
||||
'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
|
||||
'origin_url':
|
||||
'https://repo.packagist.org/p/0099ff/dialogflowphp.json',
|
||||
'origin_type': 'packagist',
|
||||
}
|
||||
|
||||
|
||||
class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase):
|
||||
Lister = PackagistLister
|
||||
PAGE = 'https://packagist.org/packages/list.json'
|
||||
lister_subdir = 'packagist'
|
||||
good_api_response_file = 'api_response.json'
|
||||
entries = 5
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_list_packages(self, http_mocker):
|
||||
"""List packages from simple api page should retrieve all packages within
|
||||
|
||||
"""
|
||||
http_mocker.get(self.PAGE, text=self.mock_response)
|
||||
fl = self.get_fl()
|
||||
packages = fl.list_packages(self.get_api_response(0))
|
||||
|
||||
for package in expected_packages:
|
||||
assert package in packages
|
||||
|
||||
def test_transport_response_simplified(self):
|
||||
"""Test model created by the lister
|
||||
|
||||
"""
|
||||
fl = self.get_fl()
|
||||
model = fl.transport_response_simplified(['0099ff/dialogflowphp'])
|
||||
assert len(model) == 1
|
||||
for key, values in model[0].items():
|
||||
assert values == expected_model[key]
|
||||
|
||||
def test_task_dict(self):
|
||||
"""Test the task creation of lister
|
||||
|
||||
"""
|
||||
fl = self.get_fl()
|
||||
with patch('swh.lister.packagist.lister.utils.create_task_dict') as mock_create_tasks: # noqa
|
||||
fl.task_dict(origin_type='packagist', origin_url='https://abc',
|
||||
name='test_pack')
|
||||
mock_create_tasks.assert_called_once_with(
|
||||
'load-packagist', 'recurring', 'test_pack', 'https://abc')
|
31
swh/lister/packagist/tests/test_tasks.py
Normal file
31
swh/lister/packagist/tests/test_tasks.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.packagist.tasks.ping')
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
|
||||
|
||||
@patch('swh.lister.packagist.tasks.PackagistLister')
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked PackagistLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.packagist.tasks.PackagistListerTask')
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with()
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with()
|
Loading…
Add table
Add a link
Reference in a new issue