Bower: List origins from registry.bower.io
This commit is contained in:
parent
5410b6e3f3
commit
ceae8c42b5
8 changed files with 269 additions and 0 deletions
1
setup.py
1
setup.py
|
@ -58,6 +58,7 @@ setup(
|
|||
lister.arch=swh.lister.arch:register
|
||||
lister.aur=swh.lister.aur:register
|
||||
lister.bitbucket=swh.lister.bitbucket:register
|
||||
lister.bower=swh.lister.bower:register
|
||||
lister.cgit=swh.lister.cgit:register
|
||||
lister.cran=swh.lister.cran:register
|
||||
lister.crates=swh.lister.crates:register
|
||||
|
|
76
swh/lister/bower/__init__.py
Normal file
76
swh/lister/bower/__init__.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
"""
|
||||
Bower lister
|
||||
============
|
||||
|
||||
The `Bower`_ lister list origins from its packages registry `registry.bower.io`_.
|
||||
|
||||
Bower is a tool to manage Javascript packages.
|
||||
|
||||
The registry provide an `http api`_ from where the lister retrieve package names
|
||||
and url.
|
||||
|
||||
As of August 2022 `registry.bower.io`_ list 71028 package names.
|
||||
|
||||
Note that even if the project is still maintained(security fixes, no new features), it is
|
||||
recommended to not use it anymore and prefer Yarn as a replacement since 2018.
|
||||
|
||||
Origins retrieving strategy
|
||||
---------------------------
|
||||
|
||||
To get a list of all package names we call `https://registry.bower.io/packages` endpoint.
|
||||
There is no other way for discovery (no archive index, no database dump, no dvcs repository).
|
||||
|
||||
Page listing
|
||||
------------
|
||||
|
||||
There is only one page that list all origins url.
|
||||
|
||||
Origins from page
|
||||
-----------------
|
||||
|
||||
The lister yields all origins url from one page. It is a list of package name and url.
|
||||
Origins url corresponds to Git repository url.
|
||||
Bower is supposed to support Svn repository too but on +/- 71000 urls I have only found 35
|
||||
urls that may not be Git repository.
|
||||
|
||||
Running tests
|
||||
-------------
|
||||
|
||||
Activate the virtualenv and run from within swh-lister directory::
|
||||
|
||||
pytest -s -vv --log-cli-level=DEBUG swh/lister/bower/tests
|
||||
|
||||
Testing with Docker
|
||||
-------------------
|
||||
|
||||
Change directory to swh/docker then launch the docker environment::
|
||||
|
||||
docker-compose up -d
|
||||
|
||||
Then connect to the lister::
|
||||
|
||||
docker exec -it docker_swh-lister_1 bash
|
||||
|
||||
And run the lister (The output of this listing results in “oneshot” tasks in the scheduler)::
|
||||
|
||||
swh lister run -l bower
|
||||
|
||||
.. _Bower: https://bower.io
|
||||
.. _registry.bower.io: https://registry.bower.io
|
||||
.. _http api: https://registry.bower.io/packages
|
||||
"""
|
||||
|
||||
|
||||
def register():
|
||||
from .lister import BowerLister
|
||||
|
||||
return {
|
||||
"lister": BowerLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
91
swh/lister/bower/lister.py
Normal file
91
swh/lister/bower/lister.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import requests
|
||||
from tenacity.before_sleep import before_sleep_log
|
||||
|
||||
from swh.lister.utils import throttling_retry
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from .. import USER_AGENT
|
||||
from ..pattern import CredentialsType, StatelessLister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Aliasing the page results returned by `get_pages` method from the lister.
|
||||
BowerListerPage = List[Dict[str, str]]
|
||||
|
||||
|
||||
class BowerLister(StatelessLister[BowerListerPage]):
|
||||
"""List Bower (Javascript package manager) origins."""
|
||||
|
||||
LISTER_NAME = "bower"
|
||||
VISIT_TYPE = "bower"
|
||||
INSTANCE = "bower"
|
||||
|
||||
API_URL = "https://registry.bower.io/packages"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.API_URL,
|
||||
)
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{
|
||||
"Accept": "application/json",
|
||||
"User-Agent": USER_AGENT,
|
||||
}
|
||||
)
|
||||
|
||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
|
||||
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
|
||||
|
||||
logger.info("Fetching URL %s with params %s", url, params)
|
||||
|
||||
response = self.session.get(url, params=params)
|
||||
if response.status_code != 200:
|
||||
logger.warning(
|
||||
"Unexpected HTTP status code %s on %s: %s",
|
||||
response.status_code,
|
||||
response.url,
|
||||
response.content,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
return response
|
||||
|
||||
def get_pages(self) -> Iterator[BowerListerPage]:
|
||||
"""Yield an iterator which returns 'page'
|
||||
|
||||
It uses the api endpoint provided by `https://registry.bower.io/packages`
|
||||
to get a list of package names with an origin url that corresponds to Git
|
||||
repository.
|
||||
|
||||
There is only one page that list all origins urls.
|
||||
"""
|
||||
response = self.page_request(url=self.url, params={})
|
||||
yield response.json()
|
||||
|
||||
def get_origins_from_page(self, page: BowerListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Iterate on all pages and yield ListedOrigin instances."""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
for entry in page:
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=entry["url"],
|
||||
last_update=None,
|
||||
)
|
19
swh/lister/bower/tasks.py
Normal file
19
swh/lister/bower/tasks.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from swh.lister.bower.lister import BowerLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".BowerListerTask")
|
||||
def list_bower(**lister_args):
|
||||
"""Lister task for Bower (Javascript package manager) registry"""
|
||||
return BowerLister.from_configfile(**lister_args).run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return "OK"
|
0
swh/lister/bower/tests/__init__.py
Normal file
0
swh/lister/bower/tests/__init__.py
Normal file
14
swh/lister/bower/tests/data/https_registry.bower.io/packages
Normal file
14
swh/lister/bower/tests/data/https_registry.bower.io/packages
Normal file
|
@ -0,0 +1,14 @@
|
|||
[
|
||||
{
|
||||
"name": "font-awesome",
|
||||
"url": "https://github.com/FortAwesome/Font-Awesome.git"
|
||||
},
|
||||
{
|
||||
"name": "redux",
|
||||
"url": "https://github.com/reactjs/redux.git"
|
||||
},
|
||||
{
|
||||
"name": "vue",
|
||||
"url": "https://github.com/vuejs/vue.git"
|
||||
}
|
||||
]
|
37
swh/lister/bower/tests/test_lister.py
Normal file
37
swh/lister/bower/tests/test_lister.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
from swh.lister.bower.lister import BowerLister
|
||||
|
||||
expected_origins = [
|
||||
{"name": "font-awesome", "url": "https://github.com/FortAwesome/Font-Awesome.git"},
|
||||
{"name": "redux", "url": "https://github.com/reactjs/redux.git"},
|
||||
{"name": "vue", "url": "https://github.com/vuejs/vue.git"},
|
||||
]
|
||||
|
||||
|
||||
def test_bower_lister(datadir, requests_mock_datadir, swh_scheduler):
|
||||
lister = BowerLister(scheduler=swh_scheduler)
|
||||
res = lister.run()
|
||||
|
||||
assert res.pages == 1
|
||||
assert res.origins == 1 + 1 + 1
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
assert len(scheduler_origins) == len(expected_origins)
|
||||
|
||||
assert {
|
||||
(
|
||||
scheduled.visit_type,
|
||||
scheduled.url,
|
||||
)
|
||||
for scheduled in scheduler_origins
|
||||
} == {
|
||||
(
|
||||
"bower",
|
||||
expected["url"],
|
||||
)
|
||||
for expected in expected_origins
|
||||
}
|
31
swh/lister/bower/tests/test_tasks.py
Normal file
31
swh/lister/bower/tests/test_tasks.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
|
||||
def test_bower_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.bower.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
def test_bower_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
|
||||
# setup the mocked BowerLister
|
||||
lister = mocker.patch("swh.lister.bower.tasks.BowerLister")
|
||||
lister.from_configfile.return_value = lister
|
||||
stats = ListerStats(pages=42, origins=42)
|
||||
lister.run.return_value = stats
|
||||
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.bower.tasks.BowerListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == stats.dict()
|
||||
|
||||
lister.from_configfile.assert_called_once_with()
|
||||
lister.run.assert_called_once_with()
|
Loading…
Add table
Add a link
Reference in a new issue