D lang lister
Add a dlang module that retrieve origins from an http api endpoint. Each origin is a git based project url on github.com, gitlab.com or bitbucket.com.
This commit is contained in:
parent
3ab856288c
commit
398a3d3a9d
8 changed files with 948 additions and 0 deletions
1
setup.py
1
setup.py
|
@ -65,6 +65,7 @@ setup(
|
|||
lister.cran=swh.lister.cran:register
|
||||
lister.crates=swh.lister.crates:register
|
||||
lister.debian=swh.lister.debian:register
|
||||
lister.dlang=swh.lister.dlang:register
|
||||
lister.fedora=swh.lister.fedora:register
|
||||
lister.gitea=swh.lister.gitea:register
|
||||
lister.github=swh.lister.github:register
|
||||
|
|
75
swh/lister/dlang/__init__.py
Normal file
75
swh/lister/dlang/__init__.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
"""
|
||||
Dlang lister
|
||||
=============
|
||||
|
||||
D is a general-purpose programming language with static typing, systems-level access,
|
||||
and C-like syntax.
|
||||
|
||||
The `Dlang`_ lister list origins from its packages manager registry `DUB`_.
|
||||
|
||||
The registry provides an `http api endpoint`_ that helps in getting the packages index
|
||||
with name, url, versions and dates.
|
||||
|
||||
As of July 2023 `DUB`_ list 2364 package names.
|
||||
|
||||
Origins retrieving strategy
|
||||
---------------------------
|
||||
|
||||
To build a list of origins we make a GET request to an `http api endpoint`_ that returns
|
||||
a Json array of objects.
|
||||
The origin url for each package is constructed with the information of corresponding
|
||||
`repository` entry which represents Git based projects hosted on Github, GitLab or
|
||||
Bitbucket.
|
||||
|
||||
Page listing
|
||||
------------
|
||||
|
||||
There is only one page listing all origins url.
|
||||
|
||||
Origins from page
|
||||
-----------------
|
||||
|
||||
The lister is stateless and yields all origins url from one page. It is a list of package
|
||||
url with last update information.
|
||||
|
||||
Running tests
|
||||
-------------
|
||||
|
||||
Activate the virtualenv and run from within swh-lister directory::
|
||||
|
||||
pytest -s -vv --log-cli-level=DEBUG swh/lister/dlang/tests
|
||||
|
||||
Testing with Docker
|
||||
-------------------
|
||||
|
||||
Change directory to swh/docker then launch the docker environment::
|
||||
|
||||
docker compose up -d
|
||||
|
||||
Then schedule a dlang listing task::
|
||||
|
||||
docker compose exec swh-scheduler swh scheduler task add -p oneshot list-dlang
|
||||
|
||||
You can follow lister execution by displaying logs of swh-lister service::
|
||||
|
||||
docker compose logs -f swh-lister
|
||||
|
||||
.. _Dlang: https://dlang.org/
|
||||
.. _DUB: https://code.dlang.org/
|
||||
.. _http api endpoint: https://code.dlang.org/api/packages/dump"
|
||||
"""
|
||||
|
||||
|
||||
def register():
|
||||
from .lister import DlangLister
|
||||
|
||||
return {
|
||||
"lister": DlangLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
93
swh/lister/dlang/lister.py
Normal file
93
swh/lister/dlang/lister.py
Normal file
|
@ -0,0 +1,93 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
import iso8601
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from ..pattern import CredentialsType, StatelessLister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Aliasing the page results returned by `get_pages` method from the lister.
|
||||
DlangListerPage = List[Dict[str, Any]]
|
||||
|
||||
|
||||
class DlangLister(StatelessLister[DlangListerPage]):
|
||||
"""List D lang origins."""
|
||||
|
||||
LISTER_NAME = "dlang"
|
||||
VISIT_TYPE = "git" # D lang origins url are Git repositories
|
||||
INSTANCE = "dlang"
|
||||
|
||||
BASE_URL = "https://code.dlang.org"
|
||||
PACKAGES_DUMP_URL = BASE_URL + "/api/packages/dump"
|
||||
KINDS = {
|
||||
"github": "https://github.com",
|
||||
"gitlab": "https://gitlab.com",
|
||||
"bitbucket": "https://bitbucket.com",
|
||||
}
|
||||
|
||||
KIND_URL_PATTERN = "{url}/{owner}/{project}"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
enable_origins: bool = True,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=self.PACKAGES_DUMP_URL,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
)
|
||||
self.session.headers.update({"Accept": "application/json"})
|
||||
|
||||
def get_pages(self) -> Iterator[DlangListerPage]:
|
||||
"""Yield an iterator which returns 'page'
|
||||
|
||||
It uses the api endpoint provided by `https://registry.dlang.io/packages`
|
||||
to get a list of package names with an origin url that corresponds to Git
|
||||
repository.
|
||||
|
||||
There is only one page that list all origins urls.
|
||||
"""
|
||||
response = self.http_request(self.url)
|
||||
yield response.json()
|
||||
|
||||
def get_origins_from_page(self, page: DlangListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Iterate on all pages and yield ListedOrigin instances"""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
for entry in page:
|
||||
repo: Dict[str, Any] = entry["repository"]
|
||||
kind: str = repo["kind"]
|
||||
|
||||
if kind not in self.KINDS:
|
||||
logging.error("Can not build a repository url with %r" % repo)
|
||||
continue
|
||||
|
||||
repo_url = self.KIND_URL_PATTERN.format(
|
||||
url=self.KINDS[kind], owner=repo["owner"], project=repo["project"]
|
||||
)
|
||||
|
||||
last_update = iso8601.parse_date(entry["stats"]["updatedAt"])
|
||||
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=repo_url,
|
||||
last_update=last_update,
|
||||
)
|
19
swh/lister/dlang/tasks.py
Normal file
19
swh/lister/dlang/tasks.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from swh.lister.dlang.lister import DlangLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".DlangListerTask")
|
||||
def list_dlang(**lister_args):
|
||||
"""Lister task for D lang packages registry"""
|
||||
return DlangLister.from_configfile(**lister_args).run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return "OK"
|
0
swh/lister/dlang/tests/__init__.py
Normal file
0
swh/lister/dlang/tests/__init__.py
Normal file
File diff suppressed because one or more lines are too long
41
swh/lister/dlang/tests/test_lister.py
Normal file
41
swh/lister/dlang/tests/test_lister.py
Normal file
|
@ -0,0 +1,41 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
import iso8601
|
||||
|
||||
from swh.lister.dlang.lister import DlangLister
|
||||
|
||||
expected_origins = [
|
||||
{
|
||||
"url": "https://github.com/katyukha/TheProcess",
|
||||
"last_update": "2023-07-12T14:42:46.231Z",
|
||||
},
|
||||
{
|
||||
"url": "https://gitlab.com/AntonMeep/silly",
|
||||
"last_update": "2023-07-12T01:32:31.235Z",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_dlang_lister(datadir, requests_mock_datadir, swh_scheduler):
|
||||
lister = DlangLister(scheduler=swh_scheduler)
|
||||
res = lister.run()
|
||||
|
||||
assert res.pages == 1
|
||||
assert res.origins == 1 + 1
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
assert len(scheduler_origins) == len(expected_origins)
|
||||
assert {
|
||||
(
|
||||
scheduled.visit_type,
|
||||
scheduled.url,
|
||||
scheduled.last_update,
|
||||
)
|
||||
for scheduled in scheduler_origins
|
||||
} == {
|
||||
("git", expected["url"], iso8601.parse_date(expected["last_update"]))
|
||||
for expected in expected_origins
|
||||
}
|
31
swh/lister/dlang/tests/test_tasks.py
Normal file
31
swh/lister/dlang/tests/test_tasks.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
|
||||
def test_dlang_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.dlang.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
def test_dlang_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
|
||||
# setup the mocked DlangLister
|
||||
lister = mocker.patch("swh.lister.dlang.tasks.DlangLister")
|
||||
lister.from_configfile.return_value = lister
|
||||
stats = ListerStats(pages=42, origins=42)
|
||||
lister.run.return_value = stats
|
||||
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.dlang.tasks.DlangListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == stats.dict()
|
||||
|
||||
lister.from_configfile.assert_called_once_with()
|
||||
lister.run.assert_called_once_with()
|
Loading…
Add table
Add a link
Reference in a new issue