Add Julia Lister for listing Julia Packages
This module introduce Julia Lister. It retrieves Julia packages origins from the Julia General Registry, a Git repository made of per package directory with Toml definition files.
This commit is contained in:
parent
7b932f46b5
commit
f8cfa05f3f
11 changed files with 440 additions and 0 deletions
|
@ -5,3 +5,4 @@ requests_mock
|
|||
types-click
|
||||
types-pyyaml
|
||||
types-requests
|
||||
types-toml
|
||||
|
|
|
@ -12,3 +12,4 @@ rpy2
|
|||
setuptools
|
||||
tenacity >= 6.2
|
||||
testing.postgresql
|
||||
toml
|
||||
|
|
1
setup.py
1
setup.py
|
@ -76,6 +76,7 @@ setup(
|
|||
lister.gogs=swh.lister.gogs:register
|
||||
lister.hackage=swh.lister.hackage:register
|
||||
lister.hex=swh.lister.hex:register
|
||||
lister.julia=swh.lister.julia:register
|
||||
lister.launchpad=swh.lister.launchpad:register
|
||||
lister.nixguix=swh.lister.nixguix:register
|
||||
lister.npm=swh.lister.npm:register
|
||||
|
|
83
swh/lister/julia/__init__.py
Normal file
83
swh/lister/julia/__init__.py
Normal file
|
@ -0,0 +1,83 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
"""
|
||||
Julia lister
|
||||
=============
|
||||
|
||||
`Julia`_ is a dynamic language for scientific computing applications. It comes with
|
||||
an ecosystem of packages managed with its internal package manager `Pkg`_.
|
||||
|
||||
A list of all officially registered packages can be found in the `Julia General Registry`_
|
||||
on GitHub, but it's easier to search for packages using the `JuliaHub`_ and
|
||||
`Julia Packages`_ sites.
|
||||
|
||||
The `Julia`_ lister lists origins from a Git repository, the `Julia General registry`_.
|
||||
The main `Registry.toml`_ file list available Julia packages. Each directory
|
||||
match a package name and have Toml files to describe the package and its versions.
|
||||
|
||||
Julia origins are Git repositories hosted on Github. Each repository must provide its
|
||||
packaged releases using the Github release system.
|
||||
|
||||
As of July 2023 `Julia General registry`_ list 9714 packages names.
|
||||
|
||||
Origins retrieving strategy
|
||||
---------------------------
|
||||
|
||||
To build a list of origins we clone the `Julia General registry`_ Git repository, then
|
||||
read the `Registry.toml`_ file to get the path to packages directories.
|
||||
Each directory have a `Package.toml` file from where we get the Git repository url for
|
||||
a package.
|
||||
|
||||
Page listing
|
||||
------------
|
||||
|
||||
There is only one page listing all origins url.
|
||||
|
||||
Origins from page
|
||||
-----------------
|
||||
|
||||
The lister is stateless and yields all origins url from one page.
|
||||
Each url corresponds to the Git url of the package repository.
|
||||
|
||||
Running tests
|
||||
-------------
|
||||
|
||||
Activate the virtualenv and run from within swh-lister directory::
|
||||
|
||||
pytest -s -vv --log-cli-level=DEBUG swh/lister/julia/tests
|
||||
|
||||
Testing with Docker
|
||||
-------------------
|
||||
|
||||
Change directory to swh/docker then launch the docker environment::
|
||||
|
||||
docker compose up -d
|
||||
|
||||
Then schedule a julia listing task::
|
||||
|
||||
docker compose exec swh-scheduler swh scheduler task add -p oneshot list-julia
|
||||
|
||||
You can follow lister execution by displaying logs of swh-lister service::
|
||||
|
||||
docker compose logs -f swh-lister
|
||||
|
||||
.. _Julia: https://julialang.org/
|
||||
.. _Pkg: https://docs.julialang.org/en/v1/stdlib/Pkg/
|
||||
.. _Julia General registry: https://github.com/JuliaRegistries/General
|
||||
.. _JuliaHub: https://juliahub.com/
|
||||
.. _Julia Packages: https://julialang.org/packages/
|
||||
.. _Registry.toml: https://github.com/JuliaRegistries/General/blob/master/Registry.toml
|
||||
""" # noqa: B950
|
||||
|
||||
|
||||
def register():
|
||||
from .lister import JuliaLister
|
||||
|
||||
return {
|
||||
"lister": JuliaLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
90
swh/lister/julia/lister.py
Normal file
90
swh/lister/julia/lister.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, List, Optional, Tuple
|
||||
|
||||
from dulwich import porcelain
|
||||
import toml
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from ..pattern import CredentialsType, StatelessLister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Aliasing the page results returned by `get_pages` method from the lister.
|
||||
JuliaListerPage = List[Tuple[str, Any]]
|
||||
|
||||
|
||||
class JuliaLister(StatelessLister[JuliaListerPage]):
|
||||
"""List Julia packages origins"""
|
||||
|
||||
LISTER_NAME = "julia"
|
||||
VISIT_TYPE = "git" # Julia origins url are Git repositories
|
||||
INSTANCE = "julia"
|
||||
|
||||
REPO_URL = (
|
||||
"https://github.com/JuliaRegistries/General.git" # Julia General Registry
|
||||
)
|
||||
REPO_PATH = Path("/tmp/General")
|
||||
REGISTRY_PATH = REPO_PATH / "Registry.toml"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
url: Optional[str] = None,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
enable_origins: bool = True,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
instance=self.INSTANCE,
|
||||
url=url or self.REPO_URL,
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
)
|
||||
|
||||
def get_registry_repository(self) -> None:
|
||||
"""Get Julia General Registry Git repository up to date on disk"""
|
||||
if self.REPO_PATH.exists():
|
||||
porcelain.pull(self.REPO_PATH, remote_location=self.url)
|
||||
else:
|
||||
porcelain.clone(source=self.url, target=self.REPO_PATH)
|
||||
|
||||
def get_pages(self) -> Iterator[JuliaListerPage]:
|
||||
"""Yield an iterator which returns 'page'
|
||||
|
||||
It uses the api endpoint provided by `https://registry.julia.io/packages`
|
||||
to get a list of package names with an origin url that corresponds to Git
|
||||
repository.
|
||||
|
||||
There is only one page that list all origins urls.
|
||||
"""
|
||||
self.get_registry_repository()
|
||||
assert self.REGISTRY_PATH.exists()
|
||||
registry = toml.load(self.REGISTRY_PATH)
|
||||
yield registry["packages"].items()
|
||||
|
||||
def get_origins_from_page(self, page: JuliaListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Iterate on all pages and yield ListedOrigin instances"""
|
||||
assert self.lister_obj.id is not None
|
||||
assert self.REPO_PATH.exists()
|
||||
|
||||
for uuid, info in page:
|
||||
package_info_path = self.REPO_PATH / info["path"] / "Package.toml"
|
||||
package_info = toml.load(package_info_path)
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=package_info["repo"],
|
||||
last_update=None,
|
||||
)
|
19
swh/lister/julia/tasks.py
Normal file
19
swh/lister/julia/tasks.py
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from swh.lister.julia.lister import JuliaLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".JuliaListerTask")
|
||||
def list_julia(**lister_args):
|
||||
"""Lister task for Julia General Registry"""
|
||||
return JuliaLister.from_configfile(**lister_args).run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return "OK"
|
30
swh/lister/julia/tests/__init__.py
Normal file
30
swh/lister/julia/tests/__init__.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import os
|
||||
from pathlib import PosixPath
|
||||
import subprocess
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
def prepare_repository_from_archive(
|
||||
archive_path: str,
|
||||
filename: Optional[str] = None,
|
||||
tmp_path: Union[PosixPath, str] = "/tmp",
|
||||
) -> str:
|
||||
"""Given an existing archive_path, uncompress it.
|
||||
Returns a file repo url which can be used as origin url.
|
||||
|
||||
This does not deal with the case where the archive passed along does not exist.
|
||||
|
||||
"""
|
||||
if not isinstance(tmp_path, str):
|
||||
tmp_path = str(tmp_path)
|
||||
# uncompress folder/repositories/dump for the loader to ingest
|
||||
subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path])
|
||||
# build the origin url (or some derivative form)
|
||||
_fname = filename if filename else os.path.basename(archive_path)
|
||||
repo_url = f"file://{tmp_path}/{_fname}"
|
||||
return repo_url
|
Binary file not shown.
148
swh/lister/julia/tests/data/fake_julia_registry_repository.sh
Normal file
148
swh/lister/julia/tests/data/fake_julia_registry_repository.sh
Normal file
|
@ -0,0 +1,148 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# Script to generate fake-julia-registry-repository.tar.gz
|
||||
# Creates a git repository like https://github.com/JuliaRegistries/General.git
|
||||
# for tests purposes
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# files and directories for Julia registry repository
|
||||
mkdir -p tmp_dir/General/
|
||||
cd tmp_dir/General/
|
||||
|
||||
touch Registry.toml
|
||||
|
||||
echo -e '''name = "General"
|
||||
uuid = "23338594-aafe-5451-b93e-139f81909106"
|
||||
repo = "https://github.com/JuliaRegistries/General.git"
|
||||
|
||||
description = """
|
||||
Official general Julia package registry where people can
|
||||
register any package they want without too much debate about
|
||||
naming and without enforced standards on documentation or
|
||||
testing. We nevertheless encourage documentation, testing and
|
||||
some amount of consideration when choosing package names.
|
||||
"""
|
||||
|
||||
[packages]''' > Registry.toml
|
||||
|
||||
# Init as a git repository
|
||||
git init
|
||||
git add .
|
||||
git commit -m "Init fake Julia registry repository for tests purpose"
|
||||
|
||||
mkdir -p F/Fable
|
||||
|
||||
touch F/Fable/Package.toml
|
||||
touch F/Fable/Versions.toml
|
||||
|
||||
echo -e '''name = "Fable"
|
||||
uuid = "a3ea4736-0a3b-4c29-ac8a-20364318a635"
|
||||
repo = "https://github.com/leios/Fable.jl.git"
|
||||
''' > F/Fable/Package.toml
|
||||
|
||||
echo -e '''["0.0.1"]
|
||||
git-tree-sha1 = "d98ef9a5309f0ec8caaf34bf4cefaf1f1ca525e8"
|
||||
|
||||
["0.0.2"]
|
||||
git-tree-sha1 = "65301af3ab06b04cf8a52cd43b06222bab5249c2"
|
||||
''' > F/Fable/Versions.toml
|
||||
|
||||
echo 'a3ea4736-0a3b-4c29-ac8a-20364318a635 = { name = "Fable", path = "F/Fable" }' >> Registry.toml
|
||||
|
||||
git add .
|
||||
git commit -m "New package: Fable v0.0.2"
|
||||
|
||||
mkdir -p O/Oscar
|
||||
|
||||
touch O/Oscar/Package.toml
|
||||
touch O/Oscar/Versions.toml
|
||||
|
||||
echo -e '''name = "Oscar"
|
||||
uuid = "f1435218-dba5-11e9-1e4d-f1a5fab5fc13"
|
||||
repo = "https://github.com/oscar-system/Oscar.jl.git"
|
||||
''' > O/Oscar/Package.toml
|
||||
|
||||
echo -e '''["0.2.0"]
|
||||
git-tree-sha1 = "cda489ed50fbd625d245655ce6e5858c3c21ce12"
|
||||
|
||||
["0.3.0"]
|
||||
git-tree-sha1 = "d62e911d06affb6450a0d059c3432df284a8e3c1"
|
||||
|
||||
["0.4.0"]
|
||||
git-tree-sha1 = "91a9c623da588d5fcfc1f0ce0b3d57a0e35c65d2"
|
||||
|
||||
["0.5.0"]
|
||||
git-tree-sha1 = "5d595e843a71df04da0e8027c4773a158be0c4f4"
|
||||
|
||||
["0.5.1"]
|
||||
git-tree-sha1 = "501602b8c0efc9b4fc6a68d0cb53b9103f736313"
|
||||
|
||||
["0.5.2"]
|
||||
git-tree-sha1 = "aa42d7bc3282e72b1b5c41d518661634cc454de0"
|
||||
|
||||
["0.6.0"]
|
||||
git-tree-sha1 = "a3ca062f1e9ab1728de6af6812c1a09bb527e5ce"
|
||||
|
||||
["0.7.0"]
|
||||
git-tree-sha1 = "185ce4c7b082bf3530940af4954642292da25ff9"
|
||||
|
||||
["0.7.1"]
|
||||
git-tree-sha1 = "26815d2504820400189b2ba822bea2b4c81555d9"
|
||||
|
||||
["0.8.0"]
|
||||
git-tree-sha1 = "25c9620ab9ee15e72b1fea5a903de51088185a7e"
|
||||
|
||||
["0.8.1"]
|
||||
git-tree-sha1 = "53a5c754fbf891bc279040cfb9a2b85c03489f38"
|
||||
|
||||
["0.8.2"]
|
||||
git-tree-sha1 = "cd7595c13e95d810bfd2dd3a96558fb8fd545470"
|
||||
|
||||
["0.9.0"]
|
||||
git-tree-sha1 = "738574ad4cb14da838e3fa5a2bae0c84cca324ed"
|
||||
|
||||
["0.10.0"]
|
||||
git-tree-sha1 = "79e850c5e047754e985c8e0a4220d6f7b1715999"
|
||||
|
||||
["0.10.1"]
|
||||
git-tree-sha1 = "45a146665c899f358c5d24a1551fee8e710285a1"
|
||||
|
||||
["0.10.2"]
|
||||
git-tree-sha1 = "0b127546fd5068de5d161c9ace299cbeb5b8c8b3"
|
||||
|
||||
["0.11.0"]
|
||||
git-tree-sha1 = "001842c060d17eecae8070f8ba8e8163f760722f"
|
||||
|
||||
["0.11.1"]
|
||||
git-tree-sha1 = "3309b97c9327617cd063cc1de5850dc13aad6007"
|
||||
|
||||
["0.11.2"]
|
||||
git-tree-sha1 = "9c2873412042edb336c5347ffa7a9daf29264da8"
|
||||
|
||||
["0.11.3"]
|
||||
git-tree-sha1 = "0c452a18943144989213e2042766371d49505b22"
|
||||
|
||||
["0.12.0"]
|
||||
git-tree-sha1 = "7618e3ba2e9b2ea43ad5d2c809e726a8a9e6e7b1"
|
||||
|
||||
["0.12.1"]
|
||||
git-tree-sha1 = "59619a31c56c9e61b5dabdbd339e30c227c5d13d"
|
||||
''' > O/Oscar/Versions.toml
|
||||
|
||||
echo 'f1435218-dba5-11e9-1e4d-f1a5fab5fc13 = { name = "Oscar", path = "O/Oscar" }' >> Registry.toml
|
||||
|
||||
git add .
|
||||
git commit -m "New package: Oscar v0.12.1"
|
||||
|
||||
# Save some space
|
||||
rm .git/hooks/*.sample
|
||||
|
||||
# Archive
|
||||
cd ../
|
||||
tar -czf fake-julia-registry-repository.tar.gz General
|
||||
mv fake-julia-registry-repository.tar.gz ../
|
||||
|
||||
# Clean up tmp_dir
|
||||
cd ../
|
||||
rm -rf tmp_dir
|
36
swh/lister/julia/tests/test_lister.py
Normal file
36
swh/lister/julia/tests/test_lister.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from swh.lister.julia.lister import JuliaLister
|
||||
from swh.lister.julia.tests import prepare_repository_from_archive
|
||||
|
||||
expected_origins = [
|
||||
"https://github.com/leios/Fable.jl.git",
|
||||
"https://github.com/oscar-system/Oscar.jl.git",
|
||||
]
|
||||
|
||||
|
||||
def test_julia_lister(datadir, tmp_path, swh_scheduler):
|
||||
archive_path = Path(datadir, "fake-julia-registry-repository.tar.gz")
|
||||
repo_url = prepare_repository_from_archive(archive_path, "General", tmp_path)
|
||||
lister = JuliaLister(url=repo_url, scheduler=swh_scheduler)
|
||||
lister.REPO_PATH = Path(tmp_path, "General")
|
||||
lister.REGISTRY_PATH = lister.REPO_PATH / "Registry.toml"
|
||||
|
||||
res = lister.run()
|
||||
assert res.origins == 1 + 1
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
assert len(scheduler_origins) == len(expected_origins)
|
||||
assert {
|
||||
(
|
||||
scheduled.visit_type,
|
||||
scheduled.url,
|
||||
scheduled.last_update,
|
||||
)
|
||||
for scheduled in scheduler_origins
|
||||
} == {("git", expected, None) for expected in expected_origins}
|
31
swh/lister/julia/tests/test_tasks.py
Normal file
31
swh/lister/julia/tests/test_tasks.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
|
||||
def test_julia_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.julia.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
def test_julia_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
|
||||
# setup the mocked JuliaLister
|
||||
lister = mocker.patch("swh.lister.julia.tasks.JuliaLister")
|
||||
lister.from_configfile.return_value = lister
|
||||
stats = ListerStats(pages=42, origins=42)
|
||||
lister.run.return_value = stats
|
||||
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.julia.tasks.JuliaListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == stats.dict()
|
||||
|
||||
lister.from_configfile.assert_called_once_with()
|
||||
lister.run.assert_called_once_with()
|
Loading…
Add table
Add a link
Reference in a new issue