pagure: Implement lister for pagure forges

Pagure is a git-centered forge, python based using pygit2.

Its REST API enables to easily list all projects hosted in an
instance so the lister implementation is quite simple.

Related to swh/meta#5043.
This commit is contained in:
Antoine Lambert 2023-06-23 09:02:49 +00:00
parent ad6644a663
commit c81c473a83
10 changed files with 376 additions and 0 deletions

View file

@ -80,6 +80,7 @@ setup(
lister.nuget=swh.lister.nuget:register
lister.opam=swh.lister.opam:register
lister.packagist=swh.lister.packagist:register
lister.pagure=swh.lister.pagure:register
lister.phabricator=swh.lister.phabricator:register
lister.pubdev=swh.lister.pubdev:register
lister.puppet=swh.lister.puppet:register

View file

@ -0,0 +1,12 @@
# Copyright (C) 2023 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def register():
from .lister import PagureLister
return {
"lister": PagureLister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -0,0 +1,73 @@
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from datetime import datetime, timezone
import logging
from typing import Any, Dict, Iterator, List, Optional
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from ..pattern import CredentialsType, StatelessLister
logger = logging.getLogger(__name__)
ProjectsPage = List[Dict[str, Any]]
class PagureLister(StatelessLister[ProjectsPage]):
"""List git origins hosted on a Pagure forge."""
LISTER_NAME = "pagure"
API_PROJECTS_ENDPOINT = "/api/0/projects"
def __init__(
self,
scheduler: SchedulerInterface,
url: Optional[str] = None,
instance: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
max_pages: Optional[int] = None,
enable_origins: bool = True,
per_page=100,
):
super().__init__(
scheduler=scheduler,
credentials=credentials,
url=url.rstrip("/") if url else None,
instance=instance,
max_origins_per_page=max_origins_per_page,
max_pages=max_pages,
enable_origins=enable_origins,
)
self.per_page = per_page
self.session.headers.update({"Accept": "application/json"})
self.url = f"{self.url}{self.API_PROJECTS_ENDPOINT}"
def get_pages(self) -> Iterator[ProjectsPage]:
url_projects = self.url
while url_projects:
params = (
{"per_page": self.per_page} if "per_page" not in url_projects else None
)
response = self.http_request(url_projects, params=params).json()
yield response["projects"]
url_projects = response["pagination"]["next"]
def get_origins_from_page(self, projects: ProjectsPage) -> Iterator[ListedOrigin]:
assert self.lister_obj.id is not None
for project in projects:
yield ListedOrigin(
lister_id=self.lister_obj.id,
url=project["full_url"],
visit_type="git",
last_update=datetime.fromtimestamp(
int(project["date_modified"])
).replace(tzinfo=timezone.utc),
)

View file

@ -0,0 +1,20 @@
# Copyright (C) 2023 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Dict
from celery import shared_task
from .lister import PagureLister
@shared_task(name=__name__ + ".PagureListerTask")
def list_pagure(**lister_args) -> Dict[str, int]:
lister = PagureLister.from_configfile(**lister_args)
return lister.run().dict()
@shared_task(name=__name__ + ".ping")
def _ping() -> str:
return "OK"

View file

View file

@ -0,0 +1,101 @@
{
"args": {
"fork": null,
"namespace": null,
"owner": null,
"page": 1,
"pattern": null,
"per_page": 2,
"short": false,
"tags": [],
"username": null
},
"pagination": {
"first": "https://pagure.io/api/0/projects?per_page=2&page=1",
"last": "https://pagure.io/api/0/projects?per_page=2&page=2",
"next": "https://pagure.io/api/0/projects?per_page=2&page=2",
"page": 1,
"pages": 4921,
"per_page": 2,
"prev": null
},
"projects": [
{
"access_groups": {
"admin": [],
"collaborator": [],
"commit": [],
"ticket": []
},
"access_users": {
"admin": [],
"collaborator": [],
"commit": [],
"owner": [
"jg-dev"
],
"ticket": []
},
"close_status": [],
"custom_keys": [],
"date_created": "1642633824",
"date_modified": "1642633824",
"description": "Testing notes and files for ticket 10291",
"full_url": "https://pagure.io/10291-testing",
"fullname": "10291-testing",
"id": 11286,
"milestones": {},
"name": "10291-testing",
"namespace": null,
"parent": null,
"priorities": {},
"tags": [],
"url_path": "10291-testing",
"user": {
"full_url": "https://pagure.io/user/jg-dev",
"fullname": "Jason Giddings",
"name": "jg-dev",
"url_path": "user/jg-dev"
}
},
{
"access_groups": {
"admin": [],
"collaborator": [],
"commit": [],
"ticket": []
},
"access_users": {
"admin": [],
"collaborator": [],
"commit": [],
"owner": [
"ankursinha"
],
"ticket": []
},
"close_status": [],
"custom_keys": [],
"date_created": "1568047513",
"date_modified": "1568047513",
"description": "NeuroFedora presentation for the OSB workshop",
"full_url": "https://pagure.io/neuro-sig/20190909-OSB-workshop-presentation",
"fullname": "neuro-sig/20190909-OSB-workshop-presentation",
"id": 6715,
"milestones": {},
"name": "20190909-OSB-workshop-presentation",
"namespace": "neuro-sig",
"parent": null,
"priorities": {},
"tags": [],
"url_path": "neuro-sig/20190909-OSB-workshop-presentation",
"user": {
"full_url": "https://pagure.io/user/ankursinha",
"fullname": "Ankur Sinha",
"name": "ankursinha",
"url_path": "user/ankursinha"
}
}
],
"total_projects": 9842
}

View file

@ -0,0 +1,103 @@
{
"args": {
"fork": null,
"namespace": null,
"owner": null,
"page": 2,
"pattern": null,
"per_page": 2,
"short": false,
"tags": [],
"username": null
},
"pagination": {
"first": "https://pagure.io/api/0/projects?per_page=2&page=1",
"last": "https://pagure.io/api/0/projects?per_page=2&page=2",
"next": null,
"page": 2,
"pages": 4921,
"per_page": 2,
"prev": "https://pagure.io/api/0/projects?per_page=2&page=1"
},
"projects": [
{
"access_groups": {
"admin": [],
"collaborator": [],
"commit": [],
"ticket": []
},
"access_users": {
"admin": [],
"collaborator": [],
"commit": [],
"owner": [
"ankursinha"
],
"ticket": []
},
"close_status": [],
"custom_keys": [],
"date_created": "1568047513",
"date_modified": "1568047513",
"description": "NeuroFedora presentation for the OSB workshop",
"full_url": "https://pagure.io/neuro-sig/20190909-OSB-workshop-presentation",
"fullname": "neuro-sig/20190909-OSB-workshop-presentation",
"id": 6715,
"milestones": {},
"name": "20190909-OSB-workshop-presentation",
"namespace": "neuro-sig",
"parent": null,
"priorities": {},
"tags": [],
"url_path": "neuro-sig/20190909-OSB-workshop-presentation",
"user": {
"full_url": "https://pagure.io/user/ankursinha",
"fullname": "Ankur Sinha",
"name": "ankursinha",
"url_path": "user/ankursinha"
}
},
{
"access_groups": {
"admin": [
"neuro-sig"
],
"collaborator": [],
"commit": [],
"ticket": []
},
"access_users": {
"admin": [],
"collaborator": [],
"commit": [],
"owner": [
"ankursinha"
],
"ticket": []
},
"close_status": [],
"custom_keys": [],
"date_created": "1564047918",
"date_modified": "1565348955",
"description": "Presentation slides for NeuroFedora talk at Flock",
"full_url": "https://pagure.io/neuro-sig/2019-flock-neurofedora",
"fullname": "neuro-sig/2019-flock-neurofedora",
"id": 6523,
"milestones": {},
"name": "2019-flock-neurofedora",
"namespace": "neuro-sig",
"parent": null,
"priorities": {},
"tags": [],
"url_path": "neuro-sig/2019-flock-neurofedora",
"user": {
"full_url": "https://pagure.io/user/ankursinha",
"fullname": "Ankur Sinha",
"name": "ankursinha",
"url_path": "user/ankursinha"
}
}
],
"total_projects": 9842
}

View file

@ -0,0 +1,34 @@
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
from swh.lister.pagure.lister import PagureLister
expected_origins = {
"https://pagure.io/10291-testing",
"https://pagure.io/neuro-sig/20190909-OSB-workshop-presentation",
"https://pagure.io/neuro-sig/2019-flock-neurofedora",
}
@pytest.mark.parametrize(
"params", [{"url": "https://pagure.io"}, {"instance": "pagure.io"}]
)
def test_pagure_lister(requests_mock_datadir, swh_scheduler, params):
lister = PagureLister(**params, scheduler=swh_scheduler, per_page=2)
res = lister.run()
assert res.pages == 2
assert res.origins == 3
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(expected_origins)
for origin in scheduler_origins:
assert origin.visit_type == "git"
assert origin.url in expected_origins
assert origin.last_update is not None

View file

@ -0,0 +1,31 @@
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.pattern import ListerStats
def test_pagure_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
res = swh_scheduler_celery_app.send_task("swh.lister.pagure.tasks.ping")
assert res
res.wait()
assert res.successful()
assert res.result == "OK"
def test_pagure_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
# setup the mocked PubDevLister
lister = mocker.patch("swh.lister.pagure.tasks.PagureLister")
lister.from_configfile.return_value = lister
stats = ListerStats(pages=42, origins=42)
lister.run.return_value = stats
res = swh_scheduler_celery_app.send_task("swh.lister.pagure.tasks.PagureListerTask")
assert res
res.wait()
assert res.successful()
assert res.result == stats.dict()
lister.from_configfile.assert_called_once_with()
lister.run.assert_called_once_with()

View file

@ -41,6 +41,7 @@ lister_args = {
"fedora": {
"url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
},
"pagure": {"instance": "pagure.io"},
}