feat: Add Hex.pm lister
This commit is contained in:
parent
5d0f35aa69
commit
a452995d95
12 changed files with 887 additions and 1 deletions
|
@ -27,6 +27,7 @@ following Python modules:
|
|||
- `swh.lister.tuleap`
|
||||
- `swh.lister.gogs`
|
||||
- `swh.liser.fedora`
|
||||
- `swh.lister.hex`
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
|
1
setup.py
1
setup.py
|
@ -87,6 +87,7 @@ setup(
|
|||
lister.maven=swh.lister.maven:register
|
||||
lister.gogs=swh.lister.gogs:register
|
||||
lister.fedora=swh.lister.fedora:register
|
||||
lister.hex=swh.lister.hex:register
|
||||
""",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
|
|
13
swh/lister/hex/__init__.py
Normal file
13
swh/lister/hex/__init__.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
def register():
|
||||
from .lister import HexLister
|
||||
|
||||
return {
|
||||
"lister": HexLister,
|
||||
"task_modules": [f"{__name__}.tasks"],
|
||||
}
|
130
swh/lister/hex/lister.py
Normal file
130
swh/lister/hex/lister.py
Normal file
|
@ -0,0 +1,130 @@
|
|||
# Copyright (C) 2021-2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import iso8601
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from ..pattern import CredentialsType, Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
HexListerPage = List[Dict[str, Any]]
|
||||
|
||||
|
||||
def get_tar_url(pkg_name: str, release_version: str):
|
||||
return f"https://repo.hex.pm/tarballs/{pkg_name}-{release_version}.tar"
|
||||
|
||||
|
||||
@dataclass
|
||||
class HexListerState:
|
||||
"""The HexLister instance state. This is used for incremental listing."""
|
||||
|
||||
last_page_id: int = 1
|
||||
"""Id of the last page listed on an incremental pass"""
|
||||
last_pkg_name: str = ""
|
||||
"""Name of the last package inserted at on an incremental pass"""
|
||||
|
||||
|
||||
class HexLister(Lister[HexListerState, HexListerPage]):
|
||||
"""List origins from the "Hex" forge."""
|
||||
|
||||
LISTER_NAME = "hex"
|
||||
VISIT_TYPE = "hex"
|
||||
|
||||
HEX_API_URL = "https://hex.pm/api/"
|
||||
PACKAGES_PATH = "packages/"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
instance: str = "hex",
|
||||
credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
credentials=credentials,
|
||||
url=self.HEX_API_URL,
|
||||
instance=instance,
|
||||
)
|
||||
# TODO: Add authentication support
|
||||
|
||||
self.session.headers.update({"Accept": "application/json"})
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> HexListerState:
|
||||
return HexListerState(**d)
|
||||
|
||||
def state_to_dict(self, state: HexListerState) -> Dict[str, Any]:
|
||||
return asdict(state)
|
||||
|
||||
def get_pages(self) -> Iterator[HexListerPage]:
|
||||
page_id = 1
|
||||
if self.state.last_page_id is not None:
|
||||
page_id = self.state.last_page_id
|
||||
|
||||
url = urljoin(self.url, self.PACKAGES_PATH)
|
||||
|
||||
while page_id is not None:
|
||||
body = self.http_request(
|
||||
url,
|
||||
params={
|
||||
"page": page_id,
|
||||
"sort": "name",
|
||||
}, # sort=name is actually the default
|
||||
).json()
|
||||
|
||||
yield body
|
||||
|
||||
page_id += 1 # Consider stopping before yielding?
|
||||
|
||||
if len(body) == 0:
|
||||
break # Consider stopping if number of items < 100?
|
||||
|
||||
def get_origins_from_page(self, page: HexListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of HexLister repositories into a list of ListedOrigins"""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
for pkg in page:
|
||||
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=pkg["html_url"],
|
||||
last_update=iso8601.parse_date(pkg["updated_at"]),
|
||||
extra_loader_arguments={
|
||||
"releases": {
|
||||
release["url"]: {
|
||||
"package": pkg["name"],
|
||||
"version": release["version"],
|
||||
"tar_url": get_tar_url(pkg["name"], release["version"]),
|
||||
}
|
||||
for release in pkg["releases"]
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
def commit_page(self, page: HexListerPage) -> None:
|
||||
if len(page) == 0:
|
||||
return
|
||||
|
||||
last_pkg_name = page[-1]["name"]
|
||||
|
||||
# incoming page should have alphabetically greater
|
||||
# last package name than the one stored in the state
|
||||
if last_pkg_name > self.state.last_pkg_name:
|
||||
self.state.last_pkg_name = last_pkg_name
|
||||
self.state.last_page_id += 1
|
||||
|
||||
def finalize(self) -> None:
|
||||
scheduler_state = self.get_state_from_scheduler()
|
||||
|
||||
if self.state.last_page_id > scheduler_state.last_page_id:
|
||||
self.updated = True
|
23
swh/lister/hex/tasks.py
Normal file
23
swh/lister/hex/tasks.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Copyright (C) 2022 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from typing import Dict, Optional
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from .lister import HexLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".FullHexRelister")
|
||||
def list_hex_full(
|
||||
instance: Optional[str] = None,
|
||||
) -> Dict[str, int]:
|
||||
"""Full update of a Hex.pm instance"""
|
||||
lister = HexLister.from_configfile(instance=instance)
|
||||
return lister.run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping() -> str:
|
||||
return "OK"
|
0
swh/lister/hex/tests/__init__.py
Normal file
0
swh/lister/hex/tests/__init__.py
Normal file
190
swh/lister/hex/tests/data/https_hex.pm/page1.json
Normal file
190
swh/lister/hex/tests/data/https_hex.pm/page1.json
Normal file
|
@ -0,0 +1,190 @@
|
|||
[
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_aadya = hex 0.1.0",
|
||||
"mix.exs": "{:aadya, \"~> 0.1.0\"}",
|
||||
"rebar.config": "{aadya, \"0.1.0\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/aadya/",
|
||||
"downloads": {
|
||||
"all": 4199,
|
||||
"recent": 2
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/aadya",
|
||||
"inserted_at": "2018-03-12T02:13:42.826404Z",
|
||||
"latest_stable_version": "0.1.0",
|
||||
"latest_version": "0.1.0",
|
||||
"meta": {
|
||||
"description": "CoAP framework",
|
||||
"licenses": [
|
||||
"GNU Lesser General Public License v3.0"
|
||||
],
|
||||
"links": {
|
||||
"GitHub": "https://gitlab.com/ahamtech/coap/aadya.git"
|
||||
},
|
||||
"maintainers": [
|
||||
"Anwesh Reddy",
|
||||
"Mahesh Reddy",
|
||||
"Malreddy Ankanna"
|
||||
]
|
||||
},
|
||||
"name": "aadya",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/aadya/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2018-03-12T02:19:58.150334Z",
|
||||
"url": "https://hex.pm/api/packages/aadya"
|
||||
},
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_active_job = hex 0.1.1",
|
||||
"mix.exs": "{:active_job, \"~> 0.1.1\"}",
|
||||
"rebar.config": "{active_job, \"0.1.1\"}"
|
||||
},
|
||||
"docs_html_url": null,
|
||||
"downloads": {
|
||||
"all": 575,
|
||||
"recent": 8
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/active_job",
|
||||
"inserted_at": "2022-05-04T05:07:26.204862Z",
|
||||
"latest_stable_version": "0.1.1",
|
||||
"latest_version": "0.1.1",
|
||||
"meta": {
|
||||
"description": "Declare job workers that can be run by a variety of queuing backends. This plugin is a port of the Rails ActiveJob gem",
|
||||
"licenses": [
|
||||
"MIT"
|
||||
],
|
||||
"links": {
|
||||
"GitHub": "https://github.com/chaskiq/ex-rails/active_job"
|
||||
},
|
||||
"maintainers": []
|
||||
},
|
||||
"name": "active_job",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/active_job/releases/0.1.1",
|
||||
"version": "0.1.1"
|
||||
},
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/active_job/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2022-06-17T07:01:32.486546Z",
|
||||
"url": "https://hex.pm/api/packages/active_job"
|
||||
},
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_active_jorb = hex 0.1.2",
|
||||
"mix.exs": "{:active_jorb, \"~> 0.1.2\"}",
|
||||
"rebar.config": "{active_jorb, \"0.1.2\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/active_jorb/",
|
||||
"downloads": {
|
||||
"all": 7148,
|
||||
"recent": 10
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/active_jorb",
|
||||
"inserted_at": "2018-04-10T17:35:34.698754Z",
|
||||
"latest_stable_version": "0.1.2",
|
||||
"latest_version": "0.1.2",
|
||||
"meta": {
|
||||
"description": "A library to enqueue jobs with your Active Job job processor. You may want\nthis when strangling your Rails project.",
|
||||
"licenses": [
|
||||
"MIT"
|
||||
],
|
||||
"links": {
|
||||
"Github": "https://github.com/PrecisionNutrition/active_jorb"
|
||||
},
|
||||
"maintainers": [
|
||||
"James Herdman"
|
||||
]
|
||||
},
|
||||
"name": "active_jorb",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/active_jorb/releases/0.1.2",
|
||||
"version": "0.1.2"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/active_jorb/releases/0.1.1",
|
||||
"version": "0.1.1"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/active_jorb/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2018-04-24T17:42:25.744971Z",
|
||||
"url": "https://hex.pm/api/packages/active_jorb"
|
||||
},
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_acx = hex 0.0.2",
|
||||
"mix.exs": "{:acx, \"~> 0.0.2\"}",
|
||||
"rebar.config": "{acx, \"0.0.2\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/acx/",
|
||||
"downloads": {
|
||||
"all": 4790,
|
||||
"recent": 8
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/acx",
|
||||
"inserted_at": "2018-01-22T06:52:21.027352Z",
|
||||
"latest_stable_version": "0.0.2",
|
||||
"latest_version": "0.0.2",
|
||||
"meta": {
|
||||
"description": "A Elixir wrap for API of Acx.io exchange.",
|
||||
"licenses": [
|
||||
"MIT"
|
||||
],
|
||||
"links": {
|
||||
"Github": "https://github.com/2pd/acx-elixir"
|
||||
},
|
||||
"maintainers": [
|
||||
"Liang Shi"
|
||||
]
|
||||
},
|
||||
"name": "acx",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/acx/releases/0.0.2",
|
||||
"version": "0.0.2"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/acx/releases/0.0.1",
|
||||
"version": "0.0.1"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2018-01-30T04:56:03.053561Z",
|
||||
"url": "https://hex.pm/api/packages/acx"
|
||||
}
|
||||
]
|
223
swh/lister/hex/tests/data/https_hex.pm/page2.json
Normal file
223
swh/lister/hex/tests/data/https_hex.pm/page2.json
Normal file
|
@ -0,0 +1,223 @@
|
|||
[
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_adam7 = hex 0.4.0",
|
||||
"mix.exs": "{:adam7, \"~> 0.4.0\"}",
|
||||
"rebar.config": "{adam7, \"0.4.0\"}"
|
||||
},
|
||||
"docs_html_url": null,
|
||||
"downloads": {
|
||||
"all": 12746,
|
||||
"recent": 27,
|
||||
"week": 10
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/adam7",
|
||||
"inserted_at": "2015-10-10T05:09:04.399996Z",
|
||||
"latest_stable_version": "0.4.0",
|
||||
"latest_version": "0.4.0",
|
||||
"meta": {
|
||||
"description": "Adam7 interlacing library for Elixir.\nPrimarily used for interlacing and de-interlacing image data for PNGs.",
|
||||
"licenses": [
|
||||
"MIT"
|
||||
],
|
||||
"links": {
|
||||
"github": "https://github.com/SenecaSystems/imagineer"
|
||||
},
|
||||
"maintainers": [
|
||||
"Chris Maddox"
|
||||
]
|
||||
},
|
||||
"name": "adam7",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/adam7/releases/0.4.0",
|
||||
"version": "0.4.0"
|
||||
},
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/adam7/releases/0.3.0",
|
||||
"version": "0.3.0"
|
||||
},
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/adam7/releases/0.2.0",
|
||||
"version": "0.2.0"
|
||||
},
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/adam7/releases/0.1.1",
|
||||
"version": "0.1.1"
|
||||
},
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/adam7/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2015-10-10T05:09:04.400005Z",
|
||||
"url": "https://hex.pm/api/packages/adam7"
|
||||
},
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_addressBook = hex 0.1.1",
|
||||
"mix.exs": "{:addressBook, \"~> 0.1.1\"}",
|
||||
"rebar.config": "{addressBook, \"0.1.1\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/addressBook/",
|
||||
"downloads": {
|
||||
"all": 4871,
|
||||
"recent": 8,
|
||||
"week": 4
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/addressBook",
|
||||
"inserted_at": "2017-06-05T19:59:12.978909Z",
|
||||
"latest_stable_version": "0.1.1",
|
||||
"latest_version": "0.1.1",
|
||||
"meta": {
|
||||
"description": "Simple package for managing address book.",
|
||||
"licenses": [
|
||||
"Apache 2.0"
|
||||
],
|
||||
"links": {
|
||||
"GitHub": "https://github.com/maxiwoj/AddressBook"
|
||||
},
|
||||
"maintainers": [
|
||||
"Maksymilian Wojczuk"
|
||||
]
|
||||
},
|
||||
"name": "addressBook",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/addressBook/releases/0.1.1",
|
||||
"version": "0.1.1"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/addressBook/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2017-06-05T21:06:42.788652Z",
|
||||
"url": "https://hex.pm/api/packages/addressBook"
|
||||
},
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_address_us = hex 0.4.1",
|
||||
"mix.exs": "{:address_us, \"~> 0.4.1\"}",
|
||||
"rebar.config": "{address_us, \"0.4.1\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/address_us/",
|
||||
"downloads": {
|
||||
"all": 55337,
|
||||
"day": 2,
|
||||
"recent": 7105,
|
||||
"week": 194
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/address_us",
|
||||
"inserted_at": "2014-10-10T20:24:11.000000Z",
|
||||
"latest_stable_version": "0.4.1",
|
||||
"latest_version": "0.4.1",
|
||||
"meta": {
|
||||
"description": "Library for parsing US Addresses into their individual parts.",
|
||||
"licenses": [
|
||||
"Apache 2.0"
|
||||
],
|
||||
"links": {
|
||||
"Docs": "https://smashedtoatoms.github.io/address_us",
|
||||
"GitHub": "https://github.com/smashedtoatoms/address_us"
|
||||
},
|
||||
"maintainers": []
|
||||
},
|
||||
"name": "address_us",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/address_us/releases/0.4.1",
|
||||
"version": "0.4.1"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/address_us/releases/0.4.0",
|
||||
"version": "0.4.0"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/address_us/releases/0.2.1",
|
||||
"version": "0.2.1"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/address_us/releases/0.1.1",
|
||||
"version": "0.1.1"
|
||||
},
|
||||
{
|
||||
"has_docs": false,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/address_us/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2020-12-11T05:07:11.118292Z",
|
||||
"url": "https://hex.pm/api/packages/address_us"
|
||||
},
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_alchemy_vm = hex 0.8.1",
|
||||
"mix.exs": "{:alchemy_vm, \"~> 0.8.1\"}",
|
||||
"rebar.config": "{alchemy_vm, \"0.8.1\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/alchemy_vm/",
|
||||
"downloads": {
|
||||
"all": 2368,
|
||||
"recent": 3,
|
||||
"week": 2
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/alchemy_vm",
|
||||
"inserted_at": "2019-03-27T00:32:40.709924Z",
|
||||
"latest_stable_version": "0.8.1",
|
||||
"latest_version": "0.8.1",
|
||||
"meta": {
|
||||
"description": "A WebAssembly Virtual Machine",
|
||||
"licenses": [
|
||||
"MIT"
|
||||
],
|
||||
"links": {
|
||||
"Elixium Network Website": "https://www.elixiumnetwork.org",
|
||||
"GitHub": "https://github.com/ElixiumNetwork/AlchemyVM"
|
||||
},
|
||||
"maintainers": []
|
||||
},
|
||||
"name": "alchemy_vm",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/alchemy_vm/releases/0.8.1",
|
||||
"version": "0.8.1"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2019-03-27T00:32:47.822901Z",
|
||||
"url": "https://hex.pm/api/packages/alchemy_vm"
|
||||
}
|
||||
]
|
108
swh/lister/hex/tests/data/https_hex.pm/page3.json
Normal file
108
swh/lister/hex/tests/data/https_hex.pm/page3.json
Normal file
|
@ -0,0 +1,108 @@
|
|||
[
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_quagga_def = hex 0.4.0",
|
||||
"mix.exs": "{:quagga_def, \"~> 0.4.0\"}",
|
||||
"rebar.config": "{quagga_def, \"0.4.0\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/quagga_def/",
|
||||
"downloads": {
|
||||
"all": 106,
|
||||
"day": 12,
|
||||
"recent": 106,
|
||||
"week": 22
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/quagga_def",
|
||||
"inserted_at": "2022-10-12T07:03:48.666872Z",
|
||||
"latest_stable_version": "0.4.0",
|
||||
"latest_version": "0.4.0",
|
||||
"meta": {
|
||||
"description": "Quagga bamboo clump convention definitions and functions",
|
||||
"licenses": [
|
||||
"MIT"
|
||||
],
|
||||
"links": {
|
||||
"GitHub": "https://github.com/mwmiller/quagga_def"
|
||||
},
|
||||
"maintainers": []
|
||||
},
|
||||
"name": "quagga_def",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/quagga_def/releases/0.4.0",
|
||||
"version": "0.4.0"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/quagga_def/releases/0.3.0",
|
||||
"version": "0.3.0"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/quagga_def/releases/0.2.0",
|
||||
"version": "0.2.0"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/quagga_def/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2022-11-29T11:41:15.862303Z",
|
||||
"url": "https://hex.pm/api/packages/quagga_def"
|
||||
},
|
||||
{
|
||||
"configs": {
|
||||
"erlang.mk": "dep_logger_dev = hex 0.1.1",
|
||||
"mix.exs": "{:logger_dev, \"~> 0.1.1\"}",
|
||||
"rebar.config": "{logger_dev, \"0.1.1\"}"
|
||||
},
|
||||
"docs_html_url": "https://hexdocs.pm/logger_dev/",
|
||||
"downloads": {
|
||||
"all": 188,
|
||||
"day": 4,
|
||||
"recent": 188,
|
||||
"week": 48
|
||||
},
|
||||
"html_url": "https://hex.pm/packages/logger_dev",
|
||||
"inserted_at": "2022-09-08T21:37:20.359224Z",
|
||||
"latest_stable_version": "0.1.1",
|
||||
"latest_version": "0.1.1",
|
||||
"meta": {
|
||||
"description": "A more readable formatter for Logger.Backends.Console",
|
||||
"licenses": [
|
||||
"MIT"
|
||||
],
|
||||
"links": {
|
||||
"GitHub": "https://github.com/protestContest/logger_dev"
|
||||
},
|
||||
"maintainers": []
|
||||
},
|
||||
"name": "logger_dev",
|
||||
"releases": [
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/logger_dev/releases/0.1.1",
|
||||
"version": "0.1.1"
|
||||
},
|
||||
{
|
||||
"has_docs": true,
|
||||
"inserted_at": null,
|
||||
"url": "https://hex.pm/api/packages/logger_dev/releases/0.1.0",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"repository": "hexpm",
|
||||
"retirements": {},
|
||||
"updated_at": "2022-09-09T21:00:14.993273Z",
|
||||
"url": "https://hex.pm/api/packages/logger_dev"
|
||||
}
|
||||
]
|
141
swh/lister/hex/tests/test_lister.py
Normal file
141
swh/lister/hex/tests/test_lister.py
Normal file
|
@ -0,0 +1,141 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
from swh.lister.hex.lister import HexLister, ListedOrigin
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def hexpm_page(datadir):
|
||||
def get_page(page_id: int):
|
||||
# FIXME: Update the test data to match ?sort=name
|
||||
text = Path(datadir, "https_hex.pm", f"page{page_id}.json").read_text()
|
||||
page_result = json.loads(text)
|
||||
origins = [origin["html_url"] for origin in page_result]
|
||||
return origins, page_result
|
||||
|
||||
return get_page
|
||||
|
||||
|
||||
def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
|
||||
"""Asserts that the two collections have the same origin URLs."""
|
||||
assert set(lister_urls) == {origin.url for origin in scheduler_origins}
|
||||
|
||||
|
||||
def test_full_lister_hex(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
requests_mock,
|
||||
hexpm_page,
|
||||
):
|
||||
"""
|
||||
Simulate a full listing of packages for hex (erlang package manager)
|
||||
"""
|
||||
p1_origin_urls, p1_json = hexpm_page(1)
|
||||
p2_origin_urls, p2_json = hexpm_page(2)
|
||||
p3_origin_urls, p3_json = hexpm_page(3)
|
||||
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=1", json=p1_json)
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=2", json=p2_json)
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=3", json=p3_json)
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=4", json=[])
|
||||
|
||||
lister = HexLister(swh_scheduler)
|
||||
|
||||
stats = lister.run()
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
|
||||
assert stats.pages == 4
|
||||
assert stats.origins == 10 # 4 + 4 + 2 + 0
|
||||
|
||||
check_listed_origins(
|
||||
p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
|
||||
)
|
||||
|
||||
assert lister_state.last_page_id == 4
|
||||
assert lister_state.last_pkg_name == "logger_dev"
|
||||
assert lister.updated
|
||||
|
||||
|
||||
def test_gogs_incremental_lister(
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
hexpm_page,
|
||||
):
|
||||
lister = HexLister(swh_scheduler)
|
||||
|
||||
# First run: P1 and P2 return 4 origins each and P3 returns 0
|
||||
p1_origin_urls, p1_json = hexpm_page(1)
|
||||
p2_origin_urls, p2_json = hexpm_page(2)
|
||||
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=1", json=p1_json)
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=2", json=p2_json)
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=3", json=[])
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 3
|
||||
assert stats.origins == 8
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
assert lister_state.last_page_id == 3
|
||||
assert lister.state.last_pkg_name == "alchemy_vm"
|
||||
assert lister.updated
|
||||
|
||||
check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
|
||||
|
||||
lister.updated = False # Reset the flag
|
||||
|
||||
# Second run: P3 isn't empty anymore
|
||||
p3_origin_urls, p3_json = hexpm_page(3)
|
||||
|
||||
requests_mock.get("https://hex.pm/api/packages/?page=3", json=p3_json)
|
||||
requests_mock.get(
|
||||
"https://hex.pm/api/packages/?page=4", json=[]
|
||||
) # TODO: Try with 40x/50x here?
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 2
|
||||
assert stats.origins == 2
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
assert (
|
||||
lister_state.last_page_id == 4
|
||||
) # TODO: Shouldn't this be 3 given that P4 is empty?
|
||||
assert lister.state.last_pkg_name == "logger_dev"
|
||||
assert lister.updated
|
||||
|
||||
check_listed_origins(
|
||||
p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
|
||||
)
|
||||
|
||||
lister.updated = False # Reset the flag
|
||||
|
||||
# Third run: No new origins
|
||||
# The lister should revisit the last page (P3)
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 1
|
||||
assert (
|
||||
stats.origins == 0
|
||||
) # FIXME: inconsistent with Gogs lister. Either of them could be wrong
|
||||
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
assert (
|
||||
lister_state.last_page_id == 4
|
||||
) # TODO: Shouldn't this be 3 given that P4 is empty?
|
||||
assert lister.state.last_pkg_name == "logger_dev"
|
||||
assert lister.updated is False # No new origins so state isn't updated
|
||||
|
||||
check_listed_origins(
|
||||
p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
|
||||
)
|
56
swh/lister/hex/tests/test_tasks.py
Normal file
56
swh/lister/hex/tests/test_tasks.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
|
||||
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.hex.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch("swh.lister.hex.tasks.HexLister")
|
||||
def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
lister.from_configfile.return_value = lister
|
||||
lister.run.return_value = ListerStats(pages=10, origins=500)
|
||||
|
||||
kwargs = dict()
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.hex.tasks.FullHexRelister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
actual_kwargs = dict(**kwargs, instance=None)
|
||||
|
||||
lister.from_configfile.assert_called_once_with(**actual_kwargs)
|
||||
lister.run.assert_called_once_with()
|
||||
|
||||
|
||||
@patch("swh.lister.hex.tasks.HexLister")
|
||||
def test_full_listing_params(
|
||||
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
|
||||
):
|
||||
lister.from_configfile.return_value = lister
|
||||
lister.run.return_value = ListerStats(pages=10, origins=500)
|
||||
|
||||
kwargs = dict(instance="hex.pm")
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.hex.tasks.FullHexRelister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.from_configfile.assert_called_once_with(**kwargs)
|
||||
lister.run.assert_called_once_with()
|
|
@ -40,7 +40,7 @@ lister_args = {
|
|||
"origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
|
||||
},
|
||||
"fedora": {
|
||||
"url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases//",
|
||||
"url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
|
||||
},
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue