packagist: Reimplement lister using new Lister API
The previous implementation was generating tasks for a non implemented Packagist loader. The new implementation extracts source repository URL, VCS type and last update date for each package referenced by Packagist and send those info to the scheduler. Packages metadata are retrieved using Packagist API endpoints whose responses are served from static files, which are guaranteed to be efficient on the Packagist side (no dymamic queries). Furthermore, subsequent listing will send the "If-Modified-Since" HTTP header to only retrieve packages metadata updated since the previous listing operation in order to save bandwidth and return only origins which might have new released versions. Closes T2991
This commit is contained in:
parent
82ab96ad06
commit
ff05191b7d
12 changed files with 842 additions and 220 deletions
|
@ -1,14 +1,12 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# Copyright (C) 2019-2021 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
def register():
|
||||
from .lister import PackagistLister
|
||||
from .models import PackagistModel
|
||||
|
||||
return {
|
||||
"models": [PackagistModel],
|
||||
"lister": PackagistLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
}
|
||||
|
|
|
@ -1,102 +1,182 @@
|
|||
# Copyright (C) 2019 The Software Heritage developers
|
||||
# Copyright (C) 2019-2021 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
import logging
|
||||
import random
|
||||
from typing import Any, Dict, List, Mapping
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
from swh.lister.core.lister_transports import ListerOnePageApiTransport
|
||||
from swh.lister.core.simple_lister import SimpleLister
|
||||
from swh.scheduler import utils
|
||||
import iso8601
|
||||
import requests
|
||||
|
||||
from .models import PackagistModel
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from .. import USER_AGENT
|
||||
from ..pattern import CredentialsType, Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
PackagistPageType = List[str]
|
||||
|
||||
def compute_package_url(repo_name: str) -> str:
|
||||
"""Compute packgist package url from repo name.
|
||||
|
||||
@dataclass
|
||||
class PackagistListerState:
|
||||
"""State of Packagist lister"""
|
||||
|
||||
last_listing_date: Optional[datetime] = None
|
||||
"""Last date when packagist lister was executed"""
|
||||
|
||||
|
||||
class PackagistLister(Lister[PackagistListerState, PackagistPageType]):
|
||||
"""
|
||||
return "https://repo.packagist.org/p/%s.json" % repo_name
|
||||
List all Packagist projects and send associated origins to scheduler.
|
||||
|
||||
The lister queries the Packagist API, whose documentation can be found at
|
||||
https://packagist.org/apidoc.
|
||||
|
||||
class PackagistLister(ListerOnePageApiTransport, SimpleLister):
|
||||
"""List packages available in the Packagist package manager.
|
||||
|
||||
The lister sends the request to the url present in the class
|
||||
variable `PAGE`, to receive a list of all the package names
|
||||
present in the Packagist package manager. Iterates over all the
|
||||
packages and constructs the metadata url of the package from
|
||||
the name of the package and creates a loading task::
|
||||
|
||||
Task:
|
||||
Type: load-packagist
|
||||
Policy: recurring
|
||||
Args:
|
||||
<package_name>
|
||||
<package_metadata_url>
|
||||
|
||||
Example::
|
||||
|
||||
Task:
|
||||
Type: load-packagist
|
||||
Policy: recurring
|
||||
Args:
|
||||
'hypejunction/hypegamemechanics'
|
||||
'https://repo.packagist.org/p/hypejunction/hypegamemechanics.json'
|
||||
|
||||
For each package, its metadata are retrieved using Packagist API endpoints
|
||||
whose responses are served from static files, which are guaranteed to be
|
||||
efficient on the Packagist side (no dymamic queries).
|
||||
Furthermore, subsequent listing will send the "If-Modified-Since" HTTP
|
||||
header to only retrieve packages metadata updated since the previous listing
|
||||
operation in order to save bandwidth and return only origins which might have
|
||||
new released versions.
|
||||
"""
|
||||
|
||||
MODEL = PackagistModel
|
||||
LISTER_NAME = "packagist"
|
||||
PAGE = "https://packagist.org/packages/list.json"
|
||||
instance = "packagist"
|
||||
LISTER_NAME = "Packagist"
|
||||
PACKAGIST_PACKAGES_LIST_URL = "https://packagist.org/packages/list.json"
|
||||
PACKAGIST_REPO_BASE_URL = "https://repo.packagist.org/p"
|
||||
|
||||
def __init__(self, override_config=None):
|
||||
ListerOnePageApiTransport.__init__(self)
|
||||
SimpleLister.__init__(self, override_config=override_config)
|
||||
|
||||
def task_dict(
|
||||
self, origin_type: str, origin_url: str, **kwargs: Mapping[str, str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Return task format dict
|
||||
|
||||
This is overridden from the lister_base as more information is
|
||||
needed for the ingestion task creation.
|
||||
|
||||
"""
|
||||
return utils.create_task_dict(
|
||||
"load-%s" % origin_type,
|
||||
kwargs.get("policy", "recurring"),
|
||||
kwargs.get("name"),
|
||||
origin_url,
|
||||
retries_left=3,
|
||||
def __init__(
|
||||
self, scheduler: SchedulerInterface, credentials: CredentialsType = None,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=self.PACKAGIST_PACKAGES_LIST_URL,
|
||||
instance="packagist",
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
def list_packages(self, response: Any) -> List[str]:
|
||||
"""List the actual packagist origins from the response.
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{"Accept": "application/json", "User-Agent": USER_AGENT}
|
||||
)
|
||||
self.listing_date = datetime.now().astimezone(tz=timezone.utc)
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> PackagistListerState:
|
||||
last_listing_date = d.get("last_listing_date")
|
||||
if last_listing_date is not None:
|
||||
d["last_listing_date"] = iso8601.parse_date(last_listing_date)
|
||||
return PackagistListerState(**d)
|
||||
|
||||
def state_to_dict(self, state: PackagistListerState) -> Dict[str, Any]:
|
||||
d: Dict[str, Optional[str]] = {"last_listing_date": None}
|
||||
last_listing_date = state.last_listing_date
|
||||
if last_listing_date is not None:
|
||||
d["last_listing_date"] = last_listing_date.isoformat()
|
||||
return d
|
||||
|
||||
def api_request(self, url: str) -> Any:
|
||||
logger.debug("Fetching URL %s", url)
|
||||
|
||||
response = self.session.get(url)
|
||||
|
||||
if response.status_code not in (200, 304):
|
||||
logger.warning(
|
||||
"Unexpected HTTP status code %s on %s: %s",
|
||||
response.status_code,
|
||||
response.url,
|
||||
response.content,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
# response is empty when status code is 304
|
||||
return response.json() if response.status_code == 200 else {}
|
||||
|
||||
def get_pages(self) -> Iterator[PackagistPageType]:
|
||||
"""
|
||||
response = json.loads(response.text)
|
||||
packages = [name for name in response["packageNames"]]
|
||||
logger.debug("Number of packages: %s", len(packages))
|
||||
random.shuffle(packages)
|
||||
return packages
|
||||
|
||||
def get_model_from_repo(self, repo_name: str) -> Mapping[str, str]:
|
||||
"""Transform from repository representation to model
|
||||
|
||||
Yield a single page listing all Packagist projects.
|
||||
"""
|
||||
url = compute_package_url(repo_name)
|
||||
return {
|
||||
"uid": repo_name,
|
||||
"name": repo_name,
|
||||
"full_name": repo_name,
|
||||
"html_url": url,
|
||||
"origin_url": url,
|
||||
"origin_type": "packagist",
|
||||
}
|
||||
yield self.api_request(self.PACKAGIST_PACKAGES_LIST_URL)["packageNames"]
|
||||
|
||||
def get_origins_from_page(self, page: PackagistPageType) -> Iterator[ListedOrigin]:
|
||||
"""
|
||||
Iterate on all Packagist projects and yield ListedOrigin instances.
|
||||
"""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
# save some bandwidth by only getting packages metadata updated since
|
||||
# last listing
|
||||
if self.state.last_listing_date is not None:
|
||||
if_modified_since = self.state.last_listing_date.strftime(
|
||||
"%a, %d %b %Y %H:%M:%S GMT"
|
||||
)
|
||||
self.session.headers["If-Modified-Since"] = if_modified_since
|
||||
|
||||
# to ensure origins will not be listed multiple times
|
||||
origin_urls = set()
|
||||
|
||||
for package_name in page:
|
||||
try:
|
||||
metadata = self.api_request(
|
||||
f"{self.PACKAGIST_REPO_BASE_URL}/{package_name}.json"
|
||||
)
|
||||
if not metadata.get("packages", {}):
|
||||
# package metadata not updated since last listing
|
||||
continue
|
||||
if package_name not in metadata["packages"]:
|
||||
# missing package metadata in response
|
||||
continue
|
||||
versions_info = metadata["packages"][package_name].values()
|
||||
except requests.exceptions.HTTPError:
|
||||
# error when getting package metadata (usually 404 when a
|
||||
# package has been removed), skip it and process next package
|
||||
continue
|
||||
|
||||
origin_url = None
|
||||
visit_type = None
|
||||
last_update = None
|
||||
|
||||
# extract origin url for package, vcs type and latest release date
|
||||
for version_info in versions_info:
|
||||
origin_url = version_info.get("source", {}).get("url", "")
|
||||
if not origin_url:
|
||||
continue
|
||||
# can be git, hg or svn
|
||||
visit_type = version_info.get("source", {}).get("type", "")
|
||||
dist_time_str = version_info.get("time", "")
|
||||
if not dist_time_str:
|
||||
continue
|
||||
dist_time = iso8601.parse_date(dist_time_str)
|
||||
if last_update is None or dist_time > last_update:
|
||||
last_update = dist_time
|
||||
|
||||
# skip package with already seen origin url or with missing required info
|
||||
if visit_type is None or origin_url is None or origin_url in origin_urls:
|
||||
continue
|
||||
|
||||
# bitbucket closed its mercurial hosting service, those origins can not be
|
||||
# loaded into the archive anymore
|
||||
if visit_type == "hg" and origin_url.startswith("https://bitbucket.org/"):
|
||||
continue
|
||||
|
||||
origin_urls.add(origin_url)
|
||||
|
||||
logger.debug(
|
||||
"Found package %s last updated on %s", package_name, last_update
|
||||
)
|
||||
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
url=origin_url,
|
||||
visit_type=visit_type,
|
||||
last_update=last_update,
|
||||
)
|
||||
|
||||
def finalize(self) -> None:
|
||||
self.state.last_listing_date = self.listing_date
|
||||
self.updated = True
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from sqlalchemy import Column, String
|
||||
|
||||
from ..core.models import ModelBase
|
||||
|
||||
|
||||
class PackagistModel(ModelBase):
|
||||
"""a Packagist repository representation
|
||||
|
||||
"""
|
||||
|
||||
__tablename__ = "packagist_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# Copyright (C) 2019-2021 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
@ -10,7 +10,7 @@ from .lister import PackagistLister
|
|||
@shared_task(name=__name__ + ".PackagistListerTask")
|
||||
def list_packagist(**lister_args):
|
||||
"List the packagist (php) registry"
|
||||
PackagistLister(**lister_args).run()
|
||||
return PackagistLister.from_configfile(**lister_args).run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
|
|
|
@ -1,26 +0,0 @@
|
|||
# Copyright (C) 2019-2020 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lister_under_test():
|
||||
return "packagist"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lister_packagist(swh_lister):
|
||||
# Amend the scheduler with an unknown yet load-packagist task type
|
||||
swh_lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-packagist",
|
||||
"description": "Load packagist origin",
|
||||
"backend_name": "swh.loader.package.tasks.LoaderPackagist",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return swh_lister
|
78
swh/lister/packagist/tests/data/den1n_contextmenu.json
Normal file
78
swh/lister/packagist/tests/data/den1n_contextmenu.json
Normal file
|
@ -0,0 +1,78 @@
|
|||
{
|
||||
"packages": {
|
||||
"den1n/contextmenu": {
|
||||
"dev-default": {
|
||||
"name": "den1n/contextmenu",
|
||||
"description": "Context menu custom element.",
|
||||
"keywords": [
|
||||
"javascript",
|
||||
"JS",
|
||||
"contextmenu",
|
||||
"den1n"
|
||||
],
|
||||
"homepage": "https://bitbucket.org/den1n/contextmenu",
|
||||
"version": "dev-default",
|
||||
"version_normalized": "9999999-dev",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [{
|
||||
"name": "Dmitry Kadochnikov",
|
||||
"email": "iqmass@gmail.com"
|
||||
}],
|
||||
"source": {
|
||||
"type": "hg",
|
||||
"url": "https://bitbucket.org/den1n/contextmenu",
|
||||
"reference": "c207786b3dcf90fc7796a99dcb9e5fdb860ef2ba"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://bitbucket.org/den1n/contextmenu/get/c207786b3dcf90fc7796a99dcb9e5fdb860ef2ba.zip",
|
||||
"reference": "c207786b3dcf90fc7796a99dcb9e5fdb860ef2ba",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2019-08-27T10:42:55+00:00",
|
||||
"default-branch": true,
|
||||
"require": {
|
||||
"den1n/xelement": "^1.0"
|
||||
},
|
||||
"uid": 4101245
|
||||
},
|
||||
"v1.0.0": {
|
||||
"name": "den1n/contextmenu",
|
||||
"description": "Simple DOM JS context menu.",
|
||||
"keywords": [
|
||||
"javascript",
|
||||
"JS",
|
||||
"contextmenu",
|
||||
"den1n"
|
||||
],
|
||||
"homepage": "https://bitbucket.org/den1n/contextmenu",
|
||||
"version": "v1.0.0",
|
||||
"version_normalized": "1.0.0.0",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [{
|
||||
"name": "Dmitry Kadochnikov",
|
||||
"email": "iqmass@gmail.com"
|
||||
}],
|
||||
"source": {
|
||||
"type": "hg",
|
||||
"url": "https://bitbucket.org/den1n/contextmenu",
|
||||
"reference": "278e30a199d1f0e1a8789a4b798814722bd11065"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://bitbucket.org/den1n/contextmenu/get/278e30a199d1f0e1a8789a4b798814722bd11065.zip",
|
||||
"reference": "278e30a199d1f0e1a8789a4b798814722bd11065",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-03-07T10:08:41+00:00",
|
||||
"uid": 1968017
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,9 +0,0 @@
|
|||
{
|
||||
"packageNames": [
|
||||
"0.0.0/composer-include-files",
|
||||
"0.0.0/laravel-env-shim",
|
||||
"0.0.1/try-make-package",
|
||||
"0099ff/dialogflowphp",
|
||||
"00f100/array_dot"
|
||||
]
|
||||
}
|
83
swh/lister/packagist/tests/data/ljjackson_linnworks.json
Normal file
83
swh/lister/packagist/tests/data/ljjackson_linnworks.json
Normal file
|
@ -0,0 +1,83 @@
|
|||
{
|
||||
"packages": {
|
||||
"ljjackson/linnworks": {
|
||||
"0.1": {
|
||||
"name": "ljjackson/linnworks",
|
||||
"description": "A PHP API Integration of Linnworks.",
|
||||
"keywords": [],
|
||||
"homepage": "https://github.com/ljjackson",
|
||||
"version": "0.1",
|
||||
"version_normalized": "0.1.0.0",
|
||||
"license": [],
|
||||
"authors": [{
|
||||
"name": "Liam Jackson",
|
||||
"homepage": "https://github.com/ljjackson",
|
||||
"role": "Developer"
|
||||
}],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ljjackson/linnworks.git",
|
||||
"reference": "b2d16490823a8a9012a83b80cdcd6a129cfc5dea"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/ljjackson/linnworks/zipball/b2d16490823a8a9012a83b80cdcd6a129cfc5dea",
|
||||
"reference": "b2d16490823a8a9012a83b80cdcd6a129cfc5dea",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-10-22T19:52:25+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"LJJackson\\Linnworks\\": "src/"
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"php": "^7.0",
|
||||
"guzzlehttp/guzzle": "^6.3",
|
||||
"ext-json": "*"
|
||||
},
|
||||
"uid": 2535139
|
||||
},
|
||||
"dev-master": {
|
||||
"name": "ljjackson/linnworks",
|
||||
"description": "A PHP API Integration of Linnworks.",
|
||||
"keywords": [],
|
||||
"homepage": "https://github.com/ljjackson",
|
||||
"version": "dev-master",
|
||||
"version_normalized": "9999999-dev",
|
||||
"license": [],
|
||||
"authors": [{
|
||||
"name": "Liam Jackson",
|
||||
"homepage": "https://github.com/ljjackson",
|
||||
"role": "Developer"
|
||||
}],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/ljjackson/linnworks.git",
|
||||
"reference": "7c6b1209dc3bafad4284b130bda8450f3478ea26"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/ljjackson/linnworks/zipball/7c6b1209dc3bafad4284b130bda8450f3478ea26",
|
||||
"reference": "7c6b1209dc3bafad4284b130bda8450f3478ea26",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-11-01T21:45:50+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"LJJackson\\Linnworks\\": "src/"
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"guzzlehttp/guzzle": "^6.3",
|
||||
"ext-json": "*",
|
||||
"php": "^7.1.3",
|
||||
"nesbot/carbon": "*"
|
||||
},
|
||||
"uid": 2517334
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
239
swh/lister/packagist/tests/data/lky_wx_article.json
Normal file
239
swh/lister/packagist/tests/data/lky_wx_article.json
Normal file
|
@ -0,0 +1,239 @@
|
|||
{
|
||||
"packages": {
|
||||
"lky/wx_article": {
|
||||
"1.0": {
|
||||
"name": "lky/wx_article",
|
||||
"description": "wx article editor",
|
||||
"keywords": [
|
||||
"laravel",
|
||||
"WxGzhArticle"
|
||||
],
|
||||
"homepage": "https://github.com/lky/wxgzharticle",
|
||||
"version": "1.0",
|
||||
"version_normalized": "1.0.0.0",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [{
|
||||
"name": "lky",
|
||||
"email": "2747865797@qq.com",
|
||||
"homepage": "http://lky.kim"
|
||||
}],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/gitlky/wx_article.git",
|
||||
"reference": "bd1826f17a42a1d3da44c4562af3be370687466b"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/gitlky/wx_article/zipball/bd1826f17a42a1d3da44c4562af3be370687466b",
|
||||
"reference": "bd1826f17a42a1d3da44c4562af3be370687466b",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-08-28T06:51:46+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"lky\\WxGzhArticle\\": "src/"
|
||||
}
|
||||
},
|
||||
"extra": {
|
||||
"laravel": {
|
||||
"providers": [
|
||||
"lky\\WxGzhArticle\\WxGzhArticleServiceProvider"
|
||||
],
|
||||
"aliases": {
|
||||
"WxGzhArticle": "lky\\WxGzhArticle\\Facades\\WxGzhArticle"
|
||||
}
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"illuminate/support": "~5",
|
||||
"ixudra/curl": "6.*",
|
||||
"guzzlehttp/guzzle": "6.*",
|
||||
"laravel/framework": "5.2.*",
|
||||
"php": ">=5.6.4"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~6.0",
|
||||
"orchestra/testbench": "~3.0"
|
||||
},
|
||||
"uid": 2493149
|
||||
},
|
||||
"dev-master": {
|
||||
"name": "lky/wx_article",
|
||||
"description": "wx article editor",
|
||||
"keywords": [
|
||||
"laravel",
|
||||
"WxGzhArticle"
|
||||
],
|
||||
"homepage": "https://github.com/lky/wx_article",
|
||||
"version": "dev-master",
|
||||
"version_normalized": "9999999-dev",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [{
|
||||
"name": "lky",
|
||||
"email": "2747865797@qq.com",
|
||||
"homepage": "http://lky.kim"
|
||||
}],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/gitlky/wx_article.git",
|
||||
"reference": "9ef7cddfe1a9715cee52acc7a97d4f51d0f6e2be"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/gitlky/wx_article/zipball/9ef7cddfe1a9715cee52acc7a97d4f51d0f6e2be",
|
||||
"reference": "9ef7cddfe1a9715cee52acc7a97d4f51d0f6e2be",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-08-30T07:37:09+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"lky\\WxGzhArticle\\": "src/"
|
||||
}
|
||||
},
|
||||
"extra": {
|
||||
"laravel": {
|
||||
"providers": [
|
||||
"lky\\WxGzhArticle\\WxGzhArticleServiceProvider"
|
||||
],
|
||||
"aliases": {
|
||||
"WxGzhArticle": "lky\\WxGzhArticle\\Facades\\WxGzhArticle"
|
||||
}
|
||||
}
|
||||
},
|
||||
"default-branch": true,
|
||||
"require": {
|
||||
"ixudra/curl": "6.*",
|
||||
"guzzlehttp/guzzle": "6.*",
|
||||
"laravel/framework": ">=5.2.0",
|
||||
"php": ">=5.6.4"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~6.0",
|
||||
"orchestra/testbench": "~3.0"
|
||||
},
|
||||
"uid": 4096807
|
||||
},
|
||||
"v1.2": {
|
||||
"name": "lky/wx_article",
|
||||
"description": "wx article editor",
|
||||
"keywords": [
|
||||
"laravel",
|
||||
"WxGzhArticle"
|
||||
],
|
||||
"homepage": "https://github.com/lky/wx_article",
|
||||
"version": "v1.2",
|
||||
"version_normalized": "1.2.0.0",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [{
|
||||
"name": "lky",
|
||||
"email": "2747865797@qq.com",
|
||||
"homepage": "http://lky.kim"
|
||||
}],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/gitlky/wx_article.git",
|
||||
"reference": "d332d20b8d848018c7e6a43e7fe47a78cdb926b7"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/gitlky/wx_article/zipball/d332d20b8d848018c7e6a43e7fe47a78cdb926b7",
|
||||
"reference": "d332d20b8d848018c7e6a43e7fe47a78cdb926b7",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"lky\\WxGzhArticle\\": "src/"
|
||||
}
|
||||
},
|
||||
"extra": {
|
||||
"laravel": {
|
||||
"providers": [
|
||||
"lky\\WxGzhArticle\\WxGzhArticleServiceProvider"
|
||||
],
|
||||
"aliases": {
|
||||
"WxGzhArticle": "lky\\WxGzhArticle\\Facades\\WxGzhArticle"
|
||||
}
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"ixudra/curl": "6.*",
|
||||
"guzzlehttp/guzzle": "6.*",
|
||||
"laravel/framework": ">=5.2.0",
|
||||
"php": ">=5.6.4"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~6.0",
|
||||
"orchestra/testbench": "~3.0"
|
||||
},
|
||||
"uid": 2493150
|
||||
},
|
||||
"v1.6": {
|
||||
"name": "lky/wx_article",
|
||||
"description": "wx article editor",
|
||||
"keywords": [
|
||||
"laravel",
|
||||
"WxGzhArticle"
|
||||
],
|
||||
"homepage": "https://github.com/lky/wx_article",
|
||||
"version": "v1.6",
|
||||
"version_normalized": "1.6.0.0",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [{
|
||||
"name": "lky",
|
||||
"email": "2747865797@qq.com",
|
||||
"homepage": "http://lky.kim"
|
||||
}],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/gitlky/wx_article.git",
|
||||
"reference": "d332d20b8d848018c7e6a43e7fe47a78cdb926b7"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/gitlky/wx_article/zipball/d332d20b8d848018c7e6a43e7fe47a78cdb926b7",
|
||||
"reference": "d332d20b8d848018c7e6a43e7fe47a78cdb926b7",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-08-29T08:26:06+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"lky\\WxGzhArticle\\": "src/"
|
||||
}
|
||||
},
|
||||
"extra": {
|
||||
"laravel": {
|
||||
"providers": [
|
||||
"lky\\WxGzhArticle\\WxGzhArticleServiceProvider"
|
||||
],
|
||||
"aliases": {
|
||||
"WxGzhArticle": "lky\\WxGzhArticle\\Facades\\WxGzhArticle"
|
||||
}
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"ixudra/curl": "6.*",
|
||||
"guzzlehttp/guzzle": "6.*",
|
||||
"laravel/framework": ">=5.2.0",
|
||||
"php": ">=5.6.4"
|
||||
},
|
||||
"require-dev": {
|
||||
"phpunit/phpunit": "~6.0",
|
||||
"orchestra/testbench": "~3.0"
|
||||
},
|
||||
"uid": 2427550
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
141
swh/lister/packagist/tests/data/spryker-eco_computop-api.json
Normal file
141
swh/lister/packagist/tests/data/spryker-eco_computop-api.json
Normal file
|
@ -0,0 +1,141 @@
|
|||
{
|
||||
"packages": {
|
||||
"spryker-eco/computop-api": {
|
||||
"1.0.0": {
|
||||
"name": "spryker-eco/computop-api",
|
||||
"description": "Computop API Module",
|
||||
"keywords": [],
|
||||
"homepage": "",
|
||||
"version": "1.0.0",
|
||||
"version_normalized": "1.0.0.0",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/spryker-eco/computop-api.git",
|
||||
"reference": "d75dc7d2c80bd93e65081b26433ee559d2c92f0a"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/spryker-eco/computop-api/zipball/d75dc7d2c80bd93e65081b26433ee559d2c92f0a",
|
||||
"reference": "d75dc7d2c80bd93e65081b26433ee559d2c92f0a",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-08-31T11:51:23+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"SprykerEco\\": "src/SprykerEco/"
|
||||
}
|
||||
},
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "1.0.x-dev"
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"php": ">=7.1",
|
||||
"spryker/kernel": "^3.0.0",
|
||||
"spryker/transfer": "^3.0.0",
|
||||
"spryker/util-text": "^1.0.0",
|
||||
"spryker/guzzle": "^2.2.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"spryker/code-sniffer": "dev-master"
|
||||
},
|
||||
"uid": 2432548
|
||||
},
|
||||
"dev-dev": {
|
||||
"name": "spryker-eco/computop-api",
|
||||
"description": "Computop API Module",
|
||||
"keywords": [],
|
||||
"homepage": "",
|
||||
"version": "dev-dev",
|
||||
"version_normalized": "dev-dev",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [],
|
||||
"source": {},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/spryker-eco/computop-api/zipball/7a695d1e412132296546d072364f410186572790",
|
||||
"reference": "7a695d1e412132296546d072364f410186572790",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2018-08-31T11:38:22+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"SprykerEco\\": "src/SprykerEco/"
|
||||
}
|
||||
},
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "1.0.x-dev"
|
||||
}
|
||||
},
|
||||
"require": {
|
||||
"php": ">=7.1",
|
||||
"spryker/kernel": "^3.0.0",
|
||||
"spryker/transfer": "^3.0.0",
|
||||
"spryker/util-text": "^1.0.0",
|
||||
"spryker/guzzle": "^2.2.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"spryker/code-sniffer": "dev-master"
|
||||
},
|
||||
"uid": 2209824
|
||||
},
|
||||
"dev-master": {
|
||||
"name": "spryker-eco/computop-api",
|
||||
"description": "ComputopApi module",
|
||||
"keywords": [],
|
||||
"homepage": "",
|
||||
"version": "dev-master",
|
||||
"version_normalized": "9999999-dev",
|
||||
"license": [
|
||||
"MIT"
|
||||
],
|
||||
"authors": [],
|
||||
"source": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/spryker-eco/computop-api.git",
|
||||
"reference": "7ac81d5db52c0639bc06a61a35d7738a964fde88"
|
||||
},
|
||||
"dist": {
|
||||
"type": "zip",
|
||||
"url": "https://api.github.com/repos/spryker-eco/computop-api/zipball/7ac81d5db52c0639bc06a61a35d7738a964fde88",
|
||||
"reference": "7ac81d5db52c0639bc06a61a35d7738a964fde88",
|
||||
"shasum": ""
|
||||
},
|
||||
"type": "library",
|
||||
"time": "2020-06-22T15:50:29+00:00",
|
||||
"autoload": {
|
||||
"psr-4": {
|
||||
"SprykerEco\\": "src/SprykerEco/"
|
||||
}
|
||||
},
|
||||
"extra": {
|
||||
"branch-alias": {
|
||||
"dev-master": "1.0.x-dev"
|
||||
}
|
||||
},
|
||||
"default-branch": true,
|
||||
"require": {
|
||||
"php": ">=7.1",
|
||||
"spryker/kernel": "^3.0.0",
|
||||
"spryker/transfer": "^3.0.0",
|
||||
"spryker/util-text": "^1.0.0",
|
||||
"spryker/guzzle": "^2.2.0"
|
||||
},
|
||||
"require-dev": {
|
||||
"spryker/code-sniffer": "dev-master"
|
||||
},
|
||||
"uid": 4006827
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,104 +1,159 @@
|
|||
# Copyright (C) 2019 The Software Heritage developers
|
||||
# Copyright (C) 2019-2021 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import requests_mock
|
||||
import iso8601
|
||||
|
||||
from swh.lister.core.tests.test_lister import HttpSimpleListerTester
|
||||
from swh.lister.packagist.lister import PackagistLister, compute_package_url
|
||||
from swh.lister.packagist.lister import PackagistLister
|
||||
|
||||
expected_packages = [
|
||||
"0.0.0/composer-include-files",
|
||||
"0.0.0/laravel-env-shim",
|
||||
"0.0.1/try-make-package",
|
||||
"0099ff/dialogflowphp",
|
||||
"00f100/array_dot",
|
||||
]
|
||||
|
||||
expected_model = {
|
||||
"uid": "0099ff/dialogflowphp",
|
||||
"name": "0099ff/dialogflowphp",
|
||||
"full_name": "0099ff/dialogflowphp",
|
||||
"html_url": "https://repo.packagist.org/p/0099ff/dialogflowphp.json",
|
||||
"origin_url": "https://repo.packagist.org/p/0099ff/dialogflowphp.json",
|
||||
"origin_type": "packagist",
|
||||
_packages_list = {
|
||||
"packageNames": [
|
||||
"ljjackson/linnworks",
|
||||
"lky/wx_article",
|
||||
"spryker-eco/computop-api",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class PackagistListerTester(HttpSimpleListerTester, unittest.TestCase):
|
||||
Lister = PackagistLister
|
||||
PAGE = "https://packagist.org/packages/list.json"
|
||||
lister_subdir = "packagist"
|
||||
good_api_response_file = "data/https_packagist.org/packages_list.json"
|
||||
entries = 5
|
||||
def _package_metadata(datadir, package_name):
|
||||
return json.loads(
|
||||
Path(datadir, f"{package_name.replace('/', '_')}.json").read_text()
|
||||
)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_list_packages(self, http_mocker):
|
||||
"""List packages from simple api page should retrieve all packages within
|
||||
|
||||
"""
|
||||
http_mocker.get(self.PAGE, text=self.mock_response)
|
||||
fl = self.get_fl()
|
||||
packages = fl.list_packages(self.get_api_response(0))
|
||||
def _package_origin_info(package_name, package_metadata):
|
||||
origin_url = None
|
||||
visit_type = None
|
||||
last_update = None
|
||||
for version_info in package_metadata["packages"][package_name].values():
|
||||
origin_url = version_info["source"].get("url")
|
||||
visit_type = version_info["source"].get("type")
|
||||
if "time" in version_info:
|
||||
version_date = iso8601.parse_date(version_info["time"])
|
||||
if last_update is None or version_date > last_update:
|
||||
last_update = version_date
|
||||
return origin_url, visit_type, last_update
|
||||
|
||||
for package in expected_packages:
|
||||
assert package in packages
|
||||
|
||||
def test_transport_response_simplified(self):
|
||||
"""Test model created by the lister
|
||||
def _request_without_if_modified_since(request):
|
||||
return request.headers.get("If-Modified-Since") is None
|
||||
|
||||
"""
|
||||
fl = self.get_fl()
|
||||
model = fl.transport_response_simplified(["0099ff/dialogflowphp"])
|
||||
assert len(model) == 1
|
||||
for key, values in model[0].items():
|
||||
assert values == expected_model[key]
|
||||
|
||||
@patch("swh.lister.packagist.lister.utils.create_task_dict")
|
||||
def test_task_dict(self, mock_create_tasks):
|
||||
"""Test the task creation of lister
|
||||
def _request_with_if_modified_since(request):
|
||||
return request.headers.get("If-Modified-Since") is not None
|
||||
|
||||
"""
|
||||
fl = self.get_fl()
|
||||
fl.task_dict(
|
||||
origin_type="packagist", origin_url="https://abc", name="test_pack"
|
||||
|
||||
def test_packagist_lister(swh_scheduler, requests_mock, datadir):
|
||||
# first listing, should return one origin per package
|
||||
lister = PackagistLister(scheduler=swh_scheduler)
|
||||
requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list)
|
||||
packages_metadata = {}
|
||||
for package_name in _packages_list["packageNames"]:
|
||||
metadata = _package_metadata(datadir, package_name)
|
||||
packages_metadata[package_name] = metadata
|
||||
requests_mock.get(
|
||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
|
||||
json=metadata,
|
||||
additional_matcher=_request_without_if_modified_since,
|
||||
)
|
||||
mock_create_tasks.assert_called_once_with(
|
||||
"load-packagist", "recurring", "test_pack", "https://abc", retries_left=3
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == len(_packages_list["packageNames"])
|
||||
assert lister.updated
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
for package_name, package_metadata in packages_metadata.items():
|
||||
origin_url, visit_type, last_update = _package_origin_info(
|
||||
package_name, package_metadata
|
||||
)
|
||||
filtered_origins = [o for o in scheduler_origins if o.url == origin_url]
|
||||
assert filtered_origins
|
||||
assert filtered_origins[0].visit_type == visit_type
|
||||
assert filtered_origins[0].last_update == last_update
|
||||
|
||||
# second listing, should return 0 origins as no package metadata
|
||||
# has been updated since first listing
|
||||
lister = PackagistLister(scheduler=swh_scheduler)
|
||||
for package_name in _packages_list["packageNames"]:
|
||||
requests_mock.get(
|
||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
|
||||
additional_matcher=_request_with_if_modified_since,
|
||||
status_code=304,
|
||||
)
|
||||
|
||||
assert lister.get_state_from_scheduler().last_listing_date is not None
|
||||
|
||||
def test_compute_package_url():
|
||||
expected_url = "https://repo.packagist.org/p/hello.json"
|
||||
actual_url = compute_package_url("hello")
|
||||
assert actual_url == expected_url
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == 0
|
||||
assert lister.updated
|
||||
|
||||
|
||||
def test_packagist_lister(lister_packagist, requests_mock_datadir):
|
||||
lister_packagist.run()
|
||||
def test_packagist_lister_missing_metadata(swh_scheduler, requests_mock, datadir):
|
||||
lister = PackagistLister(scheduler=swh_scheduler)
|
||||
requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list)
|
||||
for package_name in _packages_list["packageNames"]:
|
||||
requests_mock.get(
|
||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
|
||||
additional_matcher=_request_without_if_modified_since,
|
||||
status_code=404,
|
||||
)
|
||||
|
||||
r = lister_packagist.scheduler.search_tasks(task_type="load-packagist")
|
||||
assert len(r) == 5
|
||||
stats = lister.run()
|
||||
|
||||
for row in r:
|
||||
assert row["type"] == "load-packagist"
|
||||
# arguments check
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 2
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == 0
|
||||
|
||||
package = args[0]
|
||||
url = args[1]
|
||||
|
||||
expected_url = compute_package_url(package)
|
||||
assert url == expected_url
|
||||
def test_packagist_lister_empty_metadata(swh_scheduler, requests_mock, datadir):
|
||||
lister = PackagistLister(scheduler=swh_scheduler)
|
||||
requests_mock.get(lister.PACKAGIST_PACKAGES_LIST_URL, json=_packages_list)
|
||||
for package_name in _packages_list["packageNames"]:
|
||||
requests_mock.get(
|
||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
|
||||
additional_matcher=_request_without_if_modified_since,
|
||||
json={"packages": {}},
|
||||
)
|
||||
|
||||
# kwargs
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert kwargs == {}
|
||||
stats = lister.run()
|
||||
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == 0
|
||||
|
||||
|
||||
def test_packagist_lister_package_with_bitbucket_hg_origin(
|
||||
swh_scheduler, requests_mock, datadir
|
||||
):
|
||||
package_name = "den1n/contextmenu"
|
||||
lister = PackagistLister(scheduler=swh_scheduler)
|
||||
requests_mock.get(
|
||||
lister.PACKAGIST_PACKAGES_LIST_URL, json={"packageNames": [package_name]}
|
||||
)
|
||||
requests_mock.get(
|
||||
f"{lister.PACKAGIST_REPO_BASE_URL}/{package_name}.json",
|
||||
additional_matcher=_request_without_if_modified_since,
|
||||
json=_package_metadata(datadir, package_name),
|
||||
)
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 1
|
||||
assert stats.origins == 0
|
||||
|
||||
|
||||
def test_lister_from_configfile(swh_scheduler_config, mocker):
|
||||
load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar")
|
||||
load_from_envvar.return_value = {
|
||||
"scheduler": {"cls": "local", **swh_scheduler_config},
|
||||
"credentials": {},
|
||||
}
|
||||
lister = PackagistLister.from_configfile()
|
||||
assert lister.scheduler is not None
|
||||
assert lister.credentials is not None
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
# Copyright (C) 2019-2020 the Software Heritage developers
|
||||
# Copyright (C) 2019-2021 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from unittest.mock import patch
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
|
||||
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
|
@ -13,11 +13,11 @@ def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
|||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch("swh.lister.packagist.tasks.PackagistLister")
|
||||
def test_lister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
# setup the mocked PackagistLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
def test_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
|
||||
lister = mocker.patch("swh.lister.packagist.tasks.PackagistLister")
|
||||
lister.from_configfile.return_value = lister
|
||||
stats = ListerStats(pages=1, origins=286500)
|
||||
lister.run.return_value = stats
|
||||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.packagist.tasks.PackagistListerTask"
|
||||
|
@ -25,7 +25,7 @@ def test_lister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
|||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == stats.dict()
|
||||
|
||||
lister.assert_called_once_with()
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.from_configfile.assert_called_once_with()
|
||||
lister.run.assert_called_once_with()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue