Reimplement Gitea lister using new Lister API

The lister is stateless and has full listing capability.
It can request the Gitea API using HTTP token authentication.
Rate-limiting was not encountered but is handled generically.
Added support for getting repo last update date through API.
This commit is contained in:
tenma 2021-01-21 15:27:42 +01:00
parent 7892077a89
commit c780ad4b44
10 changed files with 677 additions and 615 deletions

View file

@ -5,10 +5,9 @@
def register():
from .lister import GiteaLister
from .models import GiteaModel
return {
"models": [GiteaModel],
"models": [],
"lister": GiteaLister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -1,89 +1,135 @@
# Copyright (C) 2018-2020 The Software Heritage developers
# Copyright (C) 2018-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
from typing import Any, Dict, List, MutableMapping, Optional, Tuple
import logging
from typing import Any, Dict, Iterator, List, Optional
from urllib.parse import urljoin
from requests import Response
import iso8601
import requests
from tenacity.before_sleep import before_sleep_log
from urllib3.util import parse_url
from ..core.page_by_page_lister import PageByPageHttpLister
from .models import GiteaModel
from swh.lister.utils import throttling_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from .. import USER_AGENT
from ..pattern import CredentialsType, StatelessLister
logger = logging.getLogger(__name__)
RepoListPage = List[Dict[str, Any]]
class GiteaLister(PageByPageHttpLister):
# Template path expecting an integer that represents the page id
PATH_TEMPLATE = "repos/search?page=%d&sort=id"
DEFAULT_URL = "https://try.gitea.io/api/v1/"
MODEL = GiteaModel
class GiteaLister(StatelessLister[RepoListPage]):
"""List origins from Gitea.
Gitea API documentation: https://try.gitea.io/api/swagger
The API does pagination and provides navigation URLs through the 'Link' header.
The default value for page size is the maximum value observed on the instances
accessible at https://try.gitea.io/api/v1/ and https://codeberg.org/api/v1/."""
LISTER_NAME = "gitea"
REPO_LIST_PATH = "repos/search"
def __init__(
self, url=None, instance=None, override_config=None, order="asc", limit=3
self,
scheduler: SchedulerInterface,
url: str,
instance: Optional[str] = None,
api_token: Optional[str] = None,
page_size: int = 50,
credentials: CredentialsType = None,
):
super().__init__(url=url, override_config=override_config)
if instance is None:
instance = parse_url(self.url).host
self.instance = instance
self.PATH_TEMPLATE = "%s&order=%s&limit=%s" % (
self.PATH_TEMPLATE,
order,
limit,
instance = parse_url(url).host
super().__init__(
scheduler=scheduler, credentials=credentials, url=url, instance=instance,
)
def get_model_from_repo(self, repo: Dict[str, Any]) -> Dict[str, Any]:
return {
"instance": self.instance,
"uid": f'{self.instance}/{repo["id"]}',
"name": repo["name"],
"full_name": repo["full_name"],
"html_url": repo["html_url"],
"origin_url": repo["clone_url"],
"origin_type": "git",
self.query_params = {
"sort": "id",
"order": "asc",
"limit": page_size,
"page": 1,
}
def get_next_target_from_response(self, response: Response) -> Optional[int]:
"""Determine the next page identifier.
"""
if "next" in response.links:
next_url = response.links["next"]["url"]
return self.get_page_from_url(next_url)
return None
def get_page_from_url(self, url: str) -> int:
page_re = re.compile(r"^.*/search\?.*page=(\d+)")
return int(page_re.match(url).group(1)) # type: ignore
def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]:
repos = response.json()["data"]
return [self.get_model_from_repo(repo) for repo in repos]
def get_pages_information(
self,
) -> Tuple[Optional[int], Optional[int], Optional[int]]:
"""Determine pages information.
"""
response = self.transport_head(identifier=1) # type: ignore
if not response.ok:
raise ValueError(
"Problem during information fetch: %s" % response.status_code
)
h = response.headers
return (
self._get_int(h, "x-total-count"),
int(self.get_page_from_url(response.links["last"]["url"])),
self._get_int(h, "x-per-page"),
self.session = requests.Session()
self.session.headers.update(
{"Accept": "application/json", "User-Agent": USER_AGENT,}
)
def _get_int(self, headers: MutableMapping[str, Any], key: str) -> Optional[int]:
_val = headers.get(key)
if _val:
return int(_val)
return None
if api_token is None and len(self.credentials) > 0:
logger.warning(
"Gitea lister support only API token authentication "
" as of now. Will use the first password as token."
)
api_token = self.credentials[0]["password"]
def run(self, min_bound=1, max_bound=None, check_existence=False):
return super().run(min_bound, max_bound, check_existence)
if api_token:
self.session.headers["Authorization"] = "Token %s" % api_token
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response:
logger.info("Fetching URL %s with params %s", url, params)
response = self.session.get(url, params=params)
if response.status_code != 200:
logger.warning(
"Unexpected HTTP status code %s on %s: %s",
response.status_code,
response.url,
response.content,
)
response.raise_for_status()
return response
@classmethod
def results_simplified(cls, body: Dict[str, RepoListPage]) -> RepoListPage:
fields_filter = ["id", "clone_url", "updated_at"]
return [{k: r[k] for k in fields_filter} for r in body["data"]]
def get_pages(self) -> Iterator[RepoListPage]:
# base with trailing slash, path without leading slash for urljoin
url: str = urljoin(self.url, self.REPO_LIST_PATH)
response = self.page_request(url, self.query_params)
while True:
page_results = self.results_simplified(response.json())
yield page_results
assert len(response.links) > 0, "API changed: no Link header found"
if "next" in response.links:
url = response.links["next"]["url"]
else:
# last page
break
response = self.page_request(url, {})
def get_origins_from_page(self, page: RepoListPage) -> Iterator[ListedOrigin]:
"""Convert a page of Gitea repositories into a list of ListedOrigins.
"""
assert self.lister_obj.id is not None
for repo in page:
last_update = iso8601.parse_date(repo["updated_at"])
yield ListedOrigin(
lister_id=self.lister_obj.id,
url=repo["clone_url"],
visit_type="git",
last_update=last_update,
)

View file

@ -1,18 +0,0 @@
# Copyright (C) 2020 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, String
from ..core.models import ModelBase
class GiteaModel(ModelBase):
"""a Gitea repository from a gitea instance
"""
__tablename__ = "gitea_repo"
uid = Column(String, primary_key=True)
instance = Column(String, index=True)

View file

@ -2,51 +2,27 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import random
from typing import Dict, Optional
from celery import group, shared_task
from celery import shared_task
from .. import utils
from .lister import GiteaLister
NBPAGES = 10
@shared_task(name=__name__ + ".IncrementalGiteaLister")
def list_gitea_incremental(**lister_args):
"""Incremental update of a Gitea instance"""
lister_args["order"] = "desc"
lister = GiteaLister(**lister_args)
total_pages = lister.get_pages_information()[1]
# stopping as soon as existing origins for that instance are detected
return lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
@shared_task(name=__name__ + ".RangeGiteaLister")
def _range_gitea_lister(start, end, **lister_args):
lister = GiteaLister(**lister_args)
return lister.run(min_bound=start, max_bound=end)
@shared_task(name=__name__ + ".FullGiteaRelister", bind=True)
def list_gitea_full(self, **lister_args):
@shared_task(name=__name__ + ".FullGiteaRelister")
def list_gitea_full(
url: str,
instance: Optional[str] = None,
api_token: Optional[str] = None,
page_size: Optional[int] = None,
) -> Dict[str, int]:
"""Full update of a Gitea instance"""
lister = GiteaLister(**lister_args)
_, total_pages, _ = lister.get_pages_information()
ranges = list(utils.split_range(total_pages, NBPAGES))
random.shuffle(ranges)
promise = group(
_range_gitea_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges
)()
self.log.debug("%s OK (spawned %s subtasks)" % (self.name, len(ranges)))
try:
promise.save()
except (NotImplementedError, AttributeError):
self.log.info("Unable to call save_group with current result backend.")
# FIXME: what to do in terms of return here?
return promise.id
lister = GiteaLister.from_configfile(
url=url, instance=instance, api_token=api_token, page_size=page_size
)
return lister.run().dict()
@shared_task(name=__name__ + ".ping")
def _ping():
def _ping() -> str:
return "OK"

View file

@ -1,4 +0,0 @@
{
"ok": true,
"data": []
}

View file

@ -1,182 +0,0 @@
{
"ok": true,
"data": [
{
"id": 5017,
"owner": {
"id": 1609,
"login": "JonasFranzDEV",
"full_name": "",
"email": "info@jonasfranz.software",
"avatar_url": "https://try.gitea.io/user/avatar/JonasFranzDEV/-1",
"language": "de-DE",
"is_admin": false,
"last_login": "2019-10-19T10:58:29Z",
"created": "2017-06-25T17:43:19Z",
"username": "JonasFranzDEV"
},
"name": "drone-gitea-release",
"full_name": "JonasFranzDEV/drone-gitea-release",
"description": "",
"empty": false,
"private": false,
"fork": false,
"template": false,
"parent": null,
"mirror": false,
"size": 380,
"html_url": "https://try.gitea.io/JonasFranzDEV/drone-gitea-release",
"ssh_url": "git@try.gitea.io:JonasFranzDEV/drone-gitea-release.git",
"clone_url": "https://try.gitea.io/JonasFranzDEV/drone-gitea-release.git",
"original_url": "",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 1,
"open_pr_counter": 0,
"release_counter": 2,
"default_branch": "master",
"archived": false,
"created_at": "2018-03-30T19:34:44Z",
"updated_at": "2018-05-29T20:09:40Z",
"permissions": {
"admin": false,
"push": false,
"pull": true
},
"has_issues": true,
"internal_tracker": {
"enable_time_tracker": true,
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true
},
"has_wiki": true,
"has_pull_requests": true,
"ignore_whitespace_conflicts": false,
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"avatar_url": ""
},
{
"id": 5018,
"owner": {
"id": 4495,
"login": "nick.korsakov",
"full_name": "",
"email": "nick@korsakov.email",
"avatar_url": "https://try.gitea.io/user/avatar/nick.korsakov/-1",
"language": "ru-RU",
"is_admin": false,
"last_login": "2020-02-15T10:29:10Z",
"created": "2018-03-31T15:00:07Z",
"username": "nick.korsakov"
},
"name": "one",
"full_name": "nick.korsakov/one",
"description": "",
"empty": true,
"private": false,
"fork": false,
"template": false,
"parent": null,
"mirror": false,
"size": 0,
"html_url": "https://try.gitea.io/nick.korsakov/one",
"ssh_url": "git@try.gitea.io:nick.korsakov/one.git",
"clone_url": "https://try.gitea.io/nick.korsakov/one.git",
"original_url": "",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 0,
"open_pr_counter": 0,
"release_counter": 0,
"default_branch": "master",
"archived": false,
"created_at": "2018-03-31T15:00:33Z",
"updated_at": "2018-03-31T15:00:33Z",
"permissions": {
"admin": false,
"push": false,
"pull": true
},
"has_issues": true,
"internal_tracker": {
"enable_time_tracker": true,
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true
},
"has_wiki": true,
"has_pull_requests": true,
"ignore_whitespace_conflicts": false,
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"avatar_url": ""
},
{
"id": 5030,
"owner": {
"id": 1623,
"login": "xingshijun",
"full_name": "",
"email": "934302794@qq.com",
"avatar_url": "https://try.gitea.io/user/avatar/xingshijun/-1",
"language": "zh-CN",
"is_admin": false,
"last_login": "2019-06-15T12:28:43Z",
"created": "2017-06-28T02:19:23Z",
"username": "xingshijun"
},
"name": "lfzl",
"full_name": "xingshijun/lfzl",
"description": "",
"empty": false,
"private": false,
"fork": false,
"template": false,
"parent": null,
"mirror": false,
"size": 10990,
"html_url": "https://try.gitea.io/xingshijun/lfzl",
"ssh_url": "git@try.gitea.io:xingshijun/lfzl.git",
"clone_url": "https://try.gitea.io/xingshijun/lfzl.git",
"original_url": "",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 0,
"open_pr_counter": 0,
"release_counter": 0,
"default_branch": "master",
"archived": false,
"created_at": "2018-04-02T08:34:08Z",
"updated_at": "2019-11-21T10:23:36Z",
"permissions": {
"admin": false,
"push": false,
"pull": true
},
"has_issues": true,
"internal_tracker": {
"enable_time_tracker": true,
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true
},
"has_wiki": true,
"has_pull_requests": true,
"ignore_whitespace_conflicts": false,
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"avatar_url": ""
}
]
}

View file

@ -1,182 +1,195 @@
{
"ok": true,
"data": [
{
"id": 5017,
"owner": {
"id": 1609,
"login": "JonasFranzDEV",
"full_name": "",
"email": "info@jonasfranz.software",
"avatar_url": "https://try.gitea.io/user/avatar/JonasFranzDEV/-1",
"language": "de-DE",
"is_admin": false,
"last_login": "2019-10-19T10:58:29Z",
"created": "2017-06-25T17:43:19Z",
"username": "JonasFranzDEV"
},
"name": "drone-gitea-release",
"full_name": "JonasFranzDEV/drone-gitea-release",
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"archived": false,
"avatar_url": "",
"clone_url": "https://try.gitea.io/JonasFranzDEV/drone-gitea-release.git",
"created_at": "2018-03-30T19:34:44Z",
"default_branch": "master",
"description": "",
"empty": false,
"private": false,
"fork": false,
"template": false,
"parent": null,
"mirror": false,
"size": 380,
"forks_count": 1,
"full_name": "JonasFranzDEV/drone-gitea-release",
"has_issues": true,
"has_projects": false,
"has_pull_requests": true,
"has_wiki": true,
"html_url": "https://try.gitea.io/JonasFranzDEV/drone-gitea-release",
"ssh_url": "git@try.gitea.io:JonasFranzDEV/drone-gitea-release.git",
"clone_url": "https://try.gitea.io/JonasFranzDEV/drone-gitea-release.git",
"original_url": "",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"id": 5017,
"ignore_whitespace_conflicts": false,
"internal": false,
"internal_tracker": {
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true,
"enable_time_tracker": true
},
"mirror": false,
"mirror_interval": "",
"name": "drone-gitea-release",
"open_issues_count": 1,
"open_pr_counter": 0,
"release_counter": 2,
"default_branch": "master",
"archived": false,
"created_at": "2018-03-30T19:34:44Z",
"updated_at": "2018-05-29T20:09:40Z",
"permissions": {
"admin": false,
"push": false,
"pull": true
},
"has_issues": true,
"internal_tracker": {
"enable_time_tracker": true,
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true
},
"has_wiki": true,
"has_pull_requests": true,
"ignore_whitespace_conflicts": false,
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"avatar_url": ""
},
{
"id": 5018,
"owner": {
"id": 4495,
"login": "nick.korsakov",
"full_name": "",
"email": "nick@korsakov.email",
"avatar_url": "https://try.gitea.io/user/avatar/nick.korsakov/-1",
"language": "ru-RU",
"is_admin": false,
"last_login": "2020-02-15T10:29:10Z",
"created": "2018-03-31T15:00:07Z",
"username": "nick.korsakov"
},
"name": "one",
"full_name": "nick.korsakov/one",
"description": "",
"empty": true,
"private": false,
"fork": false,
"template": false,
"parent": null,
"mirror": false,
"size": 0,
"html_url": "https://try.gitea.io/nick.korsakov/one",
"ssh_url": "git@try.gitea.io:nick.korsakov/one.git",
"clone_url": "https://try.gitea.io/nick.korsakov/one.git",
"original_url": "",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"open_issues_count": 0,
"open_pr_counter": 0,
"release_counter": 0,
"default_branch": "master",
"archived": false,
"created_at": "2018-03-31T15:00:33Z",
"updated_at": "2018-03-31T15:00:33Z",
"owner": {
"avatar_url": "https://try.gitea.io/user/avatar/JonasFranzDEV/-1",
"created": "2017-06-25T17:43:19Z",
"email": "info@jonasfranz.software",
"full_name": "",
"id": 1609,
"is_admin": false,
"language": "",
"last_login": "0001-01-01T00:00:00Z",
"login": "JonasFranzDEV",
"username": "JonasFranzDEV"
},
"parent": null,
"permissions": {
"admin": false,
"push": false,
"pull": true
"pull": true,
"push": false
},
"has_issues": true,
"internal_tracker": {
"enable_time_tracker": true,
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true
},
"has_wiki": true,
"has_pull_requests": true,
"ignore_whitespace_conflicts": false,
"private": false,
"release_counter": 2,
"size": 380,
"ssh_url": "git@try.gitea.io:JonasFranzDEV/drone-gitea-release.git",
"stars_count": 0,
"template": false,
"updated_at": "2018-05-29T20:09:40Z",
"watchers_count": 1,
"website": ""
},
{
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"avatar_url": ""
},
{
"id": 5030,
"owner": {
"id": 1623,
"login": "xingshijun",
"full_name": "",
"email": "934302794@qq.com",
"avatar_url": "https://try.gitea.io/user/avatar/xingshijun/-1",
"language": "zh-CN",
"is_admin": false,
"last_login": "2019-06-15T12:28:43Z",
"created": "2017-06-28T02:19:23Z",
"username": "xingshijun"
},
"name": "lfzl",
"full_name": "xingshijun/lfzl",
"archived": false,
"avatar_url": "",
"clone_url": "https://try.gitea.io/xingshijun/lfzl.git",
"created_at": "2018-04-02T08:34:08Z",
"default_branch": "master",
"description": "",
"empty": false,
"private": false,
"fork": false,
"template": false,
"parent": null,
"mirror": false,
"size": 10990,
"html_url": "https://try.gitea.io/xingshijun/lfzl",
"ssh_url": "git@try.gitea.io:xingshijun/lfzl.git",
"clone_url": "https://try.gitea.io/xingshijun/lfzl.git",
"original_url": "",
"website": "",
"stars_count": 0,
"forks_count": 0,
"watchers_count": 1,
"full_name": "xingshijun/lfzl",
"has_issues": true,
"has_projects": false,
"has_pull_requests": true,
"has_wiki": true,
"html_url": "https://try.gitea.io/xingshijun/lfzl",
"id": 5030,
"ignore_whitespace_conflicts": false,
"internal": false,
"internal_tracker": {
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true,
"enable_time_tracker": true
},
"mirror": false,
"mirror_interval": "",
"name": "lfzl",
"open_issues_count": 0,
"open_pr_counter": 0,
"release_counter": 0,
"default_branch": "master",
"archived": false,
"created_at": "2018-04-02T08:34:08Z",
"updated_at": "2019-11-21T10:23:36Z",
"original_url": "",
"owner": {
"avatar_url": "https://try.gitea.io/user/avatar/xingshijun/-1",
"created": "2017-06-28T02:19:23Z",
"email": "934302794@qq.com",
"full_name": "",
"id": 1623,
"is_admin": false,
"language": "",
"last_login": "0001-01-01T00:00:00Z",
"login": "xingshijun",
"username": "xingshijun"
},
"parent": null,
"permissions": {
"admin": false,
"push": false,
"pull": true
"pull": true,
"push": false
},
"has_issues": true,
"internal_tracker": {
"enable_time_tracker": true,
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true
},
"has_wiki": true,
"has_pull_requests": true,
"ignore_whitespace_conflicts": false,
"private": false,
"release_counter": 0,
"size": 10997,
"ssh_url": "git@try.gitea.io:xingshijun/lfzl.git",
"stars_count": 0,
"template": false,
"updated_at": "2020-04-16T08:39:18Z",
"watchers_count": 1,
"website": ""
},
{
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"avatar_url": ""
"archived": false,
"avatar_url": "",
"clone_url": "https://try.gitea.io/ulm0/negroni.git",
"created_at": "2018-04-02T17:30:26Z",
"default_branch": "master",
"description": "Idiomatic HTTP Middleware for Golang",
"empty": false,
"fork": false,
"forks_count": 1,
"full_name": "ulm0/negroni",
"has_issues": true,
"has_projects": false,
"has_pull_requests": true,
"has_wiki": true,
"html_url": "https://try.gitea.io/ulm0/negroni",
"id": 5034,
"ignore_whitespace_conflicts": false,
"internal": false,
"internal_tracker": {
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true,
"enable_time_tracker": true
},
"mirror": true,
"mirror_interval": "8h0m0s",
"name": "negroni",
"open_issues_count": 0,
"open_pr_counter": 0,
"original_url": "",
"owner": {
"avatar_url": "https://try.gitea.io/user/avatar/ulm0/-1",
"created": "2017-07-09T18:58:34Z",
"email": "ulm0@innersea.xyz",
"full_name": "Mauricio Ugaz",
"id": 1706,
"is_admin": false,
"language": "",
"last_login": "0001-01-01T00:00:00Z",
"login": "ulm0",
"username": "ulm0"
},
"parent": null,
"permissions": {
"admin": false,
"pull": true,
"push": false
},
"private": false,
"release_counter": 7,
"size": 17739,
"ssh_url": "git@try.gitea.io:ulm0/negroni.git",
"stars_count": 0,
"template": false,
"updated_at": "2020-11-14T17:50:56Z",
"watchers_count": 1,
"website": ""
}
]
],
"ok": true,
"links": {
"next": "https://try.gitea.io/api/v1/repos/search?limit=3&order=asc&page=2&sort=id",
"last": "https://try.gitea.io/api/v1/repos/search?limit=3&order=asc&page=2282&sort=id"
}
}

View file

@ -0,0 +1,252 @@
{
"data": [
{
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"archived": false,
"avatar_url": "",
"clone_url": "https://try.gitea.io/ulm0/mux.git",
"created_at": "2018-04-02T17:35:13Z",
"default_branch": "master",
"description": "A powerful URL router and dispatcher for golang.",
"empty": false,
"fork": false,
"forks_count": 1,
"full_name": "ulm0/mux",
"has_issues": true,
"has_projects": false,
"has_pull_requests": true,
"has_wiki": true,
"html_url": "https://try.gitea.io/ulm0/mux",
"id": 5035,
"ignore_whitespace_conflicts": false,
"internal": false,
"internal_tracker": {
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true,
"enable_time_tracker": true
},
"mirror": true,
"mirror_interval": "8h0m0s",
"name": "mux",
"open_issues_count": 0,
"open_pr_counter": 0,
"original_url": "",
"owner": {
"avatar_url": "https://try.gitea.io/user/avatar/ulm0/-1",
"created": "2017-07-09T18:58:34Z",
"email": "ulm0@innersea.xyz",
"full_name": "Mauricio Ugaz",
"id": 1706,
"is_admin": false,
"language": "",
"last_login": "0001-01-01T00:00:00Z",
"login": "ulm0",
"username": "ulm0"
},
"parent": null,
"permissions": {
"admin": false,
"pull": true,
"push": false
},
"private": false,
"release_counter": 14,
"size": 2512,
"ssh_url": "git@try.gitea.io:ulm0/mux.git",
"stars_count": 0,
"template": false,
"updated_at": "2020-09-12T19:20:56Z",
"watchers_count": 1,
"website": "http://www.gorillatoolkit.org/pkg/mux"
},
{
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"archived": false,
"avatar_url": "",
"clone_url": "https://try.gitea.io/ligh0721/negroni.git",
"created_at": "2018-04-03T10:41:41Z",
"default_branch": "master",
"description": "Idiomatic HTTP Middleware for Golang",
"empty": false,
"fork": true,
"forks_count": 0,
"full_name": "ligh0721/negroni",
"has_issues": true,
"has_projects": false,
"has_pull_requests": true,
"has_wiki": true,
"html_url": "https://try.gitea.io/ligh0721/negroni",
"id": 5045,
"ignore_whitespace_conflicts": false,
"internal": false,
"internal_tracker": {
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true,
"enable_time_tracker": true
},
"mirror": false,
"mirror_interval": "",
"name": "negroni",
"open_issues_count": 0,
"open_pr_counter": 0,
"original_url": "",
"owner": {
"avatar_url": "https://try.gitea.io/user/avatar/ligh0721/-1",
"created": "2018-04-03T10:37:01Z",
"email": "lightning_0721@163.com",
"full_name": "",
"id": 4534,
"is_admin": false,
"language": "",
"last_login": "0001-01-01T00:00:00Z",
"login": "ligh0721",
"username": "ligh0721"
},
"parent": {
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"archived": false,
"avatar_url": "",
"clone_url": "https://try.gitea.io/ulm0/negroni.git",
"created_at": "2018-04-02T17:30:26Z",
"default_branch": "master",
"description": "Idiomatic HTTP Middleware for Golang",
"empty": false,
"fork": false,
"forks_count": 1,
"full_name": "ulm0/negroni",
"has_issues": true,
"has_projects": false,
"has_pull_requests": true,
"has_wiki": true,
"html_url": "https://try.gitea.io/ulm0/negroni",
"id": 5034,
"ignore_whitespace_conflicts": false,
"internal": false,
"internal_tracker": {
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true,
"enable_time_tracker": true
},
"mirror": true,
"mirror_interval": "8h0m0s",
"name": "negroni",
"open_issues_count": 0,
"open_pr_counter": 0,
"original_url": "",
"owner": {
"avatar_url": "https://try.gitea.io/user/avatar/ulm0/-1",
"created": "2017-07-09T18:58:34Z",
"email": "ulm0@innersea.xyz",
"full_name": "Mauricio Ugaz",
"id": 1706,
"is_admin": false,
"language": "",
"last_login": "0001-01-01T00:00:00Z",
"login": "ulm0",
"username": "ulm0"
},
"parent": null,
"permissions": {
"admin": false,
"pull": true,
"push": false
},
"private": false,
"release_counter": 7,
"size": 17739,
"ssh_url": "git@try.gitea.io:ulm0/negroni.git",
"stars_count": 0,
"template": false,
"updated_at": "2020-11-14T17:50:56Z",
"watchers_count": 1,
"website": ""
},
"permissions": {
"admin": false,
"pull": true,
"push": false
},
"private": false,
"release_counter": 3,
"size": 344,
"ssh_url": "git@try.gitea.io:ligh0721/negroni.git",
"stars_count": 0,
"template": false,
"updated_at": "2018-04-03T10:41:41Z",
"watchers_count": 1,
"website": ""
},
{
"allow_merge_commits": false,
"allow_rebase": false,
"allow_rebase_explicit": true,
"allow_squash_merge": false,
"archived": false,
"avatar_url": "",
"clone_url": "https://try.gitea.io/user12312341324124/Tiny.git",
"created_at": "2018-04-03T13:08:29Z",
"default_branch": "master",
"description": "",
"empty": false,
"fork": false,
"forks_count": 1,
"full_name": "user12312341324124/Tiny",
"has_issues": true,
"has_projects": false,
"has_pull_requests": true,
"has_wiki": true,
"html_url": "https://try.gitea.io/user12312341324124/Tiny",
"id": 5046,
"ignore_whitespace_conflicts": false,
"internal": false,
"internal_tracker": {
"allow_only_contributors_to_track_time": true,
"enable_issue_dependencies": true,
"enable_time_tracker": true
},
"mirror": false,
"mirror_interval": "",
"name": "Tiny",
"open_issues_count": 1,
"open_pr_counter": 0,
"original_url": "",
"owner": {
"avatar_url": "https://try.gitea.io/user/avatar/user12312341324124/-1",
"created": "2018-04-03T13:07:45Z",
"email": "z333676@mvrht.net",
"full_name": "",
"id": 4536,
"is_admin": false,
"language": "",
"last_login": "0001-01-01T00:00:00Z",
"login": "user12312341324124",
"username": "user12312341324124"
},
"parent": null,
"permissions": {
"admin": false,
"pull": true,
"push": false
},
"private": false,
"release_counter": 0,
"size": 110,
"ssh_url": "git@try.gitea.io:user12312341324124/Tiny.git",
"stars_count": 0,
"template": false,
"updated_at": "2018-04-03T13:08:29Z",
"watchers_count": 1,
"website": ""
}
],
"ok": true
}

View file

@ -3,56 +3,122 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import re
import unittest
import json
from pathlib import Path
from pprint import pprint
from typing import Dict, List, Tuple
from swh.lister.core.tests.test_lister import HttpListerTesterBase
from swh.lister.gitea.lister import GiteaLister
import pytest
import requests
logger = logging.getLogger(__name__)
from swh.lister.gitea.lister import GiteaLister, RepoListPage
from swh.scheduler.model import ListedOrigin
TRYGITEA_BASEURL = "https://try.gitea.io/api/v1/"
TRYGITEA_P1_URL = TRYGITEA_BASEURL + "repos/search?sort=id&order=asc&limit=3&page=1"
TRYGITEA_P2_URL = TRYGITEA_BASEURL + "repos/search?sort=id&order=asc&limit=3&page=2"
class GiteaListerTester(HttpListerTesterBase, unittest.TestCase):
Lister = GiteaLister
test_re = re.compile(r"^.*/projects.*page=(\d+).*")
lister_subdir = "gitea"
good_api_response_file = "data/https_try.gitea.io/api_response.json"
bad_api_response_file = "data/https_try.gitea.io/api_empty_response.json"
first_index = 1
last_index = 2
entries_per_page = 3
convert_type = int
def response_headers(self, request):
headers = {}
if self.request_index(request) == self.first_index:
headers.update(
{
"Link": "<https://try.gitea.io/api/v1\
/repos/search?&page=%s&sort=id>;"
' rel="next"' % self.last_index
}
)
return headers
@pytest.fixture
def trygitea_p1(datadir) -> Tuple[str, Dict[str, str], RepoListPage, List[str]]:
text = Path(datadir, "https_try.gitea.io", "repos_page1").read_text()
headers = {
"Link": '<{p2}>; rel="next",<{p2}>; rel="last"'.format(p2=TRYGITEA_P2_URL)
}
page_result = GiteaLister.results_simplified(json.loads(text))
origin_urls = [r["clone_url"] for r in page_result]
return text, headers, page_result, origin_urls
def test_lister_gitea(lister_gitea, requests_mock_datadir):
lister_gitea.run()
r = lister_gitea.scheduler.search_tasks(task_type="load-git")
assert len(r) == 3
@pytest.fixture
def trygitea_p2(datadir) -> Tuple[str, Dict[str, str], RepoListPage, List[str]]:
text = Path(datadir, "https_try.gitea.io", "repos_page2").read_text()
headers = {
"Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=TRYGITEA_P1_URL)
}
page_result = GiteaLister.results_simplified(json.loads(text))
origin_urls = [r["clone_url"] for r in page_result]
return text, headers, page_result, origin_urls
for row in r:
assert row["type"] == "load-git"
# arguments check
args = row["arguments"]["args"]
assert len(args) == 0
# kwargs
kwargs = row["arguments"]["kwargs"]
url = kwargs["url"]
assert url.startswith("https://try.gitea.io")
def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]):
"""Asserts that the two collections have the same origin URLs.
assert row["policy"] == "recurring"
assert row["priority"] is None
Does not test last_update."""
sorted_lister_urls = list(sorted(lister_urls))
sorted_scheduler_origins = list(sorted(scheduler_origins))
assert len(sorted_lister_urls) == len(sorted_scheduler_origins)
for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins):
assert l_url == s_origin.url
def test_gitea_full_listing(
swh_scheduler, requests_mock, mocker, trygitea_p1, trygitea_p2
):
"""Covers full listing of multiple pages, instance inference from URL,
rate-limit, token authentication, token from credentials,
page size (required for test), checking page results and listed origins,
statelessness."""
p1_text, p1_headers, p1_result, p1_origin_urls = trygitea_p1
p2_text, p2_headers, p2_result, p2_origin_urls = trygitea_p2
requests_mock.get(TRYGITEA_P1_URL, text=p1_text, headers=p1_headers)
requests_mock.get(
TRYGITEA_P2_URL,
[
{"status_code": requests.codes.too_many_requests},
{"text": p2_text, "headers": p2_headers},
],
)
instance = "try.gitea.io"
api_token = "p"
creds = {"gitea": {instance: [{"username": "u", "password": api_token}]}}
kwargs = dict(url=TRYGITEA_BASEURL, page_size=3, credentials=creds)
lister = GiteaLister(scheduler=swh_scheduler, **kwargs)
assert lister.instance == instance
assert (
"Authorization" in lister.session.headers
and lister.session.headers["Authorization"].lower() == "token %s" % api_token
)
lister.get_origins_from_page = mocker.spy(lister, "get_origins_from_page")
# end test setup
stats = lister.run()
# start test checks
assert stats.pages == 2
assert stats.origins == 6
# ~pprint(lister.get_origins_from_page.call_args_list)
calls = [mocker.call(p1_result), mocker.call(p2_result)]
lister.get_origins_from_page.assert_has_calls(calls)
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins
assert lister.get_state_from_scheduler() is None
check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
@pytest.mark.parametrize("http_code", [400, 500, 502])
def test_gitea_list_http_error(swh_scheduler, requests_mock, http_code):
"""Test handling of some HTTP errors commonly encountered"""
requests_mock.get(TRYGITEA_P1_URL, status_code=http_code)
lister = GiteaLister(scheduler=swh_scheduler, url=TRYGITEA_BASEURL, page_size=3)
with pytest.raises(requests.HTTPError):
lister.run()
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins
assert len(scheduler_origins) == 0

View file

@ -3,13 +3,9 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from time import sleep
from unittest.mock import call, patch
from unittest.mock import patch
from celery.result import GroupResult
from swh.lister.gitea.tasks import NBPAGES
from swh.lister.utils import split_range
from swh.lister.pattern import ListerStats
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
@ -21,125 +17,43 @@ def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
@patch("swh.lister.gitea.tasks.GiteaLister")
def test_incremental(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
# setup the mocked GiteaLister
lister.return_value = lister
lister.run.return_value = None
lister.get_pages_information.return_value = (None, 10, None)
def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(url="https://try.gitea.io/api/v1")
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitea.tasks.IncrementalGiteaLister"
"swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with(order="desc")
lister.db_last_index.assert_not_called()
lister.get_pages_information.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=1, max_bound=10, check_existence=True)
actual_kwargs = dict(**kwargs, instance=None, api_token=None, page_size=None)
lister.from_configfile.assert_called_once_with(**actual_kwargs)
lister.run.assert_called_once_with()
@patch("swh.lister.gitea.tasks.GiteaLister")
def test_range(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
# setup the mocked GiteaLister
lister.return_value = lister
lister.run.return_value = None
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitea.tasks.RangeGiteaLister", kwargs=dict(start=12, end=42)
)
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with()
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@patch("swh.lister.gitea.tasks.GiteaLister")
def test_relister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
total_pages = 85
# setup the mocked GiteaLister
lister.return_value = lister
lister.run.return_value = None
lister.get_pages_information.return_value = (None, total_pages, None)
res = swh_scheduler_celery_app.send_task("swh.lister.gitea.tasks.FullGiteaRelister")
assert res
res.wait()
assert res.successful()
# retrieve the GroupResult for this task and wait for all the subtasks
# to complete
promise_id = res.result
assert promise_id
promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app)
for i in range(5):
if promise.ready():
break
sleep(1)
lister.assert_called_with()
# one by the FullGiteaRelister task
# + 9 for the RangeGiteaLister subtasks
assert lister.call_count == 10
lister.db_last_index.assert_not_called()
lister.db_partition_indices.assert_not_called()
lister.get_pages_information.assert_called_once_with()
# lister.run should have been called once per partition interval
for min_bound, max_bound in split_range(total_pages, NBPAGES):
assert (
call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
)
@patch("swh.lister.gitea.tasks.GiteaLister")
def test_relister_instance(
def test_full_listing_params(
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
):
total_pages = 85
# setup the mocked GiteaLister
lister.return_value = lister
lister.run.return_value = None
lister.get_pages_information.return_value = (None, total_pages, None)
lister.from_configfile.return_value = lister
lister.run.return_value = ListerStats(pages=10, origins=500)
kwargs = dict(
url="https://0xacab.org/api/v4",
instance="0xacab",
api_token="test",
page_size=50,
)
res = swh_scheduler_celery_app.send_task(
"swh.lister.gitea.tasks.FullGiteaRelister",
kwargs=dict(url="https://0xacab.org/api/v4"),
"swh.lister.gitea.tasks.FullGiteaRelister", kwargs=kwargs,
)
assert res
res.wait()
assert res.successful()
# retrieve the GroupResult for this task and wait for all the subtasks
# to complete
promise_id = res.result
assert promise_id
promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app)
for i in range(5):
if promise.ready():
break
sleep(1)
lister.assert_called_with(url="https://0xacab.org/api/v4")
# one by the FullGiteaRelister task
# + 9 for the RangeGiteaLister subtasks
assert lister.call_count == 10
lister.db_last_index.assert_not_called()
lister.db_partition_indices.assert_not_called()
lister.get_pages_information.assert_called_once_with()
# lister.run should have been called once per partition interval
for min_bound, max_bound in split_range(total_pages, NBPAGES):
assert (
call(min_bound=min_bound, max_bound=max_bound) in lister.run.call_args_list
)
lister.from_configfile.assert_called_once_with(**kwargs)
lister.run.assert_called_once_with()