feat: Make the Gogs lister incremental
This commit is contained in:
parent
cee6bcb514
commit
6a53a6ad06
4 changed files with 471 additions and 77 deletions
|
@ -3,10 +3,11 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
import logging
|
||||
import random
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import parse_qs, urljoin, urlparse
|
||||
|
||||
import iso8601
|
||||
import requests
|
||||
|
@ -17,15 +18,36 @@ from swh.scheduler.interface import SchedulerInterface
|
|||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from .. import USER_AGENT
|
||||
from ..pattern import CredentialsType, StatelessLister
|
||||
from ..pattern import CredentialsType, Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Aliasing page results returned by `GogsLister.get_pages` method
|
||||
GogsListerPage = List[Dict[str, Any]]
|
||||
Repo = Dict[str, Any]
|
||||
|
||||
|
||||
class GogsLister(StatelessLister[GogsListerPage]):
|
||||
@dataclass
|
||||
class GogsListerPage:
|
||||
repos: Optional[List[Repo]] = None
|
||||
next_link: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class GogsListerState:
|
||||
last_seen_next_link: Optional[str] = None
|
||||
"""Last link header (could be already visited) during an incremental pass."""
|
||||
last_seen_repo_id: Optional[int] = None
|
||||
"""Last repo id seen during an incremental pass."""
|
||||
|
||||
|
||||
def _parse_page_id(url: Optional[str]) -> int:
|
||||
"""Parse the page id from a Gogs page url."""
|
||||
if url is None:
|
||||
return 0
|
||||
|
||||
return int(parse_qs(urlparse(url).query)["page"][0])
|
||||
|
||||
|
||||
class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
||||
|
||||
"""List origins from the Gogs
|
||||
|
||||
|
@ -61,7 +83,6 @@ class GogsLister(StatelessLister[GogsListerPage]):
|
|||
|
||||
self.query_params = {
|
||||
"limit": page_size,
|
||||
"page": 1,
|
||||
}
|
||||
|
||||
self.api_token = api_token
|
||||
|
@ -88,6 +109,12 @@ class GogsLister(StatelessLister[GogsListerPage]):
|
|||
}
|
||||
)
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> GogsListerState:
|
||||
return GogsListerState(**d)
|
||||
|
||||
def state_to_dict(self, state: GogsListerState) -> Dict[str, Any]:
|
||||
return asdict(state)
|
||||
|
||||
@throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
|
||||
def page_request(self, url, params) -> requests.Response:
|
||||
|
||||
|
@ -107,38 +134,70 @@ class GogsLister(StatelessLister[GogsListerPage]):
|
|||
return response
|
||||
|
||||
@classmethod
|
||||
def results_simplified(cls, body: Dict[str, GogsListerPage]) -> GogsListerPage:
|
||||
def extract_repos(cls, body: Dict[str, Any]) -> List[Repo]:
|
||||
fields_filter = ["id", "clone_url", "updated_at"]
|
||||
return [{k: r[k] for k in fields_filter} for r in body["data"]]
|
||||
|
||||
def get_pages(self) -> Iterator[GogsListerPage]:
|
||||
page_id = 1
|
||||
if self.state.last_seen_next_link is not None:
|
||||
page_id = _parse_page_id(self.state.last_seen_next_link)
|
||||
|
||||
# base with trailing slash, path without leading slash for urljoin
|
||||
url = urljoin(self.url, self.REPO_LIST_PATH)
|
||||
response = self.page_request(url, self.query_params)
|
||||
next_link: Optional[str] = urljoin(self.url, self.REPO_LIST_PATH)
|
||||
response = self.page_request(next_link, {**self.query_params, "page": page_id})
|
||||
|
||||
while True:
|
||||
page_results = self.results_simplified(response.json())
|
||||
|
||||
yield page_results
|
||||
while next_link is not None:
|
||||
repos = self.extract_repos(response.json())
|
||||
|
||||
assert len(response.links) > 0, "API changed: no Link header found"
|
||||
if "next" in response.links:
|
||||
url = response.links["next"]["url"]
|
||||
next_link = response.links["next"]["url"]
|
||||
else:
|
||||
break
|
||||
next_link = None # Happens for the last page
|
||||
|
||||
response = self.page_request(url, {})
|
||||
yield GogsListerPage(repos=repos, next_link=next_link)
|
||||
|
||||
if next_link is not None:
|
||||
response = self.page_request(next_link, {})
|
||||
|
||||
def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of Gogs repositories into a list of ListedOrigins"""
|
||||
assert self.lister_obj.id is not None
|
||||
assert page.repos is not None
|
||||
|
||||
for repo in page:
|
||||
last_update = iso8601.parse_date(repo["updated_at"])
|
||||
for r in page.repos:
|
||||
last_update = iso8601.parse_date(r["updated_at"])
|
||||
|
||||
yield ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
visit_type=self.VISIT_TYPE,
|
||||
url=repo["clone_url"],
|
||||
url=r["clone_url"],
|
||||
last_update=last_update,
|
||||
)
|
||||
|
||||
def commit_page(self, page: GogsListerPage) -> None:
|
||||
last_seen_next_link = page.next_link
|
||||
|
||||
page_id = _parse_page_id(last_seen_next_link)
|
||||
state_page_id = _parse_page_id(self.state.last_seen_next_link)
|
||||
|
||||
if page_id > state_page_id:
|
||||
self.state.last_seen_next_link = last_seen_next_link
|
||||
|
||||
if (page.repos is not None) and len(page.repos) > 0:
|
||||
self.state.last_seen_repo_id = page.repos[-1]["id"]
|
||||
|
||||
def finalize(self) -> None:
|
||||
scheduler_state = self.get_state_from_scheduler()
|
||||
|
||||
state_page_id = _parse_page_id(self.state.last_seen_next_link)
|
||||
scheduler_page_id = _parse_page_id(scheduler_state.last_seen_next_link)
|
||||
|
||||
state_last_repo_id = self.state.last_seen_repo_id or 0
|
||||
scheduler_last_repo_id = scheduler_state.last_seen_repo_id or 0
|
||||
|
||||
if (state_page_id >= scheduler_page_id) and (
|
||||
state_last_repo_id > scheduler_last_repo_id
|
||||
):
|
||||
self.updated = True # Marked updated only if it finds new repos
|
||||
|
|
168
swh/lister/gogs/tests/data/https_try.gogs.io/repos_page3
Normal file
168
swh/lister/gogs/tests/data/https_try.gogs.io/repos_page3
Normal file
|
@ -0,0 +1,168 @@
|
|||
{
|
||||
"data": [
|
||||
{
|
||||
"id": 340,
|
||||
"owner": {
|
||||
"id": 585,
|
||||
"username": "zork",
|
||||
"login": "zork",
|
||||
"full_name": "",
|
||||
"email": "f905334@trbvm.com",
|
||||
"avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
|
||||
},
|
||||
"name": "beyond-the-titanic",
|
||||
"full_name": "zork/beyond-the-titanic",
|
||||
"description": "Adventure awaits you onboard the RMS Titanic. Can you survive the sinking and make it home to San Francisco?",
|
||||
"private": false,
|
||||
"fork": false,
|
||||
"parent": null,
|
||||
"empty": false,
|
||||
"mirror": false,
|
||||
"size": 1436672,
|
||||
"html_url": "https://try.gogs.io/zork/beyond-the-titanic",
|
||||
"ssh_url": "git@try.gogs.io:zork/beyond-the-titanic.git",
|
||||
"clone_url": "https://try.gogs.io/zork/beyond-the-titanic.git",
|
||||
"website": "",
|
||||
"stars_count": 0,
|
||||
"forks_count": 1,
|
||||
"watchers_count": 1,
|
||||
"open_issues_count": 0,
|
||||
"default_branch": "master",
|
||||
"created_at": "2015-03-03T22:51:12Z",
|
||||
"updated_at": "2022-03-26T07:28:38Z"
|
||||
},
|
||||
{
|
||||
"id": 350,
|
||||
"owner": {
|
||||
"id": 599,
|
||||
"username": "perekre",
|
||||
"login": "perekre",
|
||||
"full_name": "",
|
||||
"email": "perekre@nincsmail.com",
|
||||
"avatar_url": "https://secure.gravatar.com/avatar/0e2666adf16f8a958a56141a2d94565c?d=identicon"
|
||||
},
|
||||
"name": "beyond-the-titanic",
|
||||
"full_name": "perekre/beyond-the-titanic",
|
||||
"description": "Adventure awaits you onboard the RMS Titanic. Can you survive the sinking and make it home to San Francisco?",
|
||||
"private": false,
|
||||
"fork": true,
|
||||
"parent": {
|
||||
"id": 340,
|
||||
"owner": {
|
||||
"id": 585,
|
||||
"username": "zork",
|
||||
"login": "zork",
|
||||
"full_name": "",
|
||||
"email": "f905334@trbvm.com",
|
||||
"avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
|
||||
},
|
||||
"name": "beyond-the-titanic",
|
||||
"full_name": "zork/beyond-the-titanic",
|
||||
"description": "Adventure awaits you onboard the RMS Titanic. Can you survive the sinking and make it home to San Francisco?",
|
||||
"private": false,
|
||||
"fork": false,
|
||||
"parent": null,
|
||||
"empty": false,
|
||||
"mirror": false,
|
||||
"size": 1436672,
|
||||
"html_url": "https://try.gogs.io/zork/beyond-the-titanic",
|
||||
"ssh_url": "git@try.gogs.io:zork/beyond-the-titanic.git",
|
||||
"clone_url": "https://try.gogs.io/zork/beyond-the-titanic.git",
|
||||
"website": "",
|
||||
"stars_count": 0,
|
||||
"forks_count": 1,
|
||||
"watchers_count": 1,
|
||||
"open_issues_count": 0,
|
||||
"default_branch": "master",
|
||||
"created_at": "2015-03-03T22:51:12Z",
|
||||
"updated_at": "2022-03-26T07:28:38Z",
|
||||
"permissions": {
|
||||
"admin": false,
|
||||
"push": false,
|
||||
"pull": true
|
||||
}
|
||||
},
|
||||
"empty": false,
|
||||
"mirror": false,
|
||||
"size": 1437696,
|
||||
"html_url": "https://try.gogs.io/perekre/beyond-the-titanic",
|
||||
"ssh_url": "git@try.gogs.io:perekre/beyond-the-titanic.git",
|
||||
"clone_url": "https://try.gogs.io/perekre/beyond-the-titanic.git",
|
||||
"website": "",
|
||||
"stars_count": 0,
|
||||
"forks_count": 0,
|
||||
"watchers_count": 1,
|
||||
"open_issues_count": 0,
|
||||
"default_branch": "master",
|
||||
"created_at": "2015-03-04T10:40:46Z",
|
||||
"updated_at": "2022-03-26T07:28:38Z"
|
||||
},
|
||||
{
|
||||
"id": 369,
|
||||
"owner": {
|
||||
"id": 108,
|
||||
"username": "yinheli",
|
||||
"login": "yinheli",
|
||||
"full_name": "",
|
||||
"email": "me@yinheli.com",
|
||||
"avatar_url": "https://secure.gravatar.com/avatar/dedb067ecae8155b87428ac7920dd0ae?d=identicon"
|
||||
},
|
||||
"name": "digits",
|
||||
"full_name": "yinheli/digits",
|
||||
"description": "Distantly related to the game Mastermind, you are given clues to help determine a random number combination. The object of the game is to guess the solution in as few tries as possible.",
|
||||
"private": false,
|
||||
"fork": true,
|
||||
"parent": {
|
||||
"id": 339,
|
||||
"owner": {
|
||||
"id": 585,
|
||||
"username": "zork",
|
||||
"login": "zork",
|
||||
"full_name": "",
|
||||
"email": "f905334@trbvm.com",
|
||||
"avatar_url": "https://secure.gravatar.com/avatar/ebcb8e171a1a47fde8ded46b2618f135?d=identicon"
|
||||
},
|
||||
"name": "digits",
|
||||
"full_name": "zork/digits",
|
||||
"description": "Distantly related to the game Mastermind, you are given clues to help determine a random number combination. The object of the game is to guess the solution in as few tries as possible.",
|
||||
"private": false,
|
||||
"fork": false,
|
||||
"parent": null,
|
||||
"empty": false,
|
||||
"mirror": false,
|
||||
"size": 18432,
|
||||
"html_url": "https://try.gogs.io/zork/digits",
|
||||
"ssh_url": "git@try.gogs.io:zork/digits.git",
|
||||
"clone_url": "https://try.gogs.io/zork/digits.git",
|
||||
"website": "",
|
||||
"stars_count": 0,
|
||||
"forks_count": 1,
|
||||
"watchers_count": 1,
|
||||
"open_issues_count": 0,
|
||||
"default_branch": "master",
|
||||
"created_at": "2015-03-03T22:47:56Z",
|
||||
"updated_at": "2022-03-26T07:28:38Z",
|
||||
"permissions": {
|
||||
"admin": false,
|
||||
"push": false,
|
||||
"pull": true
|
||||
}
|
||||
},
|
||||
"empty": false,
|
||||
"mirror": false,
|
||||
"size": 18432,
|
||||
"html_url": "https://try.gogs.io/yinheli/digits",
|
||||
"ssh_url": "git@try.gogs.io:yinheli/digits.git",
|
||||
"clone_url": "https://try.gogs.io/yinheli/digits.git",
|
||||
"website": "",
|
||||
"stars_count": 0,
|
||||
"forks_count": 0,
|
||||
"watchers_count": 1,
|
||||
"open_issues_count": 0,
|
||||
"default_branch": "master",
|
||||
"created_at": "2015-03-06T01:31:17Z",
|
||||
"updated_at": "2022-03-26T07:28:38Z"
|
||||
}
|
||||
],
|
||||
"ok": true
|
||||
}
|
1
swh/lister/gogs/tests/data/https_try.gogs.io/repos_page4
Normal file
1
swh/lister/gogs/tests/data/https_try.gogs.io/repos_page4
Normal file
|
@ -0,0 +1 @@
|
|||
{"data":[{"id":380,"owner":{"id":653,"username":"gdr","login":"gdr","full_name":"","email":"gdr@gdr.name","avatar_url":"https://secure.gravatar.com/avatar/237e2bf0a3687301ed4ef3c65e56c672?d=identicon"},"name":"ttrss-af_nofacebook","full_name":"gdr/ttrss-af_nofacebook","description":"Tiny Tiny RSS plugin for removing Facebook's l.php links","private":false,"fork":false,"parent":null,"empty":false,"mirror":true,"size":4096,"html_url":"https://try.gogs.io/gdr/ttrss-af_nofacebook","ssh_url":"git@try.gogs.io:gdr/ttrss-af_nofacebook.git","clone_url":"https://try.gogs.io/gdr/ttrss-af_nofacebook.git","website":"","stars_count":0,"forks_count":0,"watchers_count":1,"open_issues_count":0,"default_branch":"master","created_at":"2015-03-06T11:39:30Z","updated_at":"2022-03-26T07:28:38Z"},{"id":399,"owner":{"id":683,"username":"brejoc","login":"brejoc","full_name":"","email":"brejoc@gmail.com","avatar_url":"https://secure.gravatar.com/avatar/80674ca691e4a325d8bff1977a1d881d?d=identicon"},"name":"gosgp","full_name":"brejoc/gosgp","description":"Command line SuperGenPass password generator written in go.","private":false,"fork":false,"parent":null,"empty":false,"mirror":false,"size":35840,"html_url":"https://try.gogs.io/brejoc/gosgp","ssh_url":"git@try.gogs.io:brejoc/gosgp.git","clone_url":"https://try.gogs.io/brejoc/gosgp.git","website":"","stars_count":0,"forks_count":2,"watchers_count":1,"open_issues_count":1,"default_branch":"master","created_at":"2015-03-09T12:15:24Z","updated_at":"2022-03-26T07:28:38Z"},{"id":403,"owner":{"id":687,"username":"mirdhyn","login":"mirdhyn","full_name":"","email":"mirdhyn@gmail.com","avatar_url":"https://secure.gravatar.com/avatar/047818f3fffe0df833958ea40a25fd5c?d=identicon"},"name":"gosgp","full_name":"mirdhyn/gosgp","description":"Command line SuperGenPass password generator written in go.","private":false,"fork":true,"parent":{"id":399,"owner":{"id":683,"username":"brejoc","login":"brejoc","full_name":"","email":"brejoc@gmail.com","avatar_url":"https://secure.gravatar.com/avatar/80674ca691e4a325d8bff1977a1d881d?d=identicon"},"name":"gosgp","full_name":"brejoc/gosgp","description":"Command line SuperGenPass password generator written in go.","private":false,"fork":false,"parent":null,"empty":false,"mirror":false,"size":35840,"html_url":"https://try.gogs.io/brejoc/gosgp","ssh_url":"git@try.gogs.io:brejoc/gosgp.git","clone_url":"https://try.gogs.io/brejoc/gosgp.git","website":"","stars_count":0,"forks_count":2,"watchers_count":1,"open_issues_count":1,"default_branch":"master","created_at":"2015-03-09T12:15:24Z","updated_at":"2022-03-26T07:28:38Z","permissions":{"admin":false,"push":false,"pull":true}},"empty":false,"mirror":false,"size":48128,"html_url":"https://try.gogs.io/mirdhyn/gosgp","ssh_url":"git@try.gogs.io:mirdhyn/gosgp.git","clone_url":"https://try.gogs.io/mirdhyn/gosgp.git","website":"","stars_count":0,"forks_count":0,"watchers_count":1,"open_issues_count":0,"default_branch":"master","created_at":"2015-03-09T21:34:21Z","updated_at":"2022-03-26T07:28:38Z"}],"ok":true}
|
|
@ -11,46 +11,84 @@ from unittest.mock import Mock
|
|||
import pytest
|
||||
from requests import HTTPError
|
||||
|
||||
from swh.lister.gogs.lister import GogsLister
|
||||
from swh.lister.gogs.lister import GogsLister, GogsListerPage, _parse_page_id
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
TRY_GOGS_URL = "https://try.gogs.io/api/v1/"
|
||||
|
||||
|
||||
def try_gogs_page(n: int):
|
||||
return TRY_GOGS_URL + f"repos/search?page={n}&limit=3"
|
||||
return TRY_GOGS_URL + GogsLister.REPO_LIST_PATH + f"?page={n}&limit=3"
|
||||
|
||||
|
||||
P1 = try_gogs_page(1)
|
||||
P2 = try_gogs_page(2)
|
||||
P3 = try_gogs_page(3)
|
||||
P4 = try_gogs_page(4)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def trygogs_p1(datadir):
|
||||
text = Path(datadir, "https_try.gogs.io", "repos_page1").read_text()
|
||||
headers = {
|
||||
"Link": '<{p2}>; rel="next",<{p2}>; rel="last"'.format(p2=try_gogs_page(2))
|
||||
}
|
||||
page_result = GogsLister.results_simplified(json.loads(text))
|
||||
origin_urls = [r["clone_url"] for r in page_result]
|
||||
headers = {"Link": f'<{P2}>; rel="next"'}
|
||||
page_result = GogsListerPage(
|
||||
repos=GogsLister.extract_repos(json.loads(text)), next_link=P2
|
||||
)
|
||||
origin_urls = [r["clone_url"] for r in page_result.repos]
|
||||
return text, headers, page_result, origin_urls
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def trygogs_p2(datadir):
|
||||
text = Path(datadir, "https_try.gogs.io", "repos_page2").read_text()
|
||||
headers = {
|
||||
"Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1))
|
||||
}
|
||||
page_result = GogsLister.results_simplified(json.loads(text))
|
||||
origin_urls = [r["clone_url"] for r in page_result]
|
||||
headers = {"Link": f'<{P3}>; rel="next",<{P1}>; rel="prev"'}
|
||||
page_result = GogsListerPage(
|
||||
repos=GogsLister.extract_repos(json.loads(text)), next_link=P3
|
||||
)
|
||||
origin_urls = [r["clone_url"] for r in page_result.repos]
|
||||
return text, headers, page_result, origin_urls
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def trygogs_empty_page():
|
||||
def trygogs_p3(datadir):
|
||||
text = Path(datadir, "https_try.gogs.io", "repos_page3").read_text()
|
||||
headers = {"Link": f'<{P4}>; rel="next",<{P2}>; rel="prev"'}
|
||||
page_result = GogsListerPage(
|
||||
repos=GogsLister.extract_repos(json.loads(text)), next_link=P3
|
||||
)
|
||||
origin_urls = [r["clone_url"] for r in page_result.repos]
|
||||
return text, headers, page_result, origin_urls
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def trygogs_p4(datadir):
|
||||
text = Path(datadir, "https_try.gogs.io", "repos_page4").read_text()
|
||||
headers = {"Link": f'<{P3}>; rel="prev"'}
|
||||
page_result = GogsListerPage(
|
||||
repos=GogsLister.extract_repos(json.loads(text)), next_link=P3
|
||||
)
|
||||
origin_urls = [r["clone_url"] for r in page_result.repos]
|
||||
return text, headers, page_result, origin_urls
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def trygogs_p3_last(datadir):
|
||||
text = Path(datadir, "https_try.gogs.io", "repos_page3").read_text()
|
||||
headers = {"Link": f'<{P2}>; rel="prev",<{P1}>; rel="first"'}
|
||||
page_result = GogsListerPage(
|
||||
repos=GogsLister.extract_repos(json.loads(text)), next_link=None
|
||||
)
|
||||
origin_urls = [r["clone_url"] for r in page_result.repos]
|
||||
return text, headers, page_result, origin_urls
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def trygogs_p3_empty():
|
||||
origins_urls = []
|
||||
page_result = {"data": [], "ok": True}
|
||||
headers = {
|
||||
"Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=try_gogs_page(1))
|
||||
}
|
||||
text = json.dumps(page_result)
|
||||
body = {"data": [], "ok": True}
|
||||
headers = {"Link": f'<{P2}>; rel="prev",<{P1}>; rel="first"'}
|
||||
page_result = GogsListerPage(repos=GogsLister.extract_repos(body), next_link=None)
|
||||
text = json.dumps(body)
|
||||
return text, headers, page_result, origins_urls
|
||||
|
||||
|
||||
|
@ -69,7 +107,7 @@ def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedO
|
|||
|
||||
|
||||
def test_gogs_full_listing(
|
||||
swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_empty_page
|
||||
swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3_last
|
||||
):
|
||||
kwargs = dict(
|
||||
url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
|
||||
|
@ -80,84 +118,212 @@ def test_gogs_full_listing(
|
|||
|
||||
p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1
|
||||
p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2
|
||||
p3_text, p3_headers, _, _ = trygogs_empty_page
|
||||
p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_last
|
||||
|
||||
requests_mock.get(try_gogs_page(1), text=p1_text, headers=p1_headers)
|
||||
requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers)
|
||||
requests_mock.get(try_gogs_page(3), text=p3_text, headers=p3_headers)
|
||||
requests_mock.get(P1, text=p1_text, headers=p1_headers)
|
||||
requests_mock.get(P2, text=p2_text, headers=p2_headers)
|
||||
requests_mock.get(P3, text=p3_text, headers=p3_headers)
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 2
|
||||
assert stats.origins == 6
|
||||
assert stats.pages == 3
|
||||
assert stats.origins == 9
|
||||
|
||||
calls = [mocker.call(p1_result), mocker.call(p2_result)]
|
||||
lister.get_origins_from_page.assert_has_calls(calls)
|
||||
calls = map(mocker.call, [p1_result, p2_result, p3_result])
|
||||
lister.get_origins_from_page.assert_has_calls(list(calls))
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
|
||||
check_listed_origins(
|
||||
p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
|
||||
)
|
||||
|
||||
assert lister.get_state_from_scheduler() is None
|
||||
assert (
|
||||
lister.get_state_from_scheduler().last_seen_next_link == P3
|
||||
) # P3 didn't provide any next link so it remains the last_seen_next_link
|
||||
|
||||
|
||||
def test_gogs_auth_instance(
|
||||
swh_scheduler, requests_mock, trygogs_p1, trygogs_empty_page
|
||||
swh_scheduler, requests_mock, trygogs_p1, trygogs_p2, trygogs_p3_empty
|
||||
):
|
||||
"""Covers token authentication, token from credentials,
|
||||
instance inference from URL."""
|
||||
|
||||
api_token = "secret"
|
||||
instance = "try.gogs.io"
|
||||
creds = {"gogs": {instance: [{"username": "u", "password": api_token}]}}
|
||||
instance = "try_gogs"
|
||||
|
||||
kwargs1 = dict(url=TRY_GOGS_URL, api_token=api_token, instance=instance)
|
||||
lister = GogsLister(scheduler=swh_scheduler, **kwargs1)
|
||||
# Test lister initialization without api_token or credentials:
|
||||
with pytest.raises(ValueError, match="No credentials or API token provided"):
|
||||
kwargs1 = dict(url=TRY_GOGS_URL, instance=instance)
|
||||
GogsLister(scheduler=swh_scheduler, **kwargs1)
|
||||
|
||||
# test API token
|
||||
assert "Authorization" in lister.session.headers
|
||||
# Test lister initialization using api_token:
|
||||
kwargs2 = dict(url=TRY_GOGS_URL, api_token=api_token, instance=instance)
|
||||
lister = GogsLister(scheduler=swh_scheduler, **kwargs2)
|
||||
assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
|
||||
|
||||
with pytest.raises(ValueError, match="No credentials or API token provided"):
|
||||
kwargs2 = dict(url=TRY_GOGS_URL, instance=instance)
|
||||
GogsLister(scheduler=swh_scheduler, **kwargs2)
|
||||
|
||||
# Test lister initialization with credentials and run it:
|
||||
creds = {"gogs": {instance: [{"username": "u", "password": api_token}]}}
|
||||
kwargs3 = dict(url=TRY_GOGS_URL, credentials=creds, instance=instance, page_size=3)
|
||||
lister = GogsLister(scheduler=swh_scheduler, **kwargs3)
|
||||
|
||||
# test API token from credentials
|
||||
assert "Authorization" in lister.session.headers
|
||||
assert lister.session.headers["Authorization"].lower() == "token %s" % api_token
|
||||
|
||||
# test instance inference from URL
|
||||
assert lister.instance
|
||||
assert "gogs" in lister.instance
|
||||
assert lister.instance == "try_gogs"
|
||||
|
||||
# setup requests mocking
|
||||
p1_text, p1_headers, _, _ = trygogs_p1
|
||||
p2_text, p2_headers, _, _ = trygogs_empty_page
|
||||
p2_text, p2_headers, _, _ = trygogs_p2
|
||||
p3_text, p3_headers, _, _ = trygogs_p3_empty
|
||||
|
||||
base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
|
||||
requests_mock.get(base_url, text=p1_text, headers=p1_headers)
|
||||
requests_mock.get(try_gogs_page(2), text=p2_text, headers=p2_headers)
|
||||
# now check the lister runs without error
|
||||
requests_mock.get(P1, text=p1_text, headers=p1_headers)
|
||||
requests_mock.get(P2, text=p2_text, headers=p2_headers)
|
||||
requests_mock.get(P3, text=p3_text, headers=p3_headers)
|
||||
|
||||
# lister should run without any error and extract the origins
|
||||
stats = lister.run()
|
||||
|
||||
assert stats.pages == 2
|
||||
assert stats.origins == 3
|
||||
assert stats.pages == 3
|
||||
assert stats.origins == 6
|
||||
|
||||
|
||||
@pytest.mark.parametrize("http_code", [400, 500, 502])
|
||||
def test_gogs_list_http_error(swh_scheduler, requests_mock, http_code):
|
||||
def test_gogs_list_http_error(
|
||||
swh_scheduler, requests_mock, http_code, trygogs_p1, trygogs_p3_last
|
||||
):
|
||||
"""Test handling of some HTTP errors commonly encountered"""
|
||||
|
||||
lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret")
|
||||
|
||||
p1_text, p1_headers, _, p1_origin_urls = trygogs_p1
|
||||
p3_text, p3_headers, _, _ = trygogs_p3_last
|
||||
|
||||
base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH
|
||||
requests_mock.get(base_url, status_code=http_code)
|
||||
requests_mock.get(
|
||||
base_url,
|
||||
[
|
||||
{"text": p1_text, "headers": p1_headers, "status_code": 200},
|
||||
{"status_code": http_code},
|
||||
{"text": p3_text, "headers": p3_headers, "status_code": 200},
|
||||
],
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPError):
|
||||
lister.run()
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
assert len(scheduler_origins) == 0
|
||||
check_listed_origins(
|
||||
p1_origin_urls, scheduler_origins
|
||||
) # Only the first page is listed
|
||||
|
||||
|
||||
def test_gogs_incremental_lister(
|
||||
swh_scheduler,
|
||||
requests_mock,
|
||||
mocker,
|
||||
trygogs_p1,
|
||||
trygogs_p2,
|
||||
trygogs_p3,
|
||||
trygogs_p3_last,
|
||||
trygogs_p3_empty,
|
||||
trygogs_p4,
|
||||
):
|
||||
kwargs = dict(
|
||||
url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
|
||||
)
|
||||
lister = GogsLister(scheduler=swh_scheduler, **kwargs)
|
||||
|
||||
lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")
|
||||
|
||||
# First listing attempt: P1 and P2 return 3 origins each
|
||||
# while P3 (current last page) is empty.
|
||||
|
||||
p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1
|
||||
p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2
|
||||
p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_empty
|
||||
|
||||
requests_mock.get(P1, text=p1_text, headers=p1_headers)
|
||||
requests_mock.get(P2, text=p2_text, headers=p2_headers)
|
||||
requests_mock.get(P3, text=p3_text, headers=p3_headers)
|
||||
|
||||
attempt1_stats = lister.run()
|
||||
assert attempt1_stats.pages == 3
|
||||
assert attempt1_stats.origins == 6
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
assert lister_state.last_seen_next_link == P3
|
||||
assert lister_state.last_seen_repo_id == p2_result.repos[-1]["id"]
|
||||
assert lister.updated
|
||||
|
||||
check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins)
|
||||
|
||||
lister.updated = False # Reset the flag
|
||||
|
||||
# Second listing attempt: P3 isn't empty anymore.
|
||||
# The lister should restart from last state and hence revisit P3.
|
||||
p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_last
|
||||
requests_mock.get(P3, text=p3_text, headers=p3_headers)
|
||||
|
||||
lister.session.get = mocker.spy(lister.session, "get")
|
||||
|
||||
attempt2_stats = lister.run()
|
||||
|
||||
assert attempt2_stats.pages == 1
|
||||
assert attempt2_stats.origins == 3
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
page_id = _parse_page_id(lister_state.last_seen_next_link)
|
||||
query_params = lister.query_params
|
||||
query_params["page"] = page_id
|
||||
|
||||
lister.session.get.assert_called_once_with(
|
||||
TRY_GOGS_URL + lister.REPO_LIST_PATH, params=query_params
|
||||
)
|
||||
|
||||
# All the 9 origins (3 pages) should be passed on to the scheduler:
|
||||
check_listed_origins(
|
||||
p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins
|
||||
)
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
assert lister_state.last_seen_next_link == P3
|
||||
assert lister_state.last_seen_repo_id == p3_result.repos[-1]["id"]
|
||||
assert lister.updated
|
||||
|
||||
lister.updated = False # Reset the flag
|
||||
|
||||
# Third listing attempt: No new origins
|
||||
# The lister should revisit last seen page (P3)
|
||||
attempt3_stats = lister.run()
|
||||
|
||||
assert attempt3_stats.pages == 1
|
||||
assert attempt3_stats.origins == 3
|
||||
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
assert lister_state.last_seen_next_link == P3
|
||||
assert lister_state.last_seen_repo_id == p3_result.repos[-1]["id"]
|
||||
assert lister.updated is False # No new origins so state isn't updated.
|
||||
|
||||
# Fourth listing attempt: Page 4 is introduced and returns 3 new origins
|
||||
# The lister should revisit last seen page (P3) as well as P4.
|
||||
p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3 # new P3 points to P4
|
||||
p4_text, p4_headers, p4_result, p4_origin_urls = trygogs_p4
|
||||
|
||||
requests_mock.get(P3, text=p3_text, headers=p3_headers)
|
||||
requests_mock.get(P4, text=p4_text, headers=p4_headers)
|
||||
|
||||
attempt4_stats = lister.run()
|
||||
|
||||
assert attempt4_stats.pages == 2
|
||||
assert attempt4_stats.origins == 6
|
||||
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
assert lister_state.last_seen_next_link == P4
|
||||
assert lister_state.last_seen_repo_id == p4_result.repos[-1]["id"]
|
||||
assert lister.updated
|
||||
|
||||
# All the 12 origins (4 pages) should be passed on to the scheduler:
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
check_listed_origins(
|
||||
p1_origin_urls + p2_origin_urls + p3_origin_urls + p4_origin_urls,
|
||||
scheduler_origins,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue