lister: Allow lister to build url out of the instance parameter

This pushes the rather elementary logic within the lister's scope. This will simplify
and unify cli call between lister and scheduler clis. This will also allow to reduce
erroneous operations which can happen for example in the add-forge-now.

With the following, we will only have to provide the type and the instance, then
everything will be scheduled properly.

Refs. swh/devel/swh-lister#4693
This commit is contained in:
Antoine R. Dumont (@ardumont) 2023-05-19 11:04:26 +02:00
parent 596e8c6c40
commit 19bdeefb14
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
7 changed files with 100 additions and 18 deletions

View file

@ -99,7 +99,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
def __init__(
self,
scheduler,
url: str,
url: Optional[str] = None,
name: Optional[str] = "gitlab",
instance: Optional[str] = None,
credentials: Optional[CredentialsType] = None,
@ -113,7 +113,8 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
self.LISTER_NAME = name
super().__init__(
scheduler=scheduler,
url=url.rstrip("/"),
# if url is not provided, url will be built in the `build_url` method
url=url.rstrip("/") if url else None,
instance=instance,
credentials=credentials,
max_origins_per_page=max_origins_per_page,
@ -138,6 +139,11 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
if api_token:
self.session.headers["Authorization"] = f"Bearer {api_token}"
def build_url(self, instance: str) -> str:
"""Build gitlab api url."""
prefix_url = super().build_url(instance)
return f"{prefix_url}/api/v4"
def state_from_dict(self, d: Dict[str, Any]) -> GitLabListerState:
return GitLabListerState(**d)

View file

@ -22,17 +22,30 @@ logger = logging.getLogger(__name__)
def api_url(instance: str) -> str:
return f"https://{instance}/api/v4/"
return f"https://{instance}/api/v4"
def _match_request(request, lister_name="gitlab"):
return request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % lister_name
def test_lister_gitlab_fail_to_instantiate(swh_scheduler):
"""Build a lister without its url nor its instance should raise"""
# while instantiating a gitlab lister is fine with the url or the instance...
assert GitLabLister(swh_scheduler, url="https://gitlab.com/api/v4") is not None
assert GitLabLister(swh_scheduler, instance="gitlab.fr") is not None
# ... It will raise without any of those
with pytest.raises(ValueError, match="'url' or 'instance'"):
GitLabLister(swh_scheduler)
def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
"""Gitlab lister supports full listing"""
instance = "gitlab.com"
lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance)
lister = GitLabLister(swh_scheduler, instance=instance)
assert lister.url == api_url(instance)
response = gitlab_page_response(datadir, instance, 1)

View file

@ -1,4 +1,4 @@
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -70,7 +70,7 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
def __init__(
self,
scheduler: SchedulerInterface,
url: str,
url: Optional[str] = None,
instance: Optional[str] = None,
api_token: Optional[str] = None,
page_size: int = 50,
@ -111,6 +111,11 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
"No authentication token set in configuration, using anonymous mode"
)
def build_url(self, instance: str) -> str:
"Build gogs url out of the instance."
prefix_url = super().build_url(instance)
return f"{prefix_url}/api/v1/"
def state_from_dict(self, d: Dict[str, Any]) -> GogsListerState:
return GogsListerState(**d)

View file

@ -1,4 +1,4 @@
# Copyright (C) 2022 The Software Heritage developers
# Copyright (C) 2022-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -99,13 +99,23 @@ def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedO
assert set(lister_urls) == {origin.url for origin in scheduler_origins}
def test_lister_gogs_fail_to_instantiate(swh_scheduler):
"""Build a lister without its url nor its instance should raise"""
# while instantiating a gogs lister is fine with the url or the instance...
assert GogsLister(swh_scheduler, url="https://try.gogs.io/api/v1") is not None
assert GogsLister(swh_scheduler, instance="try.gogs.io") is not None
# ... It will raise without any of those
with pytest.raises(ValueError, match="'url' or 'instance'"):
GogsLister(swh_scheduler)
def test_gogs_full_listing(
swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3_last
):
kwargs = dict(
url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret"
)
kwargs = dict(instance="try.gogs.io", page_size=3, api_token="secret")
lister = GogsLister(scheduler=swh_scheduler, **kwargs)
assert lister.url == TRY_GOGS_URL
lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page")

View file

@ -104,7 +104,7 @@ class Lister(Generic[StateType, PageType]):
def __init__(
self,
scheduler: SchedulerInterface,
url: str,
url: Optional[str] = None,
instance: Optional[str] = None,
credentials: CredentialsType = None,
max_origins_per_page: Optional[int] = None,
@ -115,11 +115,23 @@ class Lister(Generic[StateType, PageType]):
if not self.LISTER_NAME:
raise ValueError("Must set the LISTER_NAME attribute on Lister classes")
self.url = url
self.url: str
if url is not None:
# Retro-compability with lister already instantiated out of a provided url
self.url = url
elif url is None and instance is not None:
# Allow lister to be instantiated simply with their type and instance
# (as in their domain like "gitlab.com", "git.garbaye.fr", ...)
self.url = self.build_url(instance)
else:
raise ValueError(
"Either 'url' or 'instance' parameter should be provided for listing."
)
if instance is not None:
self.instance = instance
else:
self.instance = urlparse(url).netloc
self.instance = urlparse(self.url).netloc
self.scheduler = scheduler
@ -152,6 +164,25 @@ class Lister(Generic[StateType, PageType]):
self.max_origins_per_page = max_origins_per_page
self.enable_origins = enable_origins
def build_url(self, instance: str) -> str:
"""Optionally build the forge url to list. When the url is not provided in the
constructor, this method is called. This should compute the actual url to use to
list the forge.
This is particularly useful for forges which uses an api. This simplifies the
cli calls to use. They should then only provide the instance (its domain).
For example:
- gitlab: https://{instance}/api/v4
- gitea: https://{instance}/api/v1
- ...
"""
scheme = urlparse(instance).scheme
if scheme:
raise ValueError("Instance should only be a net location.")
return f"https://{instance}"
@http_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
def http_request(self, url: str, method="GET", **kwargs) -> requests.Response:
logger.debug("Fetching URL %s with params %s", url, kwargs.get("params"))

View file

@ -1,4 +1,4 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# Copyright (C) 2019-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -17,14 +17,13 @@ lister_args = {
"api_token": "bogus",
},
"gitea": {
"url": "https://try.gitea.io/api/v1/",
"instance": "try.gitea.io",
},
"tuleap": {
"url": "https://tuleap.net",
},
"gitlab": {
"url": "https://gitlab.ow2.org/api/v4",
"instance": "ow2",
"instance": "gitlab.ow2.org",
},
"opam": {"url": "https://opam.ocaml.org", "instance": "opam"},
"maven": {
@ -32,7 +31,7 @@ lister_args = {
"index_url": "http://indexes/export.fld",
},
"gogs": {
"url": "https://try.gogs.io/",
"instance": "try.gogs.io",
"api_token": "secret",
},
"nixguix": {

View file

@ -24,6 +24,24 @@ class InstantiableLister(pattern.Lister[StateType, PageType]):
return d
def test_instantiation_fail_to_instantiate(swh_scheduler):
"""Instantiation without proper url or instance will raise."""
# While instantiating with either a url or instance is fine...
InstantiableLister(scheduler=swh_scheduler, url="https://example.com")
InstantiableLister(scheduler=swh_scheduler, instance="example.com")
# ... Instantiating will fail when:
# - no instance nor url parameters are provided to the constructor
with pytest.raises(ValueError, match="'url' or 'instance"):
InstantiableLister(
scheduler=swh_scheduler,
)
# - an instance, which is not in a net location format, is provided
with pytest.raises(ValueError, match="net location"):
InstantiableLister(scheduler=swh_scheduler, instance="http://example.com")
def test_instantiation(swh_scheduler):
lister = InstantiableLister(
scheduler=swh_scheduler, url="https://example.com", instance="example.com"