From 19bdeefb147663dca5fea41bd80d5af771b50a7f Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Fri, 19 May 2023 11:04:26 +0200 Subject: [PATCH] lister: Allow lister to build url out of the instance parameter This pushes the rather elementary logic within the lister's scope. This will simplify and unify cli call between lister and scheduler clis. This will also allow to reduce erroneous operations which can happen for example in the add-forge-now. With the following, we will only have to provide the type and the instance, then everything will be scheduled properly. Refs. swh/devel/swh-lister#4693 --- swh/lister/gitlab/lister.py | 10 +++++-- swh/lister/gitlab/tests/test_lister.py | 17 ++++++++++-- swh/lister/gogs/lister.py | 9 +++++-- swh/lister/gogs/tests/test_lister.py | 18 ++++++++++--- swh/lister/pattern.py | 37 +++++++++++++++++++++++--- swh/lister/tests/test_cli.py | 9 +++---- swh/lister/tests/test_pattern.py | 18 +++++++++++++ 7 files changed, 100 insertions(+), 18 deletions(-) diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 399b840..d2b1bd5 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -99,7 +99,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): def __init__( self, scheduler, - url: str, + url: Optional[str] = None, name: Optional[str] = "gitlab", instance: Optional[str] = None, credentials: Optional[CredentialsType] = None, @@ -113,7 +113,8 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): self.LISTER_NAME = name super().__init__( scheduler=scheduler, - url=url.rstrip("/"), + # if url is not provided, url will be built in the `build_url` method + url=url.rstrip("/") if url else None, instance=instance, credentials=credentials, max_origins_per_page=max_origins_per_page, @@ -138,6 +139,11 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): if api_token: self.session.headers["Authorization"] = f"Bearer {api_token}" + def build_url(self, instance: str) -> str: + """Build gitlab api url.""" + prefix_url = super().build_url(instance) + return f"{prefix_url}/api/v4" + def state_from_dict(self, d: Dict[str, Any]) -> GitLabListerState: return GitLabListerState(**d) diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py index dc99402..f8dd504 100644 --- a/swh/lister/gitlab/tests/test_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -22,17 +22,30 @@ logger = logging.getLogger(__name__) def api_url(instance: str) -> str: - return f"https://{instance}/api/v4/" + return f"https://{instance}/api/v4" def _match_request(request, lister_name="gitlab"): return request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % lister_name +def test_lister_gitlab_fail_to_instantiate(swh_scheduler): + """Build a lister without its url nor its instance should raise""" + # while instantiating a gitlab lister is fine with the url or the instance... + assert GitLabLister(swh_scheduler, url="https://gitlab.com/api/v4") is not None + assert GitLabLister(swh_scheduler, instance="gitlab.fr") is not None + + # ... It will raise without any of those + with pytest.raises(ValueError, match="'url' or 'instance'"): + GitLabLister(swh_scheduler) + + def test_lister_gitlab(datadir, swh_scheduler, requests_mock): """Gitlab lister supports full listing""" instance = "gitlab.com" - lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance) + lister = GitLabLister(swh_scheduler, instance=instance) + + assert lister.url == api_url(instance) response = gitlab_page_response(datadir, instance, 1) diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py index cdc5576..381e410 100644 --- a/swh/lister/gogs/lister.py +++ b/swh/lister/gogs/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 The Software Heritage developers +# Copyright (C) 2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -70,7 +70,7 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]): def __init__( self, scheduler: SchedulerInterface, - url: str, + url: Optional[str] = None, instance: Optional[str] = None, api_token: Optional[str] = None, page_size: int = 50, @@ -111,6 +111,11 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]): "No authentication token set in configuration, using anonymous mode" ) + def build_url(self, instance: str) -> str: + "Build gogs url out of the instance." + prefix_url = super().build_url(instance) + return f"{prefix_url}/api/v1/" + def state_from_dict(self, d: Dict[str, Any]) -> GogsListerState: return GogsListerState(**d) diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py index c90c2bc..cf0dd98 100644 --- a/swh/lister/gogs/tests/test_lister.py +++ b/swh/lister/gogs/tests/test_lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 The Software Heritage developers +# Copyright (C) 2022-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -99,13 +99,23 @@ def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedO assert set(lister_urls) == {origin.url for origin in scheduler_origins} +def test_lister_gogs_fail_to_instantiate(swh_scheduler): + """Build a lister without its url nor its instance should raise""" + # while instantiating a gogs lister is fine with the url or the instance... + assert GogsLister(swh_scheduler, url="https://try.gogs.io/api/v1") is not None + assert GogsLister(swh_scheduler, instance="try.gogs.io") is not None + + # ... It will raise without any of those + with pytest.raises(ValueError, match="'url' or 'instance'"): + GogsLister(swh_scheduler) + + def test_gogs_full_listing( swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3_last ): - kwargs = dict( - url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret" - ) + kwargs = dict(instance="try.gogs.io", page_size=3, api_token="secret") lister = GogsLister(scheduler=swh_scheduler, **kwargs) + assert lister.url == TRY_GOGS_URL lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page") diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py index 31dc4c6..7fb8680 100644 --- a/swh/lister/pattern.py +++ b/swh/lister/pattern.py @@ -104,7 +104,7 @@ class Lister(Generic[StateType, PageType]): def __init__( self, scheduler: SchedulerInterface, - url: str, + url: Optional[str] = None, instance: Optional[str] = None, credentials: CredentialsType = None, max_origins_per_page: Optional[int] = None, @@ -115,11 +115,23 @@ class Lister(Generic[StateType, PageType]): if not self.LISTER_NAME: raise ValueError("Must set the LISTER_NAME attribute on Lister classes") - self.url = url + self.url: str + if url is not None: + # Retro-compability with lister already instantiated out of a provided url + self.url = url + elif url is None and instance is not None: + # Allow lister to be instantiated simply with their type and instance + # (as in their domain like "gitlab.com", "git.garbaye.fr", ...) + self.url = self.build_url(instance) + else: + raise ValueError( + "Either 'url' or 'instance' parameter should be provided for listing." + ) + if instance is not None: self.instance = instance else: - self.instance = urlparse(url).netloc + self.instance = urlparse(self.url).netloc self.scheduler = scheduler @@ -152,6 +164,25 @@ class Lister(Generic[StateType, PageType]): self.max_origins_per_page = max_origins_per_page self.enable_origins = enable_origins + def build_url(self, instance: str) -> str: + """Optionally build the forge url to list. When the url is not provided in the + constructor, this method is called. This should compute the actual url to use to + list the forge. + + This is particularly useful for forges which uses an api. This simplifies the + cli calls to use. They should then only provide the instance (its domain). + + For example: + - gitlab: https://{instance}/api/v4 + - gitea: https://{instance}/api/v1 + - ... + + """ + scheme = urlparse(instance).scheme + if scheme: + raise ValueError("Instance should only be a net location.") + return f"https://{instance}" + @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) def http_request(self, url: str, method="GET", **kwargs) -> requests.Response: logger.debug("Fetching URL %s with params %s", url, kwargs.get("params")) diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py index 5000545..6fe2cca 100644 --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -17,14 +17,13 @@ lister_args = { "api_token": "bogus", }, "gitea": { - "url": "https://try.gitea.io/api/v1/", + "instance": "try.gitea.io", }, "tuleap": { "url": "https://tuleap.net", }, "gitlab": { - "url": "https://gitlab.ow2.org/api/v4", - "instance": "ow2", + "instance": "gitlab.ow2.org", }, "opam": {"url": "https://opam.ocaml.org", "instance": "opam"}, "maven": { @@ -32,7 +31,7 @@ lister_args = { "index_url": "http://indexes/export.fld", }, "gogs": { - "url": "https://try.gogs.io/", + "instance": "try.gogs.io", "api_token": "secret", }, "nixguix": { diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py index 6dcd1d5..33dc3e7 100644 --- a/swh/lister/tests/test_pattern.py +++ b/swh/lister/tests/test_pattern.py @@ -24,6 +24,24 @@ class InstantiableLister(pattern.Lister[StateType, PageType]): return d +def test_instantiation_fail_to_instantiate(swh_scheduler): + """Instantiation without proper url or instance will raise.""" + # While instantiating with either a url or instance is fine... + InstantiableLister(scheduler=swh_scheduler, url="https://example.com") + InstantiableLister(scheduler=swh_scheduler, instance="example.com") + + # ... Instantiating will fail when: + # - no instance nor url parameters are provided to the constructor + with pytest.raises(ValueError, match="'url' or 'instance"): + InstantiableLister( + scheduler=swh_scheduler, + ) + + # - an instance, which is not in a net location format, is provided + with pytest.raises(ValueError, match="net location"): + InstantiableLister(scheduler=swh_scheduler, instance="http://example.com") + + def test_instantiation(swh_scheduler): lister = InstantiableLister( scheduler=swh_scheduler, url="https://example.com", instance="example.com"