diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 399b840..d2b1bd5 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -99,7 +99,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): def __init__( self, scheduler, - url: str, + url: Optional[str] = None, name: Optional[str] = "gitlab", instance: Optional[str] = None, credentials: Optional[CredentialsType] = None, @@ -113,7 +113,8 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): self.LISTER_NAME = name super().__init__( scheduler=scheduler, - url=url.rstrip("/"), + # if url is not provided, url will be built in the `build_url` method + url=url.rstrip("/") if url else None, instance=instance, credentials=credentials, max_origins_per_page=max_origins_per_page, @@ -138,6 +139,11 @@ class GitLabLister(Lister[GitLabListerState, PageResult]): if api_token: self.session.headers["Authorization"] = f"Bearer {api_token}" + def build_url(self, instance: str) -> str: + """Build gitlab api url.""" + prefix_url = super().build_url(instance) + return f"{prefix_url}/api/v4" + def state_from_dict(self, d: Dict[str, Any]) -> GitLabListerState: return GitLabListerState(**d) diff --git a/swh/lister/gitlab/tests/test_lister.py b/swh/lister/gitlab/tests/test_lister.py index dc99402..f8dd504 100644 --- a/swh/lister/gitlab/tests/test_lister.py +++ b/swh/lister/gitlab/tests/test_lister.py @@ -22,17 +22,30 @@ logger = logging.getLogger(__name__) def api_url(instance: str) -> str: - return f"https://{instance}/api/v4/" + return f"https://{instance}/api/v4" def _match_request(request, lister_name="gitlab"): return request.headers.get("User-Agent") == USER_AGENT_TEMPLATE % lister_name +def test_lister_gitlab_fail_to_instantiate(swh_scheduler): + """Build a lister without its url nor its instance should raise""" + # while instantiating a gitlab lister is fine with the url or the instance... + assert GitLabLister(swh_scheduler, url="https://gitlab.com/api/v4") is not None + assert GitLabLister(swh_scheduler, instance="gitlab.fr") is not None + + # ... It will raise without any of those + with pytest.raises(ValueError, match="'url' or 'instance'"): + GitLabLister(swh_scheduler) + + def test_lister_gitlab(datadir, swh_scheduler, requests_mock): """Gitlab lister supports full listing""" instance = "gitlab.com" - lister = GitLabLister(swh_scheduler, url=api_url(instance), instance=instance) + lister = GitLabLister(swh_scheduler, instance=instance) + + assert lister.url == api_url(instance) response = gitlab_page_response(datadir, instance, 1) diff --git a/swh/lister/gogs/lister.py b/swh/lister/gogs/lister.py index cdc5576..381e410 100644 --- a/swh/lister/gogs/lister.py +++ b/swh/lister/gogs/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 The Software Heritage developers +# Copyright (C) 2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -70,7 +70,7 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]): def __init__( self, scheduler: SchedulerInterface, - url: str, + url: Optional[str] = None, instance: Optional[str] = None, api_token: Optional[str] = None, page_size: int = 50, @@ -111,6 +111,11 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]): "No authentication token set in configuration, using anonymous mode" ) + def build_url(self, instance: str) -> str: + "Build gogs url out of the instance." + prefix_url = super().build_url(instance) + return f"{prefix_url}/api/v1/" + def state_from_dict(self, d: Dict[str, Any]) -> GogsListerState: return GogsListerState(**d) diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py index c90c2bc..cf0dd98 100644 --- a/swh/lister/gogs/tests/test_lister.py +++ b/swh/lister/gogs/tests/test_lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022 The Software Heritage developers +# Copyright (C) 2022-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -99,13 +99,23 @@ def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedO assert set(lister_urls) == {origin.url for origin in scheduler_origins} +def test_lister_gogs_fail_to_instantiate(swh_scheduler): + """Build a lister without its url nor its instance should raise""" + # while instantiating a gogs lister is fine with the url or the instance... + assert GogsLister(swh_scheduler, url="https://try.gogs.io/api/v1") is not None + assert GogsLister(swh_scheduler, instance="try.gogs.io") is not None + + # ... It will raise without any of those + with pytest.raises(ValueError, match="'url' or 'instance'"): + GogsLister(swh_scheduler) + + def test_gogs_full_listing( swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3_last ): - kwargs = dict( - url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret" - ) + kwargs = dict(instance="try.gogs.io", page_size=3, api_token="secret") lister = GogsLister(scheduler=swh_scheduler, **kwargs) + assert lister.url == TRY_GOGS_URL lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page") diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py index 31dc4c6..7fb8680 100644 --- a/swh/lister/pattern.py +++ b/swh/lister/pattern.py @@ -104,7 +104,7 @@ class Lister(Generic[StateType, PageType]): def __init__( self, scheduler: SchedulerInterface, - url: str, + url: Optional[str] = None, instance: Optional[str] = None, credentials: CredentialsType = None, max_origins_per_page: Optional[int] = None, @@ -115,11 +115,23 @@ class Lister(Generic[StateType, PageType]): if not self.LISTER_NAME: raise ValueError("Must set the LISTER_NAME attribute on Lister classes") - self.url = url + self.url: str + if url is not None: + # Retro-compability with lister already instantiated out of a provided url + self.url = url + elif url is None and instance is not None: + # Allow lister to be instantiated simply with their type and instance + # (as in their domain like "gitlab.com", "git.garbaye.fr", ...) + self.url = self.build_url(instance) + else: + raise ValueError( + "Either 'url' or 'instance' parameter should be provided for listing." + ) + if instance is not None: self.instance = instance else: - self.instance = urlparse(url).netloc + self.instance = urlparse(self.url).netloc self.scheduler = scheduler @@ -152,6 +164,25 @@ class Lister(Generic[StateType, PageType]): self.max_origins_per_page = max_origins_per_page self.enable_origins = enable_origins + def build_url(self, instance: str) -> str: + """Optionally build the forge url to list. When the url is not provided in the + constructor, this method is called. This should compute the actual url to use to + list the forge. + + This is particularly useful for forges which uses an api. This simplifies the + cli calls to use. They should then only provide the instance (its domain). + + For example: + - gitlab: https://{instance}/api/v4 + - gitea: https://{instance}/api/v1 + - ... + + """ + scheme = urlparse(instance).scheme + if scheme: + raise ValueError("Instance should only be a net location.") + return f"https://{instance}" + @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) def http_request(self, url: str, method="GET", **kwargs) -> requests.Response: logger.debug("Fetching URL %s with params %s", url, kwargs.get("params")) diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py index 5000545..6fe2cca 100644 --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2021 The Software Heritage developers +# Copyright (C) 2019-2023 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -17,14 +17,13 @@ lister_args = { "api_token": "bogus", }, "gitea": { - "url": "https://try.gitea.io/api/v1/", + "instance": "try.gitea.io", }, "tuleap": { "url": "https://tuleap.net", }, "gitlab": { - "url": "https://gitlab.ow2.org/api/v4", - "instance": "ow2", + "instance": "gitlab.ow2.org", }, "opam": {"url": "https://opam.ocaml.org", "instance": "opam"}, "maven": { @@ -32,7 +31,7 @@ lister_args = { "index_url": "http://indexes/export.fld", }, "gogs": { - "url": "https://try.gogs.io/", + "instance": "try.gogs.io", "api_token": "secret", }, "nixguix": { diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py index 6dcd1d5..33dc3e7 100644 --- a/swh/lister/tests/test_pattern.py +++ b/swh/lister/tests/test_pattern.py @@ -24,6 +24,24 @@ class InstantiableLister(pattern.Lister[StateType, PageType]): return d +def test_instantiation_fail_to_instantiate(swh_scheduler): + """Instantiation without proper url or instance will raise.""" + # While instantiating with either a url or instance is fine... + InstantiableLister(scheduler=swh_scheduler, url="https://example.com") + InstantiableLister(scheduler=swh_scheduler, instance="example.com") + + # ... Instantiating will fail when: + # - no instance nor url parameters are provided to the constructor + with pytest.raises(ValueError, match="'url' or 'instance"): + InstantiableLister( + scheduler=swh_scheduler, + ) + + # - an instance, which is not in a net location format, is provided + with pytest.raises(ValueError, match="net location"): + InstantiableLister(scheduler=swh_scheduler, instance="http://example.com") + + def test_instantiation(swh_scheduler): lister = InstantiableLister( scheduler=swh_scheduler, url="https://example.com", instance="example.com"