From e7716c0122e9c4a411ed54dde821b48939eb2ebb Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Tue, 21 Sep 2021 18:18:49 +0200 Subject: [PATCH] opam: Share opam root directory even on multiple instances That avoids having multiple distinct opam root directories per opam lister instance. The current opam commands used by the lister are actually listing specifically per instance. Related to P1171 --- swh/lister/opam/lister.py | 68 ++++++++++++++++------- swh/lister/opam/tests/test_lister.py | 81 +++++++++++++++++++++++++--- 2 files changed, 123 insertions(+), 26 deletions(-) diff --git a/swh/lister/opam/lister.py b/swh/lister/opam/lister.py index 486bdc2..4ad510e 100644 --- a/swh/lister/opam/lister.py +++ b/swh/lister/opam/lister.py @@ -7,7 +7,7 @@ import io import logging import os from subprocess import PIPE, Popen, call -from typing import Iterator, Optional +from typing import Any, Dict, Iterator, Optional from swh.lister.pattern import StatelessLister from swh.scheduler.interface import SchedulerInterface @@ -53,24 +53,12 @@ class OpamLister(StatelessLister[PageType]): self.env = os.environ.copy() # Opam root folder is initialized in the :meth:`get_pages` method as no # side-effect should happen in the constructor to ease instantiation - self.opamroot = os.path.join(opam_root, self.instance) + self.opam_root = opam_root def get_pages(self) -> Iterator[PageType]: - # Initialize the opam root directory with the opam instance data to list. - call( - [ - "opam", - "init", - "--reinit", - "--bare", - "--no-setup", - "--root", - self.opamroot, - self.instance, - self.url, - ], - env=self.env, - ) + # Initialize the opam root directory + opam_init(self.opam_root, self.instance, self.url, self.env) + # Actually list opam instance data proc = Popen( [ @@ -78,10 +66,11 @@ class OpamLister(StatelessLister[PageType]): "list", "--all", "--no-switch", + "--safe", "--repos", self.instance, "--root", - self.opamroot, + self.opam_root, "--normalise", "--short", ], @@ -103,9 +92,50 @@ class OpamLister(StatelessLister[PageType]): url=url, last_update=None, extra_loader_arguments={ - "opam_root": self.opamroot, + "opam_root": self.opam_root, "opam_instance": self.instance, "opam_url": self.url, "opam_package": page, }, ) + + +def opam_init(opam_root: str, instance: str, url: str, env: Dict[str, Any]) -> None: + """Initialize an opam_root folder. + + Args: + opam_root: The opam root folder to initialize + instance: Name of the opam repository to add or initialize + url: The associated url of the opam repository to add or initialize + env: The global environment to use for the opam command. + + Returns: + None. + + """ + if not os.path.exists(opam_root) or not os.listdir(opam_root): + command = [ + "opam", + "init", + "--reinit", + "--bare", + "--no-setup", + "--root", + opam_root, + instance, + url, + ] + else: + # The repository exists and is populated, we just add another instance in the + # repository. If it's already setup, it's a noop + command = [ + "opam", + "repository", + "add", + "--root", + opam_root, + instance, + url, + ] + # Actually execute the command + call(command, env=env) diff --git a/swh/lister/opam/tests/test_lister.py b/swh/lister/opam/tests/test_lister.py index ffa281a..0079d7a 100644 --- a/swh/lister/opam/tests/test_lister.py +++ b/swh/lister/opam/tests/test_lister.py @@ -4,12 +4,13 @@ # See top-level LICENSE file for more information import io +import os from tempfile import mkdtemp from unittest.mock import MagicMock import pytest -from swh.lister.opam.lister import OpamLister +from swh.lister.opam.lister import OpamLister, opam_init module_name = "swh.lister.opam.lister" @@ -29,6 +30,45 @@ def mock_opam(mocker): return mock_init, mock_open +def test_mock_init_repository_init(mock_opam, tmp_path, datadir): + """Initializing opam root directory with an instance should be ok + + """ + mock_init, mock_popen = mock_opam + + instance = "fake" + instance_url = f"file://{datadir}/{instance}" + opam_root = str(tmp_path / "test-opam") + assert not os.path.exists(opam_root) + + # This will initialize an opam directory with the instance + opam_init(opam_root, instance, instance_url, {}) + + assert mock_init.called + + +def test_mock_init_repository_update(mock_opam, tmp_path, datadir): + """Updating opam root directory with another instance should be ok + + """ + mock_init, mock_popen = mock_opam + + instance = "fake_opam_repo" + instance_url = f"file://{datadir}/{instance}" + opam_root = str(tmp_path / "test-opam") + + os.makedirs(opam_root, exist_ok=True) + with open(os.path.join(opam_root, "opam"), "w") as f: + f.write("one file to avoid empty folder") + + assert os.path.exists(opam_root) + assert os.listdir(opam_root) == ["opam"] # not empty + # This will update the repository opam with another instance + opam_init(opam_root, instance, instance_url, {}) + + assert mock_init.called + + def test_lister_opam_optional_instance(swh_scheduler): """Instance name should be optional and default to be built out of the netloc.""" netloc = "opam.ocaml.org" @@ -36,21 +76,19 @@ def test_lister_opam_optional_instance(swh_scheduler): lister = OpamLister(swh_scheduler, url=instance_url,) assert lister.instance == netloc - assert lister.opamroot.endswith(lister.instance) + assert lister.opam_root == "/tmp/opam/" def test_urls(swh_scheduler, mock_opam): mock_init, mock_popen = mock_opam - instance_url = "https://opam.ocaml.org" + tmp_folder = mkdtemp(prefix="swh_opam_lister") lister = OpamLister( - swh_scheduler, - url=instance_url, - instance="opam", - opam_root=mkdtemp(prefix="swh_opam_lister"), + swh_scheduler, url=instance_url, instance="opam", opam_root=tmp_folder, ) assert lister.instance == "opam" + assert lister.opam_root == tmp_folder # call the lister and get all listed origins urls stats = lister.run() @@ -101,3 +139,32 @@ def test_opam_binary(datadir, swh_scheduler): result_urls = [origin.url for origin in scheduler_origins] assert expected_urls == result_urls + + +def test_opam_multi_instance(datadir, swh_scheduler): + instance_url = f"file://{datadir}/fake_opam_repo" + + lister = OpamLister( + swh_scheduler, + url=instance_url, + instance="fake", + opam_root=mkdtemp(prefix="swh_opam_lister"), + ) + + stats = lister.run() + + assert stats.pages == 4 + assert stats.origins == 4 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + expected_urls = [ + f"opam+{instance_url}/packages/agrid/", + f"opam+{instance_url}/packages/calculon/", + f"opam+{instance_url}/packages/directories/", + f"opam+{instance_url}/packages/ocb/", + ] + + result_urls = [origin.url for origin in scheduler_origins] + + assert expected_urls == result_urls