opam: Share opam root directory even on multiple instances

That avoids having multiple distinct opam root directories per opam lister instance. The
current opam commands used by the lister are actually listing specifically per instance.

Related to P1171
This commit is contained in:
Antoine R. Dumont (@ardumont) 2021-09-21 18:18:49 +02:00
parent 5ab6b00408
commit e7716c0122
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
2 changed files with 123 additions and 26 deletions

View file

@ -7,7 +7,7 @@ import io
import logging
import os
from subprocess import PIPE, Popen, call
from typing import Iterator, Optional
from typing import Any, Dict, Iterator, Optional
from swh.lister.pattern import StatelessLister
from swh.scheduler.interface import SchedulerInterface
@ -53,24 +53,12 @@ class OpamLister(StatelessLister[PageType]):
self.env = os.environ.copy()
# Opam root folder is initialized in the :meth:`get_pages` method as no
# side-effect should happen in the constructor to ease instantiation
self.opamroot = os.path.join(opam_root, self.instance)
self.opam_root = opam_root
def get_pages(self) -> Iterator[PageType]:
# Initialize the opam root directory with the opam instance data to list.
call(
[
"opam",
"init",
"--reinit",
"--bare",
"--no-setup",
"--root",
self.opamroot,
self.instance,
self.url,
],
env=self.env,
)
# Initialize the opam root directory
opam_init(self.opam_root, self.instance, self.url, self.env)
# Actually list opam instance data
proc = Popen(
[
@ -78,10 +66,11 @@ class OpamLister(StatelessLister[PageType]):
"list",
"--all",
"--no-switch",
"--safe",
"--repos",
self.instance,
"--root",
self.opamroot,
self.opam_root,
"--normalise",
"--short",
],
@ -103,9 +92,50 @@ class OpamLister(StatelessLister[PageType]):
url=url,
last_update=None,
extra_loader_arguments={
"opam_root": self.opamroot,
"opam_root": self.opam_root,
"opam_instance": self.instance,
"opam_url": self.url,
"opam_package": page,
},
)
def opam_init(opam_root: str, instance: str, url: str, env: Dict[str, Any]) -> None:
"""Initialize an opam_root folder.
Args:
opam_root: The opam root folder to initialize
instance: Name of the opam repository to add or initialize
url: The associated url of the opam repository to add or initialize
env: The global environment to use for the opam command.
Returns:
None.
"""
if not os.path.exists(opam_root) or not os.listdir(opam_root):
command = [
"opam",
"init",
"--reinit",
"--bare",
"--no-setup",
"--root",
opam_root,
instance,
url,
]
else:
# The repository exists and is populated, we just add another instance in the
# repository. If it's already setup, it's a noop
command = [
"opam",
"repository",
"add",
"--root",
opam_root,
instance,
url,
]
# Actually execute the command
call(command, env=env)

View file

@ -4,12 +4,13 @@
# See top-level LICENSE file for more information
import io
import os
from tempfile import mkdtemp
from unittest.mock import MagicMock
import pytest
from swh.lister.opam.lister import OpamLister
from swh.lister.opam.lister import OpamLister, opam_init
module_name = "swh.lister.opam.lister"
@ -29,6 +30,45 @@ def mock_opam(mocker):
return mock_init, mock_open
def test_mock_init_repository_init(mock_opam, tmp_path, datadir):
"""Initializing opam root directory with an instance should be ok
"""
mock_init, mock_popen = mock_opam
instance = "fake"
instance_url = f"file://{datadir}/{instance}"
opam_root = str(tmp_path / "test-opam")
assert not os.path.exists(opam_root)
# This will initialize an opam directory with the instance
opam_init(opam_root, instance, instance_url, {})
assert mock_init.called
def test_mock_init_repository_update(mock_opam, tmp_path, datadir):
"""Updating opam root directory with another instance should be ok
"""
mock_init, mock_popen = mock_opam
instance = "fake_opam_repo"
instance_url = f"file://{datadir}/{instance}"
opam_root = str(tmp_path / "test-opam")
os.makedirs(opam_root, exist_ok=True)
with open(os.path.join(opam_root, "opam"), "w") as f:
f.write("one file to avoid empty folder")
assert os.path.exists(opam_root)
assert os.listdir(opam_root) == ["opam"] # not empty
# This will update the repository opam with another instance
opam_init(opam_root, instance, instance_url, {})
assert mock_init.called
def test_lister_opam_optional_instance(swh_scheduler):
"""Instance name should be optional and default to be built out of the netloc."""
netloc = "opam.ocaml.org"
@ -36,21 +76,19 @@ def test_lister_opam_optional_instance(swh_scheduler):
lister = OpamLister(swh_scheduler, url=instance_url,)
assert lister.instance == netloc
assert lister.opamroot.endswith(lister.instance)
assert lister.opam_root == "/tmp/opam/"
def test_urls(swh_scheduler, mock_opam):
mock_init, mock_popen = mock_opam
instance_url = "https://opam.ocaml.org"
tmp_folder = mkdtemp(prefix="swh_opam_lister")
lister = OpamLister(
swh_scheduler,
url=instance_url,
instance="opam",
opam_root=mkdtemp(prefix="swh_opam_lister"),
swh_scheduler, url=instance_url, instance="opam", opam_root=tmp_folder,
)
assert lister.instance == "opam"
assert lister.opam_root == tmp_folder
# call the lister and get all listed origins urls
stats = lister.run()
@ -101,3 +139,32 @@ def test_opam_binary(datadir, swh_scheduler):
result_urls = [origin.url for origin in scheduler_origins]
assert expected_urls == result_urls
def test_opam_multi_instance(datadir, swh_scheduler):
instance_url = f"file://{datadir}/fake_opam_repo"
lister = OpamLister(
swh_scheduler,
url=instance_url,
instance="fake",
opam_root=mkdtemp(prefix="swh_opam_lister"),
)
stats = lister.run()
assert stats.pages == 4
assert stats.origins == 4
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
expected_urls = [
f"opam+{instance_url}/packages/agrid/",
f"opam+{instance_url}/packages/calculon/",
f"opam+{instance_url}/packages/directories/",
f"opam+{instance_url}/packages/ocb/",
]
result_urls = [origin.url for origin in scheduler_origins]
assert expected_urls == result_urls