Hook up listers implemented with the new pattern to the CLI

We stop depending on the ListerBase implementation. The main hoop we're jumping
through is the config override mechanism in swh.lister.get_lister, as it's
really specifc to the ListerBase `override_config` argument, which is dropped in
pattern.Lister (in favor of explicit arguments at lister instantiation).

We implement a small shim in swh.lister.pattern.Lister to give
backwards-compatibility for the new pattern to get_lister.

This generic configuration override mechanism will probably be completely
removed when the configuration mechanism is reworked. We'll see.
This commit is contained in:
Nicolas Dandrimont 2020-07-16 11:59:08 +02:00
parent 9e083c1eea
commit 525fc0102d
3 changed files with 39 additions and 14 deletions

View file

@ -7,6 +7,8 @@ import logging
import pkg_resources
from swh.lister import pattern
logger = logging.getLogger(__name__)
@ -51,5 +53,8 @@ def get_lister(lister_name, db_url=None, **conf):
registry_entry = LISTERS[lister_name].load()()
lister_cls = registry_entry["lister"]
lister = lister_cls(override_config=conf)
return lister
if issubclass(lister_cls, pattern.Lister):
return lister_cls.from_config(**conf)
else:
# Old-style lister
return lister_cls(override_config=conf)

View file

@ -6,7 +6,7 @@
from dataclasses import dataclass
from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar
from swh.scheduler import model
from swh.scheduler import get_scheduler, model
from swh.scheduler.interface import SchedulerInterface
@ -222,3 +222,27 @@ class Lister(Generic[StateType, PageType]):
"""
ret = self.scheduler.record_listed_origins(origins)
return len(ret)
@classmethod
def from_config(cls, scheduler: Dict[str, Any], **config: Any):
"""Instantiate a lister from a configuration dict.
This is basically a backwards-compatibility shim for the CLI.
Args:
scheduler: instantiation config for the scheduler
config: the configuration dict for the lister, with the following keys:
- credentials (optional): credentials list for the scheduler
- any other kwargs passed to the lister.
Returns:
the instantiated lister
"""
# Drop the legacy config keys which aren't used for this generation of listers.
for legacy_key in ("storage", "lister", "celery"):
config.pop(legacy_key, None)
# Instantiate the scheduler
scheduler_instance = get_scheduler(**scheduler)
return cls(scheduler=scheduler_instance, **config)

View file

@ -6,7 +6,6 @@
import pytest
from swh.lister.cli import SUPPORTED_LISTERS, get_lister
from swh.lister.core.lister_base import ListerBase
from .test_utils import init_db
@ -19,17 +18,16 @@ def test_get_lister_wrong_input():
assert "Invalid lister" in str(e.value)
def test_get_lister():
def test_get_lister(swh_scheduler_config):
"""Instantiating a supported lister should be ok
"""
db_url = init_db().url()
# exclude listers because they need special instantiation treatment unrelated to
# this test (launchpad: network mock, gnu: scheduler load task)
listers_to_instantiate = set(SUPPORTED_LISTERS) - {"launchpad", "gnu"}
for lister_name in listers_to_instantiate:
lst = get_lister(lister_name, db_url)
assert isinstance(lst, ListerBase)
for lister_name in SUPPORTED_LISTERS:
lst = get_lister(
lister_name, db_url, scheduler={"cls": "local", **swh_scheduler_config}
)
assert hasattr(lst, "run")
def test_get_lister_override():
@ -47,9 +45,7 @@ def test_get_lister_override():
# check the override ends up defined in the lister
for lister_name, url in listers.items():
lst = get_lister(
lister_name,
db_url,
**{"url": url, "priority": "high", "policy": "oneshot",},
lister_name, db_url, url=url, priority="high", policy="oneshot"
)
assert lst.url == url