plugins: add support for scheduler's task-type declaration

Add a new register-task-types cli that will create missing task-type entries in the
scheduler according to:

- only create missing task-types (do not update them), but check that the
  backend_name field is consistent,
- each SWHTask-based task declared in a module listed in the 'task_modules'
  plugin registry field will be checked and added if needed; tasks which name
  start wit an underscore will not be added,
- added task-type will have:
  - the 'type' field is derived from the task's function name (with underscores
    replaced with dashes),
  - the description field is the first line of that function's docstring,
  - default values as provided by the swh.lister.cli.DEFAULT_TASK_TYPE (with
    a simple pattern matching to have decent default values for full/incremental
    tasks),
  - these default values can be overloaded via the 'task_type' plugin registry
    entry.

For this, we had to rename all tasks names (eg. `cran_lister` -> `list_cran`).

Comes with some tests.
This commit is contained in:
David Douard 2019-09-03 14:39:06 +02:00
parent e3c0ea9d90
commit 8d9deeb8f8
14 changed files with 233 additions and 41 deletions

View file

@ -3,12 +3,43 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import glob
import pytest
import traceback
from datetime import timedelta
import yaml
from swh.core.utils import numfile_sortkey as sortkey
from swh.scheduler import get_scheduler
from swh.scheduler.tests.conftest import DUMP_FILES
from swh.lister.core.lister_base import ListerBase
from swh.lister.cli import get_lister, SUPPORTED_LISTERS
from swh.lister.cli import lister as cli, get_lister, SUPPORTED_LISTERS
from .test_utils import init_db
from click.testing import CliRunner
@pytest.fixture
def swh_scheduler_config(request, postgresql_proc, postgresql):
scheduler_config = {
'db': 'postgresql://{user}@{host}:{port}/{dbname}'.format(
host=postgresql_proc.host,
port=postgresql_proc.port,
user='postgres',
dbname='tests')
}
all_dump_files = sorted(glob.glob(DUMP_FILES), key=sortkey)
cursor = postgresql.cursor()
for fname in all_dump_files:
with open(fname) as fobj:
cursor.execute(fobj.read())
postgresql.commit()
return scheduler_config
def test_get_lister_wrong_input():
@ -64,3 +95,47 @@ def test_get_lister_override():
assert url_key not in lst.config
assert 'priority' not in lst.config
assert 'oneshot' not in lst.config
def test_task_types(swh_scheduler_config, tmp_path):
db_url = init_db().url()
configfile = tmp_path / 'config.yml'
configfile.write_text(yaml.dump({'scheduler': {
'cls': 'local',
'args': swh_scheduler_config}}))
runner = CliRunner()
result = runner.invoke(cli, [
'--db-url', db_url,
'--config-file', configfile.as_posix(),
'register-task-types'])
assert result.exit_code == 0, traceback.print_exception(*result.exc_info)
scheduler = get_scheduler(cls='local', args=swh_scheduler_config)
all_tasks = [
'list-bitbucket-full', 'list-bitbucket-incremental',
'list-cran',
'list-cgit',
'list-debian-distribution',
'list-gitlab-full', 'list-gitlab-incremental',
'list-github-full', 'list-github-incremental',
'list-gnu-full',
'list-npm-full', 'list-npm-incremental',
'list-phabricator-full',
'list-packagist',
'list-pypi',
]
for task in all_tasks:
task_type_desc = scheduler.get_task_type(task)
assert task_type_desc
assert task_type_desc['type'] == task
assert task_type_desc['backoff_factor'] == 1
if task == 'list-npm-full':
delay = timedelta(days=7) # overloaded in the plugin registry
elif task.endswith('-full'):
delay = timedelta(days=90) # default value for 'full' lister tasks
else:
delay = timedelta(days=1) # default value for other lister tasks
assert task_type_desc['default_interval'] == delay, task