cgit.tests: Check the tasks from the scheduler

This commit is contained in:
Antoine R. Dumont (@ardumont) 2019-10-05 18:23:19 +02:00
parent 04ca318680
commit 394658e53b
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
6 changed files with 123 additions and 102 deletions

View file

@ -1,3 +1,3 @@
swh.core
swh.core >= 0.0.73
swh.storage[schemata] >= 0.0.122
swh.scheduler >= 0.0.58

View file

@ -0,0 +1,43 @@
# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import pkg_resources
logger = logging.getLogger(__name__)
LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
for entry_point in pkg_resources.iter_entry_points('swh.workers')
if entry_point.name.split('.', 1)[0] == 'lister'}
SUPPORTED_LISTERS = list(LISTERS)
def get_lister(lister_name, db_url=None, **conf):
"""Instantiate a lister given its name.
Args:
lister_name (str): Lister's name
conf (dict): Configuration dict (lister db cnx, policy, priority...)
Returns:
Tuple (instantiated lister, drop_tables function, init schema function,
insert minimum data function)
"""
if lister_name not in LISTERS:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
(lister_name, SUPPORTED_LISTERS))
if db_url:
conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
registry_entry = LISTERS[lister_name].load()()
lister_cls = registry_entry['lister']
lister = lister_cls(override_config=conf)
return lister

View file

@ -47,7 +47,7 @@ class CGitLister(ListerBase):
'https://git.savannah.gnu.org/git/elisp-es.git'
"""
MODEL = CGitModel
DEFAULT_URL = 'http://git.savannah.gnu.org/cgit/'
DEFAULT_URL = 'https://git.savannah.gnu.org/cgit/'
LISTER_NAME = 'cgit'
url_prefix_present = True

View file

@ -2,85 +2,65 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from os.path import join, dirname
import re
from urllib.parse import urlparse
from unittest.mock import Mock
import requests_mock
from sqlalchemy import create_engine
def test_lister_no_page(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
from swh.lister.cgit.lister import CGitLister
from swh.lister.tests.test_utils import init_db
assert lister.url == 'https://git.savannah.gnu.org/cgit/'
repos = list(lister.get_repos())
assert len(repos) == 977
assert repos[0] == 'https://git.savannah.gnu.org/cgit/elisp-es.git/'
# note the url below is NOT a subpath of /cgit/
assert repos[-1] == 'https://git.savannah.gnu.org/path/to/yetris.git/' # noqa
# note the url below is NOT on the same server
assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/'
DATADIR = join(dirname(__file__), 'data')
def test_lister_model(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
repo = next(lister.get_repos())
model = lister.build_model(repo)
assert model == {
'uid': 'https://git.savannah.gnu.org/cgit/elisp-es.git/',
'name': 'elisp-es.git',
'origin_type': 'git',
'instance': 'git.savannah.gnu.org',
'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git'
}
def get_response_cb(request, context):
url = urlparse(request.url)
dirname = url.hostname
filename = url.path[1:-1].replace('/', '_')
if url.query:
filename += ',' + url.query
resp = open(join(DATADIR, dirname, filename), 'rb').read()
return resp.decode('ascii', 'ignore')
def test_lister_with_pages(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister.url = 'https://git.tizen/cgit/'
repos = list(lister.get_repos())
# we should have 16 repos (listed on 3 pages)
assert len(repos) == 16
def test_lister_no_page():
with requests_mock.Mocker() as m:
m.get(re.compile('http://git.savannah.gnu.org'), text=get_response_cb)
lister = CGitLister()
def test_lister_run(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister.url = 'https://git.tizen/cgit/'
lister.run()
assert lister.url == 'http://git.savannah.gnu.org/cgit/'
r = lister.scheduler.search_tasks(task_type='load-git')
assert len(r) == 16
repos = list(lister.get_repos())
assert len(repos) == 977
for row in r:
assert row['type'] == 'load-git'
# arguments check
args = row['arguments']['args']
assert len(args) == 1
assert repos[0] == 'http://git.savannah.gnu.org/cgit/elisp-es.git/'
# note the url below is NOT a subpath of /cgit/
assert repos[-1] == 'http://git.savannah.gnu.org/path/to/yetris.git/' # noqa
# note the url below is NOT on the same server
assert repos[-2] == 'http://example.org/cgit/xstarcastle.git/'
url = args[0]
assert url.startswith('https://git.tizen')
def test_lister_model():
with requests_mock.Mocker() as m:
m.get(re.compile('http://git.savannah.gnu.org'), text=get_response_cb)
lister = CGitLister()
repo = next(lister.get_repos())
model = lister.build_model(repo)
assert model == {
'uid': 'http://git.savannah.gnu.org/cgit/elisp-es.git/',
'name': 'elisp-es.git',
'origin_type': 'git',
'instance': 'git.savannah.gnu.org',
'origin_url': 'https://git.savannah.gnu.org/git/elisp-es.git'
}
def test_lister_with_pages():
with requests_mock.Mocker() as m:
m.get(re.compile('http://git.tizen/cgit/'), text=get_response_cb)
lister = CGitLister(url='http://git.tizen/cgit/')
assert lister.url == 'http://git.tizen/cgit/'
repos = list(lister.get_repos())
# we should have 16 repos (listed on 3 pages)
assert len(repos) == 16
def test_lister_run():
with requests_mock.Mocker() as m:
m.get(re.compile('http://git.tizen/cgit/'), text=get_response_cb)
db = init_db()
conf = {'lister': {'cls': 'local', 'args': {'db': db.url()}}}
lister = CGitLister(url='http://git.tizen/cgit/',
override_config=conf)
engine = create_engine(db.url())
lister.MODEL.metadata.create_all(engine)
lister.schedule_missing_tasks = Mock(return_value=None)
lister.run()
# kwargs
kwargs = row['arguments']['kwargs']
assert kwargs == {}
assert row['policy'] == 'recurring'
assert row['priority'] is None

View file

@ -5,7 +5,6 @@
import os
import logging
import pkg_resources
from copy import deepcopy
from importlib import import_module
@ -15,15 +14,12 @@ from sqlalchemy import create_engine
from swh.core.cli import CONTEXT_SETTINGS
from swh.scheduler import get_scheduler
from swh.scheduler.task import SWHTask
from swh.lister import get_lister, SUPPORTED_LISTERS, LISTERS
from swh.lister.core.models import initialize
logger = logging.getLogger(__name__)
LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
for entry_point in pkg_resources.iter_entry_points('swh.workers')
if entry_point.name.split('.', 1)[0] == 'lister'}
SUPPORTED_LISTERS = list(LISTERS)
# the key in this dict is the suffix used to match new task-type to be added.
# For example for a task which function name is "list_gitlab_full', the default
@ -45,31 +41,6 @@ DEFAULT_TASK_TYPE = {
}
def get_lister(lister_name, db_url=None, **conf):
"""Instantiate a lister given its name.
Args:
lister_name (str): Lister's name
conf (dict): Configuration dict (lister db cnx, policy, priority...)
Returns:
Tuple (instantiated lister, drop_tables function, init schema function,
insert minimum data function)
"""
if lister_name not in LISTERS:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %
(lister_name, SUPPORTED_LISTERS))
if db_url:
conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
registry_entry = LISTERS[lister_name].load()()
lister_cls = registry_entry['lister']
lister = lister_cls(override_config=conf)
return lister
@click.group(name='lister', context_settings=CONTEXT_SETTINGS)
@click.option('--config-file', '-C', default=None,
type=click.Path(exists=True, dir_okay=False,),

View file

@ -1 +1,28 @@
from swh.scheduler.tests.conftest import * # noqa
import pytest
from sqlalchemy import create_engine
from swh.lister import get_lister, SUPPORTED_LISTERS
from swh.lister.core.models import initialize
@pytest.fixture
def swh_listers(request, postgresql_proc, postgresql, swh_scheduler):
db_url = 'postgresql://{user}@{host}:{port}/{dbname}'.format(
host=postgresql_proc.host,
port=postgresql_proc.port,
user='postgres',
dbname='tests')
listers = {}
# Prepare schema for all listers
for lister_name in SUPPORTED_LISTERS:
lister = get_lister(lister_name, db_url=db_url)
lister.scheduler = swh_scheduler # inject scheduler fixture
listers[lister_name] = lister
initialize(create_engine(db_url), drop_tables=True)
return listers