Generate the gitlab's instance name from the api_baseurl by default

using the host of the given url.

This allows to create a lister task by simply specify the API base url
and prevent 'inconsistent by default' behavior, eg. with:

  swh-scheduler task add swh-lister-gitlab-full \
      api_baseurl=https://0xacab.org/api/v4

the created task does not use 'gitlab' as instance name (but '0xacab.org'
here).

It's still possible to explicitely specify the instance name if needed.
This commit is contained in:
David Douard 2019-01-09 16:34:53 +01:00
parent 7db281aa38
commit fb9265bb03
3 changed files with 56 additions and 5 deletions

View file

@ -4,6 +4,7 @@
import random
import time
from urllib3.util import parse_url
from ..core.page_by_page_lister import PageByPageHttpLister
from .models import GitLabModel
@ -15,10 +16,12 @@ class GitLabLister(PageByPageHttpLister):
MODEL = GitLabModel
LISTER_NAME = 'gitlab'
def __init__(self, api_baseurl=None, instance=None,
def __init__(self, api_baseurl, instance=None,
override_config=None, sort='asc', per_page=20):
super().__init__(api_baseurl=api_baseurl,
override_config=override_config)
if instance is None:
instance = parse_url(api_baseurl).host
self.instance = instance
self.PATH_TEMPLATE = '%s&sort=%s' % (self.PATH_TEMPLATE, sort)
if per_page != 20:

View file

@ -17,7 +17,7 @@ NBPAGES = 10
def new_lister(api_baseurl='https://gitlab.com/api/v4',
instance='gitlab', sort='asc', per_page=20):
instance=None, sort='asc', per_page=20):
return GitLabLister(
api_baseurl=api_baseurl, instance=instance, sort=sort,
per_page=per_page)

View file

@ -28,7 +28,7 @@ def test_incremental(lister, swh_app, celery_session_worker):
lister.assert_called_once_with(
api_baseurl='https://gitlab.com/api/v4',
instance='gitlab', sort='desc', per_page=20)
instance=None, sort='desc', per_page=20)
lister.db_last_index.assert_not_called()
lister.get_pages_information.assert_called_once_with()
lister.run.assert_called_once_with(
@ -50,7 +50,7 @@ def test_range(lister, swh_app, celery_session_worker):
lister.assert_called_once_with(
api_baseurl='https://gitlab.com/api/v4',
instance='gitlab', sort='asc', per_page=20)
instance=None, sort='asc', per_page=20)
lister.db_last_index.assert_not_called()
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
@ -83,7 +83,55 @@ def test_relister(lister, swh_app, celery_session_worker):
lister.assert_called_with(
api_baseurl='https://gitlab.com/api/v4',
instance='gitlab', sort='asc', per_page=20)
instance=None, sort='asc', per_page=20)
# one by the FullGitlabRelister task
# + 9 for the RangeGitlabLister subtasks
assert lister.call_count == 10
lister.db_last_index.assert_not_called()
lister.db_partition_indices.assert_not_called()
lister.get_pages_information.assert_called_once_with()
# lister.run should have been called once per partition interval
for i in range(8):
# XXX inconsistent behavior: max_bound is EXCLUDED here
assert (dict(min_bound=10*i, max_bound=10*i + 10),) \
in lister.run.call_args_list
assert (dict(min_bound=80, max_bound=85),) \
in lister.run.call_args_list
@patch('swh.lister.gitlab.tasks.GitLabLister')
def test_relister_instance(lister, swh_app, celery_session_worker):
# setup the mocked GitlabLister
lister.return_value = lister
lister.run.return_value = None
lister.get_pages_information.return_value = (None, 85, None)
lister.db_partition_indices.return_value = [
(i, i+9) for i in range(0, 80, 10)] + [(80, 85)]
res = swh_app.send_task(
'swh.lister.gitlab.tasks.FullGitLabRelister',
kwargs=dict(api_baseurl='https://0xacab.org/api/v4'))
assert res
res.wait()
assert res.successful()
# retrieve the GroupResult for this task and wait for all the subtasks
# to complete
promise_id = res.result
assert promise_id
promise = GroupResult.restore(promise_id, app=swh_app)
for i in range(5):
if promise.ready():
break
sleep(1)
lister.assert_called_with(
api_baseurl='https://0xacab.org/api/v4',
instance=None, sort='asc', per_page=20)
# one by the FullGitlabRelister task
# + 9 for the RangeGitlabLister subtasks