tasks: normalize the url argument name of most lister
Since all the listing tasks accepts an url as first argument (whatever the argument name is), it makes sense to use a simple common argument name for this. I've chosen 'url' instead of api_baseurl/forge_url/url. Also kill now useless `new_lister()` functions.
This commit is contained in:
parent
631b8e7668
commit
b810876ef8
19 changed files with 60 additions and 91 deletions
|
@ -23,9 +23,8 @@ class BitBucketLister(IndexingHttpLister):
|
|||
instance = 'bitbucket'
|
||||
default_min_bound = datetime.utcfromtimestamp(0)
|
||||
|
||||
def __init__(self, api_baseurl=None, override_config=None, per_page=100):
|
||||
super().__init__(
|
||||
api_baseurl=api_baseurl, override_config=override_config)
|
||||
def __init__(self, url=None, override_config=None, per_page=100):
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
per_page = self.config.get('per_page', per_page)
|
||||
|
||||
self.PATH_TEMPLATE = '%s&pagelen=%s' % (
|
||||
|
|
|
@ -12,20 +12,16 @@ from .lister import BitBucketLister
|
|||
GROUP_SPLIT = 10000
|
||||
|
||||
|
||||
def new_lister(api_baseurl='https://api.bitbucket.org/2.0', per_page=100):
|
||||
return BitBucketLister(api_baseurl=api_baseurl, per_page=per_page)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.IncrementalBitBucketLister')
|
||||
def list_bitbucket_incremental(**lister_args):
|
||||
'''Incremental update of the BitBucket forge'''
|
||||
lister = new_lister(**lister_args)
|
||||
lister = BitBucketLister(**lister_args)
|
||||
lister.run(min_bound=lister.db_last_index(), max_bound=None)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.RangeBitBucketLister')
|
||||
def _range_bitbucket_lister(start, end, **lister_args):
|
||||
lister = new_lister(**lister_args)
|
||||
lister = BitBucketLister(**lister_args)
|
||||
lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
|
@ -36,7 +32,7 @@ def list_bitbucket_full(self, split=None, **lister_args):
|
|||
It's not to be called for an initial listing.
|
||||
|
||||
"""
|
||||
lister = new_lister(**lister_args)
|
||||
lister = BitBucketLister(**lister_args)
|
||||
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
|
||||
if not ranges:
|
||||
self.log.info('Nothing to list')
|
||||
|
|
|
@ -26,8 +26,7 @@ def test_incremental(lister, swh_app, celery_session_worker):
|
|||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
|
||||
lister.assert_called_once_with()
|
||||
lister.db_last_index.assert_called_once_with()
|
||||
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
|
||||
|
||||
|
@ -45,8 +44,7 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
|
||||
lister.assert_called_once_with()
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
@ -76,8 +74,7 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
break
|
||||
sleep(1)
|
||||
|
||||
lister.assert_called_with(
|
||||
api_baseurl='https://api.bitbucket.org/2.0', per_page=100)
|
||||
lister.assert_called_with()
|
||||
|
||||
# one by the FullBitbucketRelister task
|
||||
# + 5 for the RangeBitbucketLister subtasks
|
||||
|
|
|
@ -63,7 +63,7 @@ def get_lister(lister_name, db_url=None, **conf):
|
|||
(lister_name, SUPPORTED_LISTERS))
|
||||
if db_url:
|
||||
conf['lister'] = {'cls': 'local', 'args': {'db': db_url}}
|
||||
# To allow api_baseurl override per lister
|
||||
|
||||
registry_entry = LISTERS[lister_name].load()()
|
||||
lister_cls = registry_entry['lister']
|
||||
lister = lister_cls(override_config=conf)
|
||||
|
|
|
@ -244,6 +244,6 @@ class IndexingLister(ListerBase):
|
|||
class IndexingHttpLister(ListerHttpTransport, IndexingLister):
|
||||
"""Convenience class for ensuring right lookup and init order
|
||||
when combining IndexingLister and ListerHttpTransport."""
|
||||
def __init__(self, api_baseurl=None, override_config=None):
|
||||
def __init__(self, url=None, override_config=None):
|
||||
IndexingLister.__init__(self, override_config=override_config)
|
||||
ListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
|
||||
ListerHttpTransport.__init__(self, url=url)
|
||||
|
|
|
@ -96,7 +96,7 @@ class ListerHttpTransport(abc.ABC):
|
|||
required.
|
||||
"""
|
||||
path = self.PATH_TEMPLATE % identifier
|
||||
return self.api_baseurl + path
|
||||
return self.url + path
|
||||
|
||||
def request_params(self, identifier):
|
||||
"""Get the full parameters passed to requests given the
|
||||
|
@ -142,14 +142,14 @@ class ListerHttpTransport(abc.ABC):
|
|||
self.reset_backoff()
|
||||
return False, 0
|
||||
|
||||
def __init__(self, api_baseurl=None):
|
||||
if not api_baseurl:
|
||||
api_baseurl = self.config.get('api_baseurl')
|
||||
if not api_baseurl:
|
||||
api_baseurl = self.DEFAULT_URL
|
||||
if not api_baseurl:
|
||||
raise NameError('HTTP Lister Transport requires api_baseurl.')
|
||||
self.api_baseurl = api_baseurl # eg. 'https://api.github.com'
|
||||
def __init__(self, url=None):
|
||||
if not url:
|
||||
url = self.config.get('url')
|
||||
if not url:
|
||||
url = self.DEFAULT_URL
|
||||
if not url:
|
||||
raise NameError('HTTP Lister Transport requires an url.')
|
||||
self.url = url # eg. 'https://api.github.com'
|
||||
self.session = requests.Session()
|
||||
self.lister_version = __version__
|
||||
|
||||
|
@ -218,7 +218,7 @@ class ListerOnePageApiTransport(ListerHttpTransport):
|
|||
"parse for information")
|
||||
PATH_TEMPLATE = None # we do not use it
|
||||
|
||||
def __init__(self, api_baseurl=None):
|
||||
def __init__(self, url=None):
|
||||
self.session = requests.Session()
|
||||
self.lister_version = __version__
|
||||
|
||||
|
|
|
@ -155,6 +155,6 @@ class PageByPageHttpLister(ListerHttpTransport, PageByPageLister):
|
|||
combining PageByPageLister and ListerHttpTransport.
|
||||
|
||||
"""
|
||||
def __init__(self, api_baseurl=None, override_config=None):
|
||||
def __init__(self, url=None, override_config=None):
|
||||
PageByPageLister.__init__(self, override_config=override_config)
|
||||
ListerHttpTransport.__init__(self, api_baseurl=api_baseurl)
|
||||
ListerHttpTransport.__init__(self, url=url)
|
||||
|
|
|
@ -77,7 +77,7 @@ class HttpListerTesterBase(abc.ABC):
|
|||
|
||||
"""
|
||||
if override_config or self.fl is None:
|
||||
self.fl = self.Lister(api_baseurl='https://fakeurl',
|
||||
self.fl = self.Lister(url='https://fakeurl',
|
||||
override_config=override_config)
|
||||
self.fl.INITIAL_BACKOFF = 1
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ class DebianLister(ListerHttpTransport, ListerBase):
|
|||
instance = 'debian'
|
||||
|
||||
def __init__(self, override_config=None):
|
||||
ListerHttpTransport.__init__(self, api_baseurl="bogus")
|
||||
ListerHttpTransport.__init__(self, url="notused")
|
||||
ListerBase.__init__(self, override_config=override_config)
|
||||
|
||||
def transport_request(self, identifier):
|
||||
|
|
|
@ -12,20 +12,16 @@ from swh.lister.github.lister import GitHubLister
|
|||
GROUP_SPLIT = 10000
|
||||
|
||||
|
||||
def new_lister(api_baseurl='https://api.github.com', **kw):
|
||||
return GitHubLister(api_baseurl=api_baseurl, **kw)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.IncrementalGitHubLister')
|
||||
def list_github_incremental(**lister_args):
|
||||
'Incremental update of GitHub'
|
||||
lister = new_lister(**lister_args)
|
||||
lister = GitHubLister(**lister_args)
|
||||
lister.run(min_bound=lister.db_last_index(), max_bound=None)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.RangeGitHubLister')
|
||||
def _range_github_lister(start, end, **lister_args):
|
||||
lister = new_lister(**lister_args)
|
||||
lister = GitHubLister(**lister_args)
|
||||
lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
|
@ -36,7 +32,7 @@ def list_github_full(self, split=None, **lister_args):
|
|||
It's not to be called for an initial listing.
|
||||
|
||||
"""
|
||||
lister = new_lister(**lister_args)
|
||||
lister = GitHubLister(**lister_args)
|
||||
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
|
||||
if not ranges:
|
||||
self.log.info('Nothing to list')
|
||||
|
|
|
@ -26,7 +26,7 @@ def test_incremental(lister, swh_app, celery_session_worker):
|
|||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(api_baseurl='https://api.github.com')
|
||||
lister.assert_called_once_with()
|
||||
lister.db_last_index.assert_called_once_with()
|
||||
lister.run.assert_called_once_with(min_bound=42, max_bound=None)
|
||||
|
||||
|
@ -44,7 +44,7 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(api_baseurl='https://api.github.com')
|
||||
lister.assert_called_once_with()
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
@ -74,7 +74,7 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
break
|
||||
sleep(1)
|
||||
|
||||
lister.assert_called_with(api_baseurl='https://api.github.com')
|
||||
lister.assert_called_with()
|
||||
|
||||
# one by the FullGitHubRelister task
|
||||
# + 5 for the RangeGitHubLister subtasks
|
||||
|
|
|
@ -16,12 +16,11 @@ class GitLabLister(PageByPageHttpLister):
|
|||
MODEL = GitLabModel
|
||||
LISTER_NAME = 'gitlab'
|
||||
|
||||
def __init__(self, api_baseurl=None, instance=None,
|
||||
def __init__(self, url=None, instance=None,
|
||||
override_config=None, sort='asc', per_page=20):
|
||||
super().__init__(api_baseurl=api_baseurl,
|
||||
override_config=override_config)
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
if instance is None:
|
||||
instance = parse_url(self.api_baseurl).host
|
||||
instance = parse_url(self.url).host
|
||||
self.instance = instance
|
||||
self.PATH_TEMPLATE = '%s&sort=%s&per_page=%s' % (
|
||||
self.PATH_TEMPLATE, sort, per_page)
|
||||
|
|
|
@ -14,18 +14,11 @@ from .lister import GitLabLister
|
|||
NBPAGES = 10
|
||||
|
||||
|
||||
def new_lister(api_baseurl='https://gitlab.com/api/v4',
|
||||
instance=None, sort='asc', per_page=20):
|
||||
return GitLabLister(
|
||||
api_baseurl=api_baseurl, instance=instance, sort=sort,
|
||||
per_page=per_page)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.IncrementalGitLabLister')
|
||||
def list_gitlab_incremental(**lister_args):
|
||||
"""Incremental update of a GitLab instance"""
|
||||
lister_args['sort'] = 'desc'
|
||||
lister = new_lister(**lister_args)
|
||||
lister = GitLabLister(**lister_args)
|
||||
total_pages = lister.get_pages_information()[1]
|
||||
# stopping as soon as existing origins for that instance are detected
|
||||
lister.run(min_bound=1, max_bound=total_pages, check_existence=True)
|
||||
|
@ -33,14 +26,14 @@ def list_gitlab_incremental(**lister_args):
|
|||
|
||||
@app.task(name=__name__ + '.RangeGitLabLister')
|
||||
def _range_gitlab_lister(start, end, **lister_args):
|
||||
lister = new_lister(**lister_args)
|
||||
lister = GitLabLister(**lister_args)
|
||||
lister.run(min_bound=start, max_bound=end)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.FullGitLabRelister', bind=True)
|
||||
def list_gitlab_full(self, **lister_args):
|
||||
"""Full update of a GitLab instance"""
|
||||
lister = new_lister(**lister_args)
|
||||
lister = GitLabLister(**lister_args)
|
||||
_, total_pages, _ = lister.get_pages_information()
|
||||
ranges = list(utils.split_range(total_pages, NBPAGES))
|
||||
random.shuffle(ranges)
|
||||
|
|
|
@ -26,9 +26,7 @@ def test_incremental(lister, swh_app, celery_session_worker):
|
|||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
api_baseurl='https://gitlab.com/api/v4',
|
||||
instance=None, sort='desc', per_page=20)
|
||||
lister.assert_called_once_with(sort='desc')
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.get_pages_information.assert_called_once_with()
|
||||
lister.run.assert_called_once_with(
|
||||
|
@ -48,9 +46,7 @@ def test_range(lister, swh_app, celery_session_worker):
|
|||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
api_baseurl='https://gitlab.com/api/v4',
|
||||
instance=None, sort='asc', per_page=20)
|
||||
lister.assert_called_once_with()
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with(min_bound=12, max_bound=42)
|
||||
|
||||
|
@ -81,9 +77,7 @@ def test_relister(lister, swh_app, celery_session_worker):
|
|||
break
|
||||
sleep(1)
|
||||
|
||||
lister.assert_called_with(
|
||||
api_baseurl='https://gitlab.com/api/v4',
|
||||
instance=None, sort='asc', per_page=20)
|
||||
lister.assert_called_with()
|
||||
|
||||
# one by the FullGitlabRelister task
|
||||
# + 9 for the RangeGitlabLister subtasks
|
||||
|
@ -113,7 +107,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
|
|||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.gitlab.tasks.FullGitLabRelister',
|
||||
kwargs=dict(api_baseurl='https://0xacab.org/api/v4'))
|
||||
kwargs=dict(url='https://0xacab.org/api/v4'))
|
||||
assert res
|
||||
|
||||
res.wait()
|
||||
|
@ -129,9 +123,7 @@ def test_relister_instance(lister, swh_app, celery_session_worker):
|
|||
break
|
||||
sleep(1)
|
||||
|
||||
lister.assert_called_with(
|
||||
api_baseurl='https://0xacab.org/api/v4',
|
||||
instance=None, sort='asc', per_page=20)
|
||||
lister.assert_called_with(url='https://0xacab.org/api/v4')
|
||||
|
||||
# one by the FullGitlabRelister task
|
||||
# + 9 for the RangeGitlabLister subtasks
|
||||
|
|
|
@ -16,10 +16,9 @@ class NpmListerBase(IndexingHttpLister):
|
|||
LISTER_NAME = 'npm'
|
||||
instance = 'npm'
|
||||
|
||||
def __init__(self, api_baseurl='https://replicate.npmjs.com',
|
||||
def __init__(self, url='https://replicate.npmjs.com',
|
||||
per_page=1000, override_config=None):
|
||||
super().__init__(api_baseurl=api_baseurl,
|
||||
override_config=override_config)
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
self.per_page = per_page + 1
|
||||
self.PATH_TEMPLATE += '&limit=%s' % self.per_page
|
||||
|
||||
|
@ -76,7 +75,7 @@ class NpmListerBase(IndexingHttpLister):
|
|||
'https://www.npmjs.com/package/%s' % repo_name,
|
||||
# package metadata url needs to be escaped otherwise some requests
|
||||
# may fail (for instance when a package name contains '/')
|
||||
'%s/%s' % (self.api_baseurl, quote(repo_name, safe=''))
|
||||
'%s/%s' % (self.url, quote(repo_name, safe=''))
|
||||
)
|
||||
|
||||
def string_pattern_check(self, inner, lower, upper=None):
|
||||
|
|
|
@ -14,7 +14,7 @@ from swh.lister.npm.models import NpmVisitModel
|
|||
@contextmanager
|
||||
def save_registry_state(lister):
|
||||
params = {'headers': lister.request_headers()}
|
||||
registry_state = lister.session.get(lister.api_baseurl, **params)
|
||||
registry_state = lister.session.get(lister.url, **params)
|
||||
registry_state = registry_state.json()
|
||||
keys = ('doc_count', 'doc_del_count', 'update_seq', 'purge_seq',
|
||||
'disk_size', 'data_size', 'committed_update_seq',
|
||||
|
|
|
@ -20,11 +20,10 @@ class PhabricatorLister(IndexingHttpLister):
|
|||
MODEL = PhabricatorModel
|
||||
LISTER_NAME = 'phabricator'
|
||||
|
||||
def __init__(self, api_baseurl=None, instance=None, override_config=None):
|
||||
super().__init__(api_baseurl=api_baseurl,
|
||||
override_config=override_config)
|
||||
def __init__(self, url=None, instance=None, override_config=None):
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
if not instance:
|
||||
instance = urllib.parse.urlparse(self.api_baseurl).hostname
|
||||
instance = urllib.parse.urlparse(self.url).hostname
|
||||
self.instance = instance
|
||||
|
||||
@property
|
||||
|
|
|
@ -33,9 +33,8 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
|||
]}}
|
||||
override_config = dict(credentials=credentials,
|
||||
**(override_config or {}))
|
||||
self.fl = self.Lister(
|
||||
api_baseurl='https://fakeurl', instance='fake',
|
||||
override_config=override_config)
|
||||
self.fl = self.Lister(url='https://fakeurl', instance='fake',
|
||||
override_config=override_config)
|
||||
self.fl.INITIAL_BACKOFF = 1
|
||||
|
||||
self.fl.reset_backoff()
|
||||
|
|
|
@ -67,34 +67,34 @@ def test_get_lister_override():
|
|||
db_url = init_db().url()
|
||||
|
||||
listers = {
|
||||
'gitlab': ('api_baseurl', 'https://gitlab.uni/api/v4/'),
|
||||
'phabricator': (
|
||||
'api_baseurl',
|
||||
'https://somewhere.org/api/diffusion.repository.search'),
|
||||
'gitlab': 'https://other.gitlab.uni/api/v4/',
|
||||
'phabricator': 'https://somewhere.org/api/diffusion.repository.search',
|
||||
'cgit': 'https://some.where/cgit',
|
||||
}
|
||||
|
||||
# check the override ends up defined in the lister
|
||||
for lister_name, (url_key, url_value) in listers.items():
|
||||
for lister_name, url in listers.items():
|
||||
lst = get_lister(
|
||||
lister_name, db_url, **{
|
||||
url_key: url_value,
|
||||
'url': url,
|
||||
'priority': 'high',
|
||||
'policy': 'oneshot',
|
||||
})
|
||||
|
||||
assert getattr(lst, url_key) == url_value
|
||||
assert lst.url == url
|
||||
assert lst.config['priority'] == 'high'
|
||||
assert lst.config['policy'] == 'oneshot'
|
||||
|
||||
# check the default urls are used and not the override (since it's not
|
||||
# passed)
|
||||
for lister_name, (url_key, url_value) in listers.items():
|
||||
for lister_name, url in listers.items():
|
||||
lst = get_lister(lister_name, db_url)
|
||||
|
||||
# no override so this does not end up in lister's configuration
|
||||
assert url_key not in lst.config
|
||||
assert 'url' not in lst.config
|
||||
assert 'priority' not in lst.config
|
||||
assert 'oneshot' not in lst.config
|
||||
assert lst.url == lst.DEFAULT_URL
|
||||
|
||||
|
||||
def test_task_types(swh_scheduler_config, tmp_path):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue