From 4c4aa0ead2c127dbaaef478048d8c08ed27f9cc1 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Wed, 11 Jul 2018 15:10:36 +0200 Subject: [PATCH] swh.lister: Make LISTER_NAME a class attribute swh.lister.gitlab: make the 'instance' a constructor parameter --- swh/lister/bitbucket/lister.py | 1 + swh/lister/bitbucket/tasks.py | 3 +-- swh/lister/cli.py | 9 +++----- swh/lister/core/indexing_lister.py | 6 ++---- swh/lister/core/lister_base.py | 12 +++++------ swh/lister/core/paging_lister.py | 6 ++---- swh/lister/core/tests/test_lister.py | 4 ++-- swh/lister/debian/lister.py | 4 ++-- swh/lister/github/lister.py | 1 + swh/lister/github/tasks.py | 3 +-- swh/lister/gitlab/lister.py | 32 +++++++++++----------------- swh/lister/gitlab/tasks.py | 7 +++--- 12 files changed, 36 insertions(+), 52 deletions(-) diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py index 7bc2c1a..6885c8e 100644 --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -11,6 +11,7 @@ from swh.lister.core.indexing_lister import SWHIndexingHttpLister class BitBucketLister(SWHIndexingHttpLister): PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel + LISTER_NAME = 'bitbucket.com' def get_model_from_repo(self, repo): return { diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py index 74eab7b..e480994 100644 --- a/swh/lister/bitbucket/tasks.py +++ b/swh/lister/bitbucket/tasks.py @@ -11,8 +11,7 @@ from .lister import BitBucketLister class BitBucketListerTask(ListerTaskBase): def new_lister(self): - return BitBucketLister(lister_name='bitbucket.com', - api_baseurl='https://api.bitbucket.org/2.0') + return BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0') class IncrementalBitBucketLister(BitBucketListerTask, diff --git a/swh/lister/cli.py b/swh/lister/cli.py index 86b4bd8..1f0d6f1 100644 --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -29,21 +29,18 @@ def cli(db_url, lister, create_tables, drop_tables): from .github import models from .github.lister import GitHubLister - _lister = GitHubLister(lister_name='github.com', - api_baseurl='https://api.github.com', + _lister = GitHubLister(api_baseurl='https://api.github.com', override_config=override_conf) elif lister == 'bitbucket': from .bitbucket import models from .bitbucket.lister import BitBucketLister - _lister = BitBucketLister(lister_name='bitbucket.com', - api_baseurl='https://api.bitbucket.org/2.0', + _lister = BitBucketLister(api_baseurl='https://api.bitbucket.org/2.0', override_config=override_conf) elif lister == 'gitlab': from .gitlab import models from .gitlab.lister import GitLabLister - _lister = GitLabLister(lister_name='gitlab.com', - api_baseurl='https://gitlab.com/api/v4/', + _lister = GitLabLister(api_baseurl='https://gitlab.com/api/v4/', override_config=override_conf) else: raise ValueError('Only supported listers are %s' % supported_listers) diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py index 8166900..d086382 100644 --- a/swh/lister/core/indexing_lister.py +++ b/swh/lister/core/indexing_lister.py @@ -205,8 +205,6 @@ class SWHIndexingLister(SWHListerBase): class SWHIndexingHttpLister(SWHListerHttpTransport, SWHIndexingLister): """Convenience class for ensuring right lookup and init order when combining SWHIndexingLister and SWHListerHttpTransport.""" - def __init__(self, lister_name=None, api_baseurl=None, - override_config=None): + def __init__(self, api_baseurl=None, override_config=None): SWHListerHttpTransport.__init__(self, api_baseurl=api_baseurl) - SWHIndexingLister.__init__(self, lister_name=lister_name, - override_config=override_config) + SWHIndexingLister.__init__(self, override_config=override_config) diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py index ecda6b6..4fa2b28 100644 --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -64,6 +64,7 @@ class SWHListerBase(abc.ABC, config.SWHConfig): MODEL = AbstractAttribute('Subclass type (not instance)' ' of swh.lister.core.models.ModelBase' ' customized for a specific service.') + LISTER_NAME = AbstractAttribute("Lister's name") @abc.abstractmethod def transport_request(self, identifier): @@ -199,30 +200,27 @@ class SWHListerBase(abc.ABC, config.SWHConfig): @property def CONFIG_BASE_FILENAME(self): # noqa: N802 - return 'lister-%s' % self.lister_name + return 'lister-%s' % self.LISTER_NAME @property def ADDITIONAL_CONFIG(self): # noqa: N802 return { 'lister_db_url': - ('str', 'postgresql:///lister-%s' % self.lister_name), + ('str', 'postgresql:///lister-%s' % self.LISTER_NAME), 'credentials': ('list[dict]', []), 'cache_responses': ('bool', False), 'cache_dir': - ('str', '~/.cache/swh/lister/%s' % self.lister_name), + ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME), } INITIAL_BACKOFF = 10 MAX_RETRIES = 7 CONN_SLEEP = 10 - def __init__(self, lister_name=None, override_config=None): + def __init__(self, override_config=None): self.backoff = self.INITIAL_BACKOFF - if lister_name is None: - raise NameError("Every lister must be assigned a lister_name.") - self.lister_name = lister_name # 'github?', 'bitbucket?', 'foo.com?' self.config = self.parse_config_file( base_filename=self.CONFIG_BASE_FILENAME, additional_configs=[self.ADDITIONAL_CONFIG] diff --git a/swh/lister/core/paging_lister.py b/swh/lister/core/paging_lister.py index 34457b2..c4044f0 100644 --- a/swh/lister/core/paging_lister.py +++ b/swh/lister/core/paging_lister.py @@ -131,8 +131,6 @@ class SWHPagingHttpLister(SWHListerHttpTransport, SWHPagingLister): combining SWHPagingLister and SWHListerHttpTransport. """ - def __init__(self, lister_name=None, api_baseurl=None, - override_config=None): + def __init__(self, api_baseurl=None, override_config=None): SWHListerHttpTransport.__init__(self, api_baseurl=api_baseurl) - SWHPagingLister.__init__(self, lister_name=lister_name, - override_config=override_config) + SWHPagingLister.__init__(self, override_config=override_config) diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py index ebe9d40..dd9a750 100644 --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -39,6 +39,7 @@ class HttpListerTesterBase(abc.ABC): first_index = AbstractAttribute('First index in good_api_response') last_index = AbstractAttribute('Last index in good_api_response') entries_per_page = AbstractAttribute('Number of results in good response') + LISTER_NAME = 'fake-lister' # May need to override this if the headers are used for something def response_headers(self, request): @@ -109,8 +110,7 @@ class HttpListerTesterBase(abc.ABC): with patch( 'swh.scheduler.backend.SchedulerBackend.reconnect', noop ): - self.fl = self.Lister(lister_name='fakelister', - api_baseurl='https://fakeurl', + self.fl = self.Lister(api_baseurl='https://fakeurl', override_config=override_config) self.fl.INITIAL_BACKOFF = 1 diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py index 913971b..d23e693 100644 --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -31,11 +31,11 @@ decompressors = { class DebianLister(SWHListerHttpTransport, SWHListerBase): MODEL = Package PATH_TEMPLATE = None + LISTER_NAME = 'debian' def __init__(self, override_config=None): SWHListerHttpTransport.__init__(self, api_baseurl="bogus") - SWHListerBase.__init__(self, lister_name='debian', - override_config=override_config) + SWHListerBase.__init__(self, override_config=override_config) def transport_request(self, identifier): """Subvert SWHListerHttpTransport.transport_request, to try several diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py index 30d4290..f841f60 100644 --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -13,6 +13,7 @@ class GitHubLister(SWHIndexingHttpLister): PATH_TEMPLATE = '/repositories?since=%d' MODEL = GitHubModel API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)') + LISTER_NAME = 'github.com' def get_model_from_repo(self, repo): return { diff --git a/swh/lister/github/tasks.py b/swh/lister/github/tasks.py index f4c5b36..ba04e8c 100644 --- a/swh/lister/github/tasks.py +++ b/swh/lister/github/tasks.py @@ -11,8 +11,7 @@ from .lister import GitHubLister class GitHubListerTask(ListerTaskBase): def new_lister(self): - return GitHubLister(lister_name='github.com', - api_baseurl='https://api.github.com') + return GitHubLister(api_baseurl='https://api.github.com') class IncrementalGitHubLister(GitHubListerTask, IndexingDiscoveryListerTask): diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py index 78f7736..1cbfc7a 100644 --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -15,14 +15,14 @@ class GitLabLister(SWHPagingHttpLister): PATH_TEMPLATE = '/projects?page=%d&order_by=id&sort=asc&simple=true' API_URL_INDEX_RE = re.compile(r'^.*/projects.*page=(\d+).*') MODEL = GitLabModel + LISTER_NAME = 'gitlab' - @property - def CONFIG_BASE_FILENAME(self): - """One gitlab lister for all instances. We discriminate between the - origin on a per instance basis in the table. - - """ - return 'lister-gitlab' + def __init__(self, api_baseurl=None, instance=None, + override_config=None, sort='asc'): + super().__init__(api_baseurl=api_baseurl, + override_config=override_config) + self.instance = instance + self.PATH_TEMPLATE = '%s&sort=%s' % (self.PATH_TEMPLATE, sort) @property def ADDITIONAL_CONFIG(self): @@ -32,16 +32,10 @@ class GitLabLister(SWHPagingHttpLister): cf. request_params method below """ - return { - 'lister_db_url': - ('str', 'postgresql:///lister-gitlab'), - 'credentials': # credentials is a dict - ('dict', {}), - 'cache_responses': - ('bool', False), - 'cache_dir': - ('str', '~/.cache/swh/lister/%s' % self.lister_name), - } + default_config = super().ADDITIONAL_CONFIG + # 'credentials' is a dict of (instance, {username, password}) dict + default_config['credentials'] = ('dict', {}) + return default_config def request_params(self, identifier): """Get the full parameters passed to requests given the @@ -67,7 +61,7 @@ class GitLabLister(SWHPagingHttpLister): # Retrieve the credentials per instance creds = self.config['credentials'] if creds: - creds_lister = creds[self.lister_name] + creds_lister = creds[self.instance] auth = random.choice(creds_lister) if creds else None if auth: params['auth'] = (auth['username'], auth['password']) @@ -75,7 +69,7 @@ class GitLabLister(SWHPagingHttpLister): def get_model_from_repo(self, repo): return { - 'instance': self.lister_name, + 'instance': self.instance, 'uid': repo['id'], 'indexable': repo['id'], 'name': repo['name'], diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py index 63bc179..ba6290e 100644 --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -11,10 +11,9 @@ from .lister import GitLabLister class GitLabListerTask(ListerTaskBase): - def new_lister(self, lister_name='gitlab.com', - api_baseurl='https://gitlab.com/api/v4'): - return GitLabLister( - lister_name=lister_name, api_baseurl=api_baseurl) + def new_lister(self, api_baseurl='https://gitlab.com/api/v4', + instance='gitlab.com'): + return GitLabLister(api_baseurl=api_baseurl, instance=instance) class RangeGitLabLister(GitLabListerTask, RangeListerTask):