swh.lister.phabricator
Add a lister of all hosted repositories on a Phabricator instance Closes T808
This commit is contained in:
parent
4efb2ce62b
commit
fedfd73c8e
14 changed files with 2894 additions and 1 deletions
|
@ -9,7 +9,8 @@ import click
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', 'npm']
|
||||
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
|
||||
'npm', 'phabricator']
|
||||
|
||||
|
||||
@click.command()
|
||||
|
@ -96,6 +97,14 @@ def cli(db_url, listers, drop_tables):
|
|||
NpmVisitModel.metadata.drop_all(_lister.db_engine)
|
||||
NpmVisitModel.metadata.create_all(_lister.db_engine)
|
||||
|
||||
elif lister == 'phabricator':
|
||||
from .phabricator.models import IndexingModelBase as ModelBase
|
||||
from .phabricator.lister import PhabricatorLister
|
||||
_lister = PhabricatorLister(
|
||||
forge_url='https://forge.softwareheritage.org',
|
||||
api_token='',
|
||||
override_config=override_conf)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
'Invalid lister %s: only supported listers are %s' %
|
||||
|
|
|
@ -11,4 +11,5 @@ def celery_includes():
|
|||
'swh.lister.gitlab.tasks',
|
||||
'swh.lister.npm.tasks',
|
||||
'swh.lister.pypi.tasks',
|
||||
'swh.lister.phabricator.tasks',
|
||||
]
|
||||
|
|
0
swh/lister/phabricator/__init__.py
Normal file
0
swh/lister/phabricator/__init__.py
Normal file
138
swh/lister/phabricator/lister.py
Normal file
138
swh/lister/phabricator/lister.py
Normal file
|
@ -0,0 +1,138 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
from swh.lister.core.indexing_lister import SWHIndexingHttpLister
|
||||
from swh.lister.phabricator.models import PhabricatorModel
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
class PhabricatorLister(SWHIndexingHttpLister):
|
||||
|
||||
PATH_TEMPLATE = '&order=oldest&attachments[uris]=1&after=%s'
|
||||
MODEL = PhabricatorModel
|
||||
LISTER_NAME = 'phabricator'
|
||||
|
||||
def __init__(self, forge_url, api_token, override_config=None):
|
||||
if forge_url.endswith("/"):
|
||||
forge_url = forge_url[:-1]
|
||||
self.forge_url = forge_url
|
||||
api_endpoint = ('api/diffusion.repository.'
|
||||
'search?api.token=%s') % api_token
|
||||
api_baseurl = '%s/%s' % (forge_url, api_endpoint)
|
||||
super().__init__(api_baseurl=api_baseurl,
|
||||
override_config=override_config)
|
||||
|
||||
def request_headers(self):
|
||||
"""
|
||||
(Override) Set requests headers to send when querying the
|
||||
Phabricator API
|
||||
"""
|
||||
return {'User-Agent': 'Software Heritage phabricator lister',
|
||||
'Accept': 'application/json'}
|
||||
|
||||
def get_model_from_repo(self, repo):
|
||||
url = get_repo_url(repo['attachments']['uris']['uris'])
|
||||
if url is None:
|
||||
return None
|
||||
return {
|
||||
'uid': self.forge_url + str(repo['id']),
|
||||
'indexable': repo['id'],
|
||||
'name': repo['fields']['shortName'],
|
||||
'full_name': repo['fields']['name'],
|
||||
'html_url': url,
|
||||
'origin_url': url,
|
||||
'description': None,
|
||||
'origin_type': repo['fields']['vcs']
|
||||
}
|
||||
|
||||
def get_next_target_from_response(self, response):
|
||||
body = response.json()['result']['cursor']
|
||||
if body['after'] != 'null':
|
||||
return body['after']
|
||||
else:
|
||||
return None
|
||||
|
||||
def transport_response_simplified(self, response):
|
||||
repos = response.json()
|
||||
if repos['result'] is None:
|
||||
raise ValueError(
|
||||
'Problem during information fetch: %s' % repos['error_code'])
|
||||
repos = repos['result']['data']
|
||||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
||||
def filter_before_inject(self, models_list):
|
||||
"""
|
||||
(Overrides) SWHIndexingLister.filter_before_inject
|
||||
Bounds query results by this Lister's set max_index.
|
||||
"""
|
||||
models_list = [m for m in models_list if m is not None]
|
||||
return super().filter_before_inject(models_list)
|
||||
|
||||
def _bootstrap_repositories_listing(self):
|
||||
"""
|
||||
Method called when no min_bound value has been provided
|
||||
to the lister. Its purpose is to:
|
||||
|
||||
1. get the first repository data hosted on the Phabricator
|
||||
instance
|
||||
|
||||
2. inject them into the lister database
|
||||
|
||||
3. return the first repository index to start the listing
|
||||
after that value
|
||||
|
||||
Returns:
|
||||
int: The first repository index
|
||||
"""
|
||||
params = '&order=oldest&limit=1'
|
||||
response = self.safely_issue_request(params)
|
||||
models_list = self.transport_response_simplified(response)
|
||||
self.max_index = models_list[0]['indexable']
|
||||
models_list = self.filter_before_inject(models_list)
|
||||
injected = self.inject_repo_data_into_db(models_list)
|
||||
self.create_missing_origins_and_tasks(models_list, injected)
|
||||
return self.max_index
|
||||
|
||||
def run(self, min_bound=None, max_bound=None):
|
||||
"""
|
||||
(Override) Run the lister on the specified Phabricator instance
|
||||
|
||||
Args:
|
||||
min_bound (int): Optional repository index to start the listing
|
||||
after it
|
||||
max_bound (int): Optional repository index to stop the listing
|
||||
after it
|
||||
"""
|
||||
# initial call to the lister, we need to bootstrap it in that case
|
||||
if min_bound is None:
|
||||
min_bound = self._bootstrap_repositories_listing()
|
||||
super().run(min_bound, max_bound)
|
||||
|
||||
|
||||
def get_repo_url(attachments):
|
||||
"""
|
||||
Return url for a hosted repository from its uris attachments according
|
||||
to the following priority lists:
|
||||
* protocol: https > http
|
||||
* identifier: shortname > callsign > id
|
||||
"""
|
||||
processed_urls = defaultdict(dict)
|
||||
for uri in attachments:
|
||||
protocol = uri['fields']['builtin']['protocol']
|
||||
url = uri['fields']['uri']['effective']
|
||||
identifier = uri['fields']['builtin']['identifier']
|
||||
if protocol in ('http', 'https'):
|
||||
processed_urls[protocol][identifier] = url
|
||||
elif protocol is None:
|
||||
for protocol in ('https', 'http'):
|
||||
if url.startswith(protocol):
|
||||
processed_urls[protocol]['undefined'] = url
|
||||
break
|
||||
for protocol in ['https', 'http']:
|
||||
for identifier in ['shortname', 'callsign', 'id', 'undefined']:
|
||||
if (protocol in processed_urls and
|
||||
identifier in processed_urls[protocol]):
|
||||
return processed_urls[protocol][identifier]
|
||||
return None
|
15
swh/lister/phabricator/models.py
Normal file
15
swh/lister/phabricator/models.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from sqlalchemy import Column, String, Integer
|
||||
|
||||
from swh.lister.core.models import IndexingModelBase
|
||||
|
||||
|
||||
class PhabricatorModel(IndexingModelBase):
|
||||
"""a Phabricator repository"""
|
||||
__tablename__ = 'phabricator_repos'
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
indexable = Column(Integer, index=True)
|
28
swh/lister/phabricator/tasks.py
Normal file
28
swh/lister/phabricator/tasks.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from swh.scheduler.celery_backend.config import app
|
||||
from swh.lister.phabricator.lister import PhabricatorLister
|
||||
|
||||
|
||||
def new_lister(
|
||||
forge_url='https://forge.softwareheritage.org', api_token='', **kw):
|
||||
return PhabricatorLister(forge_url=forge_url, api_token=api_token, **kw)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.IncrementalPhabricatorLister')
|
||||
def incremental_phabricator_lister(**lister_args):
|
||||
lister = new_lister(**lister_args)
|
||||
lister.run(min_bound=lister.db_last_index())
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.FullPhabricatorLister')
|
||||
def full_phabricator_lister(**lister_args):
|
||||
lister = new_lister(**lister_args)
|
||||
lister.run()
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.ping')
|
||||
def ping():
|
||||
return 'OK'
|
0
swh/lister/phabricator/tests/__init__.py
Normal file
0
swh/lister/phabricator/tests/__init__.py
Normal file
8
swh/lister/phabricator/tests/api_empty_response.json
Normal file
8
swh/lister/phabricator/tests/api_empty_response.json
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"result": {
|
||||
"data": [],
|
||||
"cursor": {
|
||||
"after": null
|
||||
}
|
||||
}
|
||||
}
|
2538
swh/lister/phabricator/tests/api_response.json
Normal file
2538
swh/lister/phabricator/tests/api_response.json
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,60 @@
|
|||
{
|
||||
"id": 8,
|
||||
"type": "REPO",
|
||||
"phid": "PHID-REPO-ge2icigfu5ijk2whqfbl",
|
||||
"fields": {
|
||||
"name": "Blender Libraries",
|
||||
"vcs": "svn",
|
||||
"callsign": "BL",
|
||||
"shortName": null,
|
||||
"status": "active",
|
||||
"isImporting": false,
|
||||
"almanacServicePHID": null,
|
||||
"spacePHID": null,
|
||||
"dateCreated": 1385564674,
|
||||
"dateModified": 1468574079,
|
||||
"policy": {
|
||||
"view": "public",
|
||||
"edit": "admin",
|
||||
"diffusion.push": "PHID-PROJ-hclk7tvd6pmpjmqastjl"
|
||||
}
|
||||
},
|
||||
"attachments": {
|
||||
"uris": {
|
||||
"uris": [
|
||||
{
|
||||
"id": "70",
|
||||
"type": "RURI",
|
||||
"phid": "PHID-RURI-h7zdbkud6why4xrb2s2e",
|
||||
"fields": {
|
||||
"repositoryPHID": "PHID-REPO-ge2icigfu5ijk2whqfbl",
|
||||
"uri": {
|
||||
"raw": "https://svn.blender.org/svnroot/bf-blender/",
|
||||
"display": "https://svn.blender.org/svnroot/bf-blender/",
|
||||
"effective": "https://svn.blender.org/svnroot/bf-blender/",
|
||||
"normalized": "svn.blender.org/svnroot/bf-blender"
|
||||
},
|
||||
"io": {
|
||||
"raw": "observe",
|
||||
"default": "none",
|
||||
"effective": "observe"
|
||||
},
|
||||
"display": {
|
||||
"raw": "always",
|
||||
"default": "never",
|
||||
"effective": "always"
|
||||
},
|
||||
"credentialPHID": null,
|
||||
"disabled": false,
|
||||
"builtin": {
|
||||
"protocol": null,
|
||||
"identifier": null
|
||||
},
|
||||
"dateCreated": "1467894515",
|
||||
"dateModified": "1468574079"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
1
swh/lister/phabricator/tests/conftest.py
Normal file
1
swh/lister/phabricator/tests/conftest.py
Normal file
|
@ -0,0 +1 @@
|
|||
from swh.lister.core.tests.conftest import * # noqa
|
53
swh/lister/phabricator/tests/test_lister.py
Normal file
53
swh/lister/phabricator/tests/test_lister.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
# Copyright (C) 2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import re
|
||||
import json
|
||||
import unittest
|
||||
from swh.lister.core.tests.test_lister import HttpListerTester
|
||||
from swh.lister.phabricator.lister import PhabricatorLister
|
||||
from swh.lister.phabricator.lister import get_repo_url
|
||||
|
||||
|
||||
class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
||||
Lister = PhabricatorLister
|
||||
test_re = re.compile(r'\&after=([^?&]+)')
|
||||
lister_subdir = 'phabricator'
|
||||
good_api_response_file = 'api_response.json'
|
||||
good_api_response_undefined_protocol = 'api_response_undefined_'\
|
||||
'protocol.json'
|
||||
bad_api_response_file = 'api_empty_response.json'
|
||||
first_index = 1
|
||||
last_index = 12
|
||||
entries_per_page = 10
|
||||
|
||||
def get_fl(self, override_config=None):
|
||||
"""(Override) Retrieve an instance of fake lister (fl).
|
||||
"""
|
||||
if override_config or self.fl is None:
|
||||
self.fl = self.Lister(forge_url='https://fakeurl', api_token='a-1',
|
||||
override_config=override_config)
|
||||
self.fl.INITIAL_BACKOFF = 1
|
||||
|
||||
self.fl.reset_backoff()
|
||||
return self.fl
|
||||
|
||||
def test_get_repo_url(self):
|
||||
f = open('swh/lister/%s/tests/%s' % (self.lister_subdir,
|
||||
self.good_api_response_file))
|
||||
api_response = json.load(f)
|
||||
repos = api_response['result']['data']
|
||||
for repo in repos:
|
||||
self.assertEqual(
|
||||
'https://forge.softwareheritage.org/source/%s.git' %
|
||||
(repo['fields']['shortName']),
|
||||
get_repo_url(repo['attachments']['uris']['uris']))
|
||||
|
||||
f = open('swh/lister/%s/tests/%s' %
|
||||
(self.lister_subdir,
|
||||
self.good_api_response_undefined_protocol))
|
||||
repo = json.load(f)
|
||||
self.assertEqual(
|
||||
'https://svn.blender.org/svnroot/bf-blender/',
|
||||
get_repo_url(repo['attachments']['uris']['uris']))
|
29
swh/lister/phabricator/tests/test_tasks.py
Normal file
29
swh/lister/phabricator/tests/test_tasks.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.phabricator.tasks.ping')
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
|
||||
|
||||
@patch('swh.lister.phabricator.tasks.PhabricatorLister')
|
||||
def test_incremental(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked PhabricatorLister
|
||||
lister.return_value = lister
|
||||
lister.db_last_index.return_value = 42
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.phabricator.tasks.IncrementalPhabricatorLister')
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
api_token='', forge_url='https://forge.softwareheritage.org')
|
||||
lister.db_last_index.assert_called_once_with()
|
||||
lister.run.assert_called_once_with(min_bound=42)
|
Loading…
Add table
Add a link
Reference in a new issue