swh.lister.phabricator

Add a lister of all hosted repositories on a Phabricator instance
Closes T808
This commit is contained in:
archit 2019-05-15 19:50:30 +05:30 committed by Archit Agrawal
parent 4efb2ce62b
commit fedfd73c8e
14 changed files with 2894 additions and 1 deletions

View file

@ -9,7 +9,8 @@ import click
logger = logging.getLogger(__name__)
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', 'npm']
SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi',
'npm', 'phabricator']
@click.command()
@ -96,6 +97,14 @@ def cli(db_url, listers, drop_tables):
NpmVisitModel.metadata.drop_all(_lister.db_engine)
NpmVisitModel.metadata.create_all(_lister.db_engine)
elif lister == 'phabricator':
from .phabricator.models import IndexingModelBase as ModelBase
from .phabricator.lister import PhabricatorLister
_lister = PhabricatorLister(
forge_url='https://forge.softwareheritage.org',
api_token='',
override_config=override_conf)
else:
raise ValueError(
'Invalid lister %s: only supported listers are %s' %

View file

@ -11,4 +11,5 @@ def celery_includes():
'swh.lister.gitlab.tasks',
'swh.lister.npm.tasks',
'swh.lister.pypi.tasks',
'swh.lister.phabricator.tasks',
]

View file

View file

@ -0,0 +1,138 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.core.indexing_lister import SWHIndexingHttpLister
from swh.lister.phabricator.models import PhabricatorModel
from collections import defaultdict
class PhabricatorLister(SWHIndexingHttpLister):
PATH_TEMPLATE = '&order=oldest&attachments[uris]=1&after=%s'
MODEL = PhabricatorModel
LISTER_NAME = 'phabricator'
def __init__(self, forge_url, api_token, override_config=None):
if forge_url.endswith("/"):
forge_url = forge_url[:-1]
self.forge_url = forge_url
api_endpoint = ('api/diffusion.repository.'
'search?api.token=%s') % api_token
api_baseurl = '%s/%s' % (forge_url, api_endpoint)
super().__init__(api_baseurl=api_baseurl,
override_config=override_config)
def request_headers(self):
"""
(Override) Set requests headers to send when querying the
Phabricator API
"""
return {'User-Agent': 'Software Heritage phabricator lister',
'Accept': 'application/json'}
def get_model_from_repo(self, repo):
url = get_repo_url(repo['attachments']['uris']['uris'])
if url is None:
return None
return {
'uid': self.forge_url + str(repo['id']),
'indexable': repo['id'],
'name': repo['fields']['shortName'],
'full_name': repo['fields']['name'],
'html_url': url,
'origin_url': url,
'description': None,
'origin_type': repo['fields']['vcs']
}
def get_next_target_from_response(self, response):
body = response.json()['result']['cursor']
if body['after'] != 'null':
return body['after']
else:
return None
def transport_response_simplified(self, response):
repos = response.json()
if repos['result'] is None:
raise ValueError(
'Problem during information fetch: %s' % repos['error_code'])
repos = repos['result']['data']
return [self.get_model_from_repo(repo) for repo in repos]
def filter_before_inject(self, models_list):
"""
(Overrides) SWHIndexingLister.filter_before_inject
Bounds query results by this Lister's set max_index.
"""
models_list = [m for m in models_list if m is not None]
return super().filter_before_inject(models_list)
def _bootstrap_repositories_listing(self):
"""
Method called when no min_bound value has been provided
to the lister. Its purpose is to:
1. get the first repository data hosted on the Phabricator
instance
2. inject them into the lister database
3. return the first repository index to start the listing
after that value
Returns:
int: The first repository index
"""
params = '&order=oldest&limit=1'
response = self.safely_issue_request(params)
models_list = self.transport_response_simplified(response)
self.max_index = models_list[0]['indexable']
models_list = self.filter_before_inject(models_list)
injected = self.inject_repo_data_into_db(models_list)
self.create_missing_origins_and_tasks(models_list, injected)
return self.max_index
def run(self, min_bound=None, max_bound=None):
"""
(Override) Run the lister on the specified Phabricator instance
Args:
min_bound (int): Optional repository index to start the listing
after it
max_bound (int): Optional repository index to stop the listing
after it
"""
# initial call to the lister, we need to bootstrap it in that case
if min_bound is None:
min_bound = self._bootstrap_repositories_listing()
super().run(min_bound, max_bound)
def get_repo_url(attachments):
"""
Return url for a hosted repository from its uris attachments according
to the following priority lists:
* protocol: https > http
* identifier: shortname > callsign > id
"""
processed_urls = defaultdict(dict)
for uri in attachments:
protocol = uri['fields']['builtin']['protocol']
url = uri['fields']['uri']['effective']
identifier = uri['fields']['builtin']['identifier']
if protocol in ('http', 'https'):
processed_urls[protocol][identifier] = url
elif protocol is None:
for protocol in ('https', 'http'):
if url.startswith(protocol):
processed_urls[protocol]['undefined'] = url
break
for protocol in ['https', 'http']:
for identifier in ['shortname', 'callsign', 'id', 'undefined']:
if (protocol in processed_urls and
identifier in processed_urls[protocol]):
return processed_urls[protocol][identifier]
return None

View file

@ -0,0 +1,15 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, String, Integer
from swh.lister.core.models import IndexingModelBase
class PhabricatorModel(IndexingModelBase):
"""a Phabricator repository"""
__tablename__ = 'phabricator_repos'
uid = Column(String, primary_key=True)
indexable = Column(Integer, index=True)

View file

@ -0,0 +1,28 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.scheduler.celery_backend.config import app
from swh.lister.phabricator.lister import PhabricatorLister
def new_lister(
forge_url='https://forge.softwareheritage.org', api_token='', **kw):
return PhabricatorLister(forge_url=forge_url, api_token=api_token, **kw)
@app.task(name=__name__ + '.IncrementalPhabricatorLister')
def incremental_phabricator_lister(**lister_args):
lister = new_lister(**lister_args)
lister.run(min_bound=lister.db_last_index())
@app.task(name=__name__ + '.FullPhabricatorLister')
def full_phabricator_lister(**lister_args):
lister = new_lister(**lister_args)
lister.run()
@app.task(name=__name__ + '.ping')
def ping():
return 'OK'

View file

View file

@ -0,0 +1,8 @@
{
"result": {
"data": [],
"cursor": {
"after": null
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,60 @@
{
"id": 8,
"type": "REPO",
"phid": "PHID-REPO-ge2icigfu5ijk2whqfbl",
"fields": {
"name": "Blender Libraries",
"vcs": "svn",
"callsign": "BL",
"shortName": null,
"status": "active",
"isImporting": false,
"almanacServicePHID": null,
"spacePHID": null,
"dateCreated": 1385564674,
"dateModified": 1468574079,
"policy": {
"view": "public",
"edit": "admin",
"diffusion.push": "PHID-PROJ-hclk7tvd6pmpjmqastjl"
}
},
"attachments": {
"uris": {
"uris": [
{
"id": "70",
"type": "RURI",
"phid": "PHID-RURI-h7zdbkud6why4xrb2s2e",
"fields": {
"repositoryPHID": "PHID-REPO-ge2icigfu5ijk2whqfbl",
"uri": {
"raw": "https://svn.blender.org/svnroot/bf-blender/",
"display": "https://svn.blender.org/svnroot/bf-blender/",
"effective": "https://svn.blender.org/svnroot/bf-blender/",
"normalized": "svn.blender.org/svnroot/bf-blender"
},
"io": {
"raw": "observe",
"default": "none",
"effective": "observe"
},
"display": {
"raw": "always",
"default": "never",
"effective": "always"
},
"credentialPHID": null,
"disabled": false,
"builtin": {
"protocol": null,
"identifier": null
},
"dateCreated": "1467894515",
"dateModified": "1468574079"
}
}
]
}
}
}

View file

@ -0,0 +1 @@
from swh.lister.core.tests.conftest import * # noqa

View file

@ -0,0 +1,53 @@
# Copyright (C) 2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
import json
import unittest
from swh.lister.core.tests.test_lister import HttpListerTester
from swh.lister.phabricator.lister import PhabricatorLister
from swh.lister.phabricator.lister import get_repo_url
class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
Lister = PhabricatorLister
test_re = re.compile(r'\&after=([^?&]+)')
lister_subdir = 'phabricator'
good_api_response_file = 'api_response.json'
good_api_response_undefined_protocol = 'api_response_undefined_'\
'protocol.json'
bad_api_response_file = 'api_empty_response.json'
first_index = 1
last_index = 12
entries_per_page = 10
def get_fl(self, override_config=None):
"""(Override) Retrieve an instance of fake lister (fl).
"""
if override_config or self.fl is None:
self.fl = self.Lister(forge_url='https://fakeurl', api_token='a-1',
override_config=override_config)
self.fl.INITIAL_BACKOFF = 1
self.fl.reset_backoff()
return self.fl
def test_get_repo_url(self):
f = open('swh/lister/%s/tests/%s' % (self.lister_subdir,
self.good_api_response_file))
api_response = json.load(f)
repos = api_response['result']['data']
for repo in repos:
self.assertEqual(
'https://forge.softwareheritage.org/source/%s.git' %
(repo['fields']['shortName']),
get_repo_url(repo['attachments']['uris']['uris']))
f = open('swh/lister/%s/tests/%s' %
(self.lister_subdir,
self.good_api_response_undefined_protocol))
repo = json.load(f)
self.assertEqual(
'https://svn.blender.org/svnroot/bf-blender/',
get_repo_url(repo['attachments']['uris']['uris']))

View file

@ -0,0 +1,29 @@
from unittest.mock import patch
def test_ping(swh_app, celery_session_worker):
res = swh_app.send_task(
'swh.lister.phabricator.tasks.ping')
assert res
res.wait()
assert res.successful()
assert res.result == 'OK'
@patch('swh.lister.phabricator.tasks.PhabricatorLister')
def test_incremental(lister, swh_app, celery_session_worker):
# setup the mocked PhabricatorLister
lister.return_value = lister
lister.db_last_index.return_value = 42
lister.run.return_value = None
res = swh_app.send_task(
'swh.lister.phabricator.tasks.IncrementalPhabricatorLister')
assert res
res.wait()
assert res.successful()
lister.assert_called_once_with(
api_token='', forge_url='https://forge.softwareheritage.org')
lister.db_last_index.assert_called_once_with()
lister.run.assert_called_once_with(min_bound=42)