Added LaunchpadLister

Summary:
Related to T1734

From abandonned D2799

Reviewers: ardumont

Reviewed By: ardumont

Differential Revision: https://forge.softwareheritage.org/D2974
This commit is contained in:
Léni Gauffier 2020-04-12 00:22:20 +02:00
parent 29325c1d94
commit 58ef08b083
16 changed files with 528 additions and 6 deletions

View file

@ -1,4 +1,5 @@
Archit Agrawal
Avi Kelman (fiendish)
Léni Gauffier
Yann Gautier
Sushant Sushant

View file

@ -7,3 +7,4 @@ xmltodict
iso8601
beautifulsoup4
pytz
launchpadlib

View file

@ -65,6 +65,7 @@ setup(
lister.packagist=swh.lister.packagist:register
lister.phabricator=swh.lister.phabricator:register
lister.pypi=swh.lister.pypi:register
lister.launchpad=swh.lister.launchpad:register
""",
classifiers=[
"Programming Language :: Python :: 3",

View file

@ -13,29 +13,32 @@ from sqlalchemy import create_engine
from swh.lister import get_lister, SUPPORTED_LISTERS
from swh.lister.core.models import initialize
logger = logging.getLogger(__name__)
@pytest.fixture
def swh_listers(request, postgresql_proc, postgresql, swh_scheduler):
def lister_db_url(postgresql_proc, postgresql):
db_url = "postgresql://{user}@{host}:{port}/{dbname}".format(
host=postgresql_proc.host,
port=postgresql_proc.port,
user="postgres",
dbname="tests",
)
logger.debug("lister db_url: %s", db_url)
return db_url
@pytest.fixture
def swh_listers(request, lister_db_url, swh_scheduler):
listers = {}
# Prepare schema for all listers
for lister_name in SUPPORTED_LISTERS:
lister = get_lister(lister_name, db_url=db_url)
lister = get_lister(lister_name, db_url=lister_db_url)
lister.scheduler = swh_scheduler # inject scheduler fixture
listers[lister_name] = lister
initialize(create_engine(db_url), drop_tables=True)
initialize(create_engine(lister_db_url), drop_tables=True)
# Add the load-archive-files expected by some listers (gnu, cran, ...)
swh_scheduler.create_task_type(

View file

@ -0,0 +1,14 @@
# Copyright (C) 2020 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
def register():
from .models import LaunchpadModel
from .lister import LaunchpadLister
return {
"models": [LaunchpadModel],
"lister": LaunchpadLister,
"task_modules": ["%s.tasks" % __name__],
}

View file

@ -0,0 +1,130 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Optional, Dict, List, Union, Tuple, Any
from swh.lister.core.lister_base import ListerBase
from .models import LaunchpadModel
from itertools import count
from launchpadlib.launchpad import Launchpad # type: ignore
from lazr.restfulclient.resource import Collection, Entry # type: ignore
from datetime import datetime, timedelta
from sqlalchemy import func
class LaunchpadLister(ListerBase):
MODEL = LaunchpadModel
LISTER_NAME = "launchpad"
instance = "launchpad"
launchpad: Launchpad
flush_packet_db = 20
def __init__(self, override_config=None):
super().__init__(override_config=override_config)
self.launchpad = Launchpad.login_anonymously(
"softwareheritage", "production", version="devel"
)
def get_model_from_repo(self, repo: Entry) -> Dict[str, Union[str, datetime]]:
return {
"uid": repo.unique_name,
"name": repo.name,
"full_name": repo.name,
"origin_url": repo.git_https_url,
"html_url": repo.web_link,
"origin_type": "git",
"date_last_modified": repo.date_last_modified,
}
def lib_response_simplified(
self, response: Collection
) -> List[Dict[str, Union[str, datetime]]]:
return [
self.get_model_from_repo(repo) for repo in response[: len(response.entries)]
]
def get_git_repos(self, threshold: Optional[datetime]) -> Collection:
get_repos = self.launchpad.git_repositories.getRepositories
return get_repos(order_by="most neglected first", modified_since_date=threshold)
def db_last_threshold(self) -> Optional[datetime]:
t = self.db_session.query(func.max(self.MODEL.date_last_modified)).first()
if t:
return t[0]
else:
return None
def ingest_data_lp(
self, identifier: Optional[datetime], checks: bool = False
) -> Tuple[Collection, dict]:
""" The core data fetch sequence. Request launchpadlib endpoint. Simplify and
filter response list of repositories. Inject repo information into
local db. Queue loader tasks for linked repositories.
Args:
identifier: Resource identifier.
checks: Additional checks required
"""
response = self.get_git_repos(identifier)
models_list = self.lib_response_simplified(response)
models_list = self.filter_before_inject(models_list)
if checks:
models_list = self.do_additional_checks(models_list)
if not models_list:
return response, {}
# inject into local db
injected = self.inject_repo_data_into_db(models_list)
# queue workers
self.schedule_missing_tasks(models_list, injected)
return response, injected
def run(self, max_bound: Optional[datetime] = None) -> Dict[str, Any]:
""" Main entry function. Sequentially fetches repository data
from the service according to the basic outline in the class
docstring, continually fetching sublists until either there
is no next index reference given or the given next index is greater
than the desired max_bound.
Args:
max_bound : optional date to start at
Returns:
Dict containing listing status
"""
status = "uneventful"
def ingest_git_repos():
threshold = max_bound
for i in count(1):
response, injected_repos = self.ingest_data_lp(threshold)
if not response and not injected_repos:
return
# batch is empty
if len(response.entries) == 0:
return
first: datetime = response[0].date_last_modified
last: datetime = response[len(response.entries) - 1].date_last_modified
next_date = last - timedelta(seconds=15)
if next_date <= first:
delta = last - first
next_date = last - delta / 2
threshold = next_date
yield i
for i in ingest_git_repos():
if (i % self.flush_packet_db) == 0:
self.db_session.commit()
self.db_session = self.mk_session()
status = "eventful"
self.db_session.commit()
self.db_session = self.mk_session()
return {"status": status}

View file

@ -0,0 +1,16 @@
# Copyright (C) 2017-2020 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, String, Date
from swh.lister.core.models import ModelBase
class LaunchpadModel(ModelBase):
"""a Launchpad repository"""
__tablename__ = "launchpad_repo"
uid = Column(String, primary_key=True)
date_last_modified = Column(Date, index=True)

View file

@ -0,0 +1,35 @@
# Copyright (C) 2017-2020 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
# import random
from celery import shared_task
from .lister import LaunchpadLister
@shared_task(name=__name__ + ".IncrementalLaunchpadLister")
def launchpad_lister_incremental(threshold, **lister_args):
"""Incremental update
"""
lister = LaunchpadLister(**lister_args)
return lister.run(max_bound=threshold)
@shared_task(name=__name__ + ".FullLaunchpadLister", bind=True)
def list_launchpad_full(self, **lister_args):
"""Full update of Launchpad
"""
self.log.debug("%s OK, spawned full task" % (self.name))
return launchpad_lister_incremental(threshold=None, **lister_args)
@shared_task(name=__name__ + ".NewLaunchpadLister", bind=True)
def list_launchpad_new(self, **lister_args):
"""Update new entries of Launchpad
"""
lister = LaunchpadLister(**lister_args)
threshold = lister.db_last_threshold()
self.log.debug("%s OK, spawned new task" % (self.name))
return launchpad_lister_incremental(threshold=threshold, **lister_args)

View file

View file

@ -0,0 +1,46 @@
from swh.lister.core.tests.conftest import * # noqa
from datetime import datetime
import json
import os
from unittest.mock import patch
import pytest
from swh.lister import get_lister
from swh.lister.core.models import initialize
from sqlalchemy.engine import create_engine
@pytest.fixture
def lister_launchpad(datadir, lister_db_url, swh_scheduler):
class Collection:
entries = []
def __init__(self, file):
self.entries = [Repo(r) for r in file]
def __getitem__(self, key):
return self.entries[key]
class Repo:
def __init__(self, d: dict):
for key in d.keys():
if key == "date_last_modified":
setattr(self, key, datetime.fromisoformat(d[key]))
else:
setattr(self, key, d[key])
def mock_lp_response(page) -> Collection:
response_filepath = os.path.join(datadir, f"response{page}.json")
with open(response_filepath, "r", encoding="utf-8") as f:
return Collection(json.load(f))
with patch("launchpadlib.launchpad.Launchpad.login_anonymously"):
lister = get_lister("launchpad", db_url=lister_db_url)
lister.scheduler = swh_scheduler # inject scheduler fixture
lister.launchpad.git_repositories.getRepositories.side_effect = [
mock_lp_response(i) for i in range(3)
]
initialize(create_engine(lister_db_url), drop_tables=True)
return lister

View file

@ -0,0 +1,107 @@
[
{
"unique_name":"~ubuntu-kernel/ubuntu/+source/linux-meta/+git/raring",
"name":"raring",
"git_https_url":"https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-meta/+git/raring",
"web_link":"https://code.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-meta/+git/raring",
"date_last_modified":"2015-05-18T16:05:23.706734+00:00"
},
{
"unique_name":"~ubuntu-kernel/ubuntu/+source/linux-signed/+git/raring",
"name":"raring",
"git_https_url":"https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-signed/+git/raring",
"web_link":"https://code.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-signed/+git/raring",
"date_last_modified":"2015-05-18T16:05:25.200936+00:00"
},
{
"unique_name":"~ubuntu-kernel/ubuntu/+source/linux/+git/quantal",
"name":"quantal",
"git_https_url":"https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/quantal",
"web_link":"https://code.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/quantal",
"date_last_modified":"2015-05-18T16:58:59.809000+00:00"
},
{
"unique_name":"~ubuntu-kernel/ubuntu/+source/linux-meta/+git/quantal",
"name":"quantal",
"git_https_url":"https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-meta/+git/quantal",
"web_link":"https://code.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-meta/+git/quantal",
"date_last_modified":"2015-05-18T17:04:31.267631+00:00"
},
{
"unique_name":"~ubuntu-kernel/ubuntu/+source/linux-signed/+git/quantal",
"name":"quantal",
"git_https_url":"https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-signed/+git/quantal",
"web_link":"https://code.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux-signed/+git/quantal",
"date_last_modified":"2015-05-18T17:04:32.689598+00:00"
},
{
"unique_name":"~ki7mt/jtsdk/+git/jtsdk",
"name":"jtsdk",
"git_https_url":"https://git.launchpad.net/jtsdk",
"web_link":"https://code.launchpad.net/~ki7mt/jtsdk/+git/jtsdk",
"date_last_modified":"2015-05-18T18:45:38.235509+00:00"
},
{
"unique_name":"~ki7mt/flsdk/+git/flsdk",
"name":"flsdk",
"git_https_url":"https://git.launchpad.net/flsdk",
"web_link":"https://code.launchpad.net/~ki7mt/flsdk/+git/flsdk",
"date_last_modified":"2015-05-18T20:37:26.560139+00:00"
},
{
"unique_name":"~martinatkins/alamatic/+git/alamatic",
"name":"alamatic",
"git_https_url":"https://git.launchpad.net/alamatic",
"web_link":"https://code.launchpad.net/~martinatkins/alamatic/+git/alamatic",
"date_last_modified":"2015-05-19T05:07:03.315855+00:00"
},
{
"unique_name":"~registry/zope-cmfcore/+git/zope-cmfcore",
"name":"zope-cmfcore",
"git_https_url":"https://git.launchpad.net/zope-cmfcore",
"web_link":"https://code.launchpad.net/~registry/zope-cmfcore/+git/zope-cmfcore",
"date_last_modified":"2015-05-22T21:00:16.480059+00:00"
},
{
"unique_name":"~registry/zope-cmfdefault/+git/zope-cmfdefault",
"name":"zope-cmfdefault",
"git_https_url":"https://git.launchpad.net/zope-cmfdefault",
"web_link":"https://code.launchpad.net/~registry/zope-cmfdefault/+git/zope-cmfdefault",
"date_last_modified":"2015-05-22T22:42:01.691765+00:00"
},
{
"unique_name":"~registry/zope-cmftopic/+git/zope-cmftopic",
"name":"zope-cmftopic",
"git_https_url":"https://git.launchpad.net/zope-cmftopic",
"web_link":"https://code.launchpad.net/~registry/zope-cmftopic/+git/zope-cmftopic",
"date_last_modified":"2015-05-22T22:59:46.944212+00:00"
},
{
"unique_name":"~registry/zope-cmfcalendar/+git/zope-cmfcalendar",
"name":"zope-cmfcalendar",
"git_https_url":"https://git.launchpad.net/zope-cmfcalendar",
"web_link":"https://code.launchpad.net/~registry/zope-cmfcalendar/+git/zope-cmfcalendar",
"date_last_modified":"2015-05-22T23:17:21.644526+00:00"
},
{
"unique_name":"~registry/zope-cmfuid/+git/zope-cmfuid",
"name":"zope-cmfuid",
"git_https_url":"https://git.launchpad.net/zope-cmfuid",
"web_link":"https://code.launchpad.net/~registry/zope-cmfuid/+git/zope-cmfuid",
"date_last_modified":"2015-05-22T23:33:56.253122+00:00"
},
{
"unique_name":"~registry/zope-cmf-buildout/+git/zope-cmf-buildout",
"name":"zope-cmf-buildout",
"git_https_url":"https://git.launchpad.net/zope-cmf-buildout",
"web_link":"https://code.launchpad.net/~registry/zope-cmf-buildout/+git/zope-cmf-buildout",
"date_last_modified":"2015-05-26T01:01:50.249139+00:00"
},
{
"unique_name":"~pspmteam/libertine/+git/libertine",
"name":"libertine",
"git_https_url":"https://git.launchpad.net/libertine",
"web_link":"https://code.launchpad.net/~pspmteam/libertine/+git/libertine",
"date_last_modified":"2015-05-27T13:45:40.133292+00:00"
}
]

View file

@ -0,0 +1,107 @@
[
{
"unique_name":"~bafu/+git/hwestack-helper",
"name":"hwestack-helper",
"git_https_url":"https://git.launchpad.net/~bafu/+git/hwestack-helper",
"web_link":"https://code.launchpad.net/~bafu/+git/hwestack-helper",
"date_last_modified":"2015-05-28T10:11:39.102130+00:00"
},
{
"unique_name":"~cryptsetup-tpm-team/cryptsetup-tpm/+git/cryptsetup-tpm",
"name":"cryptsetup-tpm",
"git_https_url":"https://git.launchpad.net/cryptsetup-tpm",
"web_link":"https://code.launchpad.net/~cryptsetup-tpm-team/cryptsetup-tpm/+git/cryptsetup-tpm",
"date_last_modified":"2015-05-29T08:04:46.949494+00:00"
},
{
"unique_name":"~libertine-team/libertine/+git/libertine",
"name":"libertine",
"git_https_url":"https://git.launchpad.net/~libertine-team/libertine/+git/libertine",
"web_link":"https://code.launchpad.net/~libertine-team/libertine/+git/libertine",
"date_last_modified":"2015-06-02T13:51:52.869335+00:00"
},
{
"unique_name":"~ursinha/rosetta2wiki/+git/rosetta2wiki",
"name":"rosetta2wiki",
"git_https_url":"https://git.launchpad.net/rosetta2wiki",
"web_link":"https://code.launchpad.net/~ursinha/rosetta2wiki/+git/rosetta2wiki",
"date_last_modified":"2015-06-03T14:26:56.098899+00:00"
},
{
"unique_name":"~rvb/+git/test-git-repo2",
"name":"test-git-repo2",
"git_https_url":"https://git.launchpad.net/~rvb/+git/test-git-repo2",
"web_link":"https://code.launchpad.net/~rvb/+git/test-git-repo2",
"date_last_modified":"2015-06-04T09:45:10.316365+00:00"
},
{
"unique_name":"~maas-maintainers/+git/test-git-repo",
"name":"test-git-repo",
"git_https_url":"https://git.launchpad.net/~maas-maintainers/+git/test-git-repo",
"web_link":"https://code.launchpad.net/~maas-maintainers/+git/test-git-repo",
"date_last_modified":"2015-06-04T09:52:15.858834+00:00"
},
{
"unique_name":"~zyga/+git/hwcert-tools",
"name":"hwcert-tools",
"git_https_url":"https://git.launchpad.net/~zyga/+git/hwcert-tools",
"web_link":"https://code.launchpad.net/~zyga/+git/hwcert-tools",
"date_last_modified":"2015-06-05T09:19:34.219427+00:00"
},
{
"unique_name":"~kaxing/+git/2fa",
"name":"2fa",
"git_https_url":"https://git.launchpad.net/~kaxing/+git/2fa",
"web_link":"https://code.launchpad.net/~kaxing/+git/2fa",
"date_last_modified":"2015-06-05T10:42:56.025561+00:00"
},
{
"unique_name":"~profzoom/+git/frobby",
"name":"frobby",
"git_https_url":"https://git.launchpad.net/~profzoom/+git/frobby",
"web_link":"https://code.launchpad.net/~profzoom/+git/frobby",
"date_last_modified":"2015-06-06T03:55:15.411463+00:00"
},
{
"unique_name":"~dpniel/+git/test-git",
"name":"test-git",
"git_https_url":"https://git.launchpad.net/~dpniel/+git/test-git",
"web_link":"https://code.launchpad.net/~dpniel/+git/test-git",
"date_last_modified":"2015-06-08T06:29:11.354988+00:00"
},
{
"unique_name":"~corey.bryant/+git/python-cinderclient",
"name":"python-cinderclient",
"git_https_url":"https://git.launchpad.net/~corey.bryant/+git/python-cinderclient",
"web_link":"https://code.launchpad.net/~corey.bryant/+git/python-cinderclient",
"date_last_modified":"2015-06-08T17:58:17.282686+00:00"
},
{
"unique_name":"~corey.bryant/+git/python-glanceclient",
"name":"python-glanceclient",
"git_https_url":"https://git.launchpad.net/~corey.bryant/+git/python-glanceclient",
"web_link":"https://code.launchpad.net/~corey.bryant/+git/python-glanceclient",
"date_last_modified":"2015-06-09T15:46:32.870529+00:00"
},
{
"unique_name":"~zyga/hwcert-tools/+git/hwcert-tools",
"name":"hwcert-tools",
"git_https_url":"https://git.launchpad.net/~zyga/hwcert-tools/+git/hwcert-tools",
"web_link":"https://code.launchpad.net/~zyga/hwcert-tools/+git/hwcert-tools",
"date_last_modified":"2015-06-09T17:15:25.299211+00:00"
},
{
"unique_name":"~ubuntu-system-image/ubuntu-system-image/+git/documentation",
"name":"documentation",
"git_https_url":"https://git.launchpad.net/~ubuntu-system-image/ubuntu-system-image/+git/documentation",
"web_link":"https://code.launchpad.net/~ubuntu-system-image/ubuntu-system-image/+git/documentation",
"date_last_modified":"2015-06-09T17:33:52.254130+00:00"
},
{
"unique_name":"~corey.bryant/+git/python-novaclient",
"name":"python-novaclient",
"git_https_url":"https://git.launchpad.net/~corey.bryant/+git/python-novaclient",
"web_link":"https://code.launchpad.net/~corey.bryant/+git/python-novaclient",
"date_last_modified":"2015-06-09T19:50:55.238308+00:00"
}
]

View file

@ -0,0 +1 @@
[]

View file

@ -0,0 +1,27 @@
def test_launchpad_lister(lister_launchpad, datadir):
lister_launchpad.run()
assert (
len(lister_launchpad.launchpad.git_repositories.getRepositories.mock_calls) == 3
)
r = lister_launchpad.scheduler.search_tasks(task_type="load-git")
assert len(r) == 30
for row in r:
assert row["type"] == "load-git"
# arguments check
args = row["arguments"]["args"]
assert len(args) == 0
# kwargs
kwargs = row["arguments"]["kwargs"]
assert set(kwargs.keys()) == {"url"}
url = kwargs["url"]
assert url.startswith("https://git.launchpad.net")
assert row["policy"] == "recurring"
assert row["priority"] is None
assert row["retries_left"] == 0

View file

@ -0,0 +1,33 @@
from unittest.mock import patch
@patch("swh.lister.launchpad.tasks.LaunchpadLister")
def test_new(lister, swh_app, celery_session_worker):
# setup the mocked LaunchpadLister
lister.return_value = lister
lister.run.return_value = None
res = swh_app.send_task("swh.lister.launchpad.tasks.NewLaunchpadLister")
assert res
res.wait()
assert res.successful()
assert lister.call_count == 2
lister.db_last_threshold.assert_called_once()
lister.run.assert_called_once()
@patch("swh.lister.launchpad.tasks.LaunchpadLister")
def test_full(lister, swh_app, celery_session_worker):
# setup the mocked LaunchpadLister
lister.return_value = lister
lister.run.return_value = None
res = swh_app.send_task("swh.lister.launchpad.tasks.FullLaunchpadLister")
assert res
res.wait()
assert res.successful()
lister.assert_called_once()
lister.db_last_threshold.assert_not_called()
lister.run.assert_called_once_with(max_bound=None)

View file

@ -47,7 +47,7 @@ def test_get_lister_override():
lst = get_lister(
lister_name,
db_url,
**{"url": url, "priority": "high", "policy": "oneshot",}
**{"url": url, "priority": "high", "policy": "oneshot",},
)
assert lst.url == url