launchpad: Allow bzr origins listing

Related to T3945
This commit is contained in:
Antoine R. Dumont (@ardumont) 2022-02-16 17:56:13 +01:00
parent 31b4429ced
commit 262f9369c8
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
3 changed files with 275 additions and 59 deletions

View file

@ -1,4 +1,4 @@
# Copyright (C) 2020-2021 The Software Heritage developers
# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -6,7 +6,7 @@
from dataclasses import dataclass
from datetime import datetime
import logging
from typing import Any, Dict, Iterator, Optional
from typing import Any, Dict, Iterator, Optional, Tuple
import iso8601
from launchpadlib.launchpad import Launchpad
@ -19,20 +19,28 @@ from ..pattern import CredentialsType, Lister
logger = logging.getLogger(__name__)
LaunchpadPageType = Iterator[Collection]
VcsType = str
LaunchpadPageType = Tuple[VcsType, Collection]
@dataclass
class LaunchpadListerState:
"""State of Launchpad lister"""
date_last_modified: Optional[datetime] = None
"""modification date of last updated repository since last listing"""
git_date_last_modified: Optional[datetime] = None
"""modification date of last updated git repository since last listing"""
bzr_date_last_modified: Optional[datetime] = None
"""modification date of last updated bzr repository since last listing"""
def origin(vcs_type: str, repo: Any) -> str:
"""Determine the origin url out of a repository with a given vcs_type"""
return repo.git_https_url if vcs_type == "git" else repo.web_link
class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
"""
List git repositories from Launchpad.
List repositories from Launchpad (git or bzr).
Args:
scheduler: instance of SchedulerInterface
@ -56,36 +64,53 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
credentials=credentials,
)
self.incremental = incremental
self.date_last_modified = None
self.date_last_modified: Dict[str, Optional[datetime]] = {
"git": None,
"bzr": None,
}
def state_from_dict(self, d: Dict[str, Any]) -> LaunchpadListerState:
date_last_modified = d.get("date_last_modified")
if date_last_modified is not None:
d["date_last_modified"] = iso8601.parse_date(date_last_modified)
for vcs_type in ["git", "bzr"]:
key = f"{vcs_type}_date_last_modified"
date_last_modified = d.get(key)
if date_last_modified is not None:
d[key] = iso8601.parse_date(date_last_modified)
return LaunchpadListerState(**d)
def state_to_dict(self, state: LaunchpadListerState) -> Dict[str, Any]:
d: Dict[str, Optional[str]] = {"date_last_modified": None}
date_last_modified = state.date_last_modified
if date_last_modified is not None:
d["date_last_modified"] = date_last_modified.isoformat()
d: Dict[str, Optional[str]] = {}
for vcs_type in ["git", "bzr"]:
attribute_name = f"{vcs_type}_date_last_modified"
d[attribute_name] = None
if hasattr(state, attribute_name):
date_last_modified = getattr(state, attribute_name)
if date_last_modified is not None:
d[attribute_name] = date_last_modified.isoformat()
return d
def get_pages(self) -> Iterator[LaunchpadPageType]:
"""
Yields an iterator on all git repositories hosted on Launchpad sorted
Yields an iterator on all git/bzr repositories hosted on Launchpad sorted
by last modification date in ascending order.
"""
launchpad = Launchpad.login_anonymously(
"softwareheritage", "production", version="devel"
)
date_last_modified = None
if self.incremental:
date_last_modified = self.state.date_last_modified
get_repos = launchpad.git_repositories.getRepositories
yield get_repos(
order_by="most neglected first", modified_since_date=date_last_modified
)
self.date_last_modified = {
"git": self.state.git_date_last_modified,
"bzr": self.state.bzr_date_last_modified,
}
for vcs_type, get_vcs_fn in [
("git", launchpad.git_repositories.getRepositories),
("bzr", launchpad.branches.getBranches),
]:
yield vcs_type, get_vcs_fn(
order_by="most neglected first",
modified_since_date=self.date_last_modified[vcs_type],
)
def get_origins_from_page(self, page: LaunchpadPageType) -> Iterator[ListedOrigin]:
"""
@ -93,40 +118,65 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
"""
assert self.lister_obj.id is not None
prev_origin_url = None
prev_origin_url: Dict[str, Optional[str]] = {"git": None, "bzr": None}
for repo in page:
vcs_type, repos = page
origin_url = repo.git_https_url
assert vcs_type in {"git", "bzr"}
for repo in repos:
origin_url = origin(vcs_type, repo)
# filter out origins with invalid URL or origin previously listed
# (last modified repository will be listed twice by launchpadlib)
if not origin_url.startswith("https://") or origin_url == prev_origin_url:
if (
not origin_url.startswith("https://")
or origin_url == prev_origin_url[vcs_type]
):
continue
last_update = repo.date_last_modified
self.date_last_modified = last_update
self.date_last_modified[vcs_type] = last_update
logger.debug("Found origin %s last updated on %s", origin_url, last_update)
logger.debug(
"Found origin %s with type %s last updated on %s",
origin_url,
vcs_type,
last_update,
)
prev_origin_url = origin_url
prev_origin_url[vcs_type] = origin_url
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type="git",
visit_type=vcs_type,
url=origin_url,
last_update=last_update,
)
def finalize(self) -> None:
if self.date_last_modified is None:
git_date_last_modified = self.date_last_modified["git"]
bzr_date_last_modified = self.date_last_modified["bzr"]
if git_date_last_modified is None and bzr_date_last_modified is None:
return
if self.incremental and (
self.state.date_last_modified is None
or self.date_last_modified > self.state.date_last_modified
self.state.git_date_last_modified is None
or (
git_date_last_modified is not None
and git_date_last_modified > self.state.git_date_last_modified
)
):
self.state.date_last_modified = self.date_last_modified
self.state.git_date_last_modified = git_date_last_modified
if self.incremental and (
self.state.bzr_date_last_modified is None
or (
bzr_date_last_modified is not None
and bzr_date_last_modified > self.state.bzr_date_last_modified
)
):
self.state.bzr_date_last_modified = self.date_last_modified["bzr"]
self.updated = True

View file

@ -0,0 +1,126 @@
[
{
"self_link": "https://api.launchpad.net/1.0/fourbar",
"web_link": "https://launchpad.net/fourbar",
"resource_type_link": "https://api.launchpad.net/1.0/#project",
"official_answers": true,
"official_blueprints": true,
"official_codehosting": true,
"official_bugs": true,
"information_type": "Public",
"active": true,
"bug_reporting_guidelines": null,
"bug_reported_acknowledgement": null,
"official_bug_tags": [],
"recipes_collection_link": "https://api.launchpad.net/1.0/fourbar/recipes",
"active_milestones_collection_link": "https://api.launchpad.net/1.0/fourbar/active_milestones",
"all_milestones_collection_link": "https://api.launchpad.net/1.0/fourbar/all_milestones",
"bug_supervisor_link": null,
"qualifies_for_free_hosting": true,
"reviewer_whiteboard": "tag:launchpad.net:2008:redacted",
"is_permitted": "tag:launchpad.net:2008:redacted",
"project_reviewed": "tag:launchpad.net:2008:redacted",
"license_approved": "tag:launchpad.net:2008:redacted",
"private": false,
"display_name": "fourBar",
"icon_link": "https://api.launchpad.net/1.0/fourbar/icon",
"logo_link": "https://api.launchpad.net/1.0/fourbar/logo",
"name": "fourbar",
"owner_link": "https://api.launchpad.net/1.0/~sorivenul",
"project_group_link": null,
"title": "fourBar",
"registrant_link": "https://api.launchpad.net/1.0/~sorivenul",
"driver_link": null,
"summary": "fourBar is a minimal application launcher for POSIX systems. It launches four commonly used applications (terminal, file browser, editor, and web browser by default). It is written in Python/Tkinter. Documentation on simple customization is included. ",
"description": "If you wish to help with the development of fourBar, download a branch, test, report bugs and propose features. There is still work to be done.",
"date_created": "2008-11-03T07:03:00.872230+00:00",
"homepage_url": null,
"wiki_url": null,
"screenshots_url": null,
"download_url": "http://downloads.sourceforge.net/fourbar/fourbar-1.0.0.tar.gz?modtime=1224102066&big_mirror=0",
"programming_language": "Python",
"sourceforge_project": "fourBar",
"freshmeat_project": null,
"brand_link": "https://api.launchpad.net/1.0/fourbar/brand",
"private_bugs": false,
"licenses": [
"GNU GPL v3"
],
"license_info": null,
"bug_tracker_link": null,
"date_next_suggest_packaging": null,
"series_collection_link": "https://api.launchpad.net/1.0/fourbar/series",
"development_focus_link": "https://api.launchpad.net/1.0/fourbar/trunk",
"releases_collection_link": "https://api.launchpad.net/1.0/fourbar/releases",
"translation_focus_link": null,
"commercial_subscription_link": null,
"commercial_subscription_is_due": false,
"remote_product": "242408&1119369",
"security_contact": null,
"vcs": "Bazaar",
"http_etag": "\"e3685b989bd2609f9a84bd2d90bef380c6f3c92b-13a47c4e8b4688c8fc042bf7eede3a2f4c14a9d6\"",
"date_last_modified":"2016-05-19T16:05:23.706734+00:00"
},
{
"self_link": "https://api.launchpad.net/1.0/gekkoware",
"web_link": "https://launchpad.net/gekkoware",
"resource_type_link": "https://api.launchpad.net/1.0/#project",
"official_answers": false,
"official_blueprints": false,
"official_codehosting": false,
"official_bugs": false,
"information_type": "Public",
"active": true,
"bug_reporting_guidelines": null,
"bug_reported_acknowledgement": null,
"official_bug_tags": [],
"recipes_collection_link": "https://api.launchpad.net/1.0/gekkoware/recipes",
"active_milestones_collection_link": "https://api.launchpad.net/1.0/gekkoware/active_milestones",
"all_milestones_collection_link": "https://api.launchpad.net/1.0/gekkoware/all_milestones",
"bug_supervisor_link": null,
"qualifies_for_free_hosting": true,
"reviewer_whiteboard": "tag:launchpad.net:2008:redacted",
"is_permitted": "tag:launchpad.net:2008:redacted",
"project_reviewed": "tag:launchpad.net:2008:redacted",
"license_approved": "tag:launchpad.net:2008:redacted",
"private": false,
"display_name": "gekkoware",
"icon_link": "https://api.launchpad.net/1.0/gekkoware/icon",
"logo_link": "https://api.launchpad.net/1.0/gekkoware/logo",
"name": "gekkoware",
"owner_link": "https://api.launchpad.net/1.0/~compermisos",
"project_group_link": null,
"title": "gekkoware",
"registrant_link": "https://api.launchpad.net/1.0/~compermisos",
"driver_link": null,
"summary": "A port of gekko to ubuntu",
"description": null,
"date_created": "2007-10-21T03:02:22.186775+00:00",
"homepage_url": "http://gekkoware.org",
"wiki_url": null,
"screenshots_url": null,
"download_url": null,
"programming_language": "php",
"sourceforge_project": "gekkoware",
"freshmeat_project": null,
"brand_link": "https://api.launchpad.net/1.0/gekkoware/brand",
"private_bugs": false,
"licenses": [
"GNU GPL v2"
],
"license_info": null,
"bug_tracker_link": null,
"date_next_suggest_packaging": null,
"series_collection_link": "https://api.launchpad.net/1.0/gekkoware/series",
"development_focus_link": "https://api.launchpad.net/1.0/gekkoware/trunk",
"releases_collection_link": "https://api.launchpad.net/1.0/gekkoware/releases",
"translation_focus_link": null,
"commercial_subscription_link": null,
"commercial_subscription_is_due": false,
"remote_product": "117004&676653",
"security_contact": null,
"vcs": "Bazaar",
"http_etag": "\"b9802efcebb5afdd87c8ee10f8473040340bcead-159127be59c12e7cbb161eee4cae2ade72353c0d\"",
"date_last_modified":"2017-03-15T16:03:22.706432+00:00"
}
]

View file

@ -1,4 +1,4 @@
# Copyright (C) 2020-2021 The Software Heritage developers
# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -10,7 +10,7 @@ from typing import List
import pytest
from ..lister import LaunchpadLister
from ..lister import LaunchpadLister, origin
class _Repo:
@ -49,25 +49,35 @@ def launchpad_response2(datadir):
return _launchpad_response(datadir, "launchpad_response2.json")
def _mock_getRepositories(mocker, launchpad_response):
@pytest.fixture
def launchpad_bzr_response(datadir):
return _launchpad_response(datadir, "launchpad_bzr_response.json")
def _mock_launchpad(mocker, launchpad_response, launchpad_bzr_response=None):
mock_launchpad = mocker.patch("swh.lister.launchpad.lister.Launchpad")
mock_getRepositories = mock_launchpad.git_repositories.getRepositories
mock_getRepositories.return_value = launchpad_response
mock_getBranches = mock_launchpad.branches.getBranches
mock_getBranches.return_value = (
[] if launchpad_bzr_response is None else launchpad_bzr_response
)
mock_launchpad.login_anonymously.return_value = mock_launchpad
return mock_getRepositories
return mock_getRepositories, mock_getBranches
def _check_listed_origins(scheduler_origins, launchpad_response):
for origin in launchpad_response:
def _check_listed_origins(scheduler_origins, launchpad_response, vcs_type="git"):
for repo in launchpad_response:
filtered_origins = [
o for o in scheduler_origins if o.url == origin.git_https_url
o for o in scheduler_origins if o.url == origin(vcs_type, repo)
]
assert len(filtered_origins) == 1
assert filtered_origins[0].last_update == origin.date_last_modified
assert filtered_origins[0].last_update == repo.date_last_modified
assert filtered_origins[0].visit_type == vcs_type
def test_lister_from_configfile(swh_scheduler_config, mocker):
@ -81,65 +91,95 @@ def test_lister_from_configfile(swh_scheduler_config, mocker):
assert lister.credentials is not None
def test_launchpad_full_lister(swh_scheduler, mocker, launchpad_response1):
mock_getRepositories = _mock_getRepositories(mocker, launchpad_response1)
def test_launchpad_full_lister(
swh_scheduler, mocker, launchpad_response1, launchpad_bzr_response
):
mock_getRepositories, mock_getBranches = _mock_launchpad(
mocker, launchpad_response1, launchpad_bzr_response
)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert not lister.incremental
assert lister.updated
assert stats.pages == 1
assert stats.origins == len(launchpad_response1)
assert stats.pages == 1 + 1, "Expects 1 page for git origins, another for bzr ones"
assert stats.origins == len(launchpad_response1) + len(launchpad_bzr_response)
mock_getRepositories.assert_called_once_with(
order_by="most neglected first", modified_since_date=None
)
mock_getBranches.assert_called_once_with(
order_by="most neglected first", modified_since_date=None
)
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(launchpad_response1)
assert len(scheduler_origins) == len(launchpad_response1) + len(
launchpad_bzr_response
)
_check_listed_origins(scheduler_origins, launchpad_response1)
_check_listed_origins(scheduler_origins, launchpad_bzr_response, vcs_type="bzr")
def test_launchpad_incremental_lister(
swh_scheduler, mocker, launchpad_response1, launchpad_response2
swh_scheduler,
mocker,
launchpad_response1,
launchpad_response2,
launchpad_bzr_response,
):
mock_getRepositories = _mock_getRepositories(mocker, launchpad_response1)
mock_getRepositories, mock_getBranches = _mock_launchpad(
mocker, launchpad_response1, launchpad_bzr_response
)
lister = LaunchpadLister(scheduler=swh_scheduler, incremental=True)
stats = lister.run()
assert lister.incremental
assert lister.updated
assert stats.pages == 1
assert stats.origins == len(launchpad_response1)
assert stats.pages == 1 + 1, "Expects 1 page for git origins, another for bzr ones"
len_first_runs = len(launchpad_response1) + len(launchpad_bzr_response)
assert stats.origins == len_first_runs
mock_getRepositories.assert_called_once_with(
order_by="most neglected first", modified_since_date=None
)
mock_getBranches.assert_called_once_with(
order_by="most neglected first", modified_since_date=None
)
lister_state = lister.get_state_from_scheduler()
assert lister_state.date_last_modified == launchpad_response1[-1].date_last_modified
assert (
lister_state.git_date_last_modified
== launchpad_response1[-1].date_last_modified
)
assert (
lister_state.bzr_date_last_modified
== launchpad_bzr_response[-1].date_last_modified
)
mock_getRepositories = _mock_getRepositories(mocker, launchpad_response2)
mock_getRepositories, mock_getBranches = _mock_launchpad(
mocker, launchpad_response2
)
lister = LaunchpadLister(scheduler=swh_scheduler, incremental=True)
stats = lister.run()
assert lister.incremental
assert lister.updated
assert stats.pages == 1
assert stats.pages == 2, "Empty bzr response still accounts for 1 page"
assert stats.origins == len(launchpad_response2)
mock_getRepositories.assert_called_once_with(
order_by="most neglected first",
modified_since_date=lister_state.date_last_modified,
modified_since_date=lister_state.git_date_last_modified,
)
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
assert len(scheduler_origins) == len(launchpad_response1) + len(launchpad_response2)
assert len(scheduler_origins) == len_first_runs + len(launchpad_response2)
_check_listed_origins(scheduler_origins, launchpad_response1)
_check_listed_origins(scheduler_origins, launchpad_bzr_response, vcs_type="bzr")
_check_listed_origins(scheduler_origins, launchpad_response2)
@ -147,12 +187,12 @@ def test_launchpad_lister_invalid_url_filtering(
swh_scheduler, mocker,
):
invalid_origin = [_Repo({"git_https_url": "tag:launchpad.net:2008:redacted",})]
_mock_getRepositories(mocker, invalid_origin)
_mock_launchpad(mocker, invalid_origin)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert not lister.updated
assert stats.pages == 1
assert stats.pages == 1 + 1, "Empty pages are still accounted for (1 git, 1 bzr)"
assert stats.origins == 0
@ -166,10 +206,10 @@ def test_launchpad_lister_duplicated_origin(
}
)
origins = [origin, origin]
_mock_getRepositories(mocker, origins)
_mock_launchpad(mocker, origins)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert lister.updated
assert stats.pages == 1
assert stats.pages == 1 + 1, "Empty bzr page is still accounted for (1 git, 1 bzr)"
assert stats.origins == 1