From 46f5a50099f926bdec44a4086848d1d55d204f49 Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Thu, 28 Jan 2021 19:09:02 +0100 Subject: [PATCH] launchpad: Prevent error due to origin listed twice launchpadlib can list the last modified repository twice so ensure to yield a single ListedOrigin model for that special case. Related to T3003#57551 --- swh/lister/launchpad/lister.py | 9 +++++++-- swh/lister/launchpad/tests/test_lister.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/swh/lister/launchpad/lister.py b/swh/lister/launchpad/lister.py index 3a5004f..4c66471 100644 --- a/swh/lister/launchpad/lister.py +++ b/swh/lister/launchpad/lister.py @@ -93,12 +93,15 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): """ assert self.lister_obj.id is not None + prev_origin_url = None + for repo in page: origin_url = repo.git_https_url - # filter out origins with invalid URL - if not origin_url.startswith("https://"): + # filter out origins with invalid URL or origin previously listed + # (last modified repository will be listed twice by launchpadlib) + if not origin_url.startswith("https://") or origin_url == prev_origin_url: continue last_update = repo.date_last_modified @@ -107,6 +110,8 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]): logger.debug("Found origin %s last updated on %s", origin_url, last_update) + prev_origin_url = origin_url + yield ListedOrigin( lister_id=self.lister_obj.id, visit_type="git", diff --git a/swh/lister/launchpad/tests/test_lister.py b/swh/lister/launchpad/tests/test_lister.py index d36f026..836fcec 100644 --- a/swh/lister/launchpad/tests/test_lister.py +++ b/swh/lister/launchpad/tests/test_lister.py @@ -154,3 +154,22 @@ def test_launchpad_lister_invalid_url_filtering( assert not lister.updated assert stats.pages == 1 assert stats.origins == 0 + + +def test_launchpad_lister_duplicated_origin( + swh_scheduler, mocker, +): + origin = _Repo( + { + "git_https_url": "https://git.launchpad.net/test", + "date_last_modified": "2021-01-14 21:05:31.231406+00:00", + } + ) + origins = [origin, origin] + _mock_getRepositories(mocker, origins) + lister = LaunchpadLister(scheduler=swh_scheduler) + stats = lister.run() + + assert lister.updated + assert stats.pages == 1 + assert stats.origins == 1