launchpad: Prevent error due to origin listed twice

launchpadlib can list the last modified repository twice so ensure to yield
a single ListedOrigin model for that special case.

Related to T3003#57551
This commit is contained in:
Antoine Lambert 2021-01-28 19:09:02 +01:00
parent 130ad7d73e
commit 46f5a50099
2 changed files with 26 additions and 2 deletions

View file

@ -93,12 +93,15 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
"""
assert self.lister_obj.id is not None
prev_origin_url = None
for repo in page:
origin_url = repo.git_https_url
# filter out origins with invalid URL
if not origin_url.startswith("https://"):
# filter out origins with invalid URL or origin previously listed
# (last modified repository will be listed twice by launchpadlib)
if not origin_url.startswith("https://") or origin_url == prev_origin_url:
continue
last_update = repo.date_last_modified
@ -107,6 +110,8 @@ class LaunchpadLister(Lister[LaunchpadListerState, LaunchpadPageType]):
logger.debug("Found origin %s last updated on %s", origin_url, last_update)
prev_origin_url = origin_url
yield ListedOrigin(
lister_id=self.lister_obj.id,
visit_type="git",

View file

@ -154,3 +154,22 @@ def test_launchpad_lister_invalid_url_filtering(
assert not lister.updated
assert stats.pages == 1
assert stats.origins == 0
def test_launchpad_lister_duplicated_origin(
swh_scheduler, mocker,
):
origin = _Repo(
{
"git_https_url": "https://git.launchpad.net/test",
"date_last_modified": "2021-01-14 21:05:31.231406+00:00",
}
)
origins = [origin, origin]
_mock_getRepositories(mocker, origins)
lister = LaunchpadLister(scheduler=swh_scheduler)
stats = lister.run()
assert lister.updated
assert stats.pages == 1
assert stats.origins == 1