sourceforge: Fix incremental listing since CVS origin URLs modification

Commit 6a7479553e modified the origin URLs for CVS projects
hosted on SourceForge but it also broke incremental listing
due to a no longer valid assertion, so fix that issue.
This commit is contained in:
Antoine Lambert 2022-03-11 11:42:53 +01:00
parent 2568ecc7c2
commit fd03941c5f
2 changed files with 28 additions and 7 deletions

View file

@ -177,17 +177,25 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
bzr_url_match = re.compile(
r"http://(?P<project>[^/]+).bzr.sourceforge.net/bzrroot/([^/]+)"
)
cvs_url_match = re.compile(
r"rsync://a.cvs.sourceforge.net/cvsroot/(?P<project>.+)/([^/]+)"
)
for origin in stream:
url = origin.url
match = url_match.match(url)
if match is None:
# Should be a bzr special endpoint
match = bzr_url_match.match(url)
assert match is not None
matches = match.groupdict()
# Could be a bzr or cvs special endpoint
bzr_match = bzr_url_match.match(url)
cvs_match = cvs_url_match.match(url)
matches = None
if bzr_match is not None:
matches = bzr_match.groupdict()
elif cvs_match is not None:
matches = cvs_match.groupdict()
assert matches
project = matches["project"]
namespace = "p" # no special namespacing for bzr projects
namespace = "p" # no special namespacing for bzr and cvs projects
else:
matches = match.groupdict()
namespace = matches["namespace"]

View file

@ -266,6 +266,18 @@ def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, m
url="http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo",
last_update=iso8601.parse_date("2021-01-27"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="cvs",
url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron",
last_update=iso8601.parse_date("2013-03-07"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="cvs",
url="rsync://a.cvs.sourceforge.net/cvsroot/aaron/www",
last_update=iso8601.parse_date("2013-03-07"),
),
]
swh_scheduler.record_listed_origins(faked_listed_origins)
@ -289,9 +301,10 @@ def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, m
lister.state = faked_state
stats = lister.run()
# - mramm (3 repos), # changed
assert stats.pages == 2
assert stats.origins == 5
assert stats.pages == 1
assert stats.origins == 3
expected_state = {
"subsitemap_last_modified": {
"https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",