sourceforge: fix support for listing bzr origins
Bazaar support was removed a long time ago and predates a lot of the new mechanisms in place in the API. Unfortunately, it looks like a lot of the URLs are offline now, but there are still a few projects that can be listed, this is pretty low-effort.
This commit is contained in:
parent
b7524bbae0
commit
31b4429ced
4 changed files with 100 additions and 10 deletions
|
@ -84,6 +84,9 @@ PROJECT_API_URL_FORMAT = "https://sourceforge.net/rest/{namespace}/{project}"
|
|||
|
||||
# Predictable URL for cloning (in the broad sense) a VCS registered for the project.
|
||||
#
|
||||
# Warning: does not apply to bzr repos, and Mercurial are http only, see use of this
|
||||
# constant below.
|
||||
#
|
||||
# `vcs`: VCS type, one of `VCS_NAMES`
|
||||
# `namespace`: Project namespace. Very often `p`, but can be something else like
|
||||
# `adobe`.
|
||||
|
@ -170,13 +173,24 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
|||
url_match = re.compile(
|
||||
r".*\.code\.sf\.net/(?P<namespace>[^/]+)/(?P<project>.+)/.*"
|
||||
)
|
||||
bzr_url_match = re.compile(
|
||||
r"http://(?P<project>[^/]+).bzr.sourceforge.net/bzrroot/([^/]+)"
|
||||
)
|
||||
|
||||
for origin in stream:
|
||||
url = origin.url
|
||||
match = url_match.match(url)
|
||||
assert match is not None
|
||||
matches = match.groupdict()
|
||||
namespace = matches["namespace"]
|
||||
project = matches["project"]
|
||||
if match is None:
|
||||
# Should be a bzr special endpoint
|
||||
match = bzr_url_match.match(url)
|
||||
assert match is not None
|
||||
matches = match.groupdict()
|
||||
project = matches["project"]
|
||||
namespace = "p" # no special namespacing for bzr projects
|
||||
else:
|
||||
matches = match.groupdict()
|
||||
namespace = matches["namespace"]
|
||||
project = matches["project"]
|
||||
# "Last modified" dates are the same across all VCS (tools, even)
|
||||
# within a project or subproject. An assertion here would be overkill.
|
||||
last_modified = origin.last_update
|
||||
|
@ -356,6 +370,11 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
|
|||
# SourceForge does not yet support anonymous HTTPS cloning for Mercurial
|
||||
# See https://sourceforge.net/p/forge/feature-requests/727/
|
||||
url = url.replace("https://", "http://")
|
||||
if tool_name == VcsNames.BAZAAR.value:
|
||||
# SourceForge has removed support for bzr and only keeps legacy projects
|
||||
# around at a separate (also not https) URL. Bzr projects are very rare
|
||||
# and a lot of them are 404 now.
|
||||
url = f"http://{project}.bzr.sourceforge.net/bzrroot/{project}"
|
||||
entry = SourceForgeListerEntry(
|
||||
vcs=VcsNames(tool_name), url=url, last_modified=last_modified
|
||||
)
|
||||
|
|
53
swh/lister/sourceforge/tests/data/bzr-repo.json
Normal file
53
swh/lister/sourceforge/tests/data/bzr-repo.json
Normal file
|
@ -0,0 +1,53 @@
|
|||
{
|
||||
"shortname": "bzr-repo",
|
||||
"name": "Bazaar repo",
|
||||
"_id": "4bf3fc291be1ce2f10000052",
|
||||
"url": "https://sourceforge.net/p/bzr-repo/",
|
||||
"private": false,
|
||||
"short_description": "This is an example bzr project",
|
||||
"creation_date": "2009-10-10",
|
||||
"summary": "",
|
||||
"external_homepage": "",
|
||||
"video_url": "",
|
||||
"socialnetworks": [],
|
||||
"status": "active",
|
||||
"moved_to_url": "",
|
||||
"preferred_support_tool": "",
|
||||
"preferred_support_url": "",
|
||||
"developers": [
|
||||
{
|
||||
"username": "Alphare",
|
||||
"name": "Raphaël Gomès",
|
||||
"url": "https://sourceforge.net/u/alphare/"
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"name": "bzr",
|
||||
"mount_point": "bzr",
|
||||
"url": "/p/bzr-repo/bazaar/",
|
||||
"icons": {
|
||||
"24": "images/code_24.png",
|
||||
"32": "images/code_32.png",
|
||||
"48": "images/code_48.png"
|
||||
},
|
||||
"installable": true,
|
||||
"tool_label": "Bazaar",
|
||||
"mount_label": "Bazaar"
|
||||
}
|
||||
],
|
||||
"labels": [],
|
||||
"categories": {
|
||||
"audience": [],
|
||||
"developmentstatus": [],
|
||||
"environment": [],
|
||||
"language": [],
|
||||
"license": [],
|
||||
"translation": [],
|
||||
"os": [],
|
||||
"database": [],
|
||||
"topic": []
|
||||
},
|
||||
"icon_url": null,
|
||||
"screenshots": []
|
||||
}
|
|
@ -40,4 +40,9 @@
|
|||
<lastmod>2019-05-02</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://sourceforge.net/p/bzr-repo/</loc>
|
||||
<lastmod>2021-01-27</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
</urlset>
|
||||
|
|
|
@ -29,6 +29,7 @@ TEST_PROJECTS = {
|
|||
"adobexmp": "adobe",
|
||||
"backapps": "p",
|
||||
"backapps/website": "p",
|
||||
"bzr-repo": "p",
|
||||
"mojunk": "p",
|
||||
"mramm": "p",
|
||||
"os3dmodels": "p",
|
||||
|
@ -79,6 +80,7 @@ def _check_listed_origins(lister, swh_scheduler):
|
|||
"https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
|
||||
"https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
|
||||
"http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
|
||||
"http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo": ("bzr", "2021-01-27"),
|
||||
}
|
||||
|
||||
|
||||
|
@ -119,9 +121,10 @@ def test_sourceforge_lister_full(swh_scheduler, requests_mock, datadir):
|
|||
# - mojunk (3 repos),
|
||||
# - backapps/website (1 repo),
|
||||
# - random-mercurial (1 repo).
|
||||
# - bzr-repo (1 repo).
|
||||
# adobe and backapps itself have no repos.
|
||||
assert stats.pages == 5
|
||||
assert stats.origins == 10
|
||||
assert stats.pages == 6
|
||||
assert stats.origins == 11
|
||||
expected_state = {
|
||||
"subsitemap_last_modified": {
|
||||
"https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
|
||||
|
@ -239,6 +242,12 @@ def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, m
|
|||
url="http://hg.code.sf.net/p/random-mercurial/hg",
|
||||
last_update=iso8601.parse_date("2019-05-02"),
|
||||
),
|
||||
ListedOrigin(
|
||||
lister_id=lister.lister_obj.id,
|
||||
visit_type="bzr",
|
||||
url="http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo",
|
||||
last_update=iso8601.parse_date("2021-01-27"),
|
||||
),
|
||||
]
|
||||
swh_scheduler.record_listed_origins(faked_listed_origins)
|
||||
|
||||
|
@ -319,9 +328,10 @@ def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir)
|
|||
# - mojunk (3 repos),
|
||||
# - backapps/website (1 repo),
|
||||
# - random-mercurial (1 repo).
|
||||
# - bzr-repo (1 repo).
|
||||
# adobe and backapps itself have no repos.
|
||||
assert stats.pages == 5
|
||||
assert stats.origins == 10
|
||||
assert stats.pages == 6
|
||||
assert stats.origins == 11
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
assert {o.url: o.visit_type for o in scheduler_origins} == {
|
||||
|
@ -335,6 +345,7 @@ def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir)
|
|||
"https://git.code.sf.net/p/mojunk/git2": "git",
|
||||
"https://svn.code.sf.net/p/mojunk/svn": "svn",
|
||||
"http://hg.code.sf.net/p/random-mercurial/hg": "hg",
|
||||
"http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo": "bzr",
|
||||
}
|
||||
|
||||
# Test `time.sleep` is called with exponential retries
|
||||
|
@ -402,10 +413,11 @@ def test_sourceforge_lister_project_error(
|
|||
# - mojunk (3 repos),
|
||||
# - backapps/website (1 repo),
|
||||
# - random-mercurial (1 repo).
|
||||
# - bzr-repo (1 repo).
|
||||
# adobe and backapps itself have no repos.
|
||||
# Did *not* list mramm
|
||||
assert stats.pages == 4
|
||||
assert stats.origins == 7
|
||||
assert stats.pages == 5
|
||||
assert stats.origins == 8
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
|
||||
|
@ -418,4 +430,5 @@ def test_sourceforge_lister_project_error(
|
|||
"https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
|
||||
"https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
|
||||
"http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
|
||||
"http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo": ("bzr", "2021-01-27"),
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue