sourceforge: Fix listing of bzr projects

Fix sourceforge origin URL for bzr projects,
http://project.bzr.sourceforge.net/bzrroot/project
redirects to http://project.bzr.sourceforge.net/bzr/project.

Handle bzr projects with multiple branches, one listed origin
must be created per branch.

Discard bzr projects that no longer exist from listing.
This commit is contained in:
Antoine Lambert 2022-04-21 15:08:33 +02:00
parent 63a744559f
commit 2fa9f0abd2
10 changed files with 1005 additions and 71 deletions

View file

@ -22,6 +22,9 @@ ignore_missing_imports = True
[mypy-lazr.*]
ignore_missing_imports = True
[mypy-lxml.*]
ignore_missing_imports = True
[mypy-pkg_resources.*]
ignore_missing_imports = True

View file

@ -6,3 +6,4 @@ beautifulsoup4
launchpadlib
tenacity >= 6.2
xmltodict
lxml

View file

@ -1,7 +1,8 @@
# Copyright (C) 2021 The Software Heritage developers
# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from dataclasses import dataclass, field
import datetime
from enum import Enum
@ -12,6 +13,7 @@ from xml.etree import ElementTree
from bs4 import BeautifulSoup
import iso8601
import lxml
import requests
from tenacity.before_sleep import before_sleep_log
@ -172,7 +174,7 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
r".*\.code\.sf\.net/(?P<namespace>[^/]+)/(?P<project>.+)/.*"
)
bzr_url_match = re.compile(
r"http://(?P<project>[^/]+).bzr.sourceforge.net/bzrroot/([^/]+)"
r"http://(?P<project>[^/]+).bzr.sourceforge.net/bzr/([^/]+)"
)
cvs_url_match = re.compile(
r"rsync://a.cvs.sourceforge.net/cvsroot/(?P<project>.+)/([^/]+)"
@ -410,7 +412,37 @@ class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
# SourceForge has removed support for bzr and only keeps legacy projects
# around at a separate (also not https) URL. Bzr projects are very rare
# and a lot of them are 404 now.
url = f"http://{project}.bzr.sourceforge.net/bzrroot/{project}"
url = f"http://{project}.bzr.sourceforge.net/bzr/{project}"
try:
response = self.page_request(url, params={})
if "To get this branch, use:" not in response.text:
# If a bzr project has multiple branches, we need to extract their
# names from the repository landing page and create one listed origin
# per branch
parser = lxml.etree.HTMLParser()
tree = lxml.etree.fromstring(response.text, parser)
# Get all tds with class 'autcell'
tds = tree.xpath(".//td[contains(@class, 'autcell')]")
for td in tds:
branch = td.findtext("a")
# If the td's parent contains <img alt="Branch"/> and
# it has non-empty text:
if td.xpath("..//img[@alt='Branch']") and branch:
hits.append(
SourceForgeListerEntry(
vcs=VcsNames(tool_name),
url=f"{url}/{branch}",
last_modified=last_modified,
)
)
continue
except requests.HTTPError:
logger.warning(
"Bazaar repository page could not be fetched, skipping project '%s'",
project,
)
continue
entry = SourceForgeListerEntry(
vcs=VcsNames(tool_name), url=url, last_modified=last_modified
)

View file

@ -1,53 +0,0 @@
{
"shortname": "bzr-repo",
"name": "Bazaar repo",
"_id": "4bf3fc291be1ce2f10000052",
"url": "https://sourceforge.net/p/bzr-repo/",
"private": false,
"short_description": "This is an example bzr project",
"creation_date": "2009-10-10",
"summary": "",
"external_homepage": "",
"video_url": "",
"socialnetworks": [],
"status": "active",
"moved_to_url": "",
"preferred_support_tool": "",
"preferred_support_url": "",
"developers": [
{
"username": "Alphare",
"name": "Raphaël Gomès",
"url": "https://sourceforge.net/u/alphare/"
}
],
"tools": [
{
"name": "bzr",
"mount_point": "bzr",
"url": "/p/bzr-repo/bazaar/",
"icons": {
"24": "images/code_24.png",
"32": "images/code_32.png",
"48": "images/code_48.png"
},
"installable": true,
"tool_label": "Bazaar",
"mount_label": "Bazaar"
}
],
"labels": [],
"categories": {
"audience": [],
"developmentstatus": [],
"environment": [],
"language": [],
"license": [],
"translation": [],
"os": [],
"database": [],
"topic": []
},
"icon_url": null,
"screenshots": []
}

View file

@ -0,0 +1,106 @@
<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>SourceForge: Browsing /ocaml-lpd</title>
<link href="/bzr/static/css/global.css" rel="stylesheet" />
<meta content="text/html; charset=utf-8" http-equiv="content-type" />
<meta content="The world's largest development and download repository of Open Source code and applications" name="description" />
<meta content="Open Source, Development, Developers, Projects, Downloads, OSTG, VA Software, SF.net, SourceForge" name="keywords" />
<link href="/bzr/static/css/sf.css" rel="stylesheet" />
</head>
<body>
<div id="doc3" class="yui-t6 login">
<div id="hd">
<div class="yui-gf">
<div class="yui-u first">
<h1>
<a href="https://sourceforge.net/" title="">
SourceForge.net
</a>
</h1>
<ul class="jump">
<li>
<a href="#content">Jump to main content</a>
</li>
</ul>
</div>
<div class="yui-u">
<a href="https://sourceforge.net/support" title="Get help and support on SourceForge.net">Help
</a>
</div>
</div>
</div>
</div>
<div id="loggerheadCont">
<h1>
Browsing
<span class="breadcrumb">
<a href="/bzr/">(root)</a><span>/</span><a href="/bzr/ocaml-lpd">ocaml-lpd</a>
</span>
</h1>
<div>
<table id="logentries">
<tr class="logheader">
<th colspan="2" class="summarycell">Filename</th>
<th class="datecell">Latest Rev</th>
<th class="datecell">Last Changed</th>
</tr>
<tr class="blueRow0">
<td class="icocell">
<a href="../"><img src="/bzr/static/images/ico_folder_up.gif" /></a>
</td>
<td colspan="3" class="summcell">
<a href="../">..</a>
</td>
</tr>
<tr class="blueRow0">
<td class="icocell">
<a href="backup.bzr.~1~/">
<img src="/bzr/static/images/ico_folder.gif" alt="Folder" />
</a>
</td>
<td class="autcell">
<a href="backup.bzr.~1~/">backup.bzr.~1~</a></td>
<td class="date"></td>
<td class="date"></td>
</tr>
<tr class="blueRow1">
<td class="icocell">
<a href="trunk/files">
<img src="/bzr/static/images/ico_branch.gif" alt="Branch" />
</a>
</td>
<td class="autcell">
<a href="trunk/files">trunk</a></td>
<td class="date">
<a href="trunk/revision/13" title="Show revision 13">13</a>
</td>
<td class="date">2011-04-17 22:02:29</td>
</tr>
</table>
</div>
</div>
<hr />
<div id="ft">
<div class="yui-g divider">
<div class="yui-u first copyright">
©Copyright 2017 -
Slashdot Media. All Rights Reserved
</div>
<div class="yui-u">
<a href="https://sourceforge.net/support">Help</a>
</div>
</div>
</div>
</body>
</html>

View file

@ -0,0 +1,201 @@
{
"shortname": "ocaml-lpd",
"name": "Lpd OCaml library",
"_id": "50c63c70e88f3d0bf07d4c6d",
"url": "https://sourceforge.net/p/ocaml-lpd/",
"private": false,
"short_description": "OCaml Lpd is a Line Printer Daemon (LPD) server library written in OCaml. This project moved to OCamlForge https://forge.ocamlcore.org/projects/lpd/",
"creation_date": "2005-02-23",
"summary": "",
"external_homepage": "http://lpd.forge.ocamlcore.org/",
"video_url": "",
"socialnetworks": [],
"status": "moved",
"moved_to_url": "https://forge.ocamlcore.org/projects/lpd/",
"preferred_support_tool": "",
"preferred_support_url": "",
"developers": [
{
"username": "chris_77",
"name": "ChriS",
"url": "https://sourceforge.net/u/chris_77/"
}
],
"tools": [
{
"name": "files-sf",
"mount_point": "files",
"url": "/p/ocaml-lpd/files/",
"icons": {
"24": "images/downloads_24.png",
"32": "images/downloads_32.png",
"48": "images/downloads_48.png"
},
"installable": false,
"tool_label": "Files",
"mount_label": "Files"
},
{
"name": "mailman",
"mount_point": "mailman",
"url": "/p/ocaml-lpd/mailman/",
"icons": {
"24": "images/forums_24.png",
"32": "images/forums_32.png",
"48": "images/forums_48.png"
},
"installable": false,
"tool_label": "Mailing Lists",
"mount_label": "Mailing Lists"
},
{
"name": "bzr",
"mount_point": "code",
"url": "/p/ocaml-lpd/code/",
"icons": {
"24": "images/code_24.png",
"32": "images/code_32.png",
"48": "images/code_48.png"
},
"installable": false,
"tool_label": "BZR",
"mount_label": "Code"
},
{
"name": "summary",
"mount_point": "summary",
"url": "/p/ocaml-lpd/summary/",
"icons": {
"24": "images/sftheme/24x24/blog_24.png",
"32": "images/sftheme/32x32/blog_32.png",
"48": "images/sftheme/48x48/blog_48.png"
},
"installable": false,
"tool_label": "Summary",
"mount_label": "Summary",
"sourceforge_group_id": 132212
},
{
"name": "wiki",
"mount_point": "wiki",
"url": "/p/ocaml-lpd/wiki/",
"icons": {
"24": "images/wiki_24.png",
"32": "images/wiki_32.png",
"48": "images/wiki_48.png"
},
"installable": true,
"tool_label": "Wiki",
"mount_label": "Wiki"
},
{
"name": "reviews",
"mount_point": "reviews",
"url": "/p/ocaml-lpd/reviews/",
"icons": {
"24": "images/sftheme/24x24/blog_24.png",
"32": "images/sftheme/32x32/blog_32.png",
"48": "images/sftheme/48x48/blog_48.png"
},
"installable": false,
"tool_label": "Reviews",
"mount_label": "Reviews"
},
{
"name": "support",
"mount_point": "support",
"url": "/p/ocaml-lpd/support/",
"icons": {
"24": "images/sftheme/24x24/blog_24.png",
"32": "images/sftheme/32x32/blog_32.png",
"48": "images/sftheme/48x48/blog_48.png"
},
"installable": false,
"tool_label": "Support",
"mount_label": "Support"
},
{
"name": "activity",
"mount_point": "activity",
"url": "/p/ocaml-lpd/activity/",
"icons": {
"24": "images/admin_24.png",
"32": "images/admin_32.png",
"48": "images/admin_48.png"
},
"installable": false,
"tool_label": "Tool",
"mount_label": "Activity"
}
],
"labels": [],
"categories": {
"audience": [
{
"id": 3,
"shortname": "developers",
"fullname": "Developers",
"fullpath": "Intended Audience :: by End-User Class :: Developers"
}
],
"developmentstatus": [
{
"id": 11,
"shortname": "production",
"fullname": "5 - Production/Stable",
"fullpath": "Development Status :: 5 - Production/Stable"
}
],
"environment": [
{
"id": 238,
"shortname": "daemon",
"fullname": "Non-interactive (Daemon)",
"fullpath": "User Interface :: Non-interactive (Daemon)"
}
],
"language": [
{
"id": 454,
"shortname": "ocaml",
"fullname": "OCaml (Objective Caml)",
"fullpath": "Programming Language :: OCaml (Objective Caml)"
}
],
"license": [
{
"id": 16,
"shortname": "lgpl",
"fullname": "GNU Library or Lesser General Public License version 2.0 (LGPLv2)",
"fullpath": "License :: OSI-Approved Open Source :: GNU Library or Lesser General Public License version 2.0 (LGPLv2)"
}
],
"translation": [
{
"id": 275,
"shortname": "english",
"fullname": "English",
"fullpath": "Translations :: English"
}
],
"os": [
{
"id": 436,
"shortname": "os_portable",
"fullname": "OS Portable (Source code to work with many OS platforms)",
"fullpath": "Operating System :: Grouping and Descriptive Categories :: OS Portable (Source code to work with many OS platforms)"
}
],
"database": [],
"topic": [
{
"id": 154,
"shortname": "printing",
"fullname": "Printing",
"fullpath": "Topic :: Printing"
}
]
},
"icon_url": null,
"screenshots": []
}

View file

@ -41,8 +41,13 @@
<changefreq>daily</changefreq>
</url>
<url>
<loc>https://sourceforge.net/p/bzr-repo/</loc>
<lastmod>2021-01-27</lastmod>
<loc>https://sourceforge.net/p/t12eksandbox/</loc>
<lastmod>2011-02-09</lastmod>
<changefreq>daily</changefreq>
</url>
<url>
<loc>https://sourceforge.net/p/ocaml-lpd/</loc>
<lastmod>2011-04-17</lastmod>
<changefreq>daily</changefreq>
</url>
</urlset>

View file

@ -0,0 +1,274 @@
<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
<meta content="Loggerhead/1.18.1 Python/2.7.5 Bazaar/2.5.1 Paste/1.7.5.1 PasteDeploy/1.5.0 SimpleTAL/4.3 simplejson/3.10.0" name="generator" />
<title>/t12eksandbox : changes</title>
<meta content="text/html; charset=utf-8" http-equiv="content-type" />
<meta content="The world's largest development and download repository of Open Source code and applications" name="description" />
<meta content="Open Source, Development, Developers, Projects, Downloads, OSTG, VA Software, SF.net, SourceForge" name="keywords" />
<link href="/bzr/static/css/sf.css" rel="stylesheet" />
</head>
<body>
<div id="doc3" class="yui-t6 login">
<div id="hd">
<div class="yui-gf">
<div class="yui-u first">
<h1>
<a href="https://sourceforge.net/" title="">
SourceForge.net
</a>
</h1>
<ul class="jump">
<li>
<a href="#content">Jump to main content</a>
</li>
</ul>
</div>
<div class="yui-u">
<a href="https://sourceforge.net/support" title="Get help and support on SourceForge.net">Help
</a>
</div>
</div>
</div>
</div>
<div id="finderBox">
<form action="/bzr/t12eksandbox/changes?start_revid=ctsai%40users.sourceforge.net-20110209191348-zkzbkuypzq1vncx9">
<label>search:</label>
<input autocomplete="off" onblur="hide_search();" type="search" name="q" id="q" />
</form>
<div>
<a href="/bzr/t12eksandbox/atom" title="RSS feed for /t12eksandbox">
<img src="/bzr/static/images/ico_rss.gif" alt="RSS" class="rssfeed" />
</a>
</div>
</div>
<ul id="menuTabs">
<li><a href="/bzr/t12eksandbox/changes" title="Changes" id="on">Changes</a></li>
<li><a href="/bzr/t12eksandbox/files" title="Files">Files</a></li>
</ul>
<div id="loggerheadCont">
<div id="search_terms"></div>
<h1>
<span class="breadcrumb">
<a href="/bzr/">(root)</a><span>/</span><a href="/bzr/t12eksandbox">t12eksandbox</a>
</span>
: changes
from revision
<span>4</span>
</h1>
<div>
<div id="branch-info">
To get this branch, use: <br />
<code>bzr branch
http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox</code>
</div>
<p class="fr revnolink">From Revision <a>4</a>
</p>
<p class="expand show_if_js" id="expand_all"><a href="#">
<img src="/bzr/static/images/treeCollapsed.png" alt="expand all" /> expand all</a>
</p>
<p style="display:none;" class="expand" id="collapse_all"><a href="#">
<img src="/bzr/static/images/treeExpanded.png" alt="collapse all" /> collapse all</a>
</p>
<table id="logentries">
<tr class="logheader">
<td class="revisionnumber">Rev</td>
<td class="expandcell show_if_js"> </td>
<td class="summarycell">Summary</td>
<td class="authorcell">Authors</td>
<td class="datecell">Date</td>
<td class="diffcell">Diff</td>
<td class="downloadcell">Files</td>
</tr>
<a name="entry-4" />
<tr class="blueRow0 revision_log" id="log-0">
<td class="revnro revnolink"><a title="Show revision 4" href="/bzr/t12eksandbox/revision/4">4</a>
</td>
<td class="expcell show_if_js">
<div class="expand_revisioninfo">
<a href="#">
<img src="/bzr/static/images/treeCollapsed.png" class="expand_icon" />
</a>
</div>
</td>
<td class="summcell">
<div class="short_description">
<a title="Show revision 4" href="/bzr/t12eksandbox/revision/4" class="link">Commit!</a>
</div>
<div style="display: none" class="long_description">
<a title="Show revision 4" href="/bzr/t12eksandbox/revision/4" class="link">Commit!<br/></a>
<div class="loading">
<img src="/bzr/static/images/spinner.gif" />
</div>
</div>
</td>
<td class="autcell">ctsai at sourceforge</td>
<td class="date">
<span title="2011-02-09 19:13:48">2011-02-09</span>
</td>
<td class="diffr"><a title="Show diff at revision 4" href="/bzr/t12eksandbox/revision/4">
<img src="/bzr/static/images/ico_diff.gif" alt="Diff" /></a></td>
<td class="downr"><a href="/bzr/t12eksandbox/files/4" title="Files at revision 4">
<img src="/bzr/static/images/ico_file.gif" alt="Files" /></a>
</td>
</tr>
<a name="entry-3" />
<tr class="blueRow1 revision_log" id="log-1">
<td class="revnro revnolink"><a title="Show revision 3" href="/bzr/t12eksandbox/revision/3">3</a>
</td>
<td class="expcell show_if_js">
<div class="expand_revisioninfo">
<a href="#">
<img src="/bzr/static/images/treeCollapsed.png" class="expand_icon" />
</a>
</div>
</td>
<td class="summcell">
<div class="short_description">
<a title="Show revision 3" href="/bzr/t12eksandbox/revision/3" class="link">fdsa</a>
</div>
<div style="display: none" class="long_description">
<a title="Show revision 3" href="/bzr/t12eksandbox/revision/3" class="link">fdsa<br/></a>
<div class="loading">
<img src="/bzr/static/images/spinner.gif" />
</div>
</div>
</td>
<td class="autcell">ctsai at sourceforge</td>
<td class="date">
<span title="2010-02-03 17:12:10">2010-02-03</span>
</td>
<td class="diffr"><a title="Show diff at revision 3" href="/bzr/t12eksandbox/revision/3">
<img src="/bzr/static/images/ico_diff.gif" alt="Diff" /></a></td>
<td class="downr"><a href="/bzr/t12eksandbox/files/3" title="Files at revision 3">
<img src="/bzr/static/images/ico_file.gif" alt="Files" /></a>
</td>
</tr>
<a name="entry-2" />
<tr class="blueRow0 revision_log" id="log-2">
<td class="revnro revnolink"><a title="Show revision 2" href="/bzr/t12eksandbox/revision/2">2</a>
</td>
<td class="expcell show_if_js">
<div class="expand_revisioninfo">
<a href="#">
<img src="/bzr/static/images/treeCollapsed.png" class="expand_icon" />
</a>
</div>
</td>
<td class="summcell">
<div class="short_description">
<a title="Show revision 2" href="/bzr/t12eksandbox/revision/2" class="link">fdsa</a>
</div>
<div style="display: none" class="long_description">
<a title="Show revision 2" href="/bzr/t12eksandbox/revision/2" class="link">fdsa<br/></a>
<div class="loading">
<img src="/bzr/static/images/spinner.gif" />
</div>
</div>
</td>
<td class="autcell">ctsai at sourceforge</td>
<td class="date">
<span title="2009-10-12 15:01:55">2009-10-12</span>
</td>
<td class="diffr"><a title="Show diff at revision 2" href="/bzr/t12eksandbox/revision/2">
<img src="/bzr/static/images/ico_diff.gif" alt="Diff" /></a></td>
<td class="downr"><a href="/bzr/t12eksandbox/files/2" title="Files at revision 2">
<img src="/bzr/static/images/ico_file.gif" alt="Files" /></a>
</td>
</tr>
<a name="entry-1" />
<tr class="blueRow1 revision_log" id="log-3">
<td class="revnro revnolink"><a title="Show revision 1" href="/bzr/t12eksandbox/revision/1">1</a>
</td>
<td class="expcell show_if_js">
<div class="expand_revisioninfo">
<a href="#">
<img src="/bzr/static/images/treeCollapsed.png" class="expand_icon" />
</a>
</div>
</td>
<td class="summcell">
<div class="short_description">
<a title="Show revision 1" href="/bzr/t12eksandbox/revision/1" class="link">Commit to test</a>
</div>
<div style="display: none" class="long_description">
<a title="Show revision 1" href="/bzr/t12eksandbox/revision/1" class="link">Commit to test<br/></a>
<div class="loading">
<img src="/bzr/static/images/spinner.gif" />
</div>
</div>
</td>
<td class="autcell">ctsai at sourceforge</td>
<td class="date">
<span title="2009-10-12 14:42:27">2009-10-12</span>
</td>
<td class="diffr"><a title="Show diff at revision 1" href="/bzr/t12eksandbox/revision/1">
<img src="/bzr/static/images/ico_diff.gif" alt="Diff" /></a></td>
<td class="downr"><a href="/bzr/t12eksandbox/files/1" title="Files at revision 1">
<img src="/bzr/static/images/ico_file.gif" alt="Files" /></a>
</td>
</tr>
</table>
</div>
<p class="fl">Loggerhead 1.18.1 is a web-based interface for <a href="http://bazaar-vcs.org/">Bazaar</a> branches</p>
</div>
<br />
<hr />
<div id="ft">
<div class="yui-g divider">
<div class="yui-u first copyright">
©Copyright 2017 -
Slashdot Media. All Rights Reserved
</div>
<div class="yui-u">
<a href="https://sourceforge.net/support">Help</a>
</div>
</div>
</div>
</body>
</html>

View file

@ -0,0 +1,292 @@
{
"shortname": "t12eksandbox",
"name": "t12ek sandbox",
"_id": "5304cd2634309d109fc1dec5",
"url": "https://sourceforge.net/p/t12eksandbox/",
"private": false,
"short_description": "Sandboxes are for playing in... Note: this is an SF.net staff's test project. Don't expect to find real files here. Update test!\r\nLine 2!\r\nupdate 2012-06-05",
"creation_date": "2009-07-14",
"summary": "",
"external_homepage": "http://t12eksandbox.sourceforge.net",
"video_url": "",
"socialnetworks": [],
"status": "active",
"moved_to_url": "",
"preferred_support_tool": "_url",
"preferred_support_url": "http://sourceforge.net/tracker/?func=add&group_id=269579&atid=1146768",
"developers": [
{
"username": "sillygoose",
"name": "sillygoose",
"url": "https://sourceforge.net/u/sillygoose/"
},
{
"username": "thimsmith",
"name": "Tim Siegel",
"url": "https://sourceforge.net/u/thimsmith/"
}
],
"tools": [
{
"name": "reviews",
"mount_point": "reviews",
"url": "/p/t12eksandbox/reviews/",
"icons": {
"24": "images/sftheme/24x24/blog_24.png",
"32": "images/sftheme/32x32/blog_32.png",
"48": "images/sftheme/48x48/blog_48.png"
},
"installable": false,
"tool_label": "Reviews",
"mount_label": "Reviews"
},
{
"name": "summary",
"mount_point": "summary",
"url": "/p/t12eksandbox/summary/",
"icons": {
"24": "images/sftheme/24x24/blog_24.png",
"32": "images/sftheme/32x32/blog_32.png",
"48": "images/sftheme/48x48/blog_48.png"
},
"installable": false,
"tool_label": "Summary",
"mount_label": "Summary",
"sourceforge_group_id": 269579
},
{
"name": "mailman",
"mount_point": "mailman",
"url": "/p/t12eksandbox/mailman/",
"icons": {
"24": "images/forums_24.png",
"32": "images/forums_32.png",
"48": "images/forums_48.png"
},
"installable": false,
"tool_label": "Mailing Lists",
"mount_label": "Mailing Lists"
},
{
"name": "support",
"mount_point": "support",
"url": "/p/t12eksandbox/support/",
"icons": {
"24": "images/sftheme/24x24/blog_24.png",
"32": "images/sftheme/32x32/blog_32.png",
"48": "images/sftheme/48x48/blog_48.png"
},
"installable": false,
"tool_label": "Support",
"mount_label": "Support"
},
{
"name": "files-sf",
"mount_point": "files",
"url": "/p/t12eksandbox/files/",
"icons": {
"24": "images/downloads_24.png",
"32": "images/downloads_32.png",
"48": "images/downloads_48.png"
},
"installable": false,
"tool_label": "Files",
"mount_label": "Files"
},
{
"name": "wiki",
"mount_point": "wiki",
"url": "/p/t12eksandbox/wiki/",
"icons": {
"24": "images/wiki_24.png",
"32": "images/wiki_32.png",
"48": "images/wiki_48.png"
},
"installable": true,
"tool_label": "Wiki",
"mount_label": "Wiki"
},
{
"name": "blog",
"mount_point": "news",
"url": "/p/t12eksandbox/news/",
"icons": {
"24": "images/blog_24.png",
"32": "images/blog_32.png",
"48": "images/blog_48.png"
},
"installable": true,
"tool_label": "Blog",
"mount_label": "News"
},
{
"name": "bzr",
"mount_point": "bazaar",
"url": "/p/t12eksandbox/bazaar/",
"icons": {
"24": "images/code_24.png",
"32": "images/code_32.png",
"48": "images/code_48.png"
},
"installable": false,
"tool_label": "BZR",
"mount_label": "Bazaar"
},
{
"name": "discussion",
"mount_point": "discussion",
"url": "/p/t12eksandbox/discussion/",
"icons": {
"24": "images/forums_24.png",
"32": "images/forums_32.png",
"48": "images/forums_48.png"
},
"installable": true,
"tool_label": "Discussion",
"mount_label": "Discussion"
},
{
"name": "tickets",
"mount_point": "support-requests",
"url": "/p/t12eksandbox/support-requests/",
"icons": {
"24": "images/tickets_24.png",
"32": "images/tickets_32.png",
"48": "images/tickets_48.png"
},
"installable": true,
"tool_label": "Tickets",
"mount_label": "Support Requests"
},
{
"name": "tickets",
"mount_point": "feature-requests",
"url": "/p/t12eksandbox/feature-requests/",
"icons": {
"24": "images/tickets_24.png",
"32": "images/tickets_32.png",
"48": "images/tickets_48.png"
},
"installable": true,
"tool_label": "Tickets",
"mount_label": "Feature Requests"
},
{
"name": "link",
"mount_point": "donate",
"url": "/p/t12eksandbox/donate/",
"icons": {
"24": "images/ext_24.png",
"32": "images/ext_32.png",
"48": "images/ext_48.png"
},
"installable": true,
"tool_label": "External Link",
"mount_label": "Donate"
},
{
"name": "tickets",
"mount_point": "patches",
"url": "/p/t12eksandbox/patches/",
"icons": {
"24": "images/tickets_24.png",
"32": "images/tickets_32.png",
"48": "images/tickets_48.png"
},
"installable": true,
"tool_label": "Tickets",
"mount_label": "Patches"
},
{
"name": "tickets",
"mount_point": "bugs",
"url": "/p/t12eksandbox/bugs/",
"icons": {
"24": "images/tickets_24.png",
"32": "images/tickets_32.png",
"48": "images/tickets_48.png"
},
"installable": true,
"tool_label": "Tickets",
"mount_label": "Bugs"
},
{
"name": "activity",
"mount_point": "activity",
"url": "/p/t12eksandbox/activity/",
"icons": {
"24": "images/admin_24.png",
"32": "images/admin_32.png",
"48": "images/admin_48.png"
},
"installable": false,
"tool_label": "Tool",
"mount_label": "Activity"
}
],
"labels": [],
"categories": {
"audience": [],
"developmentstatus": [
{
"id": 10,
"shortname": "beta",
"fullname": "4 - Beta",
"fullpath": "Development Status :: 4 - Beta"
},
{
"id": 7,
"shortname": "planning",
"fullname": "1 - Planning",
"fullpath": "Development Status :: 1 - Planning"
}
],
"environment": [],
"language": [],
"license": [
{
"id": 196,
"shortname": "other",
"fullname": "Other License",
"fullpath": "License :: Other License"
}
],
"translation": [],
"os": [],
"database": [
{
"id": 524,
"shortname": "db_net_mysql",
"fullname": "MySQL",
"fullpath": "Database Environment :: Network-based DBMS :: MySQL"
}
],
"topic": [
{
"id": 575,
"shortname": "testing",
"fullname": "Testing",
"fullpath": "Topic :: Software Development :: Testing"
},
{
"id": 97,
"shortname": "scientific",
"fullname": "Scientific/Engineering",
"fullpath": "Topic :: Scientific/Engineering"
}
]
},
"icon_url": null,
"screenshots": [
{
"url": "https://sourceforge.net/p/t12eksandbox/screenshot/224498.jpg",
"thumbnail_url": "https://sourceforge.net/p/t12eksandbox/screenshot/224498.jpg/thumb",
"caption": "aimage2"
},
{
"url": "https://sourceforge.net/p/t12eksandbox/screenshot/224496.jpg",
"thumbnail_url": "https://sourceforge.net/p/t12eksandbox/screenshot/224496.jpg/thumb",
"caption": "3Kimage3"
}
]
}

View file

@ -35,6 +35,8 @@ TEST_PROJECTS = {
"mramm": "p",
"os3dmodels": "p",
"random-mercurial": "p",
"t12eksandbox": "p",
"ocaml-lpd": "p",
}
URLS_MATCHER = {
@ -67,6 +69,10 @@ def get_cvs_info_page(datadir):
return Path(datadir, "aaron.html").read_text()
def get_bzr_repo_page(datadir, repo_name):
return Path(datadir, f"{repo_name}.html").read_text()
def _check_request_headers(request):
return request.headers.get("User-Agent") == USER_AGENT
@ -85,7 +91,14 @@ def _check_listed_origins(lister, swh_scheduler):
"https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
"https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
"http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
"http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo": ("bzr", "2021-01-27"),
"http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": (
"bzr",
"2011-02-09",
),
"http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": (
"bzr",
"2011-04-17",
),
"rsync://a.cvs.sourceforge.net/cvsroot/aaron/aaron": ("cvs", "2013-03-07"),
"rsync://a.cvs.sourceforge.net/cvsroot/aaron/www": ("cvs", "2013-03-07"),
}
@ -126,6 +139,16 @@ def test_sourceforge_lister_full(swh_scheduler, requests_mock, datadir):
text=get_cvs_info_page(datadir),
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
text=get_bzr_repo_page(datadir, "t12eksandbox"),
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
text=get_bzr_repo_page(datadir, "ocaml-lpd"),
additional_matcher=_check_request_headers,
)
stats = lister.run()
# - os3dmodels (2 repos),
@ -133,10 +156,11 @@ def test_sourceforge_lister_full(swh_scheduler, requests_mock, datadir):
# - mojunk (3 repos),
# - backapps/website (1 repo),
# - random-mercurial (1 repo).
# - bzr-repo (1 repo).
# - t12eksandbox (1 repo).
# - ocaml-lpd (1 repo).
# adobe and backapps itself have no repos.
assert stats.pages == 7
assert stats.origins == 13
assert stats.pages == 8
assert stats.origins == 14
expected_state = {
"subsitemap_last_modified": {
"https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
@ -196,6 +220,18 @@ def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, m
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
text=get_bzr_repo_page(datadir, "t12eksandbox"),
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
text=get_bzr_repo_page(datadir, "ocaml-lpd"),
additional_matcher=_check_request_headers,
)
faked_listed_origins = [
# mramm: changed
ListedOrigin(
@ -263,8 +299,14 @@ def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, m
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="bzr",
url="http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo",
last_update=iso8601.parse_date("2021-01-27"),
url="http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox",
last_update=iso8601.parse_date("2011-02-09"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="bzr",
url="http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk",
last_update=iso8601.parse_date("2011-04-17"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
@ -359,16 +401,29 @@ def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir)
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
text=get_bzr_repo_page(datadir, "t12eksandbox"),
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
text=get_bzr_repo_page(datadir, "ocaml-lpd"),
additional_matcher=_check_request_headers,
)
stats = lister.run()
# - os3dmodels (2 repos),
# - mramm (3 repos),
# - mojunk (3 repos),
# - backapps/website (1 repo),
# - random-mercurial (1 repo).
# - bzr-repo (1 repo).
# - t12eksandbox (1 repo).
# - ocaml-lpd (1 repo).
# adobe and backapps itself have no repos.
assert stats.pages == 7
assert stats.origins == 13
assert stats.pages == 8
assert stats.origins == 14
_check_listed_origins(lister, swh_scheduler)
@ -426,6 +481,16 @@ def test_sourceforge_lister_project_error(
json=functools.partial(get_project_json, datadir),
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox"),
text=get_bzr_repo_page(datadir, "t12eksandbox"),
additional_matcher=_check_request_headers,
)
requests_mock.get(
re.compile("http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd"),
text=get_bzr_repo_page(datadir, "ocaml-lpd"),
additional_matcher=_check_request_headers,
)
# Make all `mramm` requests fail
# `mramm` is in subsitemap 0, which ensures we keep listing after an error.
requests_mock.get(
@ -442,11 +507,12 @@ def test_sourceforge_lister_project_error(
# - mojunk (3 repos),
# - backapps/website (1 repo),
# - random-mercurial (1 repo).
# - bzr-repo (1 repo).
# - t12eksandbox (1 repo).
# - ocaml-lpd (1 repo).
# adobe and backapps itself have no repos.
# Did *not* list mramm
assert stats.pages == 5
assert stats.origins == 8
assert stats.pages == 6
assert stats.origins == 9
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
@ -459,5 +525,12 @@ def test_sourceforge_lister_project_error(
"https://git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
"https://svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
"http://hg.code.sf.net/p/random-mercurial/hg": ("hg", "2019-05-02"),
"http://bzr-repo.bzr.sourceforge.net/bzrroot/bzr-repo": ("bzr", "2021-01-27"),
"http://t12eksandbox.bzr.sourceforge.net/bzr/t12eksandbox": (
"bzr",
"2011-02-09",
),
"http://ocaml-lpd.bzr.sourceforge.net/bzr/ocaml-lpd/trunk": (
"bzr",
"2011-04-17",
),
}