gitweb: Ensure to strip any prefix before git clone URL

Some gitweb instances can have some string prefixes before the displayed
git clone URLs so ensure to strip them to properly extract URLs.

Related to swh/infra/sysadm-environment#5051.
This commit is contained in:
Antoine Lambert 2023-09-26 14:18:36 +02:00
parent 88611642fc
commit 59a979642f
2 changed files with 9 additions and 3 deletions

View file

@ -130,6 +130,12 @@ class GitwebLister(StatelessLister[Repositories]):
urls = []
for row in bs.find_all("tr", {"class": "metadata_url"}):
url = row.contents[-1].string.strip()
for scheme in ("http", "https", "git"):
# remove any string prefix before origin
pos = url.find(f"{scheme}://")
if pos != -1:
url = url[pos:]
break
if "," in url:
urls_ = [s.strip() for s in url.split(",") if s]

View file

@ -43,8 +43,8 @@ summary | <a href="https://git.distorted.org.uk/~mdw/firewall/shortlog">shortlog
<tr id="metadata_desc"><td>description</td><td>Firewall scripts for distorted.org.uk.</td></tr>
<tr id="metadata_owner"><td>owner</td><td>Mark Wooding</td></tr>
<tr id="metadata_lchange"><td>last change</td><td><span class="datetime">Thu, 16 Mar 2023 18:09:32 +0000</span> (18:09 +0000)</td></tr>
<tr class="metadata_url"><td>URL</td><td>https://git.distorted.org.uk/~mdw/firewall</td></tr>
<tr class="metadata_url"><td></td><td>git://git.distorted.org.uk/~mdw/firewall</td></tr>
<tr class="metadata_url"><td>URL</td><td>fallback: https://git.distorted.org.uk/~mdw/firewall</td></tr>
<tr class="metadata_url"><td></td><td>fast: git://git.distorted.org.uk/~mdw/firewall</td></tr>
</table>
<div class="header">
<a class="title" href="https://git.distorted.org.uk/~mdw/firewall/shortlog">shortlog</a>
@ -164,4 +164,4 @@ window.onload = function () {
};
</script>
</body>
</html>
</html>