diff --git a/swh/lister/gitweb/lister.py b/swh/lister/gitweb/lister.py index 1710ba7..79fd7bf 100644 --- a/swh/lister/gitweb/lister.py +++ b/swh/lister/gitweb/lister.py @@ -130,6 +130,12 @@ class GitwebLister(StatelessLister[Repositories]): urls = [] for row in bs.find_all("tr", {"class": "metadata_url"}): url = row.contents[-1].string.strip() + for scheme in ("http", "https", "git"): + # remove any string prefix before origin + pos = url.find(f"{scheme}://") + if pos != -1: + url = url[pos:] + break if "," in url: urls_ = [s.strip() for s in url.split(",") if s] diff --git a/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall b/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall index 6113b2a..dbc00a0 100644 --- a/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall +++ b/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall @@ -43,8 +43,8 @@ summary | shortlog descriptionFirewall scripts for distorted.org.uk. ownerMark Wooding last changeThu, 16 Mar 2023 18:09:32 +0000 (18:09 +0000) -URLhttps://git.distorted.org.uk/~mdw/firewall -git://git.distorted.org.uk/~mdw/firewall +URLfallback: https://git.distorted.org.uk/~mdw/firewall +fast: git://git.distorted.org.uk/~mdw/firewall
shortlog @@ -164,4 +164,4 @@ window.onload = function () { }; - \ No newline at end of file +