From 59a979642f102ca399b3b0d53e3af44a33fcf55a Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Tue, 26 Sep 2023 14:18:36 +0200 Subject: [PATCH] gitweb: Ensure to strip any prefix before git clone URL Some gitweb instances can have some string prefixes before the displayed git clone URLs so ensure to strip them to properly extract URLs. Related to swh/infra/sysadm-environment#5051. --- swh/lister/gitweb/lister.py | 6 ++++++ .../tests/data/https_git.distorted.org.uk/~mdw_firewall | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/swh/lister/gitweb/lister.py b/swh/lister/gitweb/lister.py index 1710ba7..79fd7bf 100644 --- a/swh/lister/gitweb/lister.py +++ b/swh/lister/gitweb/lister.py @@ -130,6 +130,12 @@ class GitwebLister(StatelessLister[Repositories]): urls = [] for row in bs.find_all("tr", {"class": "metadata_url"}): url = row.contents[-1].string.strip() + for scheme in ("http", "https", "git"): + # remove any string prefix before origin + pos = url.find(f"{scheme}://") + if pos != -1: + url = url[pos:] + break if "," in url: urls_ = [s.strip() for s in url.split(",") if s] diff --git a/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall b/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall index 6113b2a..dbc00a0 100644 --- a/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall +++ b/swh/lister/gitweb/tests/data/https_git.distorted.org.uk/~mdw_firewall @@ -43,8 +43,8 @@ summary | shortlog descriptionFirewall scripts for distorted.org.uk. ownerMark Wooding last changeThu, 16 Mar 2023 18:09:32 +0000 (18:09 +0000) -URLhttps://git.distorted.org.uk/~mdw/firewall -git://git.distorted.org.uk/~mdw/firewall +URLfallback: https://git.distorted.org.uk/~mdw/firewall +fast: git://git.distorted.org.uk/~mdw/firewall
shortlog @@ -164,4 +164,4 @@ window.onload = function () { }; - \ No newline at end of file +