cgit: Ensure the clone url is searched on the right tab

For some forges, the default tab for a repository detail is not the
summary tab so the clone urls are not detected and the repository
is ignored

Related to T4544
This commit is contained in:
Vincent SELLIER 2022-09-20 09:58:15 +02:00
parent bd35d54398
commit 9b3e565cf7
No known key found for this signature in database
GPG key ID: 3F13C434EADAD17D
8 changed files with 249 additions and 0 deletions

View file

@ -169,6 +169,22 @@ class CGitLister(StatelessLister[Repositories]):
)
return None
# check if we are on the summary tab, if not, go to this tab
tab = bs.find("table", {"class": "tabs"})
if tab:
summary_a = tab.find("a", string="summary")
if summary_a:
summary_url = urljoin(repository_url, summary_a["href"]).strip("/")
if summary_url != repository_url:
logger.debug(
"%s : Active tab is not the summary, trying to load the summary page",
repository_url,
)
return self._get_origin_from_repository_url(summary_url)
else:
logger.debug("No summary tab found on %s", repository_url)
# origin urls are listed on the repository page
# TODO check if forcing https is better or not ?
# <link rel='vcs-git' href='git://...' title='...'/>
@ -177,6 +193,7 @@ class CGitLister(StatelessLister[Repositories]):
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
if not urls:
logger.debug("No git urls found on %s", repository_url)
return None
# look for the http/https url, if any, and use it as origin_url

View file

@ -0,0 +1 @@
These files are a partial dump of http://git.savannah.gnu.org/cgit

View file

@ -0,0 +1,40 @@
<!DOCTYPE html>
<html lang='en'>
<head>
<title>friendware by acdw</title>
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
<meta name='robots' content='index, nofollow'/>
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
</head>
<body>
<div id='cgit'><table id='header'>
<tr>
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
<td class='main'>friendware by acdw</td></tr>
<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
<table class='tabs'><tr><td>
<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/'>
<input type='search' name='q' size='10' value=''/>
<input type='submit' value='search'/>
</form></td></tr></table>
<div class='content'><table summary='repository list' class='list nowrap'><tr class='nohover'><th class='left'><a href='/?s=name'>Name</a></th><th class='left'><a href='/?s=desc'>Description</a></th><th class='left'><a href='/?s=idle'>Idle</a></th><th class='left'>Links</th></tr>
<tr><td class='toplevel-repo'><a title='sfeed' href='/sfeed/'>sfeed</a></td><td><a href='/sfeed/'>My sfeed scripts
</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/sfeed/summary/'>summary</a> <a class='button' href='/sfeed/log/'>log</a> <a class='button' href='/sfeed/tree/'>tree</a></td></tr>
<tr><td class='toplevel-repo'><a title='foo' href='/foo/'>foo</a></td><td><a href='/foo/'>Non existing repository
</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/foo/summary/'>summary</a> <a class='button' href='/foo/log/'>log</a> <a class='button' href='/foo/tree/'>tree</a></td></tr>
</table></div> <!-- class=content -->
<div class="footer">
&copy; 2022 C. Duckworth.
generated by
<a href="/cgit" >cgit</a>.
comments, issues, and patches welcome at
&lt;
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
&gt;.
</div>
</div> <!-- id=cgit -->
</body>
</html>

View file

@ -0,0 +1,33 @@
<!DOCTYPE html>
<html lang='en'>
<head>
<title></title>
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
<meta name='robots' content='index, nofollow'/>
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
</head>
<body>
<div id='cgit'><table id='header'>
<tr>
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
<td class='main'>friendware by acdw</td></tr>
<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
<table class='tabs'><tr><td>
<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
<input type='search' name='q' size='10' value=''/>
<input type='submit' value='search'/>
</form></td></tr></table>
<div class='content'><div class='error'>No repositories found</div>
</div> <!-- class=content -->
<div class="footer">
&copy; 2022 C. Duckworth.
generated by
<a href="/cgit" >cgit</a>.
comments, issues, and patches welcome at
&lt;
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
&gt;.
</div>
</div> <!-- id=cgit -->
</body>
</html>

View file

@ -0,0 +1,33 @@
<!DOCTYPE html>
<html lang='en'>
<head>
<title></title>
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
<meta name='robots' content='index, nofollow'/>
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
</head>
<body>
<div id='cgit'><table id='header'>
<tr>
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
<td class='main'>friendware by acdw</td></tr>
<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
<table class='tabs'><tr><td>
<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
<input type='search' name='q' size='10' value=''/>
<input type='submit' value='search'/>
</form></td></tr></table>
<div class='content'><div class='error'>No repositories found</div>
</div> <!-- class=content -->
<div class="footer">
&copy; 2022 C. Duckworth.
generated by
<a href="/cgit" >cgit</a>.
comments, issues, and patches welcome at
&lt;
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
&gt;.
</div>
</div> <!-- id=cgit -->
</body>
</html>

View file

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang='en'>
<head>
<title>sfeed - My sfeed scripts
</title>
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
<meta name='robots' content='index, nofollow'/>
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
</head>
<body>
<div id='cgit'><table id='header'>
<tr>
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
<select name='h' onchange='this.form.submit();'>
<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
<optgroup label='branches'><option value='main' selected='selected'>main</option>
</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
<tr><td class='sub'>My sfeed scripts
</td><td class='sub right'>Case Duckworth</td></tr></table>
<table class='tabs'><tr><td>
<a class='active' href='/sfeed/'>about</a> <a href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
<select name='qt'>
<option value='grep'>log msg</option>
<option value='author'>author</option>
<option value='committer'>committer</option>
<option value='range'>range</option>
</select>
<input class='txt' type='search' size='10' name='q' value=''/>
<input type='submit' value='search'/>
</form>
</td></tr></table>
<div class='content'><div id='summary'><h1 id="sfeed">sfeed</h1>
<p>Turns out, <a href="https://codemadness.org/sfeed-simple-feed-parser.html">sfeed</a> is cool! You can see what this repo generates at <a href="https://acdw.casa/planet/" class="uri">https://acdw.casa/planet/</a>.</p>
</div></div> <!-- class=content -->
<div class="footer">
&copy; 2022 C. Duckworth.
generated by
<a href="/cgit" >cgit</a>.
comments, issues, and patches welcome at
&lt;
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
&gt;.
</div>
</div> <!-- id=cgit -->
</body>
</html>

View file

@ -0,0 +1,63 @@
<!DOCTYPE html>
<html lang='en'>
<head>
<title>sfeed - My sfeed scripts
</title>
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
<meta name='robots' content='index, nofollow'/>
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
</head>
<body>
<div id='cgit'><table id='header'>
<tr>
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
<select name='h' onchange='this.form.submit();'>
<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
<optgroup label='branches'><option value='main' selected='selected'>main</option>
</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
<tr><td class='sub'>My sfeed scripts
</td><td class='sub right'>Case Duckworth</td></tr></table>
<table class='tabs'><tr><td>
<a href='/sfeed/'>about</a> <a class='active' href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
<select name='qt'>
<option value='grep'>log msg</option>
<option value='author'>author</option>
<option value='committer'>committer</option>
<option value='range'>range</option>
</select>
<input class='txt' type='search' size='10' name='q' value=''/>
<input type='submit' value='search'/>
</form>
</td></tr></table>
<div class='content'><table summary='repository info' class='list nowrap'><tr class='nohover'><th class='left'>Branch</th><th class='left'>Commit message</th><th class='left'>Author</th><th class='left' colspan='2'>Age</th></tr>
<tr><td><a href='/sfeed/log/'>main</a></td><td><a href='/sfeed/commit/'>Add APOD</a></td><td>Case Duckworth</td><td colspan='2'><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td></tr>
<tr class='nohover'><td colspan='3'>&nbsp;</td></tr><tr class='nohover'><td colspan='3'>&nbsp;</td></tr><tr class='nohover'><th class='left'>Age</th><th class='left'>Commit message</th><th class='left'>Author</th></tr>
<tr><td><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td><td><a href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>Add APOD</a><span class='decoration'> <a class='deco' href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>HEAD</a> <a class='branch-deco' href='/sfeed/log/'>main</a></span></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-15 14:42:28 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=da8103330e7ec902994d72c6b1a3ec5fbad5c9d3'>Change fresh item colors</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-15 14:38:35 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=4bea5f02d3b13cac53ddaa60d8202083fbe80aeb'>Indentation</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-14 09:53:56 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=d5348fa3f2080bfd0fa217f2311d61aa99e34860'>Add Tab Completion</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-14 08:51:36 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=c3e8226c62e107d9db6dda4d669b3676cfc6cde7'>Add Lonnie Johnson</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-12 17:39:32 -0500'>7 days</span></td><td><a href='/sfeed/commit/?id=3debe199e3dbd5020da1768d297777423652e6fd'>Add miniature calendar; metafilter</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-10 23:20:29 -0500'>9 days</span></td><td><a href='/sfeed/commit/?id=7eed94e4c96f9fbd4a03dd1dc11a67514d64b404'>Add active listening</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-09 20:01:46 -0500'>10 days</span></td><td><a href='/sfeed/commit/?id=c6056e5676b3e9970dbe1fc681d3f5ea447734fc'>Add tilde.town blog</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-07 23:57:26 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=06c33ee36f71f7f92ab7c918f02efd262b83d276'>Add zserge</a></td><td>Case Duckworth</td></tr>
<tr><td><span class='age-days' title='2022-09-07 23:56:50 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=0cd234e1faeefa14cb44206a3f8776190d35b2e9'>Remove duplicate</a></td><td>Case Duckworth</td></tr>
<tr class='nohover'><td colspan='3'><a href='/sfeed/log/'>[...]</a></td></tr>
<tr class='nohover'><td colspan='3'>&nbsp;</td></tr><tr class='nohover'><th class='left' colspan='3'>Clone</th></tr>
<tr><td colspan='3'><a rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'>https://git.acdw.net/sfeed</a></td></tr>
</table></div> <!-- class=content -->
<div class="footer">
&copy; 2022 C. Duckworth.
generated by
<a href="/cgit" >cgit</a>.
comments, issues, and patches welcome at
&lt;
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
&gt;.
</div>
</div> <!-- id=cgit -->
</body>
</html>

View file

@ -265,3 +265,16 @@ def test_lister_cgit_get_pages_with_pages_and_retry(
# we should have 16 repos (listed on 3 pages)
assert len(repos) == 3
assert len(flattened_repos) == 16
def test_lister_cgit_summary_not_default(requests_mock_datadir, swh_scheduler):
"""cgit lister returns git url when the default repository tab is not the summary"""
url = "https://git.acdw.net/cgit"
lister_cgit = CGitLister(swh_scheduler, url=url)
stats = lister_cgit.run()
expected_nb_origins = 1
assert stats == ListerStats(pages=1, origins=expected_nb_origins)