cgit: Ensure the clone url is searched on the right tab
For some forges, the default tab for a repository detail is not the summary tab so the clone urls are not detected and the repository is ignored Related to T4544
This commit is contained in:
parent
bd35d54398
commit
9b3e565cf7
8 changed files with 249 additions and 0 deletions
|
@ -169,6 +169,22 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
)
|
||||
return None
|
||||
|
||||
# check if we are on the summary tab, if not, go to this tab
|
||||
tab = bs.find("table", {"class": "tabs"})
|
||||
if tab:
|
||||
summary_a = tab.find("a", string="summary")
|
||||
if summary_a:
|
||||
summary_url = urljoin(repository_url, summary_a["href"]).strip("/")
|
||||
|
||||
if summary_url != repository_url:
|
||||
logger.debug(
|
||||
"%s : Active tab is not the summary, trying to load the summary page",
|
||||
repository_url,
|
||||
)
|
||||
return self._get_origin_from_repository_url(summary_url)
|
||||
else:
|
||||
logger.debug("No summary tab found on %s", repository_url)
|
||||
|
||||
# origin urls are listed on the repository page
|
||||
# TODO check if forcing https is better or not ?
|
||||
# <link rel='vcs-git' href='git://...' title='...'/>
|
||||
|
@ -177,6 +193,7 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
urls = [x["href"] for x in bs.find_all("a", {"rel": "vcs-git"})]
|
||||
|
||||
if not urls:
|
||||
logger.debug("No git urls found on %s", repository_url)
|
||||
return None
|
||||
|
||||
# look for the http/https url, if any, and use it as origin_url
|
||||
|
|
1
swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
Normal file
1
swh/lister/cgit/tests/data/https_git.acdw.net/Readme.md
Normal file
|
@ -0,0 +1 @@
|
|||
These files are a partial dump of http://git.savannah.gnu.org/cgit
|
40
swh/lister/cgit/tests/data/https_git.acdw.net/cgit
Normal file
40
swh/lister/cgit/tests/data/https_git.acdw.net/cgit
Normal file
|
@ -0,0 +1,40 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang='en'>
|
||||
<head>
|
||||
<title>friendware by acdw</title>
|
||||
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
|
||||
<meta name='robots' content='index, nofollow'/>
|
||||
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
|
||||
</head>
|
||||
<body>
|
||||
<div id='cgit'><table id='header'>
|
||||
<tr>
|
||||
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
|
||||
<td class='main'>friendware by acdw</td></tr>
|
||||
<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
|
||||
<table class='tabs'><tr><td>
|
||||
<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/'>
|
||||
<input type='search' name='q' size='10' value=''/>
|
||||
<input type='submit' value='search'/>
|
||||
</form></td></tr></table>
|
||||
<div class='content'><table summary='repository list' class='list nowrap'><tr class='nohover'><th class='left'><a href='/?s=name'>Name</a></th><th class='left'><a href='/?s=desc'>Description</a></th><th class='left'><a href='/?s=idle'>Idle</a></th><th class='left'>Links</th></tr>
|
||||
<tr><td class='toplevel-repo'><a title='sfeed' href='/sfeed/'>sfeed</a></td><td><a href='/sfeed/'>My sfeed scripts
|
||||
</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/sfeed/summary/'>summary</a> <a class='button' href='/sfeed/log/'>log</a> <a class='button' href='/sfeed/tree/'>tree</a></td></tr>
|
||||
|
||||
<tr><td class='toplevel-repo'><a title='foo' href='/foo/'>foo</a></td><td><a href='/foo/'>Non existing repository
|
||||
</a></td><td><span class='age-mins' title='2022-09-19 19:28:30 +0000'>28 min.</span></td><td><a class='button' href='/foo/summary/'>summary</a> <a class='button' href='/foo/log/'>log</a> <a class='button' href='/foo/tree/'>tree</a></td></tr>
|
||||
|
||||
|
||||
</table></div> <!-- class=content -->
|
||||
<div class="footer">
|
||||
© 2022 C. Duckworth.
|
||||
generated by
|
||||
<a href="/cgit" >cgit</a>.
|
||||
comments, issues, and patches welcome at
|
||||
<
|
||||
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
|
||||
>.
|
||||
</div>
|
||||
</div> <!-- id=cgit -->
|
||||
</body>
|
||||
</html>
|
33
swh/lister/cgit/tests/data/https_git.acdw.net/foo
Normal file
33
swh/lister/cgit/tests/data/https_git.acdw.net/foo
Normal file
|
@ -0,0 +1,33 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang='en'>
|
||||
<head>
|
||||
<title></title>
|
||||
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
|
||||
<meta name='robots' content='index, nofollow'/>
|
||||
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
|
||||
</head>
|
||||
<body>
|
||||
<div id='cgit'><table id='header'>
|
||||
<tr>
|
||||
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
|
||||
<td class='main'>friendware by acdw</td></tr>
|
||||
<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
|
||||
<table class='tabs'><tr><td>
|
||||
<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
|
||||
<input type='search' name='q' size='10' value=''/>
|
||||
<input type='submit' value='search'/>
|
||||
</form></td></tr></table>
|
||||
<div class='content'><div class='error'>No repositories found</div>
|
||||
</div> <!-- class=content -->
|
||||
<div class="footer">
|
||||
© 2022 C. Duckworth.
|
||||
generated by
|
||||
<a href="/cgit" >cgit</a>.
|
||||
comments, issues, and patches welcome at
|
||||
<
|
||||
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
|
||||
>.
|
||||
</div>
|
||||
</div> <!-- id=cgit -->
|
||||
</body>
|
||||
</html>
|
33
swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary
Normal file
33
swh/lister/cgit/tests/data/https_git.acdw.net/foo_summary
Normal file
|
@ -0,0 +1,33 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang='en'>
|
||||
<head>
|
||||
<title></title>
|
||||
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
|
||||
<meta name='robots' content='index, nofollow'/>
|
||||
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
|
||||
</head>
|
||||
<body>
|
||||
<div id='cgit'><table id='header'>
|
||||
<tr>
|
||||
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
|
||||
<td class='main'>friendware by acdw</td></tr>
|
||||
<tr><td class='sub'>the real software was the friends we made along the way</td></tr></table>
|
||||
<table class='tabs'><tr><td>
|
||||
<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/foo/'>
|
||||
<input type='search' name='q' size='10' value=''/>
|
||||
<input type='submit' value='search'/>
|
||||
</form></td></tr></table>
|
||||
<div class='content'><div class='error'>No repositories found</div>
|
||||
</div> <!-- class=content -->
|
||||
<div class="footer">
|
||||
© 2022 C. Duckworth.
|
||||
generated by
|
||||
<a href="/cgit" >cgit</a>.
|
||||
comments, issues, and patches welcome at
|
||||
<
|
||||
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
|
||||
>.
|
||||
</div>
|
||||
</div> <!-- id=cgit -->
|
||||
</body>
|
||||
</html>
|
49
swh/lister/cgit/tests/data/https_git.acdw.net/sfeed
Normal file
49
swh/lister/cgit/tests/data/https_git.acdw.net/sfeed
Normal file
|
@ -0,0 +1,49 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang='en'>
|
||||
<head>
|
||||
<title>sfeed - My sfeed scripts
|
||||
</title>
|
||||
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
|
||||
<meta name='robots' content='index, nofollow'/>
|
||||
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
|
||||
<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
|
||||
<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
|
||||
</head>
|
||||
<body>
|
||||
<div id='cgit'><table id='header'>
|
||||
<tr>
|
||||
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
|
||||
<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
|
||||
<select name='h' onchange='this.form.submit();'>
|
||||
<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
|
||||
<optgroup label='branches'><option value='main' selected='selected'>main</option>
|
||||
</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
|
||||
<tr><td class='sub'>My sfeed scripts
|
||||
</td><td class='sub right'>Case Duckworth</td></tr></table>
|
||||
<table class='tabs'><tr><td>
|
||||
<a class='active' href='/sfeed/'>about</a> <a href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
|
||||
<select name='qt'>
|
||||
<option value='grep'>log msg</option>
|
||||
<option value='author'>author</option>
|
||||
<option value='committer'>committer</option>
|
||||
<option value='range'>range</option>
|
||||
</select>
|
||||
<input class='txt' type='search' size='10' name='q' value=''/>
|
||||
<input type='submit' value='search'/>
|
||||
</form>
|
||||
</td></tr></table>
|
||||
<div class='content'><div id='summary'><h1 id="sfeed">sfeed</h1>
|
||||
<p>Turns out, <a href="https://codemadness.org/sfeed-simple-feed-parser.html">sfeed</a> is cool! You can see what this repo generates at <a href="https://acdw.casa/planet/" class="uri">https://acdw.casa/planet/</a>.</p>
|
||||
</div></div> <!-- class=content -->
|
||||
<div class="footer">
|
||||
© 2022 C. Duckworth.
|
||||
generated by
|
||||
<a href="/cgit" >cgit</a>.
|
||||
comments, issues, and patches welcome at
|
||||
<
|
||||
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
|
||||
>.
|
||||
</div>
|
||||
</div> <!-- id=cgit -->
|
||||
</body>
|
||||
</html>
|
63
swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary
Normal file
63
swh/lister/cgit/tests/data/https_git.acdw.net/sfeed_summary
Normal file
|
@ -0,0 +1,63 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang='en'>
|
||||
<head>
|
||||
<title>sfeed - My sfeed scripts
|
||||
</title>
|
||||
<meta name='generator' content='cgit 1.4.1-21-gabe81'/>
|
||||
<meta name='robots' content='index, nofollow'/>
|
||||
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
|
||||
<link rel='alternate' title='Atom feed' href='https://git.acdw.net/sfeed/atom/?h=main' type='application/atom+xml'/>
|
||||
<link rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'/>
|
||||
</head>
|
||||
<body>
|
||||
<div id='cgit'><table id='header'>
|
||||
<tr>
|
||||
<td class='logo' rowspan='2'><a href='/'><img src='/avatar.png' alt='cgit logo'/></a></td>
|
||||
<td class='main'><a href='/'>index</a> : <a title='sfeed' href='/sfeed/'>sfeed</a></td><td class='form'><form method='get'>
|
||||
<select name='h' onchange='this.form.submit();'>
|
||||
<option value='7e389913489916166a0c9f590a1afb1737adcbf2'>this commit</option>
|
||||
<optgroup label='branches'><option value='main' selected='selected'>main</option>
|
||||
</optgroup></select> <input type='submit' value='switch'/></form></td></tr>
|
||||
<tr><td class='sub'>My sfeed scripts
|
||||
</td><td class='sub right'>Case Duckworth</td></tr></table>
|
||||
<table class='tabs'><tr><td>
|
||||
<a href='/sfeed/'>about</a> <a class='active' href='/sfeed/summary/'>summary</a> <a href='/sfeed/refs/'>refs</a> <a href='/sfeed/log/'>log</a> <a href='/sfeed/tree/'>tree</a> <a href='/sfeed/commit/'>commit</a> <a href='/sfeed/diff/'>diff</a> <a href='/sfeed/stats/'>stats</a></td><td class='form'><form class='right' method='get' action='/sfeed/log/'>
|
||||
<select name='qt'>
|
||||
<option value='grep'>log msg</option>
|
||||
<option value='author'>author</option>
|
||||
<option value='committer'>committer</option>
|
||||
<option value='range'>range</option>
|
||||
</select>
|
||||
<input class='txt' type='search' size='10' name='q' value=''/>
|
||||
<input type='submit' value='search'/>
|
||||
</form>
|
||||
</td></tr></table>
|
||||
<div class='content'><table summary='repository info' class='list nowrap'><tr class='nohover'><th class='left'>Branch</th><th class='left'>Commit message</th><th class='left'>Author</th><th class='left' colspan='2'>Age</th></tr>
|
||||
<tr><td><a href='/sfeed/log/'>main</a></td><td><a href='/sfeed/commit/'>Add APOD</a></td><td>Case Duckworth</td><td colspan='2'><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td></tr>
|
||||
<tr class='nohover'><td colspan='3'> </td></tr><tr class='nohover'><td colspan='3'> </td></tr><tr class='nohover'><th class='left'>Age</th><th class='left'>Commit message</th><th class='left'>Author</th></tr>
|
||||
<tr><td><span class='age-mins' title='2022-09-19 14:28:30 -0500'>38 min.</span></td><td><a href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>Add APOD</a><span class='decoration'> <a class='deco' href='/sfeed/commit/?id=7e389913489916166a0c9f590a1afb1737adcbf2'>HEAD</a> <a class='branch-deco' href='/sfeed/log/'>main</a></span></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-15 14:42:28 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=da8103330e7ec902994d72c6b1a3ec5fbad5c9d3'>Change fresh item colors</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-15 14:38:35 -0500'>4 days</span></td><td><a href='/sfeed/commit/?id=4bea5f02d3b13cac53ddaa60d8202083fbe80aeb'>Indentation</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-14 09:53:56 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=d5348fa3f2080bfd0fa217f2311d61aa99e34860'>Add Tab Completion</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-14 08:51:36 -0500'>5 days</span></td><td><a href='/sfeed/commit/?id=c3e8226c62e107d9db6dda4d669b3676cfc6cde7'>Add Lonnie Johnson</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-12 17:39:32 -0500'>7 days</span></td><td><a href='/sfeed/commit/?id=3debe199e3dbd5020da1768d297777423652e6fd'>Add miniature calendar; metafilter</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-10 23:20:29 -0500'>9 days</span></td><td><a href='/sfeed/commit/?id=7eed94e4c96f9fbd4a03dd1dc11a67514d64b404'>Add active listening</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-09 20:01:46 -0500'>10 days</span></td><td><a href='/sfeed/commit/?id=c6056e5676b3e9970dbe1fc681d3f5ea447734fc'>Add tilde.town blog</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-07 23:57:26 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=06c33ee36f71f7f92ab7c918f02efd262b83d276'>Add zserge</a></td><td>Case Duckworth</td></tr>
|
||||
<tr><td><span class='age-days' title='2022-09-07 23:56:50 -0500'>12 days</span></td><td><a href='/sfeed/commit/?id=0cd234e1faeefa14cb44206a3f8776190d35b2e9'>Remove duplicate</a></td><td>Case Duckworth</td></tr>
|
||||
<tr class='nohover'><td colspan='3'><a href='/sfeed/log/'>[...]</a></td></tr>
|
||||
<tr class='nohover'><td colspan='3'> </td></tr><tr class='nohover'><th class='left' colspan='3'>Clone</th></tr>
|
||||
<tr><td colspan='3'><a rel='vcs-git' href='https://git.acdw.net/sfeed' title='sfeed Git repository'>https://git.acdw.net/sfeed</a></td></tr>
|
||||
</table></div> <!-- class=content -->
|
||||
<div class="footer">
|
||||
© 2022 C. Duckworth.
|
||||
generated by
|
||||
<a href="/cgit" >cgit</a>.
|
||||
comments, issues, and patches welcome at
|
||||
<
|
||||
<a href="mailto:git@acdw.net" >git <em>at</em> acdw <em>dot</em> net</a>
|
||||
>.
|
||||
</div>
|
||||
</div> <!-- id=cgit -->
|
||||
</body>
|
||||
</html>
|
|
@ -265,3 +265,16 @@ def test_lister_cgit_get_pages_with_pages_and_retry(
|
|||
# we should have 16 repos (listed on 3 pages)
|
||||
assert len(repos) == 3
|
||||
assert len(flattened_repos) == 16
|
||||
|
||||
|
||||
def test_lister_cgit_summary_not_default(requests_mock_datadir, swh_scheduler):
|
||||
"""cgit lister returns git url when the default repository tab is not the summary"""
|
||||
|
||||
url = "https://git.acdw.net/cgit"
|
||||
|
||||
lister_cgit = CGitLister(swh_scheduler, url=url)
|
||||
|
||||
stats = lister_cgit.run()
|
||||
|
||||
expected_nb_origins = 1
|
||||
assert stats == ListerStats(pages=1, origins=expected_nb_origins)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue