swh.lister.cgit: Remove repo page visit step
Remove the need to visit every page and extract the origin url by introducing a parameter url_prefix. The origin url is in format <prefix>/<repo_name> where The prefix is same for all the repos for a particular cgit instance.
This commit is contained in:
parent
7e3c79bb1d
commit
0bf24469b7
9 changed files with 215 additions and 223 deletions
|
@ -3,8 +3,8 @@
|
|||
# See top-level LICENSE file for more information
|
||||
|
||||
import random
|
||||
import logging
|
||||
from bs4 import BeautifulSoup
|
||||
from collections import defaultdict
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
@ -18,38 +18,60 @@ class CGitLister(ListerOnePageApiTransport, SimpleLister):
|
|||
MODEL = CGitModel
|
||||
LISTER_NAME = 'cgit'
|
||||
PAGE = None
|
||||
url_prefix_present = True
|
||||
|
||||
def __init__(self, base_url, instance=None, override_config=None):
|
||||
def __init__(self, url, instance=None, url_prefix=None,
|
||||
override_config=None):
|
||||
"""Inits Class with PAGE url and origin url prefix.
|
||||
|
||||
self.PAGE = base_url
|
||||
Args:
|
||||
url (str): URL of the CGit instance.
|
||||
instance (str): Name of cgit instance.
|
||||
url_prefix (str): Prefix of the origin_url. Origin link of the
|
||||
repos of some special instances do not match
|
||||
the url of the repository page, they have origin
|
||||
url in the format <url_prefix>/<repo_name>.
|
||||
|
||||
"""
|
||||
self.PAGE = url
|
||||
if url_prefix is None:
|
||||
self.url_prefix = url
|
||||
self.url_prefix_present = False
|
||||
else:
|
||||
self.url_prefix = url_prefix
|
||||
|
||||
if not self.url_prefix.endswith('/'):
|
||||
self.url_prefix += '/'
|
||||
url = urlparse(self.PAGE)
|
||||
self.url_netloc = find_netloc(url)
|
||||
|
||||
if not instance:
|
||||
instance = url.hostname
|
||||
self.instance = instance
|
||||
|
||||
ListerOnePageApiTransport .__init__(self)
|
||||
SimpleLister.__init__(self, override_config=override_config)
|
||||
|
||||
def list_packages(self, response):
|
||||
"""List the actual cgit instance origins from the response.
|
||||
|
||||
Find the repos in all the pages by parsing over the HTML of
|
||||
the `base_url`. Find the details for all the repos and return
|
||||
them in the format of list of dictionaries.
|
||||
Find repositories metadata by parsing the html page (response's raw
|
||||
content). If there are links in the html page, retrieve those
|
||||
repositories metadata from those pages as well. Return the
|
||||
repositories as list of dictionaries.
|
||||
|
||||
Args:
|
||||
response (Response): http api request response.
|
||||
|
||||
Returns:
|
||||
List of repository origin urls (as dict) included in the response.
|
||||
|
||||
"""
|
||||
repos_details = []
|
||||
repos = get_repo_list(response)
|
||||
soup = make_repo_soup(response)
|
||||
pages = self.get_page(soup)
|
||||
if len(pages) > 1:
|
||||
repos.extend(self.get_all_pages(pages))
|
||||
|
||||
for repo in repos:
|
||||
for repo in self.yield_repo_from_responses(response):
|
||||
repo_name = repo.a.text
|
||||
repo_url = self.get_url(repo)
|
||||
origin_url = find_origin_url(repo_url)
|
||||
origin_url = self.find_origin_url(repo, repo_name)
|
||||
|
||||
try:
|
||||
time = repo.span['title']
|
||||
|
@ -58,57 +80,93 @@ class CGitLister(ListerOnePageApiTransport, SimpleLister):
|
|||
|
||||
if origin_url is not None:
|
||||
repos_details.append({
|
||||
'name': repo_name,
|
||||
'time': time,
|
||||
'origin_url': origin_url,
|
||||
})
|
||||
'name': repo_name,
|
||||
'time': time,
|
||||
'origin_url': origin_url,
|
||||
})
|
||||
|
||||
random.shuffle(repos_details)
|
||||
return repos_details
|
||||
|
||||
def get_page(self, soup):
|
||||
"""Find URL of all pages
|
||||
def yield_repo_from_responses(self, response):
|
||||
"""Yield repositories from all pages of the cgit instance.
|
||||
|
||||
Finds URL of all the pages that are present by parsing over the HTML of
|
||||
Finds the number of pages present and yields the list of
|
||||
repositories present.
|
||||
|
||||
Args:
|
||||
response (Response): server response.
|
||||
|
||||
Yields:
|
||||
List of beautifulsoup object of repository rows.
|
||||
|
||||
"""
|
||||
html = response.text
|
||||
yield from get_repo_list(html)
|
||||
pages = self.get_pages(make_soup(html))
|
||||
if len(pages) > 1:
|
||||
yield from self.get_repos_from_pages(pages[1:])
|
||||
|
||||
def find_origin_url(self, repo, repo_name):
|
||||
"""Finds the origin url for a repository
|
||||
|
||||
Args:
|
||||
repo (Beautifulsoup): Beautifulsoup object of the repository
|
||||
row present in base url.
|
||||
repo_name (str): Repository name.
|
||||
|
||||
Returns:
|
||||
string: origin url.
|
||||
|
||||
"""
|
||||
if self.url_prefix_present:
|
||||
return self.url_prefix + repo_name
|
||||
|
||||
return self.get_url(repo)
|
||||
|
||||
def get_pages(self, url_soup):
|
||||
"""Find URL of all pages.
|
||||
|
||||
Finds URL of pages that are present by parsing over the HTML of
|
||||
pagination present at the end of the page.
|
||||
|
||||
Args:
|
||||
soup (Beautifulsoup): a beautifulsoup object of base URL
|
||||
url_soup (Beautifulsoup): a beautifulsoup object of base URL
|
||||
|
||||
Returns:
|
||||
list: URL of all the pages present for a cgit instance
|
||||
list: URL of pages present for a cgit instance
|
||||
|
||||
"""
|
||||
pages = soup.find('div', {"class": "content"}).find_all('li')
|
||||
pages = url_soup.find('div', {"class": "content"}).find_all('li')
|
||||
|
||||
if not pages:
|
||||
return [self.PAGE]
|
||||
|
||||
return [self.get_url(page) for page in pages]
|
||||
|
||||
def get_all_pages(self, pages):
|
||||
"""Find repos from all the pages
|
||||
def get_repos_from_pages(self, pages):
|
||||
"""Find repos from all pages.
|
||||
|
||||
Make the request for all the pages (except the first) present for a
|
||||
particular cgit instance and finds the repos that are available
|
||||
for each and every page.
|
||||
Request the available repos from the pages. This yields
|
||||
the available repositories found as beautiful object representation.
|
||||
|
||||
Args:
|
||||
pages ([str]): list of urls of all the pages present for a
|
||||
particular cgit instance
|
||||
pages ([str]): list of urls of all pages present for a
|
||||
particular cgit instance.
|
||||
|
||||
Returns:
|
||||
List of beautifulsoup object of all the repositories (url) row
|
||||
present in all the pages(except first).
|
||||
Yields:
|
||||
List of beautifulsoup object of repository (url) rows
|
||||
present in pages(except first).
|
||||
|
||||
"""
|
||||
all_repos = []
|
||||
for page in pages[1:]:
|
||||
for page in pages:
|
||||
response = requests.get(page)
|
||||
repos = get_repo_list(response)
|
||||
all_repos.extend(repos)
|
||||
if not response.ok:
|
||||
logging.warning('Failed to retrieve repositories from page %s',
|
||||
page)
|
||||
continue
|
||||
|
||||
return all_repos
|
||||
yield from get_repo_list(response.text)
|
||||
|
||||
def get_url(self, repo):
|
||||
"""Finds url of a repo page.
|
||||
|
@ -150,19 +208,19 @@ class CGitLister(ListerOnePageApiTransport, SimpleLister):
|
|||
|
||||
|
||||
def find_netloc(url):
|
||||
"""Finds the network location from then base_url
|
||||
"""Finds the network location from then url.
|
||||
|
||||
All the url in the repo are relative to the network location part of base
|
||||
url, so we need to compute it to reconstruct all the urls.
|
||||
URL in the repo are relative to the network location part of base
|
||||
URL, so we need to compute it to reconstruct URLs.
|
||||
|
||||
Args:
|
||||
url (urllib): urllib object of base_url
|
||||
url (urllib): urllib object of url.
|
||||
|
||||
Returns:
|
||||
string: Scheme and Network location part in the base URL.
|
||||
|
||||
Example:
|
||||
For base_url = https://git.kernel.org/pub/scm/
|
||||
For url = https://git.kernel.org/pub/scm/
|
||||
>>> find_netloc(url)
|
||||
'https://git.kernel.org'
|
||||
|
||||
|
@ -171,106 +229,23 @@ def find_netloc(url):
|
|||
|
||||
|
||||
def get_repo_list(response):
|
||||
"""Find all the rows with repo for a particualar page on the base url
|
||||
|
||||
Finds all the repos on page and retuens a list of all the repos. Each
|
||||
element of the list is a beautifulsoup object representing a repo.
|
||||
"""Find repositories (as beautifulsoup object) available within the server
|
||||
response.
|
||||
|
||||
Args:
|
||||
response (Response): server response
|
||||
|
||||
Returns:
|
||||
List of all the repos on a page.
|
||||
List all repositories as beautifulsoup object within the response.
|
||||
|
||||
"""
|
||||
repo_soup = make_repo_soup(response)
|
||||
repo_soup = make_soup(response)
|
||||
return repo_soup \
|
||||
.find('div', {"class": "content"}).find_all("tr", {"class": ""})
|
||||
|
||||
|
||||
def make_repo_soup(response):
|
||||
"""Makes BeautifulSoup object of the response
|
||||
def make_soup(response):
|
||||
"""Instantiates a beautiful soup object from the response object.
|
||||
|
||||
"""
|
||||
return BeautifulSoup(response.text, features="html.parser")
|
||||
|
||||
|
||||
def find_origin_url(repo_url):
|
||||
"""Finds origin url for a repo.
|
||||
|
||||
Finds the origin url for a particular repo by parsing over the page of
|
||||
that repo.
|
||||
|
||||
Args:
|
||||
repo_url: URL of the repo.
|
||||
|
||||
Returns:
|
||||
string: Origin url for the repo.
|
||||
|
||||
Examples:
|
||||
|
||||
>>> find_origin_url(
|
||||
'http://git.savannah.gnu.org/cgit/fbvbconv-py.git/')
|
||||
'https://git.savannah.gnu.org/git/fbvbconv-py.git'
|
||||
|
||||
"""
|
||||
|
||||
response = requests.get(repo_url)
|
||||
repo_soup = make_repo_soup(response)
|
||||
|
||||
origin_urls = find_all_origin_url(repo_soup)
|
||||
return priority_origin_url(origin_urls)
|
||||
|
||||
|
||||
def find_all_origin_url(soup):
|
||||
"""Finds all possible origin url for a repo.
|
||||
|
||||
Finds all the origin url for a particular repo by parsing over the html of
|
||||
repo page.
|
||||
|
||||
Args:
|
||||
soup: a beautifulsoup object repo representation.
|
||||
|
||||
Returns:
|
||||
dictionary: All possible origin urls for a repository (dict with
|
||||
key 'protocol', value the associated url).
|
||||
|
||||
Examples:
|
||||
If soup is beautifulsoup object of the html code at
|
||||
http://git.savannah.gnu.org/cgit/fbvbconv-py.git/
|
||||
|
||||
>>> print(find_all_origin_url(soup))
|
||||
{ 'https': 'https://git.savannah.gnu.org/git/fbvbconv-py.git',
|
||||
'ssh': 'ssh://git.savannah.gnu.org/srv/git/fbvbconv-py.git',
|
||||
'git': 'git://git.savannah.gnu.org/fbvbconv-py.git'}
|
||||
"""
|
||||
origin_urls = defaultdict(dict)
|
||||
found_clone_word = False
|
||||
|
||||
for i in soup.find_all('tr'):
|
||||
if found_clone_word:
|
||||
link = i.text
|
||||
protocol = link[:link.find(':')]
|
||||
origin_urls[protocol] = link
|
||||
if i.text == 'Clone':
|
||||
found_clone_word = True
|
||||
|
||||
return origin_urls
|
||||
|
||||
|
||||
def priority_origin_url(origin_url):
|
||||
"""Finds the highest priority link for a particular repo.
|
||||
|
||||
Priority order is https>http>git>ssh.
|
||||
|
||||
Args:
|
||||
origin_urls (Dict): All possible origin urls for a repository
|
||||
(key 'protocol', value the associated url)
|
||||
|
||||
Returns:
|
||||
Url (str) with the highest priority.
|
||||
|
||||
"""
|
||||
for protocol in ['https', 'http', 'git', 'ssh']:
|
||||
if protocol in origin_url:
|
||||
return origin_url[protocol]
|
||||
return BeautifulSoup(response, features="html.parser")
|
||||
|
|
|
@ -7,9 +7,11 @@ from swh.scheduler.celery_backend.config import app
|
|||
from .lister import CGitLister
|
||||
|
||||
|
||||
def new_lister(base_url='https://git.savannah.gnu.org/cgit/',
|
||||
instance='savannah-gnu', **kw):
|
||||
return CGitLister(base_url=base_url, instance=instance, **kw)
|
||||
def new_lister(url='https://git.kernel.org/',
|
||||
url_prefix=None,
|
||||
instance='kernal', **kw):
|
||||
return CGitLister(url=url, instance=instance, url_prefix=url_prefix,
|
||||
**kw)
|
||||
|
||||
|
||||
@app.task(name=__name__ + '.CGitListerTask')
|
||||
|
|
|
@ -1,47 +0,0 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang='en'>
|
||||
<head>
|
||||
<title>fbvbconv-py.git - Unnamed repository; edit this file 'description' to name the repository.</title>
|
||||
<meta name='generator' content='cgit v1.0-41-gc330'/>
|
||||
<meta name='robots' content='index, nofollow'/>
|
||||
<link rel='stylesheet' type='text/css' href='/cgit/cgit.css'/>
|
||||
<link rel='shortcut icon' href='/gitweb/git-favicon.png'/>
|
||||
<link rel='alternate' title='Atom feed' href='http://git.savannah.gnu.org/cgit/fbvbconv-py.git/atom/?h=master' type='application/atom+xml'/>
|
||||
<link rel='vcs-git' href='git://git.savannah.gnu.org/fbvbconv-py.git' title='fbvbconv-py.git Git repository'/>
|
||||
<link rel='vcs-git' href='https://git.savannah.gnu.org/git/fbvbconv-py.git' title='fbvbconv-py.git Git repository'/>
|
||||
<link rel='vcs-git' href='ssh://git.savannah.gnu.org/srv/git/fbvbconv-py.git' title='fbvbconv-py.git Git repository'/>
|
||||
</head>
|
||||
<body>
|
||||
<div id='cgit'><table id='header'>
|
||||
<tr>
|
||||
<td class='logo' rowspan='2'><a href='/cgit/'><img src='/cgit/cgit.png' alt='cgit logo'/></a></td>
|
||||
<td class='main'><a href='/cgit/'>index</a> : <a title='fbvbconv-py.git' href='/cgit/fbvbconv-py.git/'>fbvbconv-py.git</a></td><td class='form'><form method='get'>
|
||||
<select name='h' onchange='this.form.submit();'>
|
||||
<option value='master' selected='selected'>master</option>
|
||||
</select> <input type='submit' value='switch'/></form></td></tr>
|
||||
<tr><td class='sub'>Unnamed repository; edit this file 'description' to name the repository.</td><td class='sub right'></td></tr></table>
|
||||
<table class='tabs'><tr><td>
|
||||
<a class='active' href='/cgit/fbvbconv-py.git/'>summary</a><a href='/cgit/fbvbconv-py.git/refs/'>refs</a><a href='/cgit/fbvbconv-py.git/log/'>log</a><a href='/cgit/fbvbconv-py.git/tree/'>tree</a><a href='/cgit/fbvbconv-py.git/commit/'>commit</a><a href='/cgit/fbvbconv-py.git/diff/'>diff</a></td><td class='form'><form class='right' method='get' action='/cgit/fbvbconv-py.git/log/'>
|
||||
<select name='qt'>
|
||||
<option value='grep'>log msg</option>
|
||||
<option value='author'>author</option>
|
||||
<option value='committer'>committer</option>
|
||||
<option value='range'>range</option>
|
||||
</select>
|
||||
<input class='txt' type='text' size='10' name='q' value=''/>
|
||||
<input type='submit' value='search'/>
|
||||
</form>
|
||||
</td></tr></table>
|
||||
<div class='content'><table summary='repository info' class='list nowrap'><tr class='nohover'><th class='left'>Branch</th><th class='left'>Commit message</th><th class='left'>Author</th><th class='left' colspan='2'>Age</th></tr>
|
||||
<tr><td><a href='/cgit/fbvbconv-py.git/log/'>master</a></td><td><a href='/cgit/fbvbconv-py.git/commit/'>initial import</a></td><td>Johannes Stezenbach</td><td colspan='2'><span class='age-years' title='2017-06-02 09:57:38 +0200'>2 years</span></td></tr>
|
||||
<tr class='nohover'><td colspan='5'> </td></tr><tr class='nohover'><td colspan='5'> </td></tr><tr class='nohover'><th class='left'>Age</th><th class='left'>Commit message</th><th class='left'>Author</th><th class='left'>Files</th><th class='left'>Lines</th></tr>
|
||||
<tr><td><span title='2017-06-02 09:57:38 +0200'>2017-06-02</span></td><td><a href='/cgit/fbvbconv-py.git/commit/?id=9766bcfae598e3e077f321bade823028eb5553bb'>initial import</a><span class='decoration'><a class='deco' href='/cgit/fbvbconv-py.git/commit/?id=9766bcfae598e3e077f321bade823028eb5553bb'>HEAD</a><a class='branch-deco' href='/cgit/fbvbconv-py.git/log/'>master</a></span></td><td>Johannes Stezenbach</td><td>3</td><td><span class='deletions'>-0</span>/<span class='insertions'>+889</span></td></tr>
|
||||
<tr class='nohover'><td colspan='5'> </td></tr><tr class='nohover'><th class='left' colspan='5'>Clone</th></tr>
|
||||
<tr><td colspan='5'><a rel='vcs-git' href='git://git.savannah.gnu.org/fbvbconv-py.git' title='fbvbconv-py.git Git repository'>git://git.savannah.gnu.org/fbvbconv-py.git</a></td></tr>
|
||||
<tr><td colspan='5'><a rel='vcs-git' href='https://git.savannah.gnu.org/git/fbvbconv-py.git' title='fbvbconv-py.git Git repository'>https://git.savannah.gnu.org/git/fbvbconv-py.git</a></td></tr>
|
||||
<tr><td colspan='5'><a rel='vcs-git' href='ssh://git.savannah.gnu.org/srv/git/fbvbconv-py.git' title='fbvbconv-py.git Git repository'>ssh://git.savannah.gnu.org/srv/git/fbvbconv-py.git</a></td></tr>
|
||||
</table></div> <!-- class=content -->
|
||||
<div class='footer'>generated by <a href='https://git.zx2c4.com/cgit/about/'>cgit v1.0-41-gc330</a> at 2019-06-19 10:51:46 +0000</div>
|
||||
</div> <!-- id=cgit -->
|
||||
</body>
|
||||
</html>
|
15
swh/lister/cgit/tests/repo_list.txt
Normal file
15
swh/lister/cgit/tests/repo_list.txt
Normal file
|
@ -0,0 +1,15 @@
|
|||
<tr><td class="toplevel-repo"><a href="/openembedded-core/" title="openembedded-core">openembedded-core</a></td><td><a href="/openembedded-core/">OpenEmbedded Core layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-26 13:04:31 +0000">5 hours</span></td><td><a class="button" href="/openembedded-core/">summary</a><a class="button" href="/openembedded-core/log/">log</a><a class="button" href="/openembedded-core/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/openembedded-core-contrib/" title="openembedded-core-contrib">openembedded-core-contrib</a></td><td><a href="/openembedded-core-contrib/">OpenEmbedded Core user contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-26 12:57:22 +0000">5 hours</span></td><td><a class="button" href="/openembedded-core-contrib/">summary</a><a class="button" href="/openembedded-core-contrib/log/">log</a><a class="button" href="/openembedded-core-contrib/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/meta-openembedded/" title="meta-openembedded">meta-openembedded</a></td><td><a href="/meta-openembedded/">Collection of OpenEmbedded layers</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-25 21:22:33 +0000">21 hours</span></td><td><a class="button" href="/meta-openembedded/">summary</a><a class="button" href="/meta-openembedded/log/">log</a><a class="button" href="/meta-openembedded/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/meta-openembedded-contrib/" title="meta-openembedded-contrib">meta-openembedded-contrib</a></td><td><a href="/meta-openembedded-contrib/">OpenEmbedded layers collection contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-hours" title="2019-06-25 21:22:33 +0000">21 hours</span></td><td><a class="button" href="/meta-openembedded-contrib/">summary</a><a class="button" href="/meta-openembedded-contrib/log/">log</a><a class="button" href="/meta-openembedded-contrib/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/bitbake/" title="bitbake">bitbake</a></td><td><a href="/bitbake/">Bitbake Development tree</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-days" title="2019-06-19 17:12:23 +0000">7 days</span></td><td><a class="button" href="/bitbake/">summary</a><a class="button" href="/bitbake/log/">log</a><a class="button" href="/bitbake/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/bitbake-contrib/" title="bitbake-contrib">bitbake-contrib</a></td><td><a href="/bitbake-contrib/">Bitbake user contribution trees</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-days" title="2019-06-18 15:30:38 +0000">8 days</span></td><td><a class="button" href="/bitbake-contrib/">summary</a><a class="button" href="/bitbake-contrib/log/">log</a><a class="button" href="/bitbake-contrib/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/meta-handheld/" title="meta-handheld">meta-handheld</a></td><td><a href="/meta-handheld/">Handheld device meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-months" title="2018-10-01 21:25:11 +0000">9 months</span></td><td><a class="button" href="/meta-handheld/">summary</a><a class="button" href="/meta-handheld/log/">log</a><a class="button" href="/meta-handheld/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/meta-opie/" title="meta-opie">meta-opie</a></td><td><a href="/meta-opie/">OPIE meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2016-06-12 03:58:09 +0000">3 years</span></td><td><a class="button" href="/meta-opie/">summary</a><a class="button" href="/meta-opie/log/">log</a><a class="button" href="/meta-opie/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/openembedded/" title="openembedded">openembedded</a></td><td><a href="/openembedded/">Classic OpenEmbedded Development Tree</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2015-05-05 08:44:03 +0000">4 years</span></td><td><a class="button" href="/openembedded/">summary</a><a class="button" href="/openembedded/log/">log</a><a class="button" href="/openembedded/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/openembedded-web-frontpages/" title="openembedded-web-frontpages">openembedded-web-frontpages</a></td><td><a href="/openembedded-web-frontpages/">OpenEmbedded Website Source Code</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2014-08-24 13:39:24 +0000">5 years</span></td><td><a class="button" href="/openembedded-web-frontpages/">summary</a><a class="button" href="/openembedded-web-frontpages/log/">log</a><a class="button" href="/openembedded-web-frontpages/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/openembedded-admin/" title="openembedded-admin">openembedded-admin</a></td><td><a href="/openembedded-admin/">OE Admin tools</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2013-10-21 21:20:18 +0000">6 years</span></td><td><a class="button" href="/openembedded-admin/">summary</a><a class="button" href="/openembedded-admin/log/">log</a><a class="button" href="/openembedded-admin/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/meta-micro/" title="meta-micro">meta-micro</a></td><td><a href="/meta-micro/">Micro distribution meta layer</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2012-09-08 21:51:18 +0000">7 years</span></td><td><a class="button" href="/meta-micro/">summary</a><a class="button" href="/meta-micro/log/">log</a><a class="button" href="/meta-micro/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/eclipsetools/" title="eclipsetools">eclipsetools</a></td><td><a href="/eclipsetools/">Eclipse tools for OpenEmbedded</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2011-11-05 09:35:20 +0000">8 years</span></td><td><a class="button" href="/eclipsetools/">summary</a><a class="button" href="/eclipsetools/log/">log</a><a class="button" href="/eclipsetools/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/oetest/" title="oetest">oetest</a></td><td><a href="/oetest/">Test utilities for OpenEmbedded</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td><span class="age-years" title="2009-08-14 14:10:25 +0000">10 years</span></td><td><a class="button" href="/oetest/">summary</a><a class="button" href="/oetest/log/">log</a><a class="button" href="/oetest/tree/">tree</a></td></tr>
|
||||
<tr><td class="toplevel-repo"><a href="/oebuildstats/" title="oebuildstats">oebuildstats</a></td><td><a href="/oebuildstats/">OE Build Stats</a></td><td><a href="/?q=OpenEmbedded">OpenEmbedded</a></td><td></td><td><a class="button" href="/oebuildstats/">summary</a><a class="button" href="/oebuildstats/log/">log</a><a class="button" href="/oebuildstats/tree/">tree</a></td></tr>
|
41
swh/lister/cgit/tests/response.html
Normal file
41
swh/lister/cgit/tests/response.html
Normal file
|
@ -0,0 +1,41 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang='en'>
|
||||
<head>
|
||||
<title>OpenEmbedded Git Repository Browser</title>
|
||||
<meta name='generator' content='cgit v1.2'/>
|
||||
<meta name='robots' content='index, nofollow'/>
|
||||
<link rel='stylesheet' type='text/css' href='/cgit.css'/>
|
||||
<link rel='shortcut icon' href='/favicon.ico'/>
|
||||
</head>
|
||||
<body>
|
||||
<div id='cgit'><table id='header'>
|
||||
<tr>
|
||||
<td class='logo' rowspan='2'><a href='/'><img src='/oe.png' alt='cgit logo'/></a></td>
|
||||
<td class='main'>OpenEmbedded Git Repository Browser</td></tr>
|
||||
<tr><td class='sub'>A web frontend for git repositories</td></tr></table>
|
||||
<table class='tabs'><tr><td>
|
||||
<a class='active' href='/'>index</a></td><td class='form'><form method='get' action='/'>
|
||||
<input type='search' name='q' size='10' value=''/>
|
||||
<input type='submit' value='search'/>
|
||||
</form></td></tr></table>
|
||||
<div class='content'><table summary='repository list' class='list nowrap'><tr class='nohover'><th class='left'><a href='/?s=name'>Name</a></th><th class='left'><a href='/?s=desc'>Description</a></th><th class='left'><a href='/?s=owner'>Owner</a></th><th class='left'><a href='/?s=idle'>Idle</a></th><th class='left'>Links</th></tr>
|
||||
<tr><td class='toplevel-repo'><a title='openembedded-core' href='/openembedded-core/'>openembedded-core</a></td><td><a href='/openembedded-core/'>OpenEmbedded Core layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-26 13:04:31 +0000'>5 hours</span></td><td><a class='button' href='/openembedded-core/'>summary</a><a class='button' href='/openembedded-core/log/'>log</a><a class='button' href='/openembedded-core/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='openembedded-core-contrib' href='/openembedded-core-contrib/'>openembedded-core-contrib</a></td><td><a href='/openembedded-core-contrib/'>OpenEmbedded Core user contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-26 12:57:22 +0000'>5 hours</span></td><td><a class='button' href='/openembedded-core-contrib/'>summary</a><a class='button' href='/openembedded-core-contrib/log/'>log</a><a class='button' href='/openembedded-core-contrib/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='meta-openembedded' href='/meta-openembedded/'>meta-openembedded</a></td><td><a href='/meta-openembedded/'>Collection of OpenEmbedded layers</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-25 21:22:33 +0000'>21 hours</span></td><td><a class='button' href='/meta-openembedded/'>summary</a><a class='button' href='/meta-openembedded/log/'>log</a><a class='button' href='/meta-openembedded/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='meta-openembedded-contrib' href='/meta-openembedded-contrib/'>meta-openembedded-contrib</a></td><td><a href='/meta-openembedded-contrib/'>OpenEmbedded layers collection contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-hours' title='2019-06-25 21:22:33 +0000'>21 hours</span></td><td><a class='button' href='/meta-openembedded-contrib/'>summary</a><a class='button' href='/meta-openembedded-contrib/log/'>log</a><a class='button' href='/meta-openembedded-contrib/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='bitbake' href='/bitbake/'>bitbake</a></td><td><a href='/bitbake/'>Bitbake Development tree</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-days' title='2019-06-19 17:12:23 +0000'>7 days</span></td><td><a class='button' href='/bitbake/'>summary</a><a class='button' href='/bitbake/log/'>log</a><a class='button' href='/bitbake/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='bitbake-contrib' href='/bitbake-contrib/'>bitbake-contrib</a></td><td><a href='/bitbake-contrib/'>Bitbake user contribution trees</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-days' title='2019-06-18 15:30:38 +0000'>8 days</span></td><td><a class='button' href='/bitbake-contrib/'>summary</a><a class='button' href='/bitbake-contrib/log/'>log</a><a class='button' href='/bitbake-contrib/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='meta-handheld' href='/meta-handheld/'>meta-handheld</a></td><td><a href='/meta-handheld/'>Handheld device meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-months' title='2018-10-01 21:25:11 +0000'>9 months</span></td><td><a class='button' href='/meta-handheld/'>summary</a><a class='button' href='/meta-handheld/log/'>log</a><a class='button' href='/meta-handheld/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='meta-opie' href='/meta-opie/'>meta-opie</a></td><td><a href='/meta-opie/'>OPIE meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2016-06-12 03:58:09 +0000'>3 years</span></td><td><a class='button' href='/meta-opie/'>summary</a><a class='button' href='/meta-opie/log/'>log</a><a class='button' href='/meta-opie/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='openembedded' href='/openembedded/'>openembedded</a></td><td><a href='/openembedded/'>Classic OpenEmbedded Development Tree</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2015-05-05 08:44:03 +0000'>4 years</span></td><td><a class='button' href='/openembedded/'>summary</a><a class='button' href='/openembedded/log/'>log</a><a class='button' href='/openembedded/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='openembedded-web-frontpages' href='/openembedded-web-frontpages/'>openembedded-web-frontpages</a></td><td><a href='/openembedded-web-frontpages/'>OpenEmbedded Website Source Code</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2014-08-24 13:39:24 +0000'>5 years</span></td><td><a class='button' href='/openembedded-web-frontpages/'>summary</a><a class='button' href='/openembedded-web-frontpages/log/'>log</a><a class='button' href='/openembedded-web-frontpages/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='openembedded-admin' href='/openembedded-admin/'>openembedded-admin</a></td><td><a href='/openembedded-admin/'>OE Admin tools</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2013-10-21 21:20:18 +0000'>6 years</span></td><td><a class='button' href='/openembedded-admin/'>summary</a><a class='button' href='/openembedded-admin/log/'>log</a><a class='button' href='/openembedded-admin/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='meta-micro' href='/meta-micro/'>meta-micro</a></td><td><a href='/meta-micro/'>Micro distribution meta layer</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2012-09-08 21:51:18 +0000'>7 years</span></td><td><a class='button' href='/meta-micro/'>summary</a><a class='button' href='/meta-micro/log/'>log</a><a class='button' href='/meta-micro/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='eclipsetools' href='/eclipsetools/'>eclipsetools</a></td><td><a href='/eclipsetools/'>Eclipse tools for OpenEmbedded</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2011-11-05 09:35:20 +0000'>8 years</span></td><td><a class='button' href='/eclipsetools/'>summary</a><a class='button' href='/eclipsetools/log/'>log</a><a class='button' href='/eclipsetools/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='oetest' href='/oetest/'>oetest</a></td><td><a href='/oetest/'>Test utilities for OpenEmbedded</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td><span class='age-years' title='2009-08-14 14:10:25 +0000'>10 years</span></td><td><a class='button' href='/oetest/'>summary</a><a class='button' href='/oetest/log/'>log</a><a class='button' href='/oetest/tree/'>tree</a></td></tr>
|
||||
<tr><td class='toplevel-repo'><a title='oebuildstats' href='/oebuildstats/'>oebuildstats</a></td><td><a href='/oebuildstats/'>OE Build Stats</a></td><td><a href='/?q=OpenEmbedded'>OpenEmbedded</a></td><td></td><td><a class='button' href='/oebuildstats/'>summary</a><a class='button' href='/oebuildstats/log/'>log</a><a class='button' href='/oebuildstats/tree/'>tree</a></td></tr>
|
||||
</table></div> <!-- class=content -->
|
||||
<div class='footer'>generated by <a href='https://git.zx2c4.com/cgit/about/'>cgit v1.2</a> (<a href='https://git-scm.com/'>git 2.18.0</a>) at 2019-06-26 18:03:12 +0000</div>
|
||||
</div> <!-- id=cgit -->
|
||||
</body>
|
||||
</html>
|
|
@ -3,43 +3,20 @@
|
|||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from swh.lister.cgit.lister import priority_origin_url, find_all_origin_url
|
||||
from swh.lister.cgit.lister import find_netloc
|
||||
from swh.lister.cgit.lister import find_netloc, get_repo_list
|
||||
|
||||
|
||||
def test_find_all_origin_url():
|
||||
f = open('swh/lister/cgit/tests/api_response.html')
|
||||
soup = BeautifulSoup(f.read(), features="html.parser")
|
||||
expected_output = {'https': 'https://git.savannah.gnu.org/git/'
|
||||
'fbvbconv-py.git',
|
||||
'ssh': 'ssh://git.savannah.gnu.org/srv/git/'
|
||||
'fbvbconv-py.git',
|
||||
'git': 'git://git.savannah.gnu.org/fbvbconv-py.git'}
|
||||
|
||||
output = find_all_origin_url(soup)
|
||||
|
||||
for protocol, url in expected_output.items():
|
||||
assert url == output[protocol]
|
||||
|
||||
|
||||
def test_priority_origin_url():
|
||||
first_input = {'https': 'https://kernel.googlesource.com/pub/scm/docs/'
|
||||
'man-pages/man-pages.git',
|
||||
'git': 'git://git.kernel.org/pub/scm/docs/man-pages/'
|
||||
'man-pages.git'}
|
||||
second_input = {'git': 'git://git.savannah.gnu.org/perl-pesel.git',
|
||||
'ssh': 'ssh://git.savannah.gnu.org/srv/git/perl-pesel.git'}
|
||||
third_input = {}
|
||||
|
||||
assert (priority_origin_url(first_input) ==
|
||||
'https://kernel.googlesource.com/pub/scm/docs/man-pages/'
|
||||
'man-pages.git')
|
||||
assert (priority_origin_url(second_input) ==
|
||||
'git://git.savannah.gnu.org/perl-pesel.git')
|
||||
assert priority_origin_url(third_input) is None
|
||||
def test_get_repo_list():
|
||||
f = open('swh/lister/cgit/tests/response.html')
|
||||
repos = get_repo_list(f.read())
|
||||
f = open('swh/lister/cgit/tests/repo_list.txt')
|
||||
expected_repos = f.readlines()
|
||||
expected_repos = list(map((lambda repo: repo[:-1]), expected_repos))
|
||||
assert len(repos) == len(expected_repos)
|
||||
for i in range(len(repos)):
|
||||
assert str(repos[i]) == expected_repos[i]
|
||||
|
||||
|
||||
def test_find_netloc():
|
||||
|
|
|
@ -11,19 +11,43 @@ def test_ping(swh_app, celery_session_worker):
|
|||
|
||||
|
||||
@patch('swh.lister.cgit.tasks.CGitLister')
|
||||
def test_lister(lister, swh_app, celery_session_worker):
|
||||
def test_lister_no_url_prefix(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked CGitLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cgit.tasks.CGitListerTask')
|
||||
'swh.lister.cgit.tasks.CGitListerTask',
|
||||
kwargs=dict(url='https://git.kernel.org/', instance='kernel'))
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
base_url='https://git.savannah.gnu.org/cgit/',
|
||||
instance='savannah-gnu')
|
||||
url='https://git.kernel.org/',
|
||||
url_prefix=None,
|
||||
instance='kernel')
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with()
|
||||
|
||||
|
||||
@patch('swh.lister.cgit.tasks.CGitLister')
|
||||
def test_lister_with_url_prefix(lister, swh_app, celery_session_worker):
|
||||
# setup the mocked CGitLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.cgit.tasks.CGitListerTask',
|
||||
kwargs=dict(url='https://cgit.kde.org/',
|
||||
url_prefix='https://anongit.kde.org/', instance='kde'))
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(
|
||||
url='https://cgit.kde.org/',
|
||||
url_prefix='https://anongit.kde.org/',
|
||||
instance='kde')
|
||||
lister.db_last_index.assert_not_called()
|
||||
lister.run.assert_called_once_with()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue