cgit: Don't stop the listing when a repository page is not available
Related to T2988
This commit is contained in:
parent
91fcde8341
commit
f6f9f1ca28
2 changed files with 30 additions and 2 deletions
|
@ -10,6 +10,7 @@ from urllib.parse import urljoin, urlparse
|
|||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
|
||||
from swh.lister import USER_AGENT
|
||||
from swh.lister.pattern import StatelessLister
|
||||
|
@ -76,6 +77,7 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
next_page: Optional[str] = self.url
|
||||
while next_page:
|
||||
bs_idx = self._get_and_parse(next_page)
|
||||
|
||||
page_results = []
|
||||
|
||||
for tr in bs_idx.find("div", {"class": "content"}).find_all(
|
||||
|
@ -113,7 +115,7 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
|
||||
for repository in repositories:
|
||||
origin_url = self._get_origin_from_repository_url(repository["url"])
|
||||
if not origin_url:
|
||||
if origin_url is None:
|
||||
continue
|
||||
|
||||
yield ListedOrigin(
|
||||
|
@ -125,7 +127,15 @@ class CGitLister(StatelessLister[Repositories]):
|
|||
|
||||
def _get_origin_from_repository_url(self, repository_url: str) -> Optional[str]:
|
||||
"""Extract the git url from the repository page"""
|
||||
bs = self._get_and_parse(repository_url)
|
||||
try:
|
||||
bs = self._get_and_parse(repository_url)
|
||||
except HTTPError as e:
|
||||
logger.warning(
|
||||
"Unexpected HTTP status code %s on %s",
|
||||
e.response.status_code,
|
||||
e.response.url,
|
||||
)
|
||||
return None
|
||||
|
||||
# origin urls are listed on the repository page
|
||||
# TODO check if forcing https is better or not ?
|
||||
|
|
|
@ -7,6 +7,7 @@ from typing import List
|
|||
|
||||
import pytest
|
||||
|
||||
from swh.core.pytest_plugin import requests_mock_datadir_factory
|
||||
from swh.lister import __version__
|
||||
from swh.lister.cgit.lister import CGitLister, _parse_last_updated_date
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
@ -142,3 +143,20 @@ def test_lister_cgit_date_parsing(date_str, expected_date):
|
|||
repository = {"url": "url", "last_updated_date": date_str}
|
||||
|
||||
assert _parse_last_updated_date(repository) == expected_date
|
||||
|
||||
|
||||
requests_mock_datadir_missing_url = requests_mock_datadir_factory(
|
||||
ignore_urls=["https://git.tizen/cgit/adaptation/ap_samsung/audio-hal-e4x12/",]
|
||||
)
|
||||
|
||||
|
||||
def test_lister_cgit_get_origin_from_repo_failing(
|
||||
requests_mock_datadir_missing_url, swh_scheduler
|
||||
):
|
||||
url = "https://git.tizen/cgit/"
|
||||
lister_cgit = CGitLister(swh_scheduler, url=url)
|
||||
|
||||
stats = lister_cgit.run()
|
||||
|
||||
expected_nb_origins = 15
|
||||
assert stats == ListerStats(pages=3, origins=expected_nb_origins)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue