diff --git a/setup.py b/setup.py index 456cd0e..983feb0 100755 --- a/setup.py +++ b/setup.py @@ -89,6 +89,7 @@ setup( lister.pypi=swh.lister.pypi:register lister.rubygems=swh.lister.rubygems:register lister.sourceforge=swh.lister.sourceforge:register + lister.stagit=swh.lister.stagit:register lister.tuleap=swh.lister.tuleap:register lister.maven=swh.lister.maven:register """, diff --git a/swh/lister/stagit/__init__.py b/swh/lister/stagit/__init__.py new file mode 100644 index 0000000..6a93edf --- /dev/null +++ b/swh/lister/stagit/__init__.py @@ -0,0 +1,12 @@ +# Copyright (C) 2023 The Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + from .lister import StagitLister + + return { + "lister": StagitLister, + "task_modules": [f"{__name__}.tasks"], + } diff --git a/swh/lister/stagit/lister.py b/swh/lister/stagit/lister.py new file mode 100644 index 0000000..b87ca66 --- /dev/null +++ b/swh/lister/stagit/lister.py @@ -0,0 +1,155 @@ +# Copyright (C) 2023 The Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from datetime import datetime, timezone +import logging +import re +from typing import Any, Dict, Iterator, List, Optional +from urllib.parse import urlparse + +from bs4 import BeautifulSoup +from requests.exceptions import HTTPError + +from swh.lister.pattern import CredentialsType, StatelessLister +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +logger = logging.getLogger(__name__) + +Repositories = List[Dict[str, Any]] + + +class StagitLister(StatelessLister[Repositories]): + """Lister class for Stagit forge instances. + + This lister will retrieve the list of published git repositories by + parsing the HTML page(s) of the index retrieved at `url`. + + """ + + LISTER_NAME = "stagit" + + def __init__( + self, + scheduler: SchedulerInterface, + url: Optional[str] = None, + instance: Optional[str] = None, + credentials: Optional[CredentialsType] = None, + max_origins_per_page: Optional[int] = None, + max_pages: Optional[int] = None, + enable_origins: bool = True, + ): + """Lister class for Stagit repositories. + + Args: + url: (Optional) Root URL of the Stagit instance, i.e. url of the index of + published git repositories on this instance. Defaults to + :file:`https://{instance}` if unset. + instance: Name of stagit instance. Defaults to url's network location + if unset. + + """ + super().__init__( + scheduler=scheduler, + url=url, + instance=instance, + credentials=credentials, + max_origins_per_page=max_origins_per_page, + max_pages=max_pages, + enable_origins=enable_origins, + ) + + self.session.headers.update({"Accept": "application/html"}) + + def _get_and_parse(self, url: str) -> BeautifulSoup: + """Get the given url and parse the retrieved HTML using BeautifulSoup""" + response = self.http_request(url) + return BeautifulSoup(response.text, features="html.parser") + + def get_pages(self) -> Iterator[Repositories]: + """Generate git 'project' URLs found on the current Stagit server.""" + bs_idx = self._get_and_parse(self.url) + + page_results = [] + + for tr in bs_idx.find("table", {"id": re.compile("index")}).find_all("tr"): + link = tr.find("a") + if not link: + continue + + repo_description_url = self.url + "/" + link["href"] + + # This retrieves the date in format "%Y-%m-%d %H:%M" + tds = tr.find_all("td") + last_update = tds[-1].text if tds and tds[-1] else None + + page_results.append( + {"url": repo_description_url, "last_update": last_update} + ) + + yield page_results + + def get_origins_from_page( + self, repositories: Repositories + ) -> Iterator[ListedOrigin]: + """Convert a page of stagit repositories into a list of ListedOrigins.""" + assert self.lister_obj.id is not None + + for repo in repositories: + origin_url = self._get_origin_from_repository_url(repo["url"]) + if origin_url is None: + continue + + yield ListedOrigin( + lister_id=self.lister_obj.id, + url=origin_url, + visit_type="git", + last_update=_parse_date(repo["last_update"]), + ) + + def _get_origin_from_repository_url(self, repository_url: str) -> Optional[str]: + """Extract the git url from the repository page""" + try: + bs = self._get_and_parse(repository_url) + except HTTPError as e: + logger.warning( + "Unexpected HTTP status code %s on %s", + e.response.status_code, + e.response.url, + ) + return None + + urls = [ + td.find("a")["href"] + for row in bs.find_all("tr", {"class": "url"}) + for td in row.find_all("td") + if td.text.startswith("git clone") + ] + + if not urls: + return None + + urls = [url for url in urls if urlparse(url).scheme in ("https", "http", "git")] + if not urls: + return None + return urls[0] + + +def _parse_date(date: Optional[str]) -> Optional[datetime]: + """Parse the last update date.""" + if not date: + return None + + parsed_date = None + try: + parsed_date = datetime.strptime(date, "%Y-%m-%d %H:%M").replace( + tzinfo=timezone.utc + ) + except Exception: + logger.warning( + "Could not parse last_update date: %s", + date, + ) + + return parsed_date diff --git a/swh/lister/stagit/tasks.py b/swh/lister/stagit/tasks.py new file mode 100644 index 0000000..48a3840 --- /dev/null +++ b/swh/lister/stagit/tasks.py @@ -0,0 +1,16 @@ +# Copyright (C) 2023 The Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Dict + +from celery import shared_task + +from .lister import StagitLister + + +@shared_task(name=f"{__name__}.StagitListerTask") +def list_stagit(**lister_args) -> Dict[str, str]: + """Lister task for Stagit instances""" + lister = StagitLister.from_configfile(**lister_args) + return lister.run().dict() diff --git a/swh/lister/stagit/tests/__init__.py b/swh/lister/stagit/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/README b/swh/lister/stagit/tests/data/https_codemadness.org/README new file mode 100644 index 0000000..2c03c9c --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/README @@ -0,0 +1,2 @@ +These files are a partial dump of https://codemadness.org/git/. + diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/foobar b/swh/lister/stagit/tests/data/https_codemadness.org/foobar new file mode 100644 index 0000000..55b0619 --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/foobar @@ -0,0 +1,33 @@ + + + + + +Repositories + + + + + + + +
Repositories
+
+
+
+ + + +
NameDescriptionOwnerLast commit
bmfbmf (Bayesian Mail Filter) 0.9.4 fork + patches +2020-02-04 22:03
dmenumy customized version of dmenu (hiltjo branch) +Hiltjo Posthuma2022-05-01 16:38
dwmmy customized version of dwm (hiltjo branch) +Hiltjo Posthuma2023-04-10 10:34
stagitstatic git page generator +Hiltjo Posthuma2020-03-03 23:49
Hiltjo Posthuma2021-07-20 13:20
twitch-gotwitch.tv web application in Go +Hiltjo Posthuma2019-05-02 18:14
webdumpText-based web client/page dump (experiment) +Hiltjo Posthuma2023-03-20 20:32
www.codemadness.orgwww.codemadness.org saait content files +Hiltjo Posthuma2023-05-20 09:50
xmlparserXML parser +Hiltjo Posthuma2023-05-14 21:59
xscreenshotscreen capture tool +
+
+ + diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/foobar_bmf_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_bmf_log.html new file mode 120000 index 0000000..b7ff337 --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_bmf_log.html @@ -0,0 +1 @@ +git_bmf_log.html \ No newline at end of file diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/foobar_dmenu_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_dmenu_log.html new file mode 120000 index 0000000..06517b8 --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_dmenu_log.html @@ -0,0 +1 @@ +git_dmenu_log.html \ No newline at end of file diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/foobar_dwm_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_dwm_log.html new file mode 120000 index 0000000..d167b8b --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_dwm_log.html @@ -0,0 +1 @@ +git_dwm_log.html \ No newline at end of file diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/foobar_stagit_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_stagit_log.html new file mode 120000 index 0000000..63036ac --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/foobar_stagit_log.html @@ -0,0 +1 @@ +git_stagit_log.html \ No newline at end of file diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/git b/swh/lister/stagit/tests/data/https_codemadness.org/git new file mode 100644 index 0000000..c6660bf --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/git @@ -0,0 +1,28 @@ + + + + + +Repositories + + + + + + + +
Repositories
+
+
+
+ + + +
NameDescriptionOwnerLast commit
bmfbmf (Bayesian Mail Filter) 0.9.4 fork + patches +2020-02-04 22:03
dmenumy customized version of dmenu (hiltjo branch) +Hiltjo Posthuma2022-05-01 16:38
dwmmy customized version of dwm (hiltjo branch) +Hiltjo Posthuma2023-04-10 10:34
stagitstatic git page generator +Hiltjo Posthuma2020-03-03 23:49
+
+ + diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/git_bmf_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/git_bmf_log.html new file mode 100644 index 0000000..9c1345a --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/git_bmf_log.html @@ -0,0 +1,60 @@ + + + + + +Log - bmf - bmf (Bayesian Mail Filter) 0.9.4 fork + patches + + + + + + + +

bmf

bmf (Bayesian Mail Filter) 0.9.4 fork + patches +
git clone git://git.codemadness.org/bmf
+Log | Files | Refs | README | LICENSE
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateCommit messageAuthorFiles+-
2020-02-04 22:03update TODOHiltjo Posthuma1+2-0
2020-02-04 21:59add a bulk test mode option (-b)Hiltjo Posthuma3+83-3
2019-01-26 19:10README: typo applicatios -> applicationsHiltjo Posthuma1+2-3
2019-01-26 18:39fix -d parameterJulian Schweinsberg2+2-2
2018-11-09 10:18fix statdump call parameterHiltjo Posthuma1+1-1
2018-11-09 10:17fix statdump declarationHiltjo Posthuma1+1-1
2018-11-08 17:12statdump: use standard I/O functions for bufferingHiltjo Posthuma1+7-17
2018-11-08 17:07fix uninitialized memory when parsing bogofilter headerHiltjo Posthuma1+2-1
2018-10-27 18:05function declaration: use the same parameter namesHiltjo Posthuma4+14-15
2018-10-27 18:02set rdonly earlier for unveil, make open() error more clearHiltjo Posthuma2+3-3
2018-10-27 17:56fix unveil(2) permissions and path name + misc code fixesHiltjo Posthuma6+25-29
2018-10-27 17:33dbh_open -> dbtext_db_openHiltjo Posthuma2+2-2
2018-10-27 17:31merge dbh and dbtext (WIP)Hiltjo Posthuma7+511-566
2018-10-27 17:14initial unveil(2) support + some code-cleanup and remove unused functionsHiltjo Posthuma7+35-35
2018-10-27 17:13whoops, fix regression in opening "database"Hiltjo Posthuma1+1-1
2018-10-27 16:37many improvementsHiltjo Posthuma17+144-610
2018-10-25 10:41fix some undefined behaviour with ctype functionsHiltjo Posthuma1+16-15
2018-09-29 11:15improve MakefileHiltjo Posthuma1+77-28
2018-09-29 11:14define PACKAGE macro in C fileHiltjo Posthuma1+2-0
2018-09-29 10:52remove unused code, leftover -f flag documentationHiltjo Posthuma7+2-91
2018-09-29 10:49dbg: simplify verbose functionHiltjo Posthuma1+3-8
2018-09-23 12:39config.h: disable pledge for non-OpenBSDHiltjo Posthuma1+5-0
2018-09-23 12:37Makefile: order dependencies, remove unneeded [ -d ] checkHiltjo Posthuma1+3-3
2018-09-23 12:36improve code-styleHiltjo Posthuma7+1531-1663
2018-09-23 12:29bmf.c: improve some code-styleHiltjo Posthuma1+250-282
2018-09-23 12:19update TODOHiltjo Posthuma1+9-6
2018-09-22 18:05remove -i and -f from usageHiltjo Posthuma1+0-2
2018-09-22 16:51Makefile: install in /usr/localHiltjo Posthuma1+2-4
2018-09-22 16:49pledge test mode, no need to reopen goodlist and spamlist for each message in test modeHiltjo Posthuma2+52-16
2018-09-22 16:27tweak pledgeHiltjo Posthuma1+2-2
2018-09-22 16:27remove code leftover from -i optionHiltjo Posthuma1+0-17
2018-09-22 16:12remove NDEBUG codeHiltjo Posthuma3+2-281
2018-09-22 16:05small cleanup, remove DB_USER, DB_PASSHiltjo Posthuma2+7-10
2018-09-22 15:57changesHiltjo Posthuma15+60-2004
2018-09-22 15:49add patches from OpenBSD portHiltjo Posthuma2+20-1
2018-09-22 15:46import bmf 0.9.4Hiltjo Posthuma31+5898-0
+ + diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/git_dmenu_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/git_dmenu_log.html new file mode 100644 index 0000000..52f6116 --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/git_dmenu_log.html @@ -0,0 +1,125 @@ + + + + + +Log - dmenu - my customized version of dmenu (hiltjo branch) + + + + + + + +

dmenu

my customized version of dmenu (hiltjo branch) +
git clone git://git.codemadness.org/dmenu
+Log | Files | Refs | README | LICENSE
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateCommit messageAuthorFiles+-
2022-05-01 16:38Makefile: add manual path for OpenBSDHiltjo Posthuma1+1-0
2022-04-30 11:19fix incorrect comment, math is hardHiltjo Posthuma1+1-1
2022-04-29 18:15inputw: improve correctness and startup performance, by NRKHiltjo Posthuma1+2-8
2022-03-28 15:38drw_text: account for fallback fonts in ellipsis_widthNRK1+4-2
2022-03-27 19:02drw_text: don't segfault when called with 0 widthNRK1+1-1
2022-03-26 16:58fix UB with the function iscntrl()Hiltjo Posthuma1+1-1
2022-03-26 16:57Revert "avoid redraw when there's no change"Hiltjo Posthuma1+2-3
2022-03-25 21:51avoid redraw when there's no changeNRK1+3-2
2022-03-25 21:51free all allocated items, use %zu for size_tNRK1+6-3
2022-03-23 18:37drw_text: improve performance when there's no matchNRK1+12-1
2022-03-23 18:37inputw: improve correctness and startup performanceNRK1+9-10
2022-03-23 20:04significantly improve performance on large stringsNRK1+10-3
2022-03-23 20:00introduce drw_fontset_getwidth_clamp()NRK2+18-2
2022-03-23 18:37drw_text: improve both performance and correctnessNRK1+28-28
2022-03-01 21:45remove false-positive warning for int comparison as boolHiltjo Posthuma1+1-1
2022-02-11 11:26bump version to 5.1Hiltjo Posthuma2+2-2
2022-02-08 20:45code-style: rm newline (oops)Hiltjo Posthuma1+0-1
2022-02-08 18:32revert using strcasestr and use a more optimized portable versionHiltjo Posthuma2+21-2
2022-02-07 09:36follow-up fix: add -D_GNU_SOURCE for strcasestr for some systemsHiltjo Posthuma1+1-1
2022-02-06 23:21improve performance of case-insensitive matchingHiltjo Posthuma1+1-12
2021-08-20 21:05Revert "Improve speed of drw_text when provided with large strings"Hiltjo Posthuma1+2-5
2021-08-09 16:39add support for more keypad keysHiltjo Posthuma1+11-0
2021-08-08 01:34Improve speed of drw_text when provided with large stringsMiles Alan1+5-2
2021-07-25 01:55remove always true condition in if statementGuilherme Janczak1+1-1
2020-09-02 16:30bump version to 5.0Hiltjo Posthuma2+2-2
2020-06-11 16:45Fix memory leaks in drwHiltjo Posthuma1+1-0
2019-03-03 12:08revert IME supportHiltjo Posthuma1+3-12
2019-02-12 21:58improve xopenim error messageHiltjo Posthuma1+1-1
2019-02-12 21:13make dmenu_path script executableHiltjo Posthuma1+0-0
2019-02-12 18:10fix crash when XOpenIM returns NULLHiltjo Posthuma1+11-2
2019-02-03 23:29Close when the embedding window is destroyedQuentin Rameau2+7-2
2019-02-02 12:54Prepared 4.9 release.Anselm R Garbe3+18-4
2019-01-27 14:28dmenu.1: document improved fastgrab behaviour from previous patchHiltjo Posthuma1+2-2
2019-01-26 14:49Use slow path if stdin is a ttydok1+1-3
2018-07-21 10:47dmenu_path: always use the cachedirQuentin Rameau1+6-6
2018-06-02 15:09Makefile: just show the compiler outputHiltjo Posthuma1+28-38
2018-06-02 15:01Do not strip at link stageHiltjo Posthuma1+1-1
2018-05-25 11:07code-style for pledge: check the return code -1, not < 0Hiltjo Posthuma1+1-1
2018-05-25 11:03code-style for pledge(2)Hiltjo Posthuma1+5-6
2018-05-25 10:04Pledge on OpenBSDHiltjo Posthuma1+9-0
2018-05-09 19:27Use bold for keyboard shortcuts in dmenu.1David Demelier1+28-28
2018-04-22 12:18Fix cursor drawn position with wide glyphsQuentin Rameau1+1-1
2018-03-22 10:18Makefile: bikesheddingly replace ${} with $()Quentin Rameau2+43-43
2018-03-19 14:42Handle IME inputQuentin Rameau1+4-1
2018-03-16 15:51Fix handling of input stringsHiltjo Posthuma1+21-14
2018-03-15 09:16Update LICENSEQuentin Rameau1+6-4
2018-03-14 18:48bump version to 4.8Hiltjo Posthuma1+1-1
2018-03-13 16:15add key bindings for moving to the word start or endQuentin Rameau2+46-0
2018-01-04 22:45Fix regression in 84a1bc5Hiltjo Posthuma1+2-1
2018-01-04 12:27Instantiate j var outside #ifdef XINEMARA directive because it is used in loop outside directiveVincent Carluer1+2-3
2017-11-03 20:10drw: drw_scm_create: use Clr typeHiltjo Posthuma1+1-1
2017-11-03 14:31fix a possible free of a uninitialize variable in paste()Hiltjo Posthuma1+6-4
2017-11-03 20:05init colors using SchemeLastHiltjo Posthuma1+2-3
2017-11-03 16:49Set class name on menu windowOmar Sandoval1+2-0
2017-05-02 16:32release 4.7Hiltjo Posthuma1+1-1
2016-12-07 14:45Revert "fix input text matching"Andrew Gregory1+1-1
2016-11-25 12:38Xinerama: correct variable declarations in preprocessor conditionalThomas Gardner1+6-5
2016-11-05 10:36die() on calloc failureHiltjo Posthuma1+1-1
2016-10-08 12:08add embedding support with -w optionQuentin Rameau2+58-12
2016-10-08 12:42dmenu.1: group single optionsQuentin Rameau2+3-6
2016-10-08 12:36dmenu.1: fix -l optionQuentin Rameau1+1-1
2016-08-12 12:39die() consistency: always add newlineHiltjo Posthuma3+9-7
2016-07-26 21:13fix crash if negative monitor (< -1) was passedHiltjo Posthuma1+2-2
2016-07-26 21:02Revert "Print highlighted input text only on single match"Hiltjo Posthuma1+3-13
2016-07-26 10:48Print highlighted input text only on single matchQuentin Rameau1+13-3
2016-07-25 09:33Partially revert 44c7de3: fix items text width offset calculationQuentin Rameau1+2-2
2016-06-28 15:56config.def.h: style improvement, use color Scheme enumHiltjo Posthuma1+5-5
2016-06-28 05:11fix: Do not crash on e.g. dmenu < /dev/nullS. Gilles1+1-1
2016-05-21 19:51import new drw from libsl and minor fixes.Markus Teich5+231-236
2016-02-22 14:03arg.h: fixed argv checks orderLucas Gabriel Vuotto1+2-2
2016-01-11 12:26TypofixKlemens Nanni1+1-1
2015-12-19 18:58Shut up glibc about _BSD_SOURCE being deprecatedQuentin Rameau1+1-1
2015-12-19 08:32Add config option for word delimitersQuentin Rameau2+10-2
2015-11-08 22:37arg.h: remove unused ARGNUM* macrosHiltjo Posthuma1+0-14
2015-11-08 22:03unboolify dmenuHiltjo Posthuma2+7-9
2015-11-08 15:46Makefile: package config.def.h on make distHiltjo Posthuma1+3-2
2015-11-08 15:44remove .hgtags fileHiltjo Posthuma1+0-49
2015-11-07 11:53dmenu.1: clarify monitor numbers are starting from 0 (first mon)Hiltjo Posthuma1+2-1
2015-11-07 11:43fix: multimon: always spawn client on first monitor if specified with -m 0Hiltjo Posthuma1+1-1
2015-10-20 20:56drw: cleanup drw_text, prevent gcc warning false-positive of unused varHiltjo Posthuma1+14-21
2015-10-20 20:55drw: simplify drw_font_xcreate and prevent a potential unneeded allocationHiltjo Posthuma1+19-22
2015-10-20 20:53drw: a valid (non-NULL) Drw and Fnt context must be passedHiltjo Posthuma1+3-16
2015-10-20 20:51add sbase-style ecalloc(), calloc: or dieHiltjo Posthuma3+18-12
2015-10-18 16:37free schemes as arrayHiltjo Posthuma1+6-6
2015-10-04 12:47fix input text matchingQuentin Rameau1+1-1
2015-10-04 12:01fix incorrect ordering of match resultsDavide Del Zompo1+3-2
2015-10-04 10:32fix paste from clipboard (ctrl+shift+y)Hiltjo Posthuma1+1-0
2015-09-27 22:38minor style fixHiltjo Posthuma1+2-1
2015-09-27 22:19Remove function prototypes and reorder functions accordinglyFRIGN1+64-80
2015-09-27 22:15Untypedef struct itemFRIGN1+9-10
2015-09-27 22:18Fix the conversion from microseconds to nanosecondsQuentin Rameau1+1-1
2015-09-27 22:06Replace deprecated usleep() with nanosleep()Quentin Rameau1+3-2
2015-09-27 22:02config.mk: use -std=c99Hiltjo Posthuma1+1-1
2015-09-27 21:57dmenu: style improvementsHiltjo Posthuma1+213-204
2015-09-27 21:56drw style improvementsHiltjo Posthuma2+106-93
2015-09-27 21:02separate program-specific c99 bool and X11Hiltjo Posthuma2+10-11
2015-09-27 20:55config.mk: fix _XOPEN_SOURCE=700 for getline()Hiltjo Posthuma1+1-1
2015-09-27 20:38config.mk: improve feature test checkHiltjo Posthuma1+1-1
2015-08-06 02:19Fixed typo introduced by shared codeEric Pruitt1+1-1
2015-07-19 19:34stest: get rid of getopt, use suckless arg.hHiltjo Posthuma3+141-53
497 more commits remaining, fetch the repository
+ + diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/git_dwm_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/git_dwm_log.html new file mode 100644 index 0000000..ceeb30a --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/git_dwm_log.html @@ -0,0 +1,125 @@ + + + + + +Log - dwm - my customized version of dwm (hiltjo branch) + + + + + + + +

dwm

my customized version of dwm (hiltjo branch) +
git clone git://git.codemadness.org/dwm
+Log | Files | Refs | README | LICENSE
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateCommit messageAuthorFiles+-
2022-08-26 12:48Remove dmenumon variableStein2+1-4
2022-08-18 16:13config.def.h: make keys and buttons constNRK1+2-2
2022-08-15 12:31Remove blw variable in favour of calculating the value when neededStein1+3-3
2022-08-11 09:15Make floating windows spawn within the monitor's window areaStein1+5-5
2022-08-09 08:38Simplify client y-offset correctionStein1+1-3
2022-08-08 08:43sync code-style patch from libslHiltjo Posthuma1+12-11
2022-08-05 22:27code-style: simplify some checksNRK1+8-15
2022-07-29 23:26spawn: reduce 2 lines, change fprintf() + perror() + exit() to die("... :")explosion-mental1+1-3
2022-08-01 09:42unmanage: stop listening for events for unmanaged windowsStein1+1-0
2022-07-22 07:18Revert "do not call signal-unsafe function inside sighanlder"Hiltjo Posthuma1+2-2
2022-07-14 01:26do not call signal-unsafe function inside sighanlderNRK1+2-2
2022-07-14 01:27use named parameter for func prototypeNRK1+2-2
2022-05-10 17:07sync latest drw.{c,h} changes from dmenuHiltjo Posthuma2+59-30
2022-05-01 16:37Makefile: add manual path for OpenBSDHiltjo Posthuma1+1-0
2022-04-26 08:42manage: Make sure c->isfixed is applied before floating checksChris Down1+1-0
2022-04-26 13:50LICENSE: add Chris DownHiltjo Posthuma1+1-0
2022-04-26 08:30Revert "manage: For isfloating/oldstate check/set, ensure trans client actually exists"Hiltjo Posthuma1+1-1
2022-02-21 14:58Update monitor positions also on removalSanttu Lakkala1+34-34
2022-03-17 15:56manage: propertynotify: Reduce cost of unused size hintsChris Down1+5-3
2022-02-21 06:10manage: For isfloating/oldstate check/set, ensure trans client actually existsMiles Alan1+1-1
2022-03-11 14:40fix mem leak in cleanup()NRK1+1-0
2022-01-07 11:39bump version to 6.3Hiltjo Posthuma1+1-1
2021-12-18 16:58drawbar: Don't expend effort drawing bar if it is occludedChris Down1+3-0
2021-08-20 21:09Revert "Improve speed of drw_text when provided with large strings"Hiltjo Posthuma1+2-5
2021-08-09 16:24Improve speed of drw_text when provided with large stringsMiles Alan1+5-2
2021-07-12 21:44Add a configuration option for fullscreen lockingQuentin Rameau2+2-1
2020-07-02 19:18Do not allow focus to drift from fullscreen client via focusstack()Chris Down1+1-1
2020-03-03 22:23Fix x coordinate calculation in buttonpress.Ian Remmler1+1-1
2020-07-08 16:05dwm.1: fix wrong text in man pageHiltjo Posthuma1+1-1
2020-06-11 13:28Fix memory leaks in drwAlex Flierl1+1-0
2020-04-23 07:50dwm crashes when opening 50+ clients (tile layout)bakkeby1+4-2
2020-04-22 14:48drawbar: Don't shadow sw globalChris Down1+4-4
2020-04-22 14:48getatomprop: Add forward declarationChris Down1+1-0
2020-04-20 15:41setmfact: Unify bounds for compile-time and runtime mfactChris Down1+1-1
2020-04-03 13:36config.mk: fix POSIX_C_SOURCE macro for feature test for snprintf()Hiltjo Posthuma1+1-1
2019-02-02 12:50Prepare 6.2 release.Anselm R Garbe3+17-3
2018-06-02 15:15pledge: add rpath promise for the ugly Xft font fallbackHiltjo Posthuma1+1-1
2018-06-02 15:04Makefile: just show the compiler outputHiltjo Posthuma1+17-26
2018-06-02 13:12Do not strip at link stageKlemens Nanni1+1-1
2018-05-25 05:56Pledge on OpenBSDKlemens Nanni1+4-0
2018-05-25 04:56config.def.h: ClkTagBar missing from commentHiltjo Posthuma1+1-1
2018-05-02 22:09Function declarations in correct order.Christopher Drelich1+1-1
2018-05-12 17:14remove old TODO and BUGS entriesHiltjo Posthuma4+4-58
2018-03-14 20:03update README: remove mentioning the old dextra repoHiltjo Posthuma1+0-3
2018-03-14 17:58All functions in alphabetical order except for this one.Christopher Drelich1+9-9
2018-03-14 16:44ColBorder has been moved to the enum with ColFg and ColBg.Christopher Drelich2+1-2
2017-12-27 12:36dont NUL terminate _NET_WM_NAMEHiltjo Posthuma1+1-1
2017-11-03 20:20sync dmenu drw.{c,h} code: use Clr* (was Scm)Hiltjo Posthuma3+13-13
2017-11-03 15:36gettextprop: check result of XGetTextProperty (undefined behaviour for XFree)Hiltjo Posthuma1+1-2
2017-11-03 16:58Set class name on status barOmar Sandoval1+2-0
2017-10-10 21:10simplify isfixed conditionsDaniel Cousens1+1-2
2017-05-08 19:08yet another cleanupAnselm R Garbe2+35-35
2017-01-07 16:21Don't restrict snap in mousemoveMarkus Teich1+11-14
2017-01-07 16:21Button passthrough when client is not focusedMarkus Teich1+11-10
2017-01-07 16:21cleanupMarkus Teich3+32-36
2016-12-05 09:16applied Markus' tagset purge of alternative view on _NET_ACTIVE_WINDOW eventAnselm R Garbe1+18-20
2016-12-05 09:09applied Ivan Delalande's NET_SUPPORTING_WM_CHECK patch for gtk3 compatibilityAnselm R Garbe1+14-2
2016-12-05 09:05applied Ian Remmler's man page adjustment suggestionsAnselm R Garbe1+5-4
2016-12-05 09:01applied Markus' decouple color-scheme patchAnselm R Garbe2+8-6
2016-12-05 08:54applied Markus' clarify status text padding patchAnselm R Garbe1+2-2
2016-11-14 10:49LICENSE: update peopleQuentin Rameau1+10-6
2016-11-14 10:18dwm.1: add keybinding for spawning dmenuQuentin Rameau1+5-0
2016-11-05 10:34die() on calloc failureHiltjo Posthuma1+1-1
2016-08-12 12:35die() consistency: always add newlineHiltjo Posthuma3+10-8
2016-06-28 16:04config.def.h: style improvement, use color Scheme enumHiltjo Posthuma1+4-4
2016-05-22 20:33import new drw from libsl and minor fixes.Markus Teich5+209-208
2016-05-25 23:33Configure geometry before applying rulesEric Pruitt1+7-6
2015-12-19 19:25fix fullscreen clients not resized on X display resolution changeHiltjo Posthuma1+6-1
2015-12-19 19:04Shut up glibc about _BSD_SOURCE being deprecatedQuentin Rameau1+1-1
2015-11-08 22:11code-style consistencyHiltjo Posthuma1+503-433
2015-11-08 21:48unboolificationHiltjo Posthuma2+77-78
2015-11-08 19:38sort include + whitespace fixHiltjo Posthuma1+3-3
2015-11-07 13:04separate program-specific c99 bool and X11Hiltjo Posthuma2+77-75
2015-11-08 15:52Makefile: package all files with make distHiltjo Posthuma1+2-2
2015-11-07 13:09setfullscreen: don't process the property twiceQuentin Rameau1+2-2
2015-10-20 21:34sync updated drw code from dmenuHiltjo Posthuma5+164-158
2015-10-20 21:30dwm: use ecalloc, prevent theoretical overflowHiltjo Posthuma1+3-6
2015-10-20 21:28cleanup, dont use c++ style commentsHiltjo Posthuma1+4-4
2015-10-20 21:27dwm: cleanup: free schemes and cursors as arrayHiltjo Posthuma1+8-9
2015-10-20 21:10config.h: use common default font, pass Xft font name to dmenuHiltjo Posthuma1+2-4
2015-10-20 21:01config.mk: add $FREETYPELIBS and $FREETYPEINC, simpler to override (ports and *BSDs)Hiltjo Posthuma1+8-2
2015-03-06 04:26Add Xft and follback-fonts support to graphics libEric Pruitt6+299-103
2014-11-23 14:25removed .hgtags, thanks Dimitris for spottingAnselm R Garbe1+0-73
2014-08-11 05:24applied Hiltjo's resize/move limitationAnselm R Garbe2+12-2
2014-05-29 16:05same as before with dwm.c as wellAnselm R Garbe1+1-1
2014-05-29 16:02updated copyright notice in LICENSE fileAnselm R Garbe1+1-1
2013-08-27 18:39applied Lukas' focus suggestion at startup, thanksAnselm R Garbe1+1-0
2013-08-02 20:40applied improved version of Martti Kühne's dmenu/multi monitor approach from dwm, no dmenuspawn requiredAnselm R Garbe2+4-1
2013-07-20 07:08do not take our font declaration as default for stAnselm R Garbe1+1-1
2013-06-23 19:53applied Jochen's drw_text patch, thanksAnselm R Garbe1+1-1
2013-06-19 17:35applied Julian's enum approach, however renamed theme into scheme resp. Theme into ClrSchemeAnselm R Garbe3+37-40
2013-06-16 13:20finished libsl/drw integrationAnselm R Garbe3+185-264
2013-05-02 15:31include font argument for st by defaultAnselm R Garbe1+1-1
2013-05-01 13:45added st to SEE ALSO sectionAnselm R Garbe1+2-1
2013-05-01 13:41use st as default terminal from now onAnselm R Garbe2+2-2
2013-05-01 13:39shut up about deprecated Xlib functionsAnselm R Garbe1+1-1
2013-04-17 19:21renamed draw into drwAnselm R Garbe6+265-268
2012-12-09 18:11continued with draw.c and draw.h implementation, now the integration beginsAnselm R Garbe2+71-29
2012-12-08 09:13continued with draw.c abstraction, also started util.{h,c} implementation, that will be used by draw.c as wellAnselm R Garbe6+80-29
2012-11-18 16:52removed DDC, all is Draw-dependentanselm@garbe.us2+40-82
1612 more commits remaining, fetch the repository
+ + diff --git a/swh/lister/stagit/tests/data/https_codemadness.org/git_stagit_log.html b/swh/lister/stagit/tests/data/https_codemadness.org/git_stagit_log.html new file mode 100644 index 0000000..ce1d670 --- /dev/null +++ b/swh/lister/stagit/tests/data/https_codemadness.org/git_stagit_log.html @@ -0,0 +1,125 @@ + + + + + +Log - stagit - static git page generator + + + + + + + +

stagit

static git page generator +
git clone git://git.codemadness.org/stagit
+Log | Files | Refs | README | LICENSE
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DateCommit messageAuthorFiles+-
2023-04-10 10:33in Atom (RFC4287) an atomTextConstruct specifies the type text as the defaultHiltjo Posthuma1+2-2
2022-08-16 11:17prefix usage with the common prefix "usage: "Hiltjo Posthuma2+2-2
2022-08-07 08:10bump version to 1.2Hiltjo Posthuma1+1-1
2022-07-26 21:50Makefile: add workaround for distros still using older libgit2 versionsHiltjo Posthuma1+4-0
2022-05-27 19:29Revert "remain compatible with slightly older libgit versions for now"Hiltjo Posthuma2+0-4
2022-05-24 12:07remain compatible with slightly older libgit versions for nowHiltjo Posthuma2+4-0
2022-05-24 08:58Allow git to run on an other user repositoryAnton Lindqvist2+4-0
2022-04-02 15:35bump version to 1.1Hiltjo Posthuma1+1-1
2022-03-19 11:22improve stream read and write error handlingHiltjo Posthuma2+42-9
2022-03-15 15:58add dark mode support for the example stylesheetHiltjo Posthuma1+48-0
2022-01-03 11:22bump LICENSE yearHiltjo Posthuma1+1-1
2022-01-03 11:21libgit2 config opts: set the search to an empty pathHiltjo Posthuma2+8-0
2021-12-14 19:51do not percent-encode: ',' or '-' or '.' it looks uglyHiltjo Posthuma2+4-4
2021-11-30 17:13bump version to 1.0Hiltjo Posthuma1+1-1
2021-11-16 17:17Print the number of remaining commitsQuentin Rameau1+16-11
2021-11-16 13:24ignore '\r' in writing the blob aswellHiltjo Posthuma1+3-2
2021-11-16 13:16percent encode characters in path namesHiltjo Posthuma2+50-6
2021-11-16 10:44encode the name, it could contain XML entitiesHiltjo Posthuma1+6-4
2021-08-03 17:22man pages: add EXAMPLES sectionHiltjo Posthuma2+16-2
2021-07-30 23:09small typo fixes and url -> URLHiltjo Posthuma3+4-4
2021-05-27 10:41bump version to 0.9.6Hiltjo Posthuma1+1-1
2021-05-18 09:42man page: codemadness is the primary server. make logo brandless (not 2f30)Hiltjo Posthuma3+3-2
2021-05-18 08:38README: improve a bit the usage examplesQuentin Rameau1+8-3
2021-05-05 17:15do not simplify the history by first-parentHiltjo Posthuma2+0-3
2021-03-25 17:17tiny comment changeHiltjo Posthuma1+1-1
2021-03-25 17:13add function to print a single line, ignoring \r and \nHiltjo Posthuma1+22-1
2021-03-19 10:29add meta viewport on stagit-index tooHiltjo Posthuma1+1-0
2021-03-14 15:23bump version to 0.9.5Hiltjo Posthuma1+1-1
2021-03-05 11:47LICENSE: updateHiltjo Posthuma1+0-1
2021-03-05 11:44change STAGIT_BASEURL to an -u option and also update the example scriptHiltjo Posthuma4+13-13
2021-03-05 10:51add $STAGIT_BASEURL environment variable to make Atom links absoluteHiltjo Posthuma2+15-3
2021-03-05 10:50README: mention tags.xml featureHiltjo Posthuma1+2-1
2021-01-09 13:59micro-optimization: fputc (function) -> putc (macro/inline function)Hiltjo Posthuma2+8-8
2021-01-08 13:32LICENSE: bump yearHiltjo Posthuma1+1-1
2020-11-28 11:28fix warning with libgit2 v0.99+, remain compatible with older versionsHiltjo Posthuma1+7-8
2020-11-16 22:24add abbreviated commit hash to submodule fileOscar Benedito1+5-2
2020-11-15 20:11add meta viewport for scaling on mobileHiltjo Posthuma1+1-0
2020-11-15 20:11style.css: improve contrastHiltjo Posthuma1+3-3
2020-11-15 19:59use size_t to count linesHiltjo Posthuma1+7-7
2020-11-15 19:58avoid shadowed `name' global variableHiltjo Posthuma1+3-3
2020-11-15 19:58refs_cmp: remove unneeded castHiltjo Posthuma1+1-2
2020-11-15 19:58use LEN() macros for arraysHiltjo Posthuma1+4-2
2020-08-17 14:09bump version to 0.9.4Hiltjo Posthuma1+1-1
2020-08-10 14:09fix a small memleak in writeatom()Hiltjo Posthuma1+6-10
2020-08-08 18:01stagit-index: remove unveil support for argvHiltjo Posthuma1+0-4
2020-08-05 22:11fix submodule lookup in bare reposkst1+2-3
2020-07-20 12:15regression: do not show unset or empty tagsHiltjo Posthuma1+1-1
2020-07-19 14:41refactor get reference, add another feed for tags/releasesHiltjo Posthuma2+146-86
2020-07-19 12:07sort branches and tags by time (descending)Hiltjo Posthuma1+92-72
2020-02-23 19:30bump version to 0.9.3Hiltjo Posthuma1+1-1
2020-02-21 12:46atom.xml: improve output format a bitHiltjo Posthuma1+2-2
2020-02-12 22:20improve includes, stagit-index does not need compat.hHiltjo Posthuma2+3-7
2019-12-29 18:02style.css: highlight anchor ids, useful for linking highlighting lines in a diffHiltjo Posthuma1+4-0
2019-12-22 11:53small code-style white-space/newlineHiltjo Posthuma1+1-0
2019-12-01 17:31add OpenBSD unveil supportHiltjo Posthuma2+10-0
2019-12-01 17:26some README tweaks and rewording, reorderingHiltjo Posthuma1+9-7
2019-12-01 17:26Makefile improvementsHiltjo Posthuma2+20-37
2019-05-05 17:11README: works also on NetBSDLeonardo Taccari1+1-1
2019-04-23 18:53bump version to 0.9.2Hiltjo Posthuma1+1-1
2019-03-16 12:07escape HTML in pathsHiltjo Posthuma1+12-4
2019-03-09 11:39pedantic snprintf() improvementHiltjo Posthuma2+4-4
2019-03-01 10:49fix issues in example scriptsHiltjo Posthuma2+2-2
2019-02-22 10:13bump version to 0.9.1Hiltjo Posthuma1+1-1
2019-02-09 14:02Makefile: add DOCPREFIX for installing docs in portsHiltjo Posthuma2+11-9
2019-02-07 20:00change order of commits in log from most recent to old to applied orderHiltjo Posthuma4+4-7
2019-02-03 14:24fix compile: libgit2 0.28 changed giterr_last to git_error_lastHiltjo Posthuma2+2-6
2018-12-25 12:44bump version to 0.9Hiltjo Posthuma1+1-1
2018-12-25 12:39style: keep default browser style for links, except patch chunksHiltjo Posthuma1+4-5
2018-11-18 17:08in the diffstat prefix the type of change, allow CSS stylingHiltjo Posthuma2+18-1
2018-11-18 17:07don't use a heuristic for renames, the content must match exactlyHiltjo Posthuma1+3-1
2018-11-18 17:06detect filetype changes in diff (for example a normal file to symlink)Hiltjo Posthuma1+3-1
2018-11-11 17:23detect more names for README and LICENSEHiltjo Posthuma1+30-16
2018-11-11 17:09detect copies and renames based on the threshold of changesHiltjo Posthuma1+9-0
2018-08-19 20:03simplify range checkHiltjo Posthuma1+1-4
2018-08-19 12:06stagit: log: indicate when using the -l option and there are more commitsHiltjo Posthuma1+4-0
2018-05-25 16:43assume OpenBSD 5.9+, always try pledge(2) on OpenBSDHiltjo Posthuma3+4-11
2018-05-09 16:51Clarify usage regarding -c and -l mutual exclusionQuentin Rameau1+1-1
2018-04-07 12:14bump version to 0.8Hiltjo Posthuma1+1-1
2018-04-06 17:28stagit-index: pledge after git_libgit2_initHiltjo Posthuma1+4-3
2018-03-26 18:08pledge after git_libgit2_initHiltjo Posthuma1+8-5
2018-03-18 14:34optimization: only diff the tree when it is needed for the diffstat...Hiltjo Posthuma1+27-17
2018-01-28 15:27README: improve tarball generation by tag exampleHiltjo Posthuma1+8-2
2018-01-21 15:47stagit: add -l option: limit the amount of commits for the log.html fileHiltjo Posthuma2+50-13
2018-01-21 14:23bump LICENSE to 2018Hiltjo Posthuma1+1-1
2018-01-20 12:18style.css: improve compatibility with older browsers, namely dilloHiltjo Posthuma1+5-0
2017-12-02 18:04Remove a non-portable syntax in MakefileQuentin Rameau1+1-1
2017-11-25 15:06Makefile: bump version to 0.7.2Hiltjo Posthuma1+1-1
2017-11-25 15:02post-receive: fix warning in script for example when pushing deleted tagsHiltjo Posthuma1+1-0
2017-11-25 11:09Sort style.css in alphabetical orderAaron Marcher1+2-2
2017-11-19 13:58Makefile: bump version to 0.7.1Hiltjo Posthuma1+1-1
2017-11-19 13:52fix: add "fattr" pledge for chmod(2) for cachefileHiltjo Posthuma1+5-7
2017-11-18 11:53Makefile: bump version to 0.7Hiltjo Posthuma1+1-1
2017-11-11 00:34Makefile: remove unused $SCRIPTSHiltjo Posthuma1+5-5
2017-11-10 19:55fix file permissions for cachefile and respect umask(2)Hiltjo Posthuma1+10-2
2017-11-10 19:53more verbose errors: show which file failed on openHiltjo Posthuma1+2-2
2017-10-31 22:25README: clarify some consHiltjo Posthuma1+15-6
2017-10-31 22:20post-receive hook: force UTF-8 localeHiltjo Posthuma1+4-0
2017-09-04 16:14allow umask to handle permissionsHiltjo Posthuma1+1-1
2017-08-09 16:52ignore object 0000000000000000000000000000000000000000Hiltjo Posthuma1+2-0
2017-08-02 16:54bump version for a release soonHiltjo Posthuma1+1-1
263 more commits remaining, fetch the repository
+ + diff --git a/swh/lister/stagit/tests/test_lister.py b/swh/lister/stagit/tests/test_lister.py new file mode 100644 index 0000000..15fbdcb --- /dev/null +++ b/swh/lister/stagit/tests/test_lister.py @@ -0,0 +1,128 @@ +# Copyright (C) 2023 The Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +from typing import List + +import pytest + +from swh.lister import __version__ +from swh.lister.pattern import ListerStats +from swh.lister.stagit.lister import StagitLister, _parse_date + +MAIN_INSTANCE = "codemadness.org" +MAIN_INSTANCE_URL = f"https://{MAIN_INSTANCE}/git" + + +def test_lister_stagit_instantiate(swh_scheduler): + """Build a lister with either an url or an instance is supported.""" + url = MAIN_INSTANCE_URL + lister = StagitLister(swh_scheduler, url=url) + assert lister is not None + assert lister.url == url + + assert StagitLister(swh_scheduler, instance=MAIN_INSTANCE) is not None + assert lister is not None + assert lister.url == url + + +def test_lister_stagit_fail_to_instantiate(swh_scheduler): + """Build a lister without its url nor its instance should raise""" + # ... It will raise without any of those + with pytest.raises(ValueError, match="'url' or 'instance'"): + StagitLister(swh_scheduler) + + +def test_lister_stagit_get_pages(requests_mock_datadir, swh_scheduler): + """Computing the number of pages scrapped during a listing.""" + url = MAIN_INSTANCE_URL + lister_stagit = StagitLister(swh_scheduler, url=url) + + expected_nb_origins = 4 + + repos: List[List[str]] = list(lister_stagit.get_pages()) + flattened_repos = sum(repos, []) + assert len(flattened_repos) == expected_nb_origins + + for listed_url in flattened_repos: + assert MAIN_INSTANCE in listed_url["url"] + + +def test_lister_stagit_run(requests_mock_datadir, swh_scheduler): + """Gitweb lister nominal listing case.""" + + url = MAIN_INSTANCE_URL + lister_stagit = StagitLister(swh_scheduler, url=url) + + stats = lister_stagit.run() + + expected_nb_origins = 4 # main page will get filtered out + assert stats == ListerStats(pages=1, origins=expected_nb_origins) + + # test page parsing + scheduler_origins = swh_scheduler.get_listed_origins( + lister_stagit.lister_obj.id + ).results + assert len(scheduler_origins) == expected_nb_origins + + # test listed repositories + for listed_origin in scheduler_origins: + assert listed_origin.visit_type == "git" + assert MAIN_INSTANCE in listed_origin.url + assert listed_origin.last_update is not None + + # test user agent content + for request in requests_mock_datadir.request_history: + assert "User-Agent" in request.headers + user_agent = request.headers["User-Agent"] + assert "Software Heritage stagit lister" in user_agent + assert __version__ in user_agent + + +def test_lister_stagit_get_pages_with_pages_and_retry( + requests_mock_datadir, requests_mock, datadir, mocker, swh_scheduler +): + """Rate limited page are tested back after some time so ingestion can proceed.""" + url = MAIN_INSTANCE_URL + with open(os.path.join(datadir, f"https_{MAIN_INSTANCE}/git"), "rb") as page: + requests_mock.get( + url, + [ + {"content": None, "status_code": 429}, + {"content": None, "status_code": 429}, + {"content": page.read(), "status_code": 200}, + ], + ) + + lister_stagit = StagitLister(swh_scheduler, url=url) + + mocker.patch.object(lister_stagit.http_request.retry, "sleep") + + pages: List[List[str]] = list(lister_stagit.get_pages()) + flattened_repos = sum(pages, []) + assert len(pages) == 1 + assert len(flattened_repos) == 4 + + +def test_lister_stagit_get_origin_from_repo_failing( + swh_scheduler, requests_mock_datadir +): + """Instances whose summary does not return anything are filtered out.""" + # This instance has some more origins which no longer returns their summary + lister_stagit = StagitLister(swh_scheduler, url=f"https://{MAIN_INSTANCE}/foobar") + + stats = lister_stagit.run() + + # so they are filtered out, only the 7 we know are thus listed + expected_nb_origins = 4 + assert stats == ListerStats(pages=1, origins=expected_nb_origins) + + +def test__parse_date(): + assert _parse_date(None) is None + assert _parse_date("No commits") is None + + date = _parse_date("2022-08-26 12:48") + assert date is not None + assert date.tzinfo is not None diff --git a/swh/lister/stagit/tests/test_tasks.py b/swh/lister/stagit/tests/test_tasks.py new file mode 100644 index 0000000..1e5cc34 --- /dev/null +++ b/swh/lister/stagit/tests/test_tasks.py @@ -0,0 +1,30 @@ +# Copyright (C) 2023 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.pattern import ListerStats + + +def test_gitweb_lister_task( + swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker +): + # setup the mocked GitwebLister + lister = mocker.patch("swh.lister.gitweb.tasks.GitwebLister") + lister.from_configfile.return_value = lister + lister.run.return_value = ListerStats(pages=10, origins=500) + + kwargs = dict( + url="https://git.gentoo.org/", instance="kernel", base_git_url=None, max_pages=1 + ) + + res = swh_scheduler_celery_app.send_task( + "swh.lister.gitweb.tasks.GitwebListerTask", + kwargs=kwargs, + ) + assert res + res.wait() + assert res.successful() + + lister.from_configfile.assert_called_once_with(**kwargs) + lister.run.assert_called_once_with() diff --git a/swh/lister/tests/test_cli.py b/swh/lister/tests/test_cli.py index 8ce3c60..00b11f2 100644 --- a/swh/lister/tests/test_cli.py +++ b/swh/lister/tests/test_cli.py @@ -48,6 +48,9 @@ lister_args = { "gitiles": { "instance": "gerrit.googlesource.com", }, + "stagit": { + "url": "https://git.codemadness.org", + }, }