gitlab: allow ignoring projects with certain path prefixes
Some GitLab instances use specific namespaces for transient repositories that it doesn't make sense to archive (for example, gitlab.org has a set of QA namespaces used for integration testing of their production deployments; drupal has an `issues/` namespace with forks of repos that are only used for collaboration on merge requests, and aren't that useful to be archived).
This commit is contained in:
parent
64267f8f50
commit
5ea79ee3e0
2 changed files with 45 additions and 1 deletions
|
@ -6,7 +6,7 @@
|
|||
from dataclasses import asdict, dataclass
|
||||
import logging
|
||||
import random
|
||||
from typing import Any, Dict, Iterator, Optional, Tuple
|
||||
from typing import Any, Dict, Iterator, List, Optional, Tuple
|
||||
from urllib.parse import parse_qs, urlencode, urlparse
|
||||
|
||||
import iso8601
|
||||
|
@ -92,6 +92,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
instance: a specific instance name (e.g. gitlab, tor, git-kernel, ...),
|
||||
url network location will be used if not provided
|
||||
incremental: defines if incremental listing is activated or not
|
||||
ignored_project_prefixes: List of prefixes of project paths to ignore
|
||||
|
||||
"""
|
||||
|
||||
|
@ -103,6 +104,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
instance: Optional[str] = None,
|
||||
credentials: Optional[CredentialsType] = None,
|
||||
incremental: bool = False,
|
||||
ignored_project_prefixes: Optional[List[str]] = None,
|
||||
):
|
||||
if name is not None:
|
||||
self.LISTER_NAME = name
|
||||
|
@ -115,6 +117,9 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
self.incremental = incremental
|
||||
self.last_page: Optional[str] = None
|
||||
self.per_page = 100
|
||||
self.ignored_project_prefixes: Optional[Tuple[str, ...]] = None
|
||||
if ignored_project_prefixes:
|
||||
self.ignored_project_prefixes = tuple(ignored_project_prefixes)
|
||||
|
||||
self.session.headers.update({"Accept": "application/json"})
|
||||
|
||||
|
@ -203,6 +208,10 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
|
||||
repositories = page_result.repositories if page_result.repositories else []
|
||||
for repo in repositories:
|
||||
if self.ignored_project_prefixes and repo["path_with_namespace"].startswith(
|
||||
self.ignored_project_prefixes
|
||||
):
|
||||
continue
|
||||
visit_type = repo.get("vcs_type", "git")
|
||||
visit_type = VCS_MAPPING.get(visit_type, visit_type)
|
||||
yield ListedOrigin(
|
||||
|
|
|
@ -356,3 +356,38 @@ def test_lister_gitlab_url_computation(url, swh_scheduler):
|
|||
)
|
||||
def test__parse_id_after(url, expected_result):
|
||||
assert _parse_id_after(url) == expected_result
|
||||
|
||||
|
||||
def test_lister_gitlab_ignored_project_prefixes(datadir, swh_scheduler, requests_mock):
|
||||
"""Gitlab lister supports listing with ignored project prefixes"""
|
||||
instance = "gitlab.com"
|
||||
lister = GitLabLister(
|
||||
swh_scheduler,
|
||||
url=api_url(instance),
|
||||
instance=instance,
|
||||
ignored_project_prefixes=["jonan/"],
|
||||
)
|
||||
|
||||
response = gitlab_page_response(datadir, instance, 1)
|
||||
|
||||
requests_mock.get(
|
||||
lister.page_url(),
|
||||
[{"json": response}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result = lister.run()
|
||||
# 2 origins start with jonan/
|
||||
expected_nb_origins = len(response) - 2
|
||||
assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)
|
||||
|
||||
scheduler_origins = lister.scheduler.get_listed_origins(
|
||||
lister.lister_obj.id
|
||||
).results
|
||||
assert len(scheduler_origins) == expected_nb_origins
|
||||
|
||||
for listed_origin in scheduler_origins:
|
||||
assert listed_origin.visit_type == "git"
|
||||
assert listed_origin.url.startswith(f"https://{instance}")
|
||||
assert not listed_origin.url.startswith(f"https://{instance}/jonan/")
|
||||
assert listed_origin.last_update is not None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue