gitlab: Add test on pagination

Related to T2987
This commit is contained in:
Antoine R. Dumont (@ardumont) 2021-01-23 15:51:18 +01:00
parent 1390a513f2
commit 84dd616ab6
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
4 changed files with 159 additions and 34 deletions

View file

@ -0,0 +1,42 @@
[
{
"avatar_url": null,
"created_at": "2021-01-02T13:25:44.175Z",
"default_branch": "master",
"description": "Miscellaneous utils that are commonly used in multiple projects.",
"forks_count": 0,
"http_url_to_repo": "https://gite.lirmm.fr/yuquan/roboticsutils.git",
"id": 4456,
"last_activity_at": "2021-01-14T11:32:50.672Z",
"name": "RoboticsUtils",
"name_with_namespace": "Wang Yuquan / RoboticsUtils",
"namespace": {},
"path": "roboticsutils",
"path_with_namespace": "yuquan/roboticsutils",
"readme_url": null,
"ssh_url_to_repo": "git@gite.lirmm.fr:yuquan/roboticsutils.git",
"star_count": 0,
"tag_list": [],
"web_url": "https://gite.lirmm.fr/yuquan/roboticsutils"
},
{
"avatar_url": "https://gite.lirmm.fr/uploads/-/system/project/avatar/4444/pacq.jpg",
"created_at": "2020-12-15T15:20:15.494Z",
"default_branch": "master",
"description": "",
"forks_count": 0,
"http_url_to_repo": "https://gite.lirmm.fr/constraint-acquisition-team/pacq.git",
"id": 4444,
"last_activity_at": "2020-12-15T19:43:53.678Z",
"name": "PACQ",
"name_with_namespace": "Constraint Acquisition Team / PACQ",
"namespace": {},
"path": "pacq",
"path_with_namespace": "constraint-acquisition-team/pacq",
"readme_url": "https://gite.lirmm.fr/constraint-acquisition-team/pacq/-/blob/master/README.md",
"ssh_url_to_repo": "git@gite.lirmm.fr:constraint-acquisition-team/pacq.git",
"star_count": 0,
"tag_list": [],
"web_url": "https://gite.lirmm.fr/constraint-acquisition-team/pacq"
}
]

View file

@ -0,0 +1,42 @@
[
{
"avatar_url": null,
"created_at": "2020-12-15T09:20:11.133Z",
"default_branch": "master",
"description": "",
"forks_count": 0,
"http_url_to_repo": "https://gite.lirmm.fr/mgardeisen/citest.git",
"id": 4440,
"last_activity_at": "2021-01-21T14:37:31.022Z",
"name": "CItest",
"name_with_namespace": "Marine Gardeisen / CItest",
"namespace": {},
"path": "citest",
"path_with_namespace": "mgardeisen/citest",
"readme_url": "https://gite.lirmm.fr/mgardeisen/citest/-/blob/master/README.md",
"ssh_url_to_repo": "git@gite.lirmm.fr:mgardeisen/citest.git",
"star_count": 0,
"tag_list": [],
"web_url": "https://gite.lirmm.fr/mgardeisen/citest"
},
{
"avatar_url": null,
"created_at": "2020-12-11T09:54:02.710Z",
"default_branch": "master",
"description": "Can be used to enforce the conventional commits specification on a package, generate a changelog recommend the next version to release\r\nSee see https://conventionalcommits.org",
"forks_count": 0,
"http_url_to_repo": "https://gite.lirmm.fr/pid/environments/conventional_commits.git",
"id": 4428,
"last_activity_at": "2021-01-08T11:11:54.178Z",
"name": "conventional_commits",
"name_with_namespace": "pid / environments / conventional_commits",
"namespace": {},
"path": "conventional_commits",
"path_with_namespace": "pid/environments/conventional_commits",
"readme_url": "https://gite.lirmm.fr/pid/environments/conventional_commits/-/blob/master/README.md",
"ssh_url_to_repo": "git@gite.lirmm.fr:pid/environments/conventional_commits.git",
"star_count": 0,
"tag_list": [],
"web_url": "https://gite.lirmm.fr/pid/environments/conventional_commits"
}
]

View file

@ -3,62 +3,103 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import logging
from pathlib import Path
from typing import Dict, List
import pytest
from swh.lister import USER_AGENT
from swh.lister.gitlab.lister import GitLabLister, _parse_page_id
from swh.lister.pattern import ListerStats
logger = logging.getLogger(__name__)
@pytest.fixture
def lister_gitlab(swh_scheduler):
url = "https://gitlab.com/api/v4/"
return GitLabLister(swh_scheduler, url=url)
def api_url(instance: str) -> str:
return f"https://{instance}/api/v4/"
# class GitLabListerTester(HttpListerTesterBase, unittest.TestCase):
# Lister = GitLabLister
# test_re = re.compile(r"^.*/projects.*page=(\d+).*")
# lister_subdir = "gitlab"
# good_api_response_file = "data/gitlab.com/api_response.json"
# bad_api_response_file = "data/gitlab.com/api_empty_response.json"
# first_index = 1
# entries_per_page = 10
# convert_type = int
# def response_headers(self, request):
# headers = {"RateLimit-Remaining": "1"}
# if self.request_index(request) == self.first_index:
# headers.update(
# {"x-next-page": "3",}
# )
# return headers
# def mock_rate_quota(self, n, request, context):
# self.rate_limit += 1
# context.status_code = 403
# context.headers["RateLimit-Remaining"] = "0"
# one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp())
# context.headers["RateLimit-Reset"] = str(one_second)
# return '{"error":"dummy"}'
def url_page(api_url: str, page_id: int) -> str:
return f"{api_url}projects?page={page_id}&order_by=id&sort=asc&per_page=20"
def test_lister_gitlab(lister_gitlab, requests_mock_datadir):
def _match_request(request):
return request.headers.get("User-Agent") == USER_AGENT
def test_lister_gitlab(datadir, swh_scheduler, requests_mock):
"""Gitlab lister supports full listing
"""
instance = "gitlab.com"
url = api_url(instance)
response = gitlab_page_response(datadir, instance, 1)
requests_mock.get(
url_page(url, 1), [{"json": response}], additional_matcher=_match_request,
)
lister_gitlab = GitLabLister(
swh_scheduler, url=api_url(instance), instance=instance
)
listed_result = lister_gitlab.run()
assert listed_result == ListerStats(pages=1, origins=10)
expected_nb_origins = len(response)
assert listed_result == ListerStats(pages=1, origins=expected_nb_origins)
scheduler_origins = lister_gitlab.scheduler.get_listed_origins(
lister_gitlab.lister_obj.id
).origins
assert len(scheduler_origins) == 10
assert len(scheduler_origins) == expected_nb_origins
for listed_origin in scheduler_origins:
assert listed_origin.visit_type == "git"
assert listed_origin.url.startswith("https://gitlab.com")
assert listed_origin.url.startswith(f"https://{instance}")
def gitlab_page_response(datadir, instance: str, page_id: int) -> List[Dict]:
"""Return list of repositories (out of test dataset)"""
datapath = Path(datadir, f"https_{instance}", f"api_response_page{page_id}.json")
return json.loads(datapath.read_text()) if datapath.exists else []
def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
"""Gitlab lister supports pagination
"""
instance = "gite.lirmm.fr"
url = api_url(instance)
response1 = gitlab_page_response(datadir, instance, 1)
response2 = gitlab_page_response(datadir, instance, 2)
requests_mock.get(
url_page(url, 1),
[{"json": response1, "headers": {"Link": f"<{url_page(url, 2)}>; rel=next"}}],
additional_matcher=_match_request,
)
requests_mock.get(
url_page(url, 2), [{"json": response2}], additional_matcher=_match_request,
)
lister = GitLabLister(swh_scheduler, url=url)
listed_result = lister.run()
expected_nb_origins = len(response1) + len(response2)
assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
scheduler_origins = lister.scheduler.get_listed_origins(
lister.lister_obj.id
).origins
assert len(scheduler_origins) == expected_nb_origins
for listed_origin in scheduler_origins:
assert listed_origin.visit_type == "git"
assert listed_origin.url.startswith(f"https://{instance}")
@pytest.mark.parametrize(