gitlab: Add test on incremental implementation
Note that the current implementation will start back the new visit from the last next_page link seen (that's what is stored in the lister state to avoid computing back the url). This means that this page will be seen at least 2 times, on the first visit and on the next. This should not pose any problems as the listing is idempotent. Related to T2987
This commit is contained in:
parent
84dd616ab6
commit
1f911401a1
3 changed files with 90 additions and 2 deletions
|
@ -100,6 +100,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
instance=instance,
|
||||
)
|
||||
self.incremental = incremental
|
||||
self.last_page: Optional[str] = None
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
|
@ -136,12 +137,13 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
|
||||
def get_pages(self) -> Iterator[PageResult]:
|
||||
next_page: Optional[str]
|
||||
if self.incremental and self.state is not None:
|
||||
if self.incremental and self.state and self.state.last_seen_next_link:
|
||||
next_page = self.state.last_seen_next_link
|
||||
else:
|
||||
next_page = f"{self.url}projects?page=1&order_by=id&sort=asc&per_page=20"
|
||||
|
||||
while next_page:
|
||||
self.last_page = next_page
|
||||
page_result = self.get_page_result(next_page)
|
||||
yield page_result
|
||||
next_page = page_result.next_page
|
||||
|
@ -171,11 +173,14 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
if self.incremental:
|
||||
# link: https://${project-api}/?...&page=2x...
|
||||
next_page = page_result.next_page
|
||||
if not next_page and self.last_page:
|
||||
next_page = self.last_page
|
||||
|
||||
if next_page:
|
||||
page_id = _parse_page_id(next_page)
|
||||
previous_next_page = self.state.last_seen_next_link
|
||||
previous_page_id = _parse_page_id(previous_next_page)
|
||||
|
||||
if previous_next_page is None or (
|
||||
previous_page_id and page_id and previous_page_id < page_id
|
||||
):
|
||||
|
@ -194,7 +199,7 @@ class GitLabLister(Lister[GitLabListerState, PageResult]):
|
|||
scheduler_state = self.get_state_from_scheduler()
|
||||
previous_next_page_id = _parse_page_id(scheduler_state.last_seen_next_link)
|
||||
|
||||
if (
|
||||
if (not previous_next_page_id and next_page_id) or (
|
||||
previous_next_page_id
|
||||
and next_page_id
|
||||
and previous_next_page_id < next_page_id
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
[
|
||||
{
|
||||
"avatar_url": null,
|
||||
"created_at": "2020-11-18T18:26:08.538Z",
|
||||
"default_branch": "master",
|
||||
"description": "PID WRapper for urdfdom and urdfdom_headers project, provided by ROS but independent from ROS.",
|
||||
"forks_count": 0,
|
||||
"http_url_to_repo": "https://gite.lirmm.fr/rob-miscellaneous/wrappers/urdfdom.git",
|
||||
"id": 4363,
|
||||
"last_activity_at": "2020-11-19T08:56:18.573Z",
|
||||
"name": "urdfdom",
|
||||
"name_with_namespace": "rob-miscellaneous / wrappers / urdfdom",
|
||||
"namespace": {},
|
||||
"path": "urdfdom",
|
||||
"path_with_namespace": "rob-miscellaneous/wrappers/urdfdom",
|
||||
"readme_url": "https://gite.lirmm.fr/rob-miscellaneous/wrappers/urdfdom/-/blob/master/README.md",
|
||||
"ssh_url_to_repo": "git@gite.lirmm.fr:rob-miscellaneous/wrappers/urdfdom.git",
|
||||
"star_count": 0,
|
||||
"tag_list": [],
|
||||
"web_url": "https://gite.lirmm.fr/rob-miscellaneous/wrappers/urdfdom"
|
||||
}
|
||||
]
|
|
@ -102,6 +102,67 @@ def test_lister_gitlab_with_pages(swh_scheduler, requests_mock, datadir):
|
|||
assert listed_origin.url.startswith(f"https://{instance}")
|
||||
|
||||
|
||||
def test_lister_gitlab_incremental(swh_scheduler, requests_mock, datadir):
|
||||
"""Gitlab lister supports pagination
|
||||
|
||||
"""
|
||||
instance = "gite.lirmm.fr"
|
||||
url = api_url(instance)
|
||||
|
||||
url_page1 = url_page(url, 1)
|
||||
response1 = gitlab_page_response(datadir, instance, 1)
|
||||
url_page2 = url_page(url, 2)
|
||||
response2 = gitlab_page_response(datadir, instance, 2)
|
||||
url_page3 = url_page(url, 3)
|
||||
response3 = gitlab_page_response(datadir, instance, 3)
|
||||
|
||||
requests_mock.get(
|
||||
url_page1,
|
||||
[{"json": response1, "headers": {"Link": f"<{url_page2}>; rel=next"}}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
requests_mock.get(
|
||||
url_page2, [{"json": response2}], additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
lister = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
|
||||
listed_result = lister.run()
|
||||
|
||||
expected_nb_origins = len(response1) + len(response2)
|
||||
assert listed_result == ListerStats(pages=2, origins=expected_nb_origins)
|
||||
assert lister.state.last_seen_next_link == url_page2
|
||||
|
||||
lister2 = GitLabLister(swh_scheduler, url=url, instance=instance, incremental=True)
|
||||
requests_mock.reset()
|
||||
# Lister will start back at the last stop
|
||||
requests_mock.get(
|
||||
url_page2,
|
||||
[{"json": response2, "headers": {"Link": f"<{url_page3}>; rel=next"}}],
|
||||
additional_matcher=_match_request,
|
||||
)
|
||||
requests_mock.get(
|
||||
url_page3, [{"json": response3}], additional_matcher=_match_request,
|
||||
)
|
||||
|
||||
listed_result2 = lister2.run()
|
||||
|
||||
assert listed_result2 == ListerStats(
|
||||
pages=2, origins=len(response2) + len(response3)
|
||||
)
|
||||
assert lister2.state.last_seen_next_link == url_page3
|
||||
|
||||
assert lister.lister_obj.id == lister2.lister_obj.id
|
||||
scheduler_origins = lister2.scheduler.get_listed_origins(
|
||||
lister2.lister_obj.id
|
||||
).origins
|
||||
|
||||
assert len(scheduler_origins) == len(response1) + len(response2) + len(response3)
|
||||
|
||||
for listed_origin in scheduler_origins:
|
||||
assert listed_origin.visit_type == "git"
|
||||
assert listed_origin.url.startswith(f"https://{instance}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"url,expected_result",
|
||||
[
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue