gitea, gogs: Ensure query parameters are not duplicated in API URLs

Gitea API return next pagination link with all query parameters provided
to an API request.

As we were also passing a dict of fixed query parameters to the page_request
method, some query parameters ended up having multiple instances in the URL
for fetching a new page of repositories data. So each time a new page was
requested, new instances of these parameters were appended to the URL which
could result in a really long URL if the number of pages to retrieve is high
and make the request fail.

Also remove a debug log already present in http_request method.
This commit is contained in:
Antoine Lambert 2024-06-05 15:14:07 +02:00
parent aaae1a6b0b
commit 323e277482
2 changed files with 17 additions and 7 deletions

View file

@ -1,4 +1,4 @@
# Copyright (C) 2023 The Software Heritage developers
# Copyright (C) 2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -45,7 +45,6 @@ def _parse_page_id(url: Optional[str]) -> int:
class GogsLister(Lister[GogsListerState, GogsListerPage]):
"""List origins from the Gogs
Gogs API documentation: https://github.com/gogs/docs-api
@ -125,10 +124,8 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
return asdict(state)
def page_request(
self, url: str, params: Dict[str, Any]
self, url: str, params: Optional[Dict[str, Any]] = None
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
logger.debug("Fetching URL %s with params %s", url, params)
try:
response = self.http_request(url, params=params)
except HTTPError as http_error:
@ -177,7 +174,12 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
yield GogsListerPage(repos=repos, next_link=next_link)
if next_link is not None:
body, links = self.page_request(next_link, self.query_params)
parsed_url = urlparse(next_link)
query_params = {**self.query_params, **parse_qs(parsed_url.query)}
next_link = parsed_url._replace(
query=urlencode(query_params, doseq=True)
).geturl()
body, links = self.page_request(next_link)
def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
"""Convert a page of Gogs repositories into a list of ListedOrigins"""

View file

@ -1,4 +1,4 @@
# Copyright (C) 2022-2023 The Software Heritage developers
# Copyright (C) 2022-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -7,6 +7,7 @@ import json
from pathlib import Path
from typing import List
from unittest.mock import Mock
from urllib.parse import parse_qs, urlparse
import pytest
from requests import HTTPError
@ -145,6 +146,13 @@ def test_gogs_full_listing(
lister.get_state_from_scheduler().last_seen_next_link == P3
) # P3 didn't provide any next link so it remains the last_seen_next_link
# check each query parameter has a single instance in Gogs API URLs
for request in requests_mock.request_history:
assert all(
len(values) == 1
for values in parse_qs(urlparse(request.url).query).values()
)
def test_gogs_auth_instance(
swh_scheduler, requests_mock, trygogs_p1, trygogs_p2, trygogs_p3_empty