gitea, gogs: Ensure query parameters are not duplicated in API URLs
Gitea API return next pagination link with all query parameters provided to an API request. As we were also passing a dict of fixed query parameters to the page_request method, some query parameters ended up having multiple instances in the URL for fetching a new page of repositories data. So each time a new page was requested, new instances of these parameters were appended to the URL which could result in a really long URL if the number of pages to retrieve is high and make the request fail. Also remove a debug log already present in http_request method.
This commit is contained in:
parent
aaae1a6b0b
commit
323e277482
2 changed files with 17 additions and 7 deletions
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2023 The Software Heritage developers
|
||||
# Copyright (C) 2024 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -45,7 +45,6 @@ def _parse_page_id(url: Optional[str]) -> int:
|
|||
|
||||
|
||||
class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
||||
|
||||
"""List origins from the Gogs
|
||||
|
||||
Gogs API documentation: https://github.com/gogs/docs-api
|
||||
|
@ -125,10 +124,8 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
|||
return asdict(state)
|
||||
|
||||
def page_request(
|
||||
self, url: str, params: Dict[str, Any]
|
||||
self, url: str, params: Optional[Dict[str, Any]] = None
|
||||
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
logger.debug("Fetching URL %s with params %s", url, params)
|
||||
|
||||
try:
|
||||
response = self.http_request(url, params=params)
|
||||
except HTTPError as http_error:
|
||||
|
@ -177,7 +174,12 @@ class GogsLister(Lister[GogsListerState, GogsListerPage]):
|
|||
yield GogsListerPage(repos=repos, next_link=next_link)
|
||||
|
||||
if next_link is not None:
|
||||
body, links = self.page_request(next_link, self.query_params)
|
||||
parsed_url = urlparse(next_link)
|
||||
query_params = {**self.query_params, **parse_qs(parsed_url.query)}
|
||||
next_link = parsed_url._replace(
|
||||
query=urlencode(query_params, doseq=True)
|
||||
).geturl()
|
||||
body, links = self.page_request(next_link)
|
||||
|
||||
def get_origins_from_page(self, page: GogsListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of Gogs repositories into a list of ListedOrigins"""
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# Copyright (C) 2022-2024 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
@ -7,6 +7,7 @@ import json
|
|||
from pathlib import Path
|
||||
from typing import List
|
||||
from unittest.mock import Mock
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import pytest
|
||||
from requests import HTTPError
|
||||
|
@ -145,6 +146,13 @@ def test_gogs_full_listing(
|
|||
lister.get_state_from_scheduler().last_seen_next_link == P3
|
||||
) # P3 didn't provide any next link so it remains the last_seen_next_link
|
||||
|
||||
# check each query parameter has a single instance in Gogs API URLs
|
||||
for request in requests_mock.request_history:
|
||||
assert all(
|
||||
len(values) == 1
|
||||
for values in parse_qs(urlparse(request.url).query).values()
|
||||
)
|
||||
|
||||
|
||||
def test_gogs_auth_instance(
|
||||
swh_scheduler, requests_mock, trygogs_p1, trygogs_p2, trygogs_p3_empty
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue