pubdev: Update User-Agent request header value

Use a value that matches good practice recommended by pub.dev REST API doc.

https://github.com/dart-lang/pub/blob/master/doc/repository-spec-v2.md
This commit is contained in:
Antoine Lambert 2022-09-07 12:15:28 +02:00
parent 44560c2383
commit c819cc237d
2 changed files with 17 additions and 3 deletions

View file

@ -14,9 +14,15 @@ from swh.lister.utils import throttling_retry
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from .. import USER_AGENT
from .. import __version__
from ..pattern import CredentialsType, StatelessLister
# https://github.com/dart-lang/pub/blob/master/doc/repository-spec-v2.md#metadata-headers
USER_AGENT = (
f"Software Heritage PubDev Lister v{__version__} "
"(+https://www.softwareheritage.org/contact)"
)
logger = logging.getLogger(__name__)
# Aliasing the page results returned by `get_pages` method from the lister.

View file

@ -3,7 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.pubdev.lister import PubDevLister
from swh.lister.pubdev.lister import USER_AGENT, PubDevLister
expected_origins = {
"https://pub.dev/packages/Autolinker",
@ -28,11 +28,19 @@ def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler):
assert origin.last_update is not None
def _match_request(request):
return request.headers.get("User-Agent") == USER_AGENT
def test_pubdev_lister_skip_package(
datadir, requests_mock_datadir, swh_scheduler, requests_mock
):
requests_mock.get("https://pub.dev/api/packages/Autolinker", status_code=404)
requests_mock.get(
"https://pub.dev/api/packages/Autolinker",
status_code=404,
additional_matcher=_match_request,
)
lister = PubDevLister(scheduler=swh_scheduler)
res = lister.run()