Update value of User-Agent HTTP request header used by listers

That HTTP header value will now contain the lister name but also a link
to our contact form in order for sysadmins to easily reach us if needed.

The following template is used to generate it:

"Software Heritage <lister_name> lister v<swh-lister version>
 (+https://www.softwareheritage.org/contact)"
This commit is contained in:
Antoine Lambert 2022-09-22 15:43:20 +02:00
parent db6ce12e9e
commit d5c30a3ce3
11 changed files with 38 additions and 36 deletions

View file

@ -12,15 +12,8 @@ from requests.exceptions import HTTPError
from swh.scheduler.interface import SchedulerInterface
from swh.scheduler.model import ListedOrigin
from .. import __version__
from ..pattern import CredentialsType, StatelessLister
# https://github.com/dart-lang/pub/blob/master/doc/repository-spec-v2.md#metadata-headers
USER_AGENT = (
f"Software Heritage PubDev Lister v{__version__} "
"(+https://www.softwareheritage.org/contact)"
)
logger = logging.getLogger(__name__)
# Aliasing the page results returned by `get_pages` method from the lister.
@ -51,12 +44,7 @@ class PubDevLister(StatelessLister[PubDevListerPage]):
url=self.BASE_URL,
)
self.session.headers.update(
{
"Accept": "application/json",
"User-Agent": USER_AGENT,
}
)
self.session.headers.update({"Accept": "application/json"})
def get_pages(self) -> Iterator[PubDevListerPage]:
"""Yield an iterator which returns 'page'

View file

@ -3,7 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.lister.pubdev.lister import USER_AGENT, PubDevLister
from swh.lister import USER_AGENT_TEMPLATE
from swh.lister.pubdev.lister import PubDevLister
expected_origins = {
"https://pub.dev/packages/Autolinker",
@ -29,7 +30,10 @@ def test_pubdev_lister(datadir, requests_mock_datadir, swh_scheduler):
def _match_request(request):
return request.headers.get("User-Agent") == USER_AGENT
return (
request.headers.get("User-Agent")
== USER_AGENT_TEMPLATE % PubDevLister.LISTER_NAME
)
def test_pubdev_lister_skip_package(