Use a uniform User-Agent on all listers

This also adds tests to make sure that we properly send our version number to
upstreams.
This commit is contained in:
Nicolas Dandrimont 2019-11-22 13:12:58 +01:00
parent 62dc4dc257
commit ff7fdf24db
9 changed files with 48 additions and 8 deletions

View file

@ -15,6 +15,9 @@ try:
except pkg_resources.DistributionNotFound:
__version__ = 'devel'
USER_AGENT_TEMPLATE = 'Software Heritage Lister (%s)'
USER_AGENT = USER_AGENT_TEMPLATE % __version__
LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
for entry_point in pkg_resources.iter_entry_points('swh.workers')

View file

@ -13,6 +13,7 @@ from requests.adapters import HTTPAdapter
from .models import CGitModel
from swh.core.utils import grouper
from swh.lister import USER_AGENT
from swh.lister.core.lister_base import ListerBase
@ -72,6 +73,9 @@ class CGitLister(ListerBase):
self.instance = instance
self.session = Session()
self.session.mount(self.url, HTTPAdapter(max_retries=3))
self.session.headers = {
'User-Agent': USER_AGENT,
}
def run(self):
total = 0

View file

@ -3,6 +3,9 @@
# See top-level LICENSE file for more information
from swh.lister import __version__
def test_lister_no_page(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
@ -64,3 +67,16 @@ def test_lister_run(requests_mock_datadir, swh_listers):
assert kwargs == {}
assert row['policy'] == 'recurring'
assert row['priority'] is None
def test_lister_requests(requests_mock_datadir, swh_listers):
lister = swh_listers['cgit']
lister.url = 'https://git.tizen/cgit/'
lister.run()
assert len(requests_mock_datadir.request_history) != 0
for request in requests_mock_datadir.request_history:
assert 'User-Agent' in request.headers
user_agent = request.headers['User-Agent']
assert 'Software Heritage Lister' in user_agent
assert __version__ in user_agent

View file

@ -14,7 +14,7 @@ import xmltodict
from typing import Optional, Union
from swh.lister import __version__
from swh.lister import USER_AGENT_TEMPLATE, __version__
from .abstractattribute import AbstractAttribute
from .lister_base import FetchError
@ -45,7 +45,7 @@ class ListerHttpTransport(abc.ABC):
MAY BE OVERRIDDEN if request headers are needed.
"""
return {
'User-Agent': 'Software Heritage lister (%s)' % self.lister_version
'User-Agent': USER_AGENT_TEMPLATE % self.lister_version
}
def request_instance_credentials(self):

View file

@ -326,6 +326,17 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
self.get_api_response(self.first_index)
self.assertEqual(sleepmock.call_count, 2)
@requests_mock.Mocker()
def test_request_headers(self, http_mocker):
fl = self.create_fl_with_db(http_mocker)
fl.run()
self.assertNotEqual(len(http_mocker.request_history), 0)
for request in http_mocker.request_history:
assert 'User-Agent' in request.headers
user_agent = request.headers['User-Agent']
assert 'Software Heritage Lister' in user_agent
assert swh.lister.__version__ in user_agent
def scheduled_tasks_test(self, next_api_response_file, next_last_index,
http_mocker):
"""Check that no loading tasks get disabled when processing a new

View file

@ -54,7 +54,12 @@ class GitHubLister(IndexingHttpLister):
return [self.get_model_from_repo(repo) for repo in repos]
def request_headers(self):
return {'Accept': 'application/vnd.github.v3+json'}
"""(Override) Set requests headers to send when querying the GitHub API
"""
headers = super().request_headers()
headers['Accept'] = 'application/vnd.github.v3+json'
return headers
def disable_deleted_repo_tasks(self, index, next_index, keep_these):
""" (Overrides) Fix provided index value to avoid erroneously disabling

View file

@ -69,8 +69,9 @@ class NpmListerBase(IndexingHttpLister):
registry.
"""
return {'User-Agent': 'Software Heritage npm lister',
'Accept': 'application/json'}
headers = super().request_headers()
headers['Accept'] = 'application/json'
return headers
def _compute_urls(self, repo_name):
"""Return a tuple (package_url, package_metadata_url)

View file

@ -40,7 +40,6 @@ def test_incremental(lister, seq, save, swh_app, celery_session_worker):
# setup the mocked NpmLister
lister.return_value = lister
lister.run.return_value = None
lister.request_headers.return_value = []
seq.return_value = 42
save.side_effect = mock_save

View file

@ -57,8 +57,9 @@ class PhabricatorLister(IndexingHttpLister):
(Override) Set requests headers to send when querying the
Phabricator API
"""
return {'User-Agent': 'Software Heritage phabricator lister',
'Accept': 'application/json'}
headers = super().request_headers()
headers['Accept'] = 'application/json'
return headers
def get_model_from_repo(self, repo):
url = get_repo_url(repo['attachments']['uris']['uris'])