Use a uniform User-Agent on all listers
This also adds tests to make sure that we properly send our version number to upstreams.
This commit is contained in:
parent
62dc4dc257
commit
ff7fdf24db
9 changed files with 48 additions and 8 deletions
|
@ -15,6 +15,9 @@ try:
|
|||
except pkg_resources.DistributionNotFound:
|
||||
__version__ = 'devel'
|
||||
|
||||
USER_AGENT_TEMPLATE = 'Software Heritage Lister (%s)'
|
||||
USER_AGENT = USER_AGENT_TEMPLATE % __version__
|
||||
|
||||
|
||||
LISTERS = {entry_point.name.split('.', 1)[1]: entry_point
|
||||
for entry_point in pkg_resources.iter_entry_points('swh.workers')
|
||||
|
|
|
@ -13,6 +13,7 @@ from requests.adapters import HTTPAdapter
|
|||
from .models import CGitModel
|
||||
|
||||
from swh.core.utils import grouper
|
||||
from swh.lister import USER_AGENT
|
||||
from swh.lister.core.lister_base import ListerBase
|
||||
|
||||
|
||||
|
@ -72,6 +73,9 @@ class CGitLister(ListerBase):
|
|||
self.instance = instance
|
||||
self.session = Session()
|
||||
self.session.mount(self.url, HTTPAdapter(max_retries=3))
|
||||
self.session.headers = {
|
||||
'User-Agent': USER_AGENT,
|
||||
}
|
||||
|
||||
def run(self):
|
||||
total = 0
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
from swh.lister import __version__
|
||||
|
||||
|
||||
def test_lister_no_page(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
|
||||
|
@ -64,3 +67,16 @@ def test_lister_run(requests_mock_datadir, swh_listers):
|
|||
assert kwargs == {}
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
|
||||
|
||||
def test_lister_requests(requests_mock_datadir, swh_listers):
|
||||
lister = swh_listers['cgit']
|
||||
lister.url = 'https://git.tizen/cgit/'
|
||||
lister.run()
|
||||
|
||||
assert len(requests_mock_datadir.request_history) != 0
|
||||
for request in requests_mock_datadir.request_history:
|
||||
assert 'User-Agent' in request.headers
|
||||
user_agent = request.headers['User-Agent']
|
||||
assert 'Software Heritage Lister' in user_agent
|
||||
assert __version__ in user_agent
|
||||
|
|
|
@ -14,7 +14,7 @@ import xmltodict
|
|||
|
||||
from typing import Optional, Union
|
||||
|
||||
from swh.lister import __version__
|
||||
from swh.lister import USER_AGENT_TEMPLATE, __version__
|
||||
|
||||
from .abstractattribute import AbstractAttribute
|
||||
from .lister_base import FetchError
|
||||
|
@ -45,7 +45,7 @@ class ListerHttpTransport(abc.ABC):
|
|||
MAY BE OVERRIDDEN if request headers are needed.
|
||||
"""
|
||||
return {
|
||||
'User-Agent': 'Software Heritage lister (%s)' % self.lister_version
|
||||
'User-Agent': USER_AGENT_TEMPLATE % self.lister_version
|
||||
}
|
||||
|
||||
def request_instance_credentials(self):
|
||||
|
|
|
@ -326,6 +326,17 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
|
|||
self.get_api_response(self.first_index)
|
||||
self.assertEqual(sleepmock.call_count, 2)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_request_headers(self, http_mocker):
|
||||
fl = self.create_fl_with_db(http_mocker)
|
||||
fl.run()
|
||||
self.assertNotEqual(len(http_mocker.request_history), 0)
|
||||
for request in http_mocker.request_history:
|
||||
assert 'User-Agent' in request.headers
|
||||
user_agent = request.headers['User-Agent']
|
||||
assert 'Software Heritage Lister' in user_agent
|
||||
assert swh.lister.__version__ in user_agent
|
||||
|
||||
def scheduled_tasks_test(self, next_api_response_file, next_last_index,
|
||||
http_mocker):
|
||||
"""Check that no loading tasks get disabled when processing a new
|
||||
|
|
|
@ -54,7 +54,12 @@ class GitHubLister(IndexingHttpLister):
|
|||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
||||
def request_headers(self):
|
||||
return {'Accept': 'application/vnd.github.v3+json'}
|
||||
"""(Override) Set requests headers to send when querying the GitHub API
|
||||
|
||||
"""
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/vnd.github.v3+json'
|
||||
return headers
|
||||
|
||||
def disable_deleted_repo_tasks(self, index, next_index, keep_these):
|
||||
""" (Overrides) Fix provided index value to avoid erroneously disabling
|
||||
|
|
|
@ -69,8 +69,9 @@ class NpmListerBase(IndexingHttpLister):
|
|||
registry.
|
||||
|
||||
"""
|
||||
return {'User-Agent': 'Software Heritage npm lister',
|
||||
'Accept': 'application/json'}
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/json'
|
||||
return headers
|
||||
|
||||
def _compute_urls(self, repo_name):
|
||||
"""Return a tuple (package_url, package_metadata_url)
|
||||
|
|
|
@ -40,7 +40,6 @@ def test_incremental(lister, seq, save, swh_app, celery_session_worker):
|
|||
# setup the mocked NpmLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.request_headers.return_value = []
|
||||
seq.return_value = 42
|
||||
save.side_effect = mock_save
|
||||
|
||||
|
|
|
@ -57,8 +57,9 @@ class PhabricatorLister(IndexingHttpLister):
|
|||
(Override) Set requests headers to send when querying the
|
||||
Phabricator API
|
||||
"""
|
||||
return {'User-Agent': 'Software Heritage phabricator lister',
|
||||
'Accept': 'application/json'}
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/json'
|
||||
return headers
|
||||
|
||||
def get_model_from_repo(self, repo):
|
||||
url = get_repo_url(repo['attachments']['uris']['uris'])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue