lister: Type correctly the 'indexable' column
instead of converting that column as a string As a side effect, bitbucket wise, we provided improperly the after query parameter as a date not url encoded. This resulted in improper api response from bitbucket's (we received from time to time the same next index as the current one). Related T1826
This commit is contained in:
parent
b99617f976
commit
3d473c307c
8 changed files with 85 additions and 44 deletions
|
@ -6,7 +6,6 @@ import logging
|
|||
import iso8601
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from urllib import parse
|
||||
|
||||
from swh.lister.bitbucket.models import BitBucketModel
|
||||
|
@ -15,7 +14,6 @@ from swh.lister.core.indexing_lister import IndexingHttpLister
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_BITBUCKET_PAGE = 10
|
||||
|
||||
|
||||
|
@ -24,7 +22,7 @@ class BitBucketLister(IndexingHttpLister):
|
|||
MODEL = BitBucketModel
|
||||
LISTER_NAME = 'bitbucket'
|
||||
instance = 'bitbucket'
|
||||
default_min_bound = datetime.utcfromtimestamp(0).isoformat()
|
||||
default_min_bound = datetime.utcfromtimestamp(0)
|
||||
|
||||
def __init__(self, api_baseurl, override_config=None, per_page=100):
|
||||
super().__init__(
|
||||
|
@ -39,7 +37,7 @@ class BitBucketLister(IndexingHttpLister):
|
|||
def get_model_from_repo(self, repo):
|
||||
return {
|
||||
'uid': repo['uuid'],
|
||||
'indexable': repo['created_on'],
|
||||
'indexable': iso8601.parse_date(repo['created_on']),
|
||||
'name': repo['name'],
|
||||
'full_name': repo['full_name'],
|
||||
'html_url': repo['links']['html']['href'],
|
||||
|
@ -48,38 +46,38 @@ class BitBucketLister(IndexingHttpLister):
|
|||
}
|
||||
|
||||
def get_next_target_from_response(self, response):
|
||||
"""This will read the 'next' link from the api response if any
|
||||
and return it as a datetime.
|
||||
|
||||
Args:
|
||||
reponse (Response): requests' response from api call
|
||||
|
||||
Returns:
|
||||
next date as a datetime
|
||||
|
||||
"""
|
||||
body = response.json()
|
||||
if 'next' in body:
|
||||
return parse.unquote(body['next'].split('after=')[1])
|
||||
next_ = body.get('next')
|
||||
if next_ is not None:
|
||||
next_ = parse.urlparse(next_)
|
||||
return iso8601.parse_date(parse.parse_qs(next_.query)['after'][0])
|
||||
|
||||
def transport_response_simplified(self, response):
|
||||
repos = response.json()['values']
|
||||
return [self.get_model_from_repo(repo) for repo in repos]
|
||||
|
||||
def request_uri(self, identifier):
|
||||
identifier = parse.quote(identifier.isoformat())
|
||||
return super().request_uri(identifier or '1970-01-01')
|
||||
|
||||
def is_within_bounds(self, inner, lower=None, upper=None):
|
||||
# values are expected to be str dates
|
||||
try:
|
||||
inner = iso8601.parse_date(inner)
|
||||
if lower:
|
||||
lower = iso8601.parse_date(lower)
|
||||
if upper:
|
||||
upper = iso8601.parse_date(upper)
|
||||
if lower is None and upper is None:
|
||||
return True
|
||||
elif lower is None:
|
||||
ret = inner <= upper
|
||||
elif upper is None:
|
||||
ret = inner >= lower
|
||||
else:
|
||||
ret = lower <= inner <= upper
|
||||
except Exception as e:
|
||||
logger.error(str(e) + ': %s, %s, %s',
|
||||
('inner=%s%s' % (type(inner), inner)),
|
||||
('lower=%s%s' % (type(lower), lower)),
|
||||
('upper=%s%s' % (type(upper), upper)))
|
||||
raise
|
||||
|
||||
# values are expected to be datetimes
|
||||
if lower is None and upper is None:
|
||||
ret = True
|
||||
elif lower is None:
|
||||
ret = inner <= upper
|
||||
elif upper is None:
|
||||
ret = inner >= lower
|
||||
else:
|
||||
ret = lower <= inner <= upper
|
||||
return ret
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from sqlalchemy import Column, String
|
||||
from sqlalchemy import Column, String, DateTime
|
||||
|
||||
from swh.lister.core.models import IndexingModelBase
|
||||
|
||||
|
@ -12,4 +12,4 @@ class BitBucketModel(IndexingModelBase):
|
|||
__tablename__ = 'bitbucket_repo'
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
indexable = Column(String, index=True)
|
||||
indexable = Column(DateTime(timezone=True), index=True)
|
||||
|
|
|
@ -1,29 +1,63 @@
|
|||
# Copyright (C) 2017-2018 the Software Heritage developers
|
||||
# Copyright (C) 2017-2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import re
|
||||
import unittest
|
||||
|
||||
from datetime import timedelta
|
||||
|
||||
from urllib.parse import unquote
|
||||
|
||||
import iso8601
|
||||
import requests_mock
|
||||
|
||||
from swh.lister.bitbucket.lister import BitBucketLister
|
||||
from swh.lister.core.tests.test_lister import HttpListerTester
|
||||
|
||||
|
||||
def convert_type(req_index):
|
||||
"""Convert the req_index to its right type according to the model's
|
||||
"indexable" column.
|
||||
|
||||
"""
|
||||
return iso8601.parse_date(unquote(req_index))
|
||||
|
||||
|
||||
class BitBucketListerTester(HttpListerTester, unittest.TestCase):
|
||||
Lister = BitBucketLister
|
||||
test_re = re.compile(r'/repositories\?after=([^?&]+)')
|
||||
lister_subdir = 'bitbucket'
|
||||
good_api_response_file = 'api_response.json'
|
||||
bad_api_response_file = 'api_empty_response.json'
|
||||
first_index = '2008-07-12T07:44:01.476818+00:00'
|
||||
last_index = '2008-07-19T06:16:43.044743+00:00'
|
||||
first_index = convert_type('2008-07-12T07:44:01.476818+00:00')
|
||||
last_index = convert_type('2008-07-19T06:16:43.044743+00:00')
|
||||
entries_per_page = 10
|
||||
convert_type = staticmethod(convert_type)
|
||||
|
||||
@requests_mock.Mocker()
|
||||
def test_fetch_none_nodb(self, http_mocker):
|
||||
"""Overridden because index is not an integer nor a string
|
||||
|
||||
"""
|
||||
http_mocker.get(self.test_re, text=self.mock_response)
|
||||
fl = self.get_fl()
|
||||
|
||||
self.disable_scheduler(fl)
|
||||
self.disable_db(fl)
|
||||
|
||||
# stores no results
|
||||
fl.run(min_bound=self.first_index - timedelta(days=3),
|
||||
max_bound=self.first_index)
|
||||
|
||||
def test_is_within_bounds(self):
|
||||
fl = self.get_fl()
|
||||
self.assertTrue(fl.is_within_bounds(
|
||||
'2008-07-15', self.first_index, self.last_index))
|
||||
iso8601.parse_date('2008-07-15'),
|
||||
self.first_index, self.last_index))
|
||||
self.assertFalse(fl.is_within_bounds(
|
||||
'2008-07-20', self.first_index, self.last_index))
|
||||
iso8601.parse_date('2008-07-20'),
|
||||
self.first_index, self.last_index))
|
||||
self.assertFalse(fl.is_within_bounds(
|
||||
'2008-07-11', self.first_index, self.last_index))
|
||||
iso8601.parse_date('2008-07-11'),
|
||||
self.first_index, self.last_index))
|
||||
|
|
|
@ -38,6 +38,12 @@ class HttpListerTesterBase(abc.ABC):
|
|||
first_index = AbstractAttribute('First index in good_api_response')
|
||||
entries_per_page = AbstractAttribute('Number of results in good response')
|
||||
LISTER_NAME = 'fake-lister'
|
||||
convert_type = str
|
||||
"""static method used to convert the "request_index" to its right type (for
|
||||
indexing listers for example, this is in accordance with the model's
|
||||
"indexable" column).
|
||||
|
||||
"""
|
||||
|
||||
# May need to override this if the headers are used for something
|
||||
def response_headers(self, request):
|
||||
|
@ -66,7 +72,7 @@ class HttpListerTesterBase(abc.ABC):
|
|||
def request_index(self, request):
|
||||
m = self.test_re.search(request.path_url)
|
||||
if m and (len(m.groups()) > 0):
|
||||
return m.group(1)
|
||||
return self.convert_type(m.group(1))
|
||||
|
||||
def mock_response(self, request, context):
|
||||
self.fl.reset_backoff()
|
||||
|
@ -76,7 +82,7 @@ class HttpListerTesterBase(abc.ABC):
|
|||
context.headers.update(custom_headers)
|
||||
req_index = self.request_index(request)
|
||||
|
||||
if req_index == str(self.first_index):
|
||||
if req_index == self.first_index:
|
||||
response_file = self.good_api_response_file
|
||||
else:
|
||||
response_file = self.bad_api_response_file
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2017-2018 the Software Heritage developers
|
||||
# Copyright (C) 2017-2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
@ -19,10 +19,11 @@ class GitHubListerTester(HttpListerTester, unittest.TestCase):
|
|||
first_index = 26
|
||||
last_index = 368
|
||||
entries_per_page = 100
|
||||
convert_type = int
|
||||
|
||||
def response_headers(self, request):
|
||||
headers = {'X-RateLimit-Remaining': '1'}
|
||||
if self.request_index(request) == str(self.first_index):
|
||||
if self.request_index(request) == self.first_index:
|
||||
headers.update({
|
||||
'Link': '<https://api.github.com/repositories?since=367>;'
|
||||
' rel="next",'
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2017-2018 the Software Heritage developers
|
||||
# Copyright (C) 2017-2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
@ -18,10 +18,11 @@ class GitLabListerTester(HttpListerTesterBase, unittest.TestCase):
|
|||
bad_api_response_file = 'api_empty_response.json'
|
||||
first_index = 1
|
||||
entries_per_page = 10
|
||||
convert_type = int
|
||||
|
||||
def response_headers(self, request):
|
||||
headers = {'RateLimit-Remaining': '1'}
|
||||
if self.request_index(request) == str(self.first_index):
|
||||
if self.request_index(request) == self.first_index:
|
||||
headers.update({
|
||||
'x-next-page': '3',
|
||||
})
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2018 the Software Heritage developers
|
||||
# Copyright (C) 2018-2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
@ -33,7 +33,7 @@ class NpmIncrementalListerTester(HttpListerTesterBase, unittest.TestCase):
|
|||
lister_subdir = 'npm'
|
||||
good_api_response_file = 'api_inc_response.json'
|
||||
bad_api_response_file = 'api_inc_empty_response.json'
|
||||
first_index = 6920642
|
||||
first_index = '6920642'
|
||||
entries_per_page = 100
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
|
|
@ -21,6 +21,7 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
|
|||
first_index = 1
|
||||
last_index = 12
|
||||
entries_per_page = 10
|
||||
convert_type = int
|
||||
|
||||
def get_fl(self, override_config=None):
|
||||
"""(Override) Retrieve an instance of fake lister (fl).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue