github/lister: Prevent erroneous scheduler tasks disabling

Closes T2014
This commit is contained in:
Antoine Lambert 2019-09-19 14:21:43 +02:00
parent 7572228f7c
commit 04d8fdf8df
6 changed files with 7661 additions and 735 deletions

View file

@ -308,6 +308,32 @@ class HttpListerTester(HttpListerTesterBase, abc.ABC):
self.get_api_response(self.first_index)
self.assertEqual(sleepmock.call_count, 2)
def scheduled_tasks_test(self, next_api_response_file, next_last_index,
http_mocker):
"""Check that no loading tasks get disabled when processing a new
page of repositories returned by a forge API
"""
fl = self.create_fl_with_db(http_mocker)
# process first page of repositories listing
fl.run()
# process second page of repositories listing
prev_last_index = self.last_index
self.first_index = self.last_index
self.last_index = next_last_index
self.good_api_response_file = next_api_response_file
fl.run(min_bound=prev_last_index)
# check expected number of ingested repos and loading tasks
ingested_repos = list(fl.db_query_range(0, self.last_index))
self.assertEqual(len(ingested_repos), len(self.scheduler_tasks))
self.assertEqual(len(ingested_repos), 2 * self.entries_per_page)
# check tasks are not disabled
for task in self.scheduler_tasks:
self.assertTrue(task['status'] != 'disabled')
class HttpSimpleListerTester(HttpListerTesterBase, abc.ABC):
"""Base testing class for subclass of

View file

@ -49,3 +49,14 @@ class GitHubLister(IndexingHttpLister):
def request_headers(self):
return {'Accept': 'application/vnd.github.v3+json'}
def disable_deleted_repo_tasks(self, index, next_index, keep_these):
""" (Overrides) Fix provided index value to avoid erroneously disabling
some scheduler tasks
"""
# Next listed repository ids are strictly greater than the 'since'
# parameter, so increment the index to avoid disabling the latest
# created task when processing a new repositories page returned by
# the Github API
return super().disable_deleted_repo_tasks(index + 1, next_index,
keep_these)

File diff suppressed because it is too large Load diff

View file

@ -4,6 +4,9 @@
import re
import unittest
import requests_mock
from datetime import datetime, timedelta
from swh.lister.core.tests.test_lister import HttpListerTester
@ -14,10 +17,10 @@ class GitHubListerTester(HttpListerTester, unittest.TestCase):
Lister = GitHubLister
test_re = re.compile(r'/repositories\?since=([^?&]+)')
lister_subdir = 'github'
good_api_response_file = 'api_response.json'
good_api_response_file = 'api_first_response.json'
bad_api_response_file = 'api_empty_response.json'
first_index = 0
last_index = 368
last_index = 369
entries_per_page = 100
convert_type = int
@ -25,17 +28,16 @@ class GitHubListerTester(HttpListerTester, unittest.TestCase):
headers = {'X-RateLimit-Remaining': '1'}
if self.request_index(request) == self.first_index:
headers.update({
'Link': '<https://api.github.com/repositories?since=367>;'
'Link': '<https://api.github.com/repositories?since=%s>;'
' rel="next",'
'<https://api.github.com/repositories{?since}>;'
' rel="first"'
' rel="first"' % self.last_index
})
else:
headers.update({
'Link': '<https://api.github.com/repositories{?since}>;'
' rel="first"'
})
return headers
def mock_rate_quota(self, n, request, context):
@ -45,3 +47,7 @@ class GitHubListerTester(HttpListerTester, unittest.TestCase):
one_second = int((datetime.now() + timedelta(seconds=1.5)).timestamp())
context.headers['X-RateLimit-Reset'] = str(one_second)
return '{"error":"dummy"}'
@requests_mock.Mocker()
def test_scheduled_tasks(self, http_mocker):
self.scheduled_tasks_test('api_next_response.json', 876, http_mocker)

View file

@ -76,26 +76,7 @@ class PhabricatorListerTester(HttpListerTester, unittest.TestCase):
@requests_mock.Mocker()
def test_scheduled_tasks(self, http_mocker):
fl = self.create_fl_with_db(http_mocker)
# process first page of repositories listing
fl.run()
# process second page of repositories listing
prev_last_index = self.last_index
self.first_index = self.last_index
self.last_index = 23
self.good_api_response_file = 'api_next_response.json'
fl.run(min_bound=prev_last_index)
# check expected number of ingested repos and loading tasks
ingested_repos = list(fl.db_query_range(0, self.last_index))
self.assertEqual(len(ingested_repos), len(self.scheduler_tasks))
self.assertEqual(len(ingested_repos), 2 * self.entries_per_page)
# check tasks are not disabled
for task in self.scheduler_tasks:
self.assertTrue(task['status'] != 'disabled')
self.scheduled_tasks_test('api_next_response.json', 23, http_mocker)
@requests_mock.Mocker()
def test_scheduled_tasks_multiple_instances(self, http_mocker):