From 81fd5f9c5d941ee8896e096e55be01e2dce25f63 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Thu, 12 Jul 2018 14:23:14 +0200 Subject: [PATCH] swh.lister.gitlab.tasks: Fix range computations --- swh/lister/gitlab/tasks.py | 15 ++++++--------- swh/lister/tests/test_utils.py | 16 ++++++++++++++++ swh/lister/utils.py | 11 +++++++++++ 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/swh/lister/gitlab/tasks.py b/swh/lister/gitlab/tasks.py index c0dcfb6..9aee77c 100644 --- a/swh/lister/gitlab/tasks.py +++ b/swh/lister/gitlab/tasks.py @@ -6,6 +6,7 @@ import random from celery import group +from .. import utils from ..core.tasks import ListerTaskBase, RangeListerTask from .lister import GitLabLister @@ -29,17 +30,13 @@ class FullGitLabRelister(GitLabListerTask): """ task_queue = 'swh_lister_gitlab_refresh' + # nb pages + nb_pages = 10 + def run_task(self, *args, **kwargs): lister = self.new_lister(*args, **kwargs) - total, _, per_page = lister.get_pages_information() - - ranges = [] - prev_index = None - for index in range(0, total, per_page): - if index is not None and prev_index is not None: - ranges.append((prev_index, index)) - prev_index = index - + _, total_pages, _ = lister.get_pages_information() + ranges = list(utils.split_range(total_pages, self.nb_pages)) random.shuffle(ranges) range_task = RangeGitLabLister() group(range_task.s(minv, maxv, *args, **kwargs) diff --git a/swh/lister/tests/test_utils.py b/swh/lister/tests/test_utils.py index 6fa07c6..3bd8939 100644 --- a/swh/lister/tests/test_utils.py +++ b/swh/lister/tests/test_utils.py @@ -49,3 +49,19 @@ class UtilsTest(unittest.TestCase): self.assertIsNone(utils.get({}, None)) with self.assertRaises(AttributeError): self.assertIsNone(utils.get(None, ['a'])) + + @istest + def split_range(self): + actual_ranges = list(utils.split_range(14, 5)) + self.assertEqual(actual_ranges, [(0, 5), (5, 10), (10, 14)]) + + actual_ranges = list(utils.split_range(19, 10)) + self.assertEqual(actual_ranges, [(0, 10), (10, 19)]) + + @istest + def split_range_errors(self): + with self.assertRaises(TypeError): + list(utils.split_range(None, 1)) + + with self.assertRaises(TypeError): + list(utils.split_range(100, None)) diff --git a/swh/lister/utils.py b/swh/lister/utils.py index 996c0af..fba2d23 100644 --- a/swh/lister/utils.py +++ b/swh/lister/utils.py @@ -12,3 +12,14 @@ def get(d, keys): if v is not None: return v return None + + +def split_range(total_pages, nb_pages): + prev_index = None + for index in range(0, total_pages, nb_pages): + if index is not None and prev_index is not None: + yield prev_index, index + prev_index = index + + if index != total_pages: + yield index, total_pages