From 6662ae8db5f1367975bc1f3337b6841448fc342d Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Sat, 22 Jun 2019 00:14:34 +0200 Subject: [PATCH] indexing_lister: Allow to define flush packet size Prior to this commit, indexing lister instances were flushing every packet of 20. This can now be defined per sub classes. --- swh/lister/bitbucket/lister.py | 3 +++ swh/lister/core/indexing_lister.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py index c2e0292..d7dfe51 100644 --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -30,6 +30,9 @@ class BitBucketLister(IndexingHttpLister): if per_page != DEFAULT_BITBUCKET_PAGE: self.PATH_TEMPLATE = '%s&pagelen=%s' % ( self.PATH_TEMPLATE, per_page) + # to stay consistent with prior behavior (20 * 10 repositories then) + self.flush_packet_db = int( + (self.flush_packet_db * DEFAULT_BITBUCKET_PAGE) / per_page) def get_model_from_repo(self, repo): return { diff --git a/swh/lister/core/indexing_lister.py b/swh/lister/core/indexing_lister.py index 3dcd8e9..aa913e4 100644 --- a/swh/lister/core/indexing_lister.py +++ b/swh/lister/core/indexing_lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2017 the Software Heritage developers +# Copyright (C) 2015-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -16,6 +16,7 @@ logger = logging.getLogger(__name__) class IndexingLister(ListerBase): + flush_packet_db = 20 """Lister* intermediate class for any service that follows the pattern: - The service must report at least one stable unique identifier, known @@ -222,7 +223,7 @@ class IndexingLister(ListerBase): yield i for i in ingest_indexes(): - if (i % 20) == 0: + if (i % self.flush_packet_db) == 0: logger.debug('Flushing updates at index %s', i) self.db_session.commit() self.db_session = self.mk_session()