indexing_lister: Allow to define flush packet size

Prior to this commit, indexing lister instances were flushing every packet of
20. This can now be defined per sub classes.
This commit is contained in:
Antoine R. Dumont (@ardumont) 2019-06-22 00:14:34 +02:00
parent 5ec3067b0d
commit 6662ae8db5
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
2 changed files with 6 additions and 2 deletions

View file

@ -30,6 +30,9 @@ class BitBucketLister(IndexingHttpLister):
if per_page != DEFAULT_BITBUCKET_PAGE:
self.PATH_TEMPLATE = '%s&pagelen=%s' % (
self.PATH_TEMPLATE, per_page)
# to stay consistent with prior behavior (20 * 10 repositories then)
self.flush_packet_db = int(
(self.flush_packet_db * DEFAULT_BITBUCKET_PAGE) / per_page)
def get_model_from_repo(self, repo):
return {

View file

@ -1,4 +1,4 @@
# Copyright (C) 2015-2017 the Software Heritage developers
# Copyright (C) 2015-2019 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@ -16,6 +16,7 @@ logger = logging.getLogger(__name__)
class IndexingLister(ListerBase):
flush_packet_db = 20
"""Lister* intermediate class for any service that follows the pattern:
- The service must report at least one stable unique identifier, known
@ -222,7 +223,7 @@ class IndexingLister(ListerBase):
yield i
for i in ingest_indexes():
if (i % 20) == 0:
if (i % self.flush_packet_db) == 0:
logger.debug('Flushing updates at index %s', i)
self.db_session.commit()
self.db_session = self.mk_session()