relister: Fix consistently the behavior for the first time relisting

If nothing has been done prior to a full relisting, there is actually nothing
to list. So the relister in question does nothing.

In that context, the IndexingLister class's `db_partition_indices` method now
returns an empty list instead of raising a ValueError when there is nothing to
list.

Related T1826
Related e129e48
This commit is contained in:
Antoine R. Dumont (@ardumont) 2019-06-21 19:42:17 +02:00
parent 6662ae8db5
commit b99617f976
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
6 changed files with 44 additions and 39 deletions

View file

@ -2,10 +2,11 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import logging
import iso8601
from datetime import datetime
from urllib import parse
from swh.lister.bitbucket.models import BitBucketModel
@ -23,6 +24,7 @@ class BitBucketLister(IndexingHttpLister):
MODEL = BitBucketModel
LISTER_NAME = 'bitbucket'
instance = 'bitbucket'
default_min_bound = datetime.utcfromtimestamp(0).isoformat()
def __init__(self, api_baseurl, override_config=None, per_page=100):
super().__init__(
@ -54,21 +56,6 @@ class BitBucketLister(IndexingHttpLister):
repos = response.json()['values']
return [self.get_model_from_repo(repo) for repo in repos]
def db_first_index(self):
"""For the first time listing, there is no data in db, so fallback to the
bitbucket starting year.
"""
return super().db_first_index() or '2008-01-01T00:00:00Z'
def db_last_index(self):
"""For the first time listing, there is no data in db, so fallback to the time
of the first run as max date.
"""
return super().db_last_index() or datetime.datetime.now(
tz=datetime.timezone.utc).isoformat()
def request_uri(self, identifier):
return super().request_uri(identifier or '1970-01-01')

View file

@ -30,8 +30,17 @@ def range_bitbucket_lister(start, end, **lister_args):
@app.task(name=__name__ + '.FullBitBucketRelister', bind=True)
def full_bitbucket_relister(self, split=None, **lister_args):
"""Relist from the beginning of what's already been listed.
It's not to be called for an initial listing.
"""
lister = new_lister(**lister_args)
ranges = lister.db_partition_indices(split or GROUP_SPLIT)
if not ranges:
self.log.info('Nothing to list')
return
random.shuffle(ranges)
promise = group(range_bitbucket_lister.s(minv, maxv, **lister_args)
for minv, maxv in ranges)()