indexing lister: Avoid generating empty or duplicate ranges when partitioning

This commit is contained in:
Nicolas Dandrimont 2019-11-12 17:44:07 +01:00
parent 2c5528ef59
commit 773cd337f1
2 changed files with 26 additions and 9 deletions

View file

@ -139,18 +139,22 @@ class IndexingLister(ListerBase):
partition_width = (max_index - min_index) / n_partitions
partitions = [
[
format_bound(min_index + i * partition_width),
format_bound(min_index + (i+1) * partition_width),
] for i in range(n_partitions)
# Generate n_partitions + 1 bounds for n_partitions partitons
bounds = [
format_bound(min_index + i * partition_width)
for i in range(n_partitions + 1)
]
# Remove bounds for lowest and highest partition
partitions[0][0] = None
partitions[-1][1] = None
# Trim duplicate bounds
bounds.append(None)
bounds = [cur
for cur, next in zip(bounds[:-1], bounds[1:])
if cur != next]
return [tuple(partition) for partition in partitions]
# Remove bounds for lowest and highest partition
bounds[0] = bounds[-1] = None
return list(zip(bounds[:-1], bounds[1:]))
def db_first_index(self):
"""Look in the db for the smallest indexable value

View file

@ -63,6 +63,19 @@ def test_db_partition_indices_zero_first():
assert partitions[-1] == (9000, None)
def test_db_partition_indices_small_index_range():
m = MockedIndexingListerDbPartitionIndices(
num_entries=5000,
first_index=0,
last_index=5,
)
assert m
partitions = m.db_partition_indices(100)
assert partitions == [(None, 1), (1, 2), (2, 3), (3, 4), (4, None)]
def test_db_partition_indices_date_indices():
# 24 hour delta
first = datetime.datetime.fromisoformat('2019-11-01T00:00:00+00:00')