indexing lister: Avoid generating empty or duplicate ranges when partitioning
This commit is contained in:
parent
2c5528ef59
commit
773cd337f1
2 changed files with 26 additions and 9 deletions
|
@ -139,18 +139,22 @@ class IndexingLister(ListerBase):
|
|||
|
||||
partition_width = (max_index - min_index) / n_partitions
|
||||
|
||||
partitions = [
|
||||
[
|
||||
format_bound(min_index + i * partition_width),
|
||||
format_bound(min_index + (i+1) * partition_width),
|
||||
] for i in range(n_partitions)
|
||||
# Generate n_partitions + 1 bounds for n_partitions partitons
|
||||
bounds = [
|
||||
format_bound(min_index + i * partition_width)
|
||||
for i in range(n_partitions + 1)
|
||||
]
|
||||
|
||||
# Remove bounds for lowest and highest partition
|
||||
partitions[0][0] = None
|
||||
partitions[-1][1] = None
|
||||
# Trim duplicate bounds
|
||||
bounds.append(None)
|
||||
bounds = [cur
|
||||
for cur, next in zip(bounds[:-1], bounds[1:])
|
||||
if cur != next]
|
||||
|
||||
return [tuple(partition) for partition in partitions]
|
||||
# Remove bounds for lowest and highest partition
|
||||
bounds[0] = bounds[-1] = None
|
||||
|
||||
return list(zip(bounds[:-1], bounds[1:]))
|
||||
|
||||
def db_first_index(self):
|
||||
"""Look in the db for the smallest indexable value
|
||||
|
|
|
@ -63,6 +63,19 @@ def test_db_partition_indices_zero_first():
|
|||
assert partitions[-1] == (9000, None)
|
||||
|
||||
|
||||
def test_db_partition_indices_small_index_range():
|
||||
m = MockedIndexingListerDbPartitionIndices(
|
||||
num_entries=5000,
|
||||
first_index=0,
|
||||
last_index=5,
|
||||
)
|
||||
assert m
|
||||
|
||||
partitions = m.db_partition_indices(100)
|
||||
|
||||
assert partitions == [(None, 1), (1, 2), (2, 3), (3, 4), (4, None)]
|
||||
|
||||
|
||||
def test_db_partition_indices_date_indices():
|
||||
# 24 hour delta
|
||||
first = datetime.datetime.fromisoformat('2019-11-01T00:00:00+00:00')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue