pattern: Make lister flush regularly origins to scheduler

As origins is a generator, the previous behavior would try to consume the overall
generator to send the records.

This groups and sends batch of 100 origins to the scheduler for writing.

Related to T3003
This commit is contained in:
Antoine R. Dumont (@ardumont) 2021-01-28 16:51:56 +01:00
parent f862004700
commit 0ad37740d9
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8

View file

@ -7,6 +7,7 @@ from dataclasses import dataclass
from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar
from swh.core.config import load_from_envvar
from swh.core.utils import grouper
from swh.scheduler import get_scheduler, model
from swh.scheduler.interface import SchedulerInterface
@ -221,8 +222,12 @@ class Lister(Generic[StateType, PageType]):
Returns:
the number of listed origins recorded in the scheduler
"""
ret = self.scheduler.record_listed_origins(origins)
return len(ret)
count = 0
for batch_origins in grouper(origins, n=100):
ret = self.scheduler.record_listed_origins(batch_origins)
count += len(ret)
return count
@classmethod
def from_config(cls, scheduler: Dict[str, Any], **config: Any):