crates: Record lister state only if all crates were processed
Previously, the lister state was recorded regardless if errors occurred when listing crates as the finalize method is called regardless of raised exception during listing. As a consequence some crates could be missed as the incremental listing restarts from the dump date of the last processed crate database. So ensure all crates have been processed by the lister before recording its state.
This commit is contained in:
parent
aafaebd5de
commit
c6aa490fc1
2 changed files with 20 additions and 4 deletions
|
@ -82,6 +82,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
|
|||
enable_origins=enable_origins,
|
||||
)
|
||||
self.index_metadata: Dict[str, str] = {}
|
||||
self.all_crates_processed = False
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> CratesListerState:
|
||||
index_last_update = d.get("index_last_update")
|
||||
|
@ -210,6 +211,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
|
|||
page.append(self.page_entry_dict(v))
|
||||
|
||||
yield page
|
||||
self.all_crates_processed = True
|
||||
|
||||
def get_origins_from_page(self, page: CratesListerPage) -> Iterator[ListedOrigin]:
|
||||
"""Iterate on all crate pages and yield ListedOrigin instances."""
|
||||
|
@ -255,8 +257,7 @@ class CratesLister(Lister[CratesListerState, CratesListerPage]):
|
|||
)
|
||||
|
||||
def finalize(self) -> None:
|
||||
last: datetime = iso8601.parse_date(self.index_metadata["timestamp"])
|
||||
|
||||
if not self.state.index_last_update:
|
||||
if not self.state.index_last_update and self.all_crates_processed:
|
||||
last = iso8601.parse_date(self.index_metadata["timestamp"])
|
||||
self.state.index_last_update = last
|
||||
self.updated = True
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# Copyright (C) 2022-2024 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import iso8601
|
||||
import pytest
|
||||
|
||||
from swh.lister.crates.lister import CratesLister, CratesListerState
|
||||
|
||||
|
@ -250,3 +252,16 @@ def test_crates_lister_incremental_nothing_new(
|
|||
|
||||
assert res.pages == 0
|
||||
assert res.origins == 0
|
||||
|
||||
|
||||
def test_crates_lister_error_when_processing_crate(
|
||||
swh_scheduler, requests_mock_datadir, mocker
|
||||
):
|
||||
"""Lister state should not be recorded to scheduler is an error occurred
|
||||
when processing crate data."""
|
||||
lister = CratesLister(scheduler=swh_scheduler)
|
||||
mocker.patch.object(lister, "page_entry_dict").side_effect = IndexError()
|
||||
with pytest.raises(IndexError):
|
||||
lister.run()
|
||||
|
||||
assert lister.get_state_from_scheduler().index_last_update is None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue