Puppet: Lister implements incremental mode
Use with_release_since api argument to retrieve modules that have been updated since the last date the lister has been executed. Related T4519
This commit is contained in:
parent
e8699422d7
commit
e1f3f87c73
4 changed files with 382 additions and 6 deletions
|
@ -22,6 +22,10 @@ It returns a paginated list of results and a `next` url.
|
|||
|
||||
The api follow `OpenApi 3.0 specifications`.
|
||||
|
||||
The lister is incremental using ``with_release_since`` api argument whose value is an
|
||||
iso date set regarding the last time the lister has been executed, stored as
|
||||
``lister.state.last_listing_date``.
|
||||
|
||||
Page listing
|
||||
------------
|
||||
|
||||
|
|
|
@ -3,15 +3,18 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from datetime import datetime
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import iso8601
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from ..pattern import CredentialsType, StatelessLister
|
||||
from ..pattern import CredentialsType, Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -19,7 +22,15 @@ logger = logging.getLogger(__name__)
|
|||
PuppetListerPage = List[Dict[str, Any]]
|
||||
|
||||
|
||||
class PuppetLister(StatelessLister[PuppetListerPage]):
|
||||
@dataclass
|
||||
class PuppetListerState:
|
||||
"""Store lister state for incremental mode operations"""
|
||||
|
||||
last_listing_date: Optional[datetime] = None
|
||||
"""Last date when Puppet lister was executed"""
|
||||
|
||||
|
||||
class PuppetLister(Lister[PuppetListerState, PuppetListerPage]):
|
||||
"""The Puppet lister list origins from 'Puppet Forge'"""
|
||||
|
||||
LISTER_NAME = "puppet"
|
||||
|
@ -39,6 +50,21 @@ class PuppetLister(StatelessLister[PuppetListerPage]):
|
|||
instance=self.INSTANCE,
|
||||
url=self.BASE_URL,
|
||||
)
|
||||
# Store the datetime the lister runs for incremental purpose
|
||||
self.listing_date = datetime.now()
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> PuppetListerState:
|
||||
last_listing_date = d.get("last_listing_date")
|
||||
if last_listing_date is not None:
|
||||
d["last_listing_date"] = iso8601.parse_date(last_listing_date)
|
||||
return PuppetListerState(**d)
|
||||
|
||||
def state_to_dict(self, state: PuppetListerState) -> Dict[str, Any]:
|
||||
d: Dict[str, Optional[str]] = {"last_listing_date": None}
|
||||
last_listing_date = state.last_listing_date
|
||||
if last_listing_date is not None:
|
||||
d["last_listing_date"] = last_listing_date.isoformat()
|
||||
return d
|
||||
|
||||
def get_pages(self) -> Iterator[PuppetListerPage]:
|
||||
"""Yield an iterator which returns 'page'
|
||||
|
@ -52,9 +78,21 @@ class PuppetLister(StatelessLister[PuppetListerPage]):
|
|||
"""
|
||||
# limit = 100 is the max value for pagination
|
||||
limit: int = 100
|
||||
response = self.http_request(
|
||||
f"{self.BASE_URL}v3/modules", params={"limit": limit}
|
||||
)
|
||||
params: Dict[str, Any] = {"limit": limit}
|
||||
|
||||
if self.state.last_listing_date:
|
||||
# Incremental mode filter query
|
||||
# To ensure we don't miss records between two lister runs `last_str`` must be
|
||||
# set with an offset of -15 hours, which is the lower timezone recorded in the
|
||||
# tzdb
|
||||
last_str = (
|
||||
self.state.last_listing_date.astimezone(timezone(timedelta(hours=-15)))
|
||||
.date()
|
||||
.isoformat()
|
||||
)
|
||||
params["with_release_since"] = last_str
|
||||
|
||||
response = self.http_request(f"{self.BASE_URL}v3/modules", params=params)
|
||||
data: Dict[str, Any] = response.json()
|
||||
yield data["results"]
|
||||
|
||||
|
@ -111,3 +149,7 @@ class PuppetLister(StatelessLister[PuppetListerPage]):
|
|||
last_update=last_update,
|
||||
extra_loader_arguments={"artifacts": artifacts},
|
||||
)
|
||||
|
||||
def finalize(self) -> None:
|
||||
self.state.last_listing_date = self.listing_date
|
||||
self.updated = True
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -3,6 +3,8 @@
|
|||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
from swh.lister.puppet.lister import PuppetLister
|
||||
|
||||
# flake8: noqa: B950
|
||||
|
@ -118,3 +120,45 @@ def test_puppet_lister(datadir, requests_mock_datadir, swh_scheduler):
|
|||
)
|
||||
for expected in sorted(expected_origins, key=lambda expected: expected["url"])
|
||||
]
|
||||
|
||||
|
||||
def test_puppet_lister_incremental(datadir, requests_mock_datadir, swh_scheduler):
|
||||
|
||||
# First run
|
||||
lister = PuppetLister(scheduler=swh_scheduler)
|
||||
res = lister.run()
|
||||
|
||||
assert res.pages == 2
|
||||
assert res.origins == 1 + 1 + 1
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
assert len(scheduler_origins) == len(expected_origins)
|
||||
|
||||
assert [
|
||||
(
|
||||
scheduled.visit_type,
|
||||
scheduled.url,
|
||||
scheduled.extra_loader_arguments["artifacts"],
|
||||
)
|
||||
for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url)
|
||||
] == [
|
||||
(
|
||||
"puppet",
|
||||
expected["url"],
|
||||
expected["artifacts"],
|
||||
)
|
||||
for expected in sorted(expected_origins, key=lambda expected: expected["url"])
|
||||
]
|
||||
|
||||
# Second run
|
||||
lister = PuppetLister(scheduler=swh_scheduler)
|
||||
# Force lister.state.last_listing_date for correct fixture loading
|
||||
|
||||
lister.state.last_listing_date = datetime(2022, 9, 26, 18, 0).astimezone(
|
||||
timezone(timedelta(hours=-7))
|
||||
)
|
||||
res = lister.run()
|
||||
|
||||
assert res.pages == 1
|
||||
assert res.origins == 1
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue