debian: Provide last_update to produced ListedOrigin models

Use the value of the "Last-Modified" header from the HTTP response
resulting of the debian sources index HTTP request.

It will prevent to create loading tasks for debian packages with no
changes since last listing.

Related to T2400
This commit is contained in:
Antoine Lambert 2021-12-03 16:09:36 +01:00
parent 605b13a676
commit 93f17d4d9c
2 changed files with 13 additions and 1 deletions

View file

@ -7,6 +7,7 @@
import bz2
from collections import defaultdict
from dataclasses import dataclass, field
from email.utils import parsedate_to_datetime
import gzip
from itertools import product
import logging
@ -134,6 +135,10 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
response = requests.get(url, stream=True)
logging.debug("Fetched URL: %s, status code: %s", url, response.status_code)
if response.status_code == 200:
last_modified = response.headers.get("Last-Modified")
self.last_sources_update = (
parsedate_to_datetime(last_modified) if last_modified else None
)
decompressor = decompressors.get(compression)
if decompressor:
data = decompressor(response.raw).readlines()
@ -224,6 +229,7 @@ class DebianLister(Lister[DebianListerState, DebianPageType]):
url=origin_url,
visit_type="deb",
extra_loader_arguments={"packages": {}},
last_update=self.last_sources_update,
)
# origin will be yielded at the end of that method
origins_to_send[origin_url] = self.listed_origins[origin_url]

View file

@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from collections import defaultdict
from email.utils import formatdate
import os
from pathlib import Path
from typing import Dict, List, Set, Tuple
@ -78,7 +79,11 @@ def _init_test(
if compression:
requests_mock.get(idx_url, status_code=404)
else:
requests_mock.get(idx_url, text=sources)
requests_mock.get(
idx_url,
text=sources,
headers={"Last-Modified": formatdate(usegmt=True)},
)
for idx_url, _ in lister.debian_index_urls(suite, _components[1]):
requests_mock.get(idx_url, status_code=404)
@ -122,6 +127,7 @@ def _check_listed_origins(
]
assert filtered_origins
assert filtered_origins[0].last_update is not None
packages = filtered_origins[0].extra_loader_arguments["packages"]
# check the version info are available
assert package_version_key in packages