feat(fedora): Introduce fedora lister
Summary: Lister to ingest fedora mirrors (.rpm) Reviewers: #reviewers, vlorentz Subscribers: vlorentz, olasd Maniphest Tasks: T4448 Differential Revision: https://forge.softwareheritage.org/D8386
This commit is contained in:
parent
ea146ce297
commit
6ad61aec23
16 changed files with 729 additions and 0 deletions
|
@ -26,6 +26,7 @@ following Python modules:
|
|||
- `swh.lister.pypi`
|
||||
- `swh.lister.tuleap`
|
||||
- `swh.lister.gogs`
|
||||
- `swh.liser.fedora`
|
||||
|
||||
Dependencies
|
||||
------------
|
||||
|
|
6
mypy.ini
6
mypy.ini
|
@ -48,3 +48,9 @@ ignore_missing_imports = True
|
|||
|
||||
[mypy-psycopg2.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-repomd.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-defusedxml.*]
|
||||
ignore_missing_imports = True
|
||||
|
|
|
@ -9,3 +9,4 @@ lxml
|
|||
dulwich
|
||||
testing.postgresql
|
||||
psycopg2
|
||||
repomd
|
||||
|
|
1
setup.py
1
setup.py
|
@ -86,6 +86,7 @@ setup(
|
|||
lister.tuleap=swh.lister.tuleap:register
|
||||
lister.maven=swh.lister.maven:register
|
||||
lister.gogs=swh.lister.gogs:register
|
||||
lister.fedora=swh.lister.fedora:register
|
||||
""",
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
|
|
13
swh/lister/fedora/__init__.py
Normal file
13
swh/lister/fedora/__init__.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
def register():
|
||||
from .lister import FedoraLister
|
||||
|
||||
return {
|
||||
"lister": FedoraLister,
|
||||
"task_modules": [f"{__name__}.tasks"],
|
||||
}
|
252
swh/lister/fedora/lister.py
Normal file
252
swh/lister/fedora/lister.py
Normal file
|
@ -0,0 +1,252 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
import logging
|
||||
from typing import Any, Dict, Iterator, List, Set, Type
|
||||
from urllib.error import HTTPError
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import repomd
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from ..pattern import Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
Release = int
|
||||
Edition = str
|
||||
PkgName = str
|
||||
PkgVersion = str
|
||||
FedoraOrigin = str
|
||||
FedoraPageType = Type[repomd.Repo]
|
||||
"""Each page is a list of packages from a given Fedora (release, edition) pair"""
|
||||
|
||||
|
||||
def get_editions(release: Release) -> List[Edition]:
|
||||
"""Get list of editions for a given release."""
|
||||
# Ignore dirs that don't contain .rpm files:
|
||||
# Docker,CloudImages,Atomic*,Spins,Live,Cloud_Atomic,Silverblue
|
||||
|
||||
if release < 20:
|
||||
return ["Everything", "Fedora"]
|
||||
elif release < 28:
|
||||
return ["Everything", "Server", "Workstation"]
|
||||
else:
|
||||
return ["Everything", "Server", "Workstation", "Modular"]
|
||||
|
||||
|
||||
def get_last_modified(pkg: repomd.Package) -> datetime:
|
||||
"""Get timezone aware last modified time in UTC from RPM package metadata."""
|
||||
ts = pkg._element.find("common:time", namespaces=repomd._ns).get("build")
|
||||
return datetime.utcfromtimestamp(int(ts)).replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def get_checksums(pkg: repomd.Package) -> Dict[str, str]:
|
||||
"""Get checksums associated to rpm archive."""
|
||||
cs = pkg._element.find("common:checksum", namespaces=repomd._ns)
|
||||
cs_type = cs.get("type")
|
||||
if cs_type == "sha":
|
||||
cs_type = "sha1"
|
||||
return {cs_type: cs.text}
|
||||
|
||||
|
||||
@dataclass
|
||||
class FedoraListerState:
|
||||
"""State of Fedora lister"""
|
||||
|
||||
package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
|
||||
"""Dictionary mapping a package name to all the versions found during
|
||||
last listing"""
|
||||
|
||||
|
||||
class FedoraLister(Lister[FedoraListerState, FedoraPageType]):
|
||||
"""
|
||||
List source packages for given Fedora releases.
|
||||
|
||||
The lister will create a snapshot for each package name from all its
|
||||
available versions.
|
||||
|
||||
If a package snapshot is different from the last listing operation,
|
||||
it will be sent to the scheduler that will create a loading task
|
||||
to archive newly found source code.
|
||||
|
||||
Args:
|
||||
scheduler: instance of SchedulerInterface
|
||||
url: fedora package archives mirror URL
|
||||
releases: list of fedora releases to process
|
||||
"""
|
||||
|
||||
LISTER_NAME = "fedora"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
instance: str = "fedora",
|
||||
url: str = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
|
||||
releases: List[Release] = [34, 35, 36],
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials={},
|
||||
)
|
||||
|
||||
self.releases = releases
|
||||
|
||||
self.listed_origins: Dict[FedoraOrigin, ListedOrigin] = {}
|
||||
"will hold all listed origins info"
|
||||
self.origins_to_send: Set[FedoraOrigin] = set()
|
||||
"will hold updated origins since last listing"
|
||||
self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
|
||||
"will contain the lister state after a call to run"
|
||||
self.last_page = False
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> FedoraListerState:
|
||||
return FedoraListerState(package_versions={k: set(v) for k, v in d.items()})
|
||||
|
||||
def state_to_dict(self, state: FedoraListerState) -> Dict[str, Any]:
|
||||
return {k: list(v) for k, v in state.package_versions.items()}
|
||||
|
||||
def page_request(self, release: Release, edition: Edition) -> FedoraPageType:
|
||||
"""Return parsed packages for a given fedora release."""
|
||||
index_url = urljoin(
|
||||
self.url,
|
||||
f"{release}/{edition}/source/SRPMS/"
|
||||
if release < 24
|
||||
else f"{release}/{edition}/source/tree/",
|
||||
)
|
||||
|
||||
repo = repomd.load(index_url) # throws error if no repomd.xml is not found
|
||||
self.last_page = (
|
||||
release == self.releases[-1] and edition == get_editions(release)[-1]
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Fetched metadata from url: %s, found %d packages", index_url, len(repo)
|
||||
)
|
||||
# TODO: Extract more fields like "provides" and "requires" from *primary.xml
|
||||
# as extrinsic metadata using the pkg._element.findtext method
|
||||
return repo
|
||||
|
||||
def get_pages(self) -> Iterator[FedoraPageType]:
|
||||
"""Return an iterator on parsed fedora packages, one page per (release, edition) pair"""
|
||||
|
||||
for release in self.releases:
|
||||
for edition in get_editions(release):
|
||||
logger.debug("Listing fedora release %s edition %s", release, edition)
|
||||
self.current_release = release
|
||||
self.current_edition = edition
|
||||
try:
|
||||
yield self.page_request(release, edition)
|
||||
except HTTPError as http_error:
|
||||
if http_error.getcode() == 404:
|
||||
logger.debug(
|
||||
"No packages metadata found for fedora release %s edition %s",
|
||||
release,
|
||||
edition,
|
||||
)
|
||||
continue
|
||||
raise
|
||||
|
||||
def origin_url_for_package(self, package_name: PkgName) -> FedoraOrigin:
|
||||
"""Return the origin url for the given package"""
|
||||
return f"https://src.fedoraproject.org/rpms/{package_name}"
|
||||
|
||||
def get_origins_from_page(self, page: FedoraPageType) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of fedora package sources into an iterator of ListedOrigin."""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
origins_to_send = set()
|
||||
|
||||
# iterate on each package's metadata
|
||||
for pkg_metadata in page:
|
||||
# extract package metadata
|
||||
package_name = pkg_metadata.name
|
||||
package_version = pkg_metadata.version
|
||||
package_build_time = get_last_modified(pkg_metadata)
|
||||
package_download_path = pkg_metadata.location
|
||||
|
||||
# build origin url
|
||||
origin_url = self.origin_url_for_package(package_name)
|
||||
# create package version key as expected by the fedora (rpm) loader
|
||||
package_version_key = pkg_metadata.vr
|
||||
|
||||
# this is the first time a package is listed
|
||||
if origin_url not in self.listed_origins:
|
||||
# create a ListedOrigin object for it that can be later
|
||||
# updated with new package versions info
|
||||
self.listed_origins[origin_url] = ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
url=origin_url,
|
||||
visit_type="rpm",
|
||||
extra_loader_arguments={"packages": {}},
|
||||
last_update=package_build_time,
|
||||
)
|
||||
|
||||
# init set that will contain all listed package versions
|
||||
self.package_versions[package_name] = set()
|
||||
|
||||
# origin will be yielded at the end of that method
|
||||
origins_to_send.add(origin_url)
|
||||
|
||||
# update package metadata in parameter that will be provided
|
||||
# to the rpm loader
|
||||
self.listed_origins[origin_url].extra_loader_arguments["packages"][
|
||||
package_version_key
|
||||
] = {
|
||||
"name": package_name,
|
||||
"version": package_version,
|
||||
"url": urljoin(page.baseurl, package_download_path),
|
||||
"release": self.current_release,
|
||||
"edition": self.current_edition,
|
||||
"buildTime": package_build_time.isoformat(),
|
||||
"checksums": get_checksums(pkg_metadata),
|
||||
}
|
||||
|
||||
last_update = self.listed_origins[origin_url].last_update
|
||||
if last_update is not None and package_build_time > last_update:
|
||||
self.listed_origins[origin_url].last_update = package_build_time
|
||||
|
||||
# add package version key to the set of found versions
|
||||
self.package_versions[package_name].add(package_version_key)
|
||||
|
||||
# package has already been listed during a previous listing process
|
||||
if package_name in self.state.package_versions:
|
||||
new_versions = (
|
||||
self.package_versions[package_name]
|
||||
- self.state.package_versions[package_name]
|
||||
)
|
||||
# no new versions so far, no need to send the origin to the scheduler
|
||||
if not new_versions:
|
||||
origins_to_send.remove(origin_url)
|
||||
|
||||
logger.debug(
|
||||
"Found %s packages to update (new ones or packages with new versions).",
|
||||
len(origins_to_send),
|
||||
)
|
||||
logger.debug(
|
||||
"Current total number of listed packages is equal to %s.",
|
||||
len(self.listed_origins),
|
||||
)
|
||||
|
||||
# yield from origins_to_send.values()
|
||||
self.origins_to_send.update(origins_to_send)
|
||||
|
||||
if self.last_page:
|
||||
# yield listed origins when all fedora releases and editions processed
|
||||
yield from [
|
||||
self.listed_origins[origin_url] for origin_url in self.origins_to_send
|
||||
]
|
||||
|
||||
def finalize(self):
|
||||
# set mapping between listed package names and versions as lister state
|
||||
self.state.package_versions = self.package_versions
|
||||
self.updated = len(self.listed_origins) > 0
|
21
swh/lister/fedora/tasks.py
Normal file
21
swh/lister/fedora/tasks.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
# Copyright (C) 2022 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from .lister import FedoraLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".FullFedoraRelister")
|
||||
def list_fedora_full(**lister_args) -> Dict[str, int]:
|
||||
"""Full update of a Fedora instance"""
|
||||
lister = FedoraLister.from_configfile(**lister_args)
|
||||
return lister.run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping() -> str:
|
||||
return "OK"
|
0
swh/lister/fedora/tests/__init__.py
Normal file
0
swh/lister/fedora/tests/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,55 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
|
||||
<revision>1499286311</revision>
|
||||
<data type="primary">
|
||||
<checksum type="sha256">4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3</checksum>
|
||||
<open-checksum type="sha256">db616ad8e4219e23dfc05cd515e017cdc0d59144689ac606951fa42cbb06ae65</open-checksum>
|
||||
<location href="repodata/4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3-primary.xml.gz"/>
|
||||
<timestamp>1499286305</timestamp>
|
||||
<size>5425131</size>
|
||||
<open-size>30064034</open-size>
|
||||
</data>
|
||||
<data type="filelists">
|
||||
<checksum type="sha256">17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c</checksum>
|
||||
<open-checksum type="sha256">7caabd1205a72d26422756211dcd536336cef643f7f73eb15a470b02ff09a194</open-checksum>
|
||||
<location href="repodata/17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c-filelists.xml.gz"/>
|
||||
<timestamp>1499286305</timestamp>
|
||||
<size>1650273</size>
|
||||
<open-size>6419422</open-size>
|
||||
</data>
|
||||
<data type="other">
|
||||
<checksum type="sha256">8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb</checksum>
|
||||
<open-checksum type="sha256">786b8d4fa759f0ade3eaab1bde390d12c950dfe217eda1773400f3a3d461522b</open-checksum>
|
||||
<location href="repodata/8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb-other.xml.gz"/>
|
||||
<timestamp>1499286305</timestamp>
|
||||
<size>4396102</size>
|
||||
<open-size>33165783</open-size>
|
||||
</data>
|
||||
<data type="primary_db">
|
||||
<checksum type="sha256">1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457</checksum>
|
||||
<open-checksum type="sha256">dc8dbac072ac1412f0ecface57fa57c5ddcac14acc880fe9b467164be733e963</open-checksum>
|
||||
<location href="repodata/1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457-primary.sqlite.bz2"/>
|
||||
<timestamp>1499286309</timestamp>
|
||||
<size>7071217</size>
|
||||
<open-size>26177536</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="filelists_db">
|
||||
<checksum type="sha256">5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11</checksum>
|
||||
<open-checksum type="sha256">b293d51dd4e6eb4128e40b6ce228c62b169b1d47be535e56f69b8ad622c4a6ca</open-checksum>
|
||||
<location href="repodata/5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11-filelists.sqlite.bz2"/>
|
||||
<timestamp>1499286307</timestamp>
|
||||
<size>2227395</size>
|
||||
<open-size>5529600</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="other_db">
|
||||
<checksum type="sha256">f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292</checksum>
|
||||
<open-checksum type="sha256">3f5d4619dcabe945b773c1c98ea40b8ead53340291bd504ab3faabfc7b57bb99</open-checksum>
|
||||
<location href="repodata/f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292-other.sqlite.bz2"/>
|
||||
<timestamp>1499286311</timestamp>
|
||||
<size>5264843</size>
|
||||
<open-size>27930624</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
</repomd>
|
|
@ -0,0 +1,85 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
|
||||
<revision>1651698851</revision>
|
||||
<data type="primary">
|
||||
<checksum type="sha256">42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c</checksum>
|
||||
<open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
|
||||
<location href="repodata/42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c-primary.xml.gz"/>
|
||||
<timestamp>1651698827</timestamp>
|
||||
<size>7144060</size>
|
||||
<open-size>45898728</open-size>
|
||||
</data>
|
||||
<data type="filelists">
|
||||
<checksum type="sha256">fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92</checksum>
|
||||
<open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
|
||||
<location href="repodata/fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92-filelists.xml.gz"/>
|
||||
<timestamp>1651698827</timestamp>
|
||||
<size>1934835</size>
|
||||
<open-size>7458268</open-size>
|
||||
</data>
|
||||
<data type="other">
|
||||
<checksum type="sha256">461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09</checksum>
|
||||
<open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
|
||||
<location href="repodata/461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09-other.xml.gz"/>
|
||||
<timestamp>1651698827</timestamp>
|
||||
<size>3779969</size>
|
||||
<open-size>33166564</open-size>
|
||||
</data>
|
||||
<data type="primary_db">
|
||||
<checksum type="sha256">ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74</checksum>
|
||||
<open-checksum type="sha256">c752f5132f2cc5f4f137dade787154316f9503ae816212b8fabf5733cc2d344d</open-checksum>
|
||||
<location href="repodata/ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74-primary.sqlite.xz"/>
|
||||
<timestamp>1651698851</timestamp>
|
||||
<size>9058624</size>
|
||||
<open-size>41562112</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="filelists_db">
|
||||
<checksum type="sha256">1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8</checksum>
|
||||
<open-checksum type="sha256">e9b5c17e6004a78d20146aa54fa5ac93a01f4f2a95117588d649e92cfc008473</open-checksum>
|
||||
<location href="repodata/1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8-filelists.sqlite.xz"/>
|
||||
<timestamp>1651698834</timestamp>
|
||||
<size>1809496</size>
|
||||
<open-size>6471680</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="other_db">
|
||||
<checksum type="sha256">850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f</checksum>
|
||||
<open-checksum type="sha256">d13c6da8f7ad2c9060fd5b811b86facc9e926ec9273c0e135c4fe1110f784cdc</open-checksum>
|
||||
<location href="repodata/850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f-other.sqlite.xz"/>
|
||||
<timestamp>1651698838</timestamp>
|
||||
<size>4285108</size>
|
||||
<open-size>27897856</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="primary_zck">
|
||||
<checksum type="sha256">fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b</checksum>
|
||||
<open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
|
||||
<header-checksum type="sha256">2074f3da25ad0d45cf2776ad35dd22a6c63fafff319143c2f7dfefa98b99d651</header-checksum>
|
||||
<location href="repodata/fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b-primary.xml.zck"/>
|
||||
<timestamp>1651698828</timestamp>
|
||||
<size>6030441</size>
|
||||
<open-size>45898728</open-size>
|
||||
<header-size>231</header-size>
|
||||
</data>
|
||||
<data type="filelists_zck">
|
||||
<checksum type="sha256">6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9</checksum>
|
||||
<open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
|
||||
<header-checksum type="sha256">55fc5e75acd903f01cf18328fec9c6f995bd8f80c5b085aa3e0fe116bb89e891</header-checksum>
|
||||
<location href="repodata/6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9-filelists.xml.zck"/>
|
||||
<timestamp>1651698829</timestamp>
|
||||
<size>1735208</size>
|
||||
<open-size>7458268</open-size>
|
||||
<header-size>136</header-size>
|
||||
</data>
|
||||
<data type="other_zck">
|
||||
<checksum type="sha256">c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5</checksum>
|
||||
<open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
|
||||
<header-checksum type="sha256">93624d227c24ff4eb2332fcb038e7157e08ed051b654820def75c5511a1ce191</header-checksum>
|
||||
<location href="repodata/c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5-other.xml.zck"/>
|
||||
<timestamp>1651698829</timestamp>
|
||||
<size>3019451</size>
|
||||
<open-size>33166564</open-size>
|
||||
<header-size>206</header-size>
|
||||
</data>
|
||||
</repomd>
|
231
swh/lister/fedora/tests/test_lister.py
Normal file
231
swh/lister/fedora/tests/test_lister.py
Normal file
|
@ -0,0 +1,231 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from unittest.mock import MagicMock
|
||||
from urllib.error import HTTPError
|
||||
|
||||
import pytest
|
||||
|
||||
from swh.lister.fedora.lister import FedoraLister, Release, get_editions
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
|
||||
|
||||
def mock_repomd(datadir, mocker, use_altered_fedora36=False):
|
||||
"""Mocks the .xml files fetched by repomd for the next lister run"""
|
||||
paths = ["repomd26.xml", "primary26.xml.gz", "repomd36.xml", "primary36.xml.gz"]
|
||||
if use_altered_fedora36:
|
||||
paths[3] = "primary36-altered.xml.gz"
|
||||
|
||||
cm = MagicMock()
|
||||
cm.read.side_effect = [
|
||||
Path(datadir, "archives.fedoraproject.org", path).read_bytes() for path in paths
|
||||
]
|
||||
cm.__enter__.return_value = cm
|
||||
mocker.patch("repomd.urllib.request.urlopen").return_value = cm
|
||||
|
||||
|
||||
def rpm_url(release, path):
|
||||
return (
|
||||
"https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
|
||||
f"{release}/Everything/source/tree/Packages/{path}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pkg_versions():
|
||||
return {
|
||||
"https://src.fedoraproject.org/rpms/0install": {
|
||||
"2.11-4.fc26": {
|
||||
"name": "0install",
|
||||
"version": "2.11",
|
||||
"release": 26,
|
||||
"edition": "Everything",
|
||||
"buildTime": "2017-02-10T04:59:31+00:00",
|
||||
"url": rpm_url(26, "0/0install-2.11-4.fc26.src.rpm"),
|
||||
"checksums": {
|
||||
# note: we intentionally altered the original
|
||||
# primary26.xml file to test sha1 usage
|
||||
"sha1": "a6fdef5d1026dea208eeeba148f55ac2f545989b",
|
||||
},
|
||||
}
|
||||
},
|
||||
"https://src.fedoraproject.org/rpms/0xFFFF": {
|
||||
"0.3.9-15.fc26": {
|
||||
"name": "0xFFFF",
|
||||
"version": "0.3.9",
|
||||
"release": 26,
|
||||
"edition": "Everything",
|
||||
"buildTime": "2017-02-10T05:01:53+00:00",
|
||||
"url": rpm_url(26, "0/0xFFFF-0.3.9-15.fc26.src.rpm"),
|
||||
"checksums": {
|
||||
"sha256": "96f9c163c0402d2b30e5343c8397a6d50e146c85a446804396b119ef9698231f"
|
||||
},
|
||||
},
|
||||
"0.9-4.fc36": {
|
||||
"name": "0xFFFF",
|
||||
"version": "0.9",
|
||||
"release": 36,
|
||||
"edition": "Everything",
|
||||
"buildTime": "2022-01-19T19:13:53+00:00",
|
||||
"url": rpm_url(36, "0/0xFFFF-0.9-4.fc36.src.rpm"),
|
||||
"checksums": {
|
||||
"sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
|
||||
},
|
||||
},
|
||||
},
|
||||
"https://src.fedoraproject.org/rpms/2ping": {
|
||||
"4.5.1-2.fc36": {
|
||||
"name": "2ping",
|
||||
"version": "4.5.1",
|
||||
"release": 36,
|
||||
"edition": "Everything",
|
||||
"buildTime": "2022-01-19T19:12:21+00:00",
|
||||
"url": rpm_url(36, "2/2ping-4.5.1-2.fc36.src.rpm"),
|
||||
"checksums": {
|
||||
"sha256": "2ce028d944ebea1cab8c6203c9fed882792478b42fc34682b886a9db16e9de28"
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def run_lister(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
releases: List[Release],
|
||||
pkg_versions: dict,
|
||||
origin_count: int,
|
||||
updated: bool = True,
|
||||
):
|
||||
"""Runs the lister and tests that the listed origins are correct."""
|
||||
lister = FedoraLister(scheduler=swh_scheduler, releases=releases)
|
||||
|
||||
stats = lister.run()
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
state_pkg_versions = {k.split("/")[-1]: set(v) for k, v in pkg_versions.items()}
|
||||
|
||||
# One edition from each release (we mocked get_editions)
|
||||
assert stats.pages == (len(releases) if updated else 0)
|
||||
assert stats.origins == origin_count
|
||||
|
||||
assert {
|
||||
o.url: o.extra_loader_arguments["packages"] for o in scheduler_origins
|
||||
} == pkg_versions
|
||||
|
||||
assert lister_state.package_versions == state_pkg_versions
|
||||
assert lister.updated == updated
|
||||
|
||||
|
||||
def test_get_editions():
|
||||
assert get_editions(18) == ["Everything", "Fedora"]
|
||||
assert get_editions(26) == ["Everything", "Server", "Workstation"]
|
||||
assert get_editions(34) == ["Everything", "Server", "Workstation", "Modular"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("status_code", [400, 404, 500])
|
||||
def test_fedora_lister_http_error(
|
||||
swh_scheduler: SchedulerInterface, mocker: MagicMock, status_code: int
|
||||
):
|
||||
"""
|
||||
Simulates handling of HTTP Errors while fetching of packages for fedora releases.
|
||||
"""
|
||||
releases = [18]
|
||||
|
||||
is_404 = status_code == 404
|
||||
|
||||
def side_effect(url):
|
||||
if is_404:
|
||||
raise HTTPError(
|
||||
url, status_code, "Not Found", {"content-type": "text/html"}, StringIO()
|
||||
)
|
||||
else:
|
||||
raise HTTPError(
|
||||
url,
|
||||
status_code,
|
||||
"Internal server error",
|
||||
{"content-type": "text/html"},
|
||||
StringIO(),
|
||||
)
|
||||
|
||||
urlopen_patch = mocker.patch("repomd.urllib.request.urlopen")
|
||||
urlopen_patch.side_effect = side_effect
|
||||
|
||||
expected_pkgs: dict = {}
|
||||
|
||||
if is_404:
|
||||
run_lister(
|
||||
swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
|
||||
)
|
||||
else:
|
||||
with pytest.raises(HTTPError):
|
||||
run_lister(
|
||||
swh_scheduler, releases, expected_pkgs, origin_count=0, updated=False
|
||||
)
|
||||
|
||||
|
||||
def test_full_lister_fedora(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
mocker: MagicMock,
|
||||
datadir: Path,
|
||||
pkg_versions: dict,
|
||||
):
|
||||
"""
|
||||
Simulates a full listing of packages for fedora releases.
|
||||
"""
|
||||
releases = [26, 36]
|
||||
|
||||
get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
|
||||
get_editions_patch.return_value = ["Everything"]
|
||||
|
||||
mock_repomd(datadir, mocker)
|
||||
run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
|
||||
|
||||
|
||||
def test_incremental_lister(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
mocker: MagicMock,
|
||||
datadir: Path,
|
||||
pkg_versions: dict,
|
||||
):
|
||||
"""
|
||||
Simulates an incremental listing of packages for fedora releases.
|
||||
"""
|
||||
releases = [26, 36]
|
||||
|
||||
get_editions_patch = mocker.patch("swh.lister.fedora.lister.get_editions")
|
||||
get_editions_patch.return_value = ["Everything"]
|
||||
|
||||
# First run
|
||||
mock_repomd(datadir, mocker)
|
||||
run_lister(swh_scheduler, releases, pkg_versions, origin_count=3)
|
||||
# Second run (no updates)
|
||||
mock_repomd(datadir, mocker)
|
||||
run_lister(swh_scheduler, releases, pkg_versions, origin_count=0)
|
||||
|
||||
# Use an altered version of primary36.xml in which we updated the version
|
||||
# of package 0xFFFF to 0.10:
|
||||
mock_repomd(datadir, mocker, use_altered_fedora36=True)
|
||||
# Add new version to the set of expected pkg versions:
|
||||
pkg_versions["https://src.fedoraproject.org/rpms/0xFFFF"].update(
|
||||
{
|
||||
"0.10-4.fc36": {
|
||||
"name": "0xFFFF",
|
||||
"version": "0.10",
|
||||
"release": 36,
|
||||
"edition": "Everything",
|
||||
"buildTime": "2022-01-19T19:13:53+00:00",
|
||||
"url": rpm_url(36, "0/0xFFFF-0.10-4.fc36.src.rpm"),
|
||||
"checksums": {
|
||||
"sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
|
||||
},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Third run (0xFFFF in fedora36 editions got updated and it needs to be listed)
|
||||
run_lister(swh_scheduler, releases, pkg_versions, origin_count=1)
|
60
swh/lister/fedora/tests/test_tasks.py
Normal file
60
swh/lister/fedora/tests/test_tasks.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
# Copyright (C) 2022 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
|
||||
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.fedora.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch("swh.lister.fedora.tasks.FedoraLister")
|
||||
def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
lister.from_configfile.return_value = lister
|
||||
lister.run.return_value = ListerStats(pages=10, origins=500)
|
||||
|
||||
kwargs = dict(
|
||||
url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/"
|
||||
)
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.fedora.tasks.FullFedoraRelister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.from_configfile.assert_called_once_with(**kwargs)
|
||||
lister.run.assert_called_once_with()
|
||||
|
||||
|
||||
@patch("swh.lister.fedora.tasks.FedoraLister")
|
||||
def test_full_listing_params(
|
||||
lister, swh_scheduler_celery_app, swh_scheduler_celery_worker
|
||||
):
|
||||
lister.from_configfile.return_value = lister
|
||||
lister.run.return_value = ListerStats(pages=10, origins=500)
|
||||
|
||||
kwargs = dict(
|
||||
url="https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/",
|
||||
instance="archives.fedoraproject.org",
|
||||
releases=["36"],
|
||||
)
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.fedora.tasks.FullFedoraRelister",
|
||||
kwargs=kwargs,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.from_configfile.assert_called_once_with(**kwargs)
|
||||
lister.run.assert_called_once_with()
|
|
@ -39,6 +39,9 @@ lister_args = {
|
|||
"url": "https://guix.gnu.org/sources.json",
|
||||
"origin_upstream": "https://git.savannah.gnu.org/cgit/guix.git/",
|
||||
},
|
||||
"fedora": {
|
||||
"url": "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases//",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue