rpm: Turn fedora lister into a generic Red Hat based distribution one
As Red Hat based linux distributions share the same type of package repository, rework the fedora lister into a generic one to list RPM source packages and their versions from numerous distributions. For a given distribution, the RPM lister will fetch packages metadata from a list of release identifiers and a list of software components. Source packages are then processed and relevant info are extracted to be sent to the RPM loader. When all releases and components were processed, the lister collected all versions for each package name and send those info to the scheduler that will create RPM loading tasks afterwards. Nevertheless, as there is no generic way to list all releases and components for a given distribution but also to guess the right URL to retrieve packages metadata from, those info need to be manually provided to the lister as input parameters. Some examples of those parameters for various distributions can be found in the config directory of the lister. Regarding the produced origin URLs, as there is no way to find valid HTTP ones for all distributions, the same behavior as with the debian lister is used and they have the following form: rpm://{instance}/packages/{package_name} where the instance variable corresponds to the name of the listed distribution such as Fedora, CentOS, or openSUSE. Related to swh/meta#5011.
This commit is contained in:
parent
fcfb7004db
commit
95714f6f37
23 changed files with 1096 additions and 577 deletions
13
swh/lister/rpm/__init__.py
Normal file
13
swh/lister/rpm/__init__.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
def register():
|
||||
from .lister import RPMLister
|
||||
|
||||
return {
|
||||
"lister": RPMLister,
|
||||
"task_modules": [f"{__name__}.tasks"],
|
||||
}
|
100
swh/lister/rpm/config/centos.yml
Normal file
100
swh/lister/rpm/config/centos.yml
Normal file
|
@ -0,0 +1,100 @@
|
|||
# RPM lister parameters to process CentOS source packages
|
||||
|
||||
url: https://www.centos.org
|
||||
instance: CentOS
|
||||
rpm_src_data:
|
||||
- base_url: https://vault.centos.org/
|
||||
releases:
|
||||
- "3.7"
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "4.0"
|
||||
- "4.1"
|
||||
- "4.2"
|
||||
- "4.3"
|
||||
- "4.4"
|
||||
- "4.5"
|
||||
- "4.6"
|
||||
- "4.7"
|
||||
- "4.8"
|
||||
- "4.9"
|
||||
- "5.0"
|
||||
- "5.1"
|
||||
- "5.2"
|
||||
- "5.3"
|
||||
- "5.4"
|
||||
- "5.5"
|
||||
- "5.6"
|
||||
- "5.7"
|
||||
- "5.8"
|
||||
- "5.9"
|
||||
- "5.10"
|
||||
- "5.11"
|
||||
- "6.0"
|
||||
- "6.1"
|
||||
- "6.2"
|
||||
- "6.3"
|
||||
- "6.4"
|
||||
- "6.5"
|
||||
- "6.6"
|
||||
- "6.7"
|
||||
- "6.8"
|
||||
- "6.9"
|
||||
- "6.10"
|
||||
- "7.0.1406"
|
||||
- "7.1.1503"
|
||||
- "7.2.1511"
|
||||
- "7.3.1611"
|
||||
- "7.4.1708"
|
||||
- "7.5.1804"
|
||||
- "7.6.1810"
|
||||
- "7.7.1908"
|
||||
- "7.8.2003"
|
||||
- "7.9.2009"
|
||||
- "8-stream"
|
||||
- "8.0.1905"
|
||||
- "8.1.1911"
|
||||
- "8.2.2004"
|
||||
- "8.3.2011"
|
||||
- "8.4.2105"
|
||||
- "8.5.2111"
|
||||
components:
|
||||
- AppStream
|
||||
- BaseOS
|
||||
- HighAvailability
|
||||
- PowerTools
|
||||
- SCL
|
||||
- addons
|
||||
- centosplus
|
||||
- contrib
|
||||
- cr
|
||||
- csgfs
|
||||
- dotnet
|
||||
- extras
|
||||
- fasttrack
|
||||
- opstools
|
||||
- os
|
||||
- rt
|
||||
- testing
|
||||
- updates
|
||||
- xen4
|
||||
|
||||
index_url_templates:
|
||||
- $base_url/$release/$component/Source/
|
||||
- $base_url/$release/$component/SRPMS/
|
||||
- $base_url/$release/$component/x86_64/
|
||||
|
||||
- base_url: https://mirror.stream.centos.org
|
||||
releases:
|
||||
- 9-stream
|
||||
components:
|
||||
- AppStream
|
||||
- BaseOS
|
||||
- CRB
|
||||
- HighAvailability
|
||||
- NFV
|
||||
- RT
|
||||
- ResilientStorage
|
||||
|
||||
index_url_templates:
|
||||
- $base_url/$release/$component/source/tree/
|
77
swh/lister/rpm/config/fedora.yml
Normal file
77
swh/lister/rpm/config/fedora.yml
Normal file
|
@ -0,0 +1,77 @@
|
|||
# RPM lister parameters to process Fedora source packages
|
||||
|
||||
url: https://fedoraproject.org
|
||||
instance: "Fedora"
|
||||
rpm_src_data:
|
||||
- base_url: https://archives.fedoraproject.org/pub/archive/fedora/linux/
|
||||
releases:
|
||||
- "2"
|
||||
- "3"
|
||||
- "4"
|
||||
- "5"
|
||||
- "6"
|
||||
components:
|
||||
- core
|
||||
- extras
|
||||
index_url_templates:
|
||||
- $base_url/$component/$release/SRPMS
|
||||
- $base_url/$component/$release/source/SRPMS
|
||||
- $base_url/$component/$release/x86_64/os/
|
||||
|
||||
- base_url: https://archives.fedoraproject.org/pub/archive/fedora/linux/
|
||||
releases:
|
||||
- "7"
|
||||
- "8"
|
||||
- "9"
|
||||
- "10"
|
||||
- "11"
|
||||
- "12"
|
||||
- "13"
|
||||
- "14"
|
||||
- "15"
|
||||
- "16"
|
||||
- "17"
|
||||
- "18"
|
||||
- "19"
|
||||
- "20"
|
||||
- "21"
|
||||
- "22"
|
||||
- "23"
|
||||
- "24"
|
||||
- "25"
|
||||
- "26"
|
||||
- "27"
|
||||
- "28"
|
||||
- "29"
|
||||
- "30"
|
||||
- "31"
|
||||
- "32"
|
||||
- "33"
|
||||
- "34"
|
||||
- "35"
|
||||
components:
|
||||
- Everything
|
||||
- Server
|
||||
- Workstation
|
||||
- Modular
|
||||
- Fedora
|
||||
index_url_templates:
|
||||
- $base_url/releases/$release/$component/source/tree/
|
||||
- $base_url/updates/$release/$component/source/tree/
|
||||
- $base_url/releases/$release/$component/source/SRPMS/
|
||||
- $base_url/updates/$release/SRPMS/
|
||||
|
||||
- base_url: https://dl.fedoraproject.org/pub/fedora/linux/
|
||||
releases:
|
||||
- "36"
|
||||
- "37"
|
||||
- "38"
|
||||
components:
|
||||
- Everything
|
||||
- Server
|
||||
- Workstation
|
||||
- Modular
|
||||
- Fedora
|
||||
index_url_templates:
|
||||
- $base_url/releases/$release/$component/source/tree/
|
||||
- $base_url/updates/$release/$component/source/tree/
|
26
swh/lister/rpm/config/opensuse.yml
Normal file
26
swh/lister/rpm/config/opensuse.yml
Normal file
|
@ -0,0 +1,26 @@
|
|||
# RPM lister parameters to process openSUSE source packages
|
||||
|
||||
url: http://opensuse.org
|
||||
instance: openSUSE
|
||||
rpm_src_data:
|
||||
- base_url: http://download.opensuse.org/source/
|
||||
releases:
|
||||
- tumbleweed
|
||||
- jump/15.2
|
||||
- leap/15.0-Current
|
||||
- leap/15.0
|
||||
- leap/15.1
|
||||
- leap/15.2
|
||||
- leap/15.3
|
||||
- leap/15.4
|
||||
- leap/15.5
|
||||
- leap/42.2
|
||||
- leap/42.3-Current
|
||||
- leap/42.3
|
||||
components:
|
||||
- oss
|
||||
- non-oss
|
||||
index_url_templates:
|
||||
- $base_url/distribution/$release/repo/$component/
|
||||
- $base_url/distribution/$release/repo/$component/suse/
|
||||
- $base_url/$release/repo/$component/
|
156
swh/lister/rpm/config/oracle.yml
Normal file
156
swh/lister/rpm/config/oracle.yml
Normal file
|
@ -0,0 +1,156 @@
|
|||
# RPM lister parameters to process Oracle Linux source packages
|
||||
|
||||
url: https://www.oracle.com/linux
|
||||
instance: OracleLinux
|
||||
rpm_src_data:
|
||||
- base_url: https://yum.oracle.com/repo/EnterpriseLinux/
|
||||
releases:
|
||||
- EL5
|
||||
components:
|
||||
- addons
|
||||
- oracle_addons
|
||||
- unsupported
|
||||
- 0/base
|
||||
- 1/base
|
||||
- 2/base
|
||||
- 3/base
|
||||
- 4/base
|
||||
- 5/base
|
||||
|
||||
index_url_templates:
|
||||
- $base_url/$release/$component/x86_64
|
||||
|
||||
- base_url: https://yum.oracle.com/repo/OracleLinux/
|
||||
releases:
|
||||
- OL5
|
||||
- OL6
|
||||
- OL7
|
||||
- OL8
|
||||
- OL9
|
||||
components:
|
||||
- 0/base
|
||||
- 0/baseos/base
|
||||
- 1/base
|
||||
- 1/baseos/base
|
||||
- 10/base
|
||||
- 11/base
|
||||
- 2/base
|
||||
- 2/baseos/base
|
||||
- 3/base
|
||||
- 3/baseos/base
|
||||
- 4/base
|
||||
- 4/baseos/base
|
||||
- 4/security/validation
|
||||
- 5/base
|
||||
- 5/baseos/base
|
||||
- 6/base
|
||||
- 6/baseos/base
|
||||
- 7/base
|
||||
- 7/baseos/base
|
||||
- 8/base
|
||||
- 8/baseos/base
|
||||
- 8/security/validation
|
||||
- 9/base
|
||||
- MODRHCK
|
||||
- MySQL
|
||||
- MySQL56
|
||||
- MySQL57_community
|
||||
- MySQL80/community
|
||||
- MySQL80/connectors/community
|
||||
- MySQL80/tools/community
|
||||
- MySQL80_community
|
||||
- RDMA
|
||||
- SoftwareCollections
|
||||
- UEK/latest
|
||||
- UEKR3
|
||||
- UEKR3/latest
|
||||
- UEKR3_OFED20
|
||||
- UEKR4
|
||||
- UEKR4/OFED
|
||||
- UEKR4/archive
|
||||
- UEKR5
|
||||
- UEKR5/RDMA
|
||||
- UEKR5/archive
|
||||
- UEKR6
|
||||
- UEKR6/RDMA
|
||||
- UEKR7
|
||||
- UEKR7/RDMA
|
||||
- addons
|
||||
- appstream
|
||||
- appstream/developer
|
||||
- automation2
|
||||
- baseos/developer
|
||||
- baseos/latest
|
||||
- beta
|
||||
- ceph
|
||||
- ceph30
|
||||
- codeready/builder
|
||||
- codeready/builder/developer
|
||||
- developer
|
||||
- developer/EPEL
|
||||
- developer/EPEL/modular
|
||||
- developer/UEKR5
|
||||
- developer/UEKR6
|
||||
- developer/UEKR7
|
||||
- developer/golang117
|
||||
- developer/golang118
|
||||
- developer/golang119
|
||||
- developer/kvm/utils
|
||||
- developer/nodejs12
|
||||
- developer/olcne
|
||||
- developer/php74
|
||||
- developer_EPEL
|
||||
- developer_gluster310
|
||||
- developer_gluster312
|
||||
- distro/builder
|
||||
- gluster/appstream
|
||||
- gluster312
|
||||
- gluster41
|
||||
- gluster5
|
||||
- gluster6
|
||||
- gluster8
|
||||
- kvm/appstream
|
||||
- kvm/utils
|
||||
- latest
|
||||
- latest/archive
|
||||
- leapp
|
||||
- ofed_UEK
|
||||
- olcne
|
||||
- olcne11
|
||||
- olcne12
|
||||
- olcne13
|
||||
- olcne14
|
||||
- olcne15
|
||||
- olcne16
|
||||
- openstack10
|
||||
- openstack21
|
||||
- openstack30
|
||||
- openstack40
|
||||
- openstack40_extras
|
||||
- openstack50
|
||||
- openstack50_extras
|
||||
- optional
|
||||
- optional/archive
|
||||
- optional/beta
|
||||
- oracle/instantclient
|
||||
- oracle/instantclient21
|
||||
- oraclelinuxmanager210/client
|
||||
- oraclelinuxmanager210/server
|
||||
- ovirt42
|
||||
- ovirt42/extras
|
||||
- ovirt43
|
||||
- ovirt43/extras
|
||||
- ovirt44
|
||||
- ovirt44/extras
|
||||
- security/validation
|
||||
- spacewalk210/client
|
||||
- spacewalk210/server
|
||||
- spacewalk24/client
|
||||
- spacewalk24/server
|
||||
- spacewalk26/client
|
||||
- spacewalk26/server
|
||||
- spacewalk27/client
|
||||
- spacewalk27/server
|
||||
|
||||
index_url_templates:
|
||||
- $base_url/$release/$component/x86_64
|
38
swh/lister/rpm/config/rockylinux.yml
Normal file
38
swh/lister/rpm/config/rockylinux.yml
Normal file
|
@ -0,0 +1,38 @@
|
|||
# RPM lister parameters to process Rocky Linux source packages
|
||||
|
||||
url: https://rockylinux.org
|
||||
instance: RockyLinux
|
||||
rpm_src_data:
|
||||
- base_url: https://download.rockylinux.org/
|
||||
releases:
|
||||
- "8.3"
|
||||
- "8.4"
|
||||
- "8.4-RC1"
|
||||
- "8.5"
|
||||
- "8.6"
|
||||
- "8.7"
|
||||
- "8.8"
|
||||
- "9.0"
|
||||
- "9.1"
|
||||
- "9.2"
|
||||
components:
|
||||
- AppStream
|
||||
- BaseOS
|
||||
- Devel
|
||||
- HighAvailability
|
||||
- Minimal
|
||||
- PowerTools
|
||||
- ResilientStorage
|
||||
- CRB
|
||||
- NFV
|
||||
- RT
|
||||
- SAP
|
||||
- SAPHANA
|
||||
- devel
|
||||
- extras
|
||||
- plus
|
||||
- nfv
|
||||
- rockyrpi
|
||||
index_url_templates:
|
||||
- $base_url/vault/rocky/$release/$component/source/tree/
|
||||
- $base_url/pub/rocky/$release/$component/source/tree/
|
314
swh/lister/rpm/lister.py
Normal file
314
swh/lister/rpm/lister.py
Normal file
|
@ -0,0 +1,314 @@
|
|||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from itertools import product
|
||||
import logging
|
||||
from string import Template
|
||||
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import repomd
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
from ..pattern import Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
Release = str
|
||||
Component = str
|
||||
PkgName = str
|
||||
PkgVersion = str
|
||||
RPMOrigin = str
|
||||
|
||||
RPMPageType = Optional[Tuple[Release, Component, repomd.Repo]]
|
||||
"""Each page is a list of packages for a given (release, component) pair
|
||||
from a Red Hat based distribution."""
|
||||
|
||||
|
||||
class RPMSourceData(TypedDict):
|
||||
"""Dictionary holding relevant data for listing RPM source packages.
|
||||
|
||||
See content of the lister config directory to get examples of RPM
|
||||
source data for famous RedHat based distributions.
|
||||
"""
|
||||
|
||||
base_url: str
|
||||
"""Base URL of a RPM repository"""
|
||||
releases: List[Release]
|
||||
"""List of release identifiers for a Red Hat based distribution"""
|
||||
components: List[Component]
|
||||
"""List of components for a Red Hat based distribution"""
|
||||
index_url_templates: List[str]
|
||||
"""List of URL templates to discover source packages metadata, the
|
||||
following variables can be substituted in them: ``base_url``, ``release``
|
||||
and ``edition``, see :class:`string.Template` for more details about the
|
||||
format. The generated URLs must target directories containing a sub-directory
|
||||
named ``repodata``, which contains packages metadata, in order to be
|
||||
successfully processed by the lister."""
|
||||
|
||||
|
||||
def _get_last_modified(pkg: repomd.Package) -> datetime:
|
||||
"""Get timezone aware last modified time in UTC from RPM package metadata."""
|
||||
ts = pkg._element.find("common:time", namespaces=repomd._ns).get("build")
|
||||
return datetime.utcfromtimestamp(int(ts)).replace(tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def _get_checksums(pkg: repomd.Package) -> Dict[str, str]:
|
||||
"""Get checksums associated to rpm archive."""
|
||||
cs = pkg._element.find("common:checksum", namespaces=repomd._ns)
|
||||
cs_type = cs.get("type")
|
||||
if cs_type == "sha":
|
||||
cs_type = "sha1"
|
||||
return {cs_type: cs.text}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RPMListerState:
|
||||
"""State of RPM lister"""
|
||||
|
||||
package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
|
||||
"""Dictionary mapping a package name to all the versions found during
|
||||
last listing"""
|
||||
|
||||
|
||||
class RPMLister(Lister[RPMListerState, RPMPageType]):
|
||||
"""
|
||||
List source packages for a Red Hat based linux distribution.
|
||||
|
||||
The lister creates a snapshot for each package from all its available versions.
|
||||
|
||||
In incremental mode, only packages with different snapshot since the last listing
|
||||
operation will be sent to the scheduler that will create loading tasks to archive
|
||||
newly found source code.
|
||||
|
||||
Args:
|
||||
scheduler: instance of SchedulerInterface
|
||||
url: Red Hat based distribution info URL
|
||||
instance: name of Red Hat based distribution
|
||||
rpm_src_data: list of dictionaries holding data required to list RPM source packages,
|
||||
see examples in the config directory.
|
||||
incremental: if :const:`True`, only packages with new versions are sent to the
|
||||
scheduler when relisting
|
||||
"""
|
||||
|
||||
LISTER_NAME = "rpm"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
url: str,
|
||||
instance: str,
|
||||
rpm_src_data: List[RPMSourceData],
|
||||
incremental: bool = False,
|
||||
max_origins_per_page: Optional[int] = None,
|
||||
max_pages: Optional[int] = None,
|
||||
enable_origins: bool = True,
|
||||
):
|
||||
super().__init__(
|
||||
scheduler=scheduler,
|
||||
url=url,
|
||||
instance=instance,
|
||||
credentials={},
|
||||
max_origins_per_page=max_origins_per_page,
|
||||
max_pages=max_pages,
|
||||
enable_origins=enable_origins,
|
||||
)
|
||||
|
||||
self.rpm_src_data = rpm_src_data
|
||||
self.incremental = incremental
|
||||
|
||||
self.listed_origins: Dict[RPMOrigin, ListedOrigin] = {}
|
||||
self.origins_to_send: Set[RPMOrigin] = set()
|
||||
self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
|
||||
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> RPMListerState:
|
||||
return RPMListerState(package_versions={k: set(v) for k, v in d.items()})
|
||||
|
||||
def state_to_dict(self, state: RPMListerState) -> Dict[str, Any]:
|
||||
return {k: list(v) for k, v in state.package_versions.items()}
|
||||
|
||||
def repo_request(
|
||||
self,
|
||||
index_url_template: Template,
|
||||
base_url: str,
|
||||
release: Release,
|
||||
component: Component,
|
||||
) -> Optional[RPMPageType]:
|
||||
"""Return parsed packages for a given distribution release and component."""
|
||||
|
||||
index_url = index_url_template.substitute(
|
||||
base_url=base_url.rstrip("/"), release=release, component=component
|
||||
)
|
||||
|
||||
try:
|
||||
repo = repomd.load(index_url) # throws error if no repomd.xml is not found
|
||||
except Exception:
|
||||
logger.debug("Repository metadata not found at URL %s", index_url)
|
||||
return None
|
||||
else:
|
||||
logger.debug(
|
||||
"Fetched metadata from url: %s, found %d packages", index_url, len(repo)
|
||||
)
|
||||
return repo
|
||||
|
||||
def get_pages(self) -> Iterator[RPMPageType]:
|
||||
"""Return an iterator on parsed rpm packages, one page per (release, component) pair."""
|
||||
for rpm_src_data in self.rpm_src_data:
|
||||
index_url_templates = [
|
||||
Template(index_url_template)
|
||||
for index_url_template in rpm_src_data["index_url_templates"]
|
||||
]
|
||||
# try all possible package repository URLs for each (release, component) pair
|
||||
for release, component, index_url_template in product(
|
||||
rpm_src_data["releases"],
|
||||
rpm_src_data["components"],
|
||||
index_url_templates,
|
||||
):
|
||||
repo = self.repo_request(
|
||||
index_url_template,
|
||||
rpm_src_data["base_url"],
|
||||
release,
|
||||
component,
|
||||
)
|
||||
if repo is not None:
|
||||
# valid package repository found, yield page
|
||||
yield (release, component, repo)
|
||||
|
||||
yield None
|
||||
|
||||
def origin_url_for_package(self, package_name: PkgName) -> RPMOrigin:
|
||||
"""Return the origin url for the given package."""
|
||||
# TODO: Use a better origin URL before deploying the lister to production
|
||||
# https://gitlab.softwareheritage.org/swh/devel/swh-model/-/issues/4632
|
||||
return f"rpm://{self.instance}/packages/{package_name}"
|
||||
|
||||
def get_origins_from_page(self, page: RPMPageType) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of rpm package sources into an iterator of ListedOrigin."""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
if page is None:
|
||||
# all pages processed, yield listed origins
|
||||
for origin_url in self.origins_to_send:
|
||||
yield self.listed_origins[origin_url]
|
||||
return
|
||||
|
||||
release, component, repo = page
|
||||
|
||||
logger.debug(
|
||||
"Listing %s release %s component %s from repository metadata located at %s",
|
||||
self.instance,
|
||||
release,
|
||||
component,
|
||||
repo.baseurl,
|
||||
)
|
||||
|
||||
origins_to_send = set()
|
||||
new_origins_count = 0
|
||||
|
||||
# iterate on each package's metadata
|
||||
for pkg_metadata in repo:
|
||||
|
||||
if pkg_metadata.arch != "src":
|
||||
# not a source package, skip it
|
||||
continue
|
||||
|
||||
# extract package metadata
|
||||
package_name = pkg_metadata.name
|
||||
|
||||
# we extract the intrinsic version of the package for the rpm loader
|
||||
# to avoid creating different releases targeting the same directory
|
||||
# 2.12-10.el8 => 2.12-10
|
||||
package_version_split = pkg_metadata.vr.rsplit("-", maxsplit=1)
|
||||
package_version = "-".join(
|
||||
[
|
||||
package_version_split[0],
|
||||
package_version_split[1].split(".", maxsplit=1)[0],
|
||||
]
|
||||
)
|
||||
|
||||
# create package version key as expected by the rpm loader
|
||||
package_version_key = f"{release}/{component}/{package_version}"
|
||||
|
||||
package_build_time = _get_last_modified(pkg_metadata)
|
||||
package_download_url = urljoin(
|
||||
repo.baseurl.rstrip("/") + "/", pkg_metadata.location
|
||||
)
|
||||
checksums = _get_checksums(pkg_metadata)
|
||||
|
||||
# build origin url
|
||||
origin_url = self.origin_url_for_package(package_name)
|
||||
|
||||
# this is the first time a package is listed
|
||||
if origin_url not in self.listed_origins:
|
||||
# create a ListedOrigin object for it that can be later
|
||||
# updated with new package versions info
|
||||
self.listed_origins[origin_url] = ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
url=origin_url,
|
||||
visit_type="rpm",
|
||||
extra_loader_arguments={"packages": {}},
|
||||
last_update=package_build_time,
|
||||
)
|
||||
|
||||
# init set that will contain all listed package versions
|
||||
self.package_versions[package_name] = set()
|
||||
new_origins_count += 1
|
||||
|
||||
# origins will be yielded when all pages processed
|
||||
origins_to_send.add(origin_url)
|
||||
|
||||
# update package metadata in parameter that will be provided
|
||||
# to the rpm loader
|
||||
self.listed_origins[origin_url].extra_loader_arguments["packages"][
|
||||
package_version_key
|
||||
] = {
|
||||
"name": package_name,
|
||||
"version": package_version,
|
||||
"url": package_download_url,
|
||||
"build_time": package_build_time.isoformat(),
|
||||
"checksums": checksums,
|
||||
}
|
||||
|
||||
last_update = self.listed_origins[origin_url].last_update
|
||||
if last_update is not None and package_build_time > last_update:
|
||||
self.listed_origins[origin_url].last_update = package_build_time
|
||||
|
||||
# add package version key to the set of found versions
|
||||
self.package_versions[package_name].add(package_version_key)
|
||||
|
||||
# package has already been listed during a previous listing process
|
||||
if self.incremental and package_name in self.state.package_versions:
|
||||
new_versions = (
|
||||
self.package_versions[package_name]
|
||||
- self.state.package_versions[package_name]
|
||||
)
|
||||
# no new versions so far, no need to send the origin to the scheduler
|
||||
if not new_versions:
|
||||
origins_to_send.remove(origin_url)
|
||||
|
||||
logger.debug(
|
||||
"Found %s packages to update (%s new ones and %s packages with new versions).",
|
||||
len(origins_to_send),
|
||||
new_origins_count,
|
||||
len(origins_to_send) - new_origins_count,
|
||||
)
|
||||
logger.debug(
|
||||
"Current total number of listed source packages is equal to %s.",
|
||||
len(self.listed_origins),
|
||||
)
|
||||
|
||||
self.origins_to_send.update(origins_to_send)
|
||||
|
||||
def finalize(self):
|
||||
if self.incremental:
|
||||
# set mapping between listed package names and versions as lister state
|
||||
self.state.package_versions = self.package_versions
|
||||
self.updated = len(self.listed_origins) > 0
|
28
swh/lister/rpm/tasks.py
Normal file
28
swh/lister/rpm/tasks.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Copyright (C) 2022-2023 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from .lister import RPMLister
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".FullRPMLister")
|
||||
def list_rpm_full(**lister_args) -> Dict[str, int]:
|
||||
"""Full listing of Red Hat based distribution source packages"""
|
||||
lister = RPMLister.from_configfile(**lister_args)
|
||||
return lister.run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".IncrementalRPMLister")
|
||||
def list_rpm_incremental(**lister_args) -> Dict[str, int]:
|
||||
"""Incremental listing of Red Hat based distribution source packages"""
|
||||
lister = RPMLister.from_configfile(**lister_args, incremental=True)
|
||||
return lister.run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping() -> str:
|
||||
return "OK"
|
0
swh/lister/rpm/tests/__init__.py
Normal file
0
swh/lister/rpm/tests/__init__.py
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,55 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
|
||||
<revision>1499286311</revision>
|
||||
<data type="primary">
|
||||
<checksum type="sha256">4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3</checksum>
|
||||
<open-checksum type="sha256">db616ad8e4219e23dfc05cd515e017cdc0d59144689ac606951fa42cbb06ae65</open-checksum>
|
||||
<location href="repodata/4f677623c24912d86848f86837d398979b5adc2a51d9a2170f11fe42a257f3d3-primary.xml.gz"/>
|
||||
<timestamp>1499286305</timestamp>
|
||||
<size>5425131</size>
|
||||
<open-size>30064034</open-size>
|
||||
</data>
|
||||
<data type="filelists">
|
||||
<checksum type="sha256">17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c</checksum>
|
||||
<open-checksum type="sha256">7caabd1205a72d26422756211dcd536336cef643f7f73eb15a470b02ff09a194</open-checksum>
|
||||
<location href="repodata/17296af99a4b80bc67fccabe71ecefa02b76e8409372d936c054b8c9de312b6c-filelists.xml.gz"/>
|
||||
<timestamp>1499286305</timestamp>
|
||||
<size>1650273</size>
|
||||
<open-size>6419422</open-size>
|
||||
</data>
|
||||
<data type="other">
|
||||
<checksum type="sha256">8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb</checksum>
|
||||
<open-checksum type="sha256">786b8d4fa759f0ade3eaab1bde390d12c950dfe217eda1773400f3a3d461522b</open-checksum>
|
||||
<location href="repodata/8f1ed139aeaa57f5bc280ce97b82f690e4008c122b4793791ca18e513268b6eb-other.xml.gz"/>
|
||||
<timestamp>1499286305</timestamp>
|
||||
<size>4396102</size>
|
||||
<open-size>33165783</open-size>
|
||||
</data>
|
||||
<data type="primary_db">
|
||||
<checksum type="sha256">1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457</checksum>
|
||||
<open-checksum type="sha256">dc8dbac072ac1412f0ecface57fa57c5ddcac14acc880fe9b467164be733e963</open-checksum>
|
||||
<location href="repodata/1d2c0be48c35e55669b410cb4dbe767ae4850b4c610e95ca9aee67f7eb31e457-primary.sqlite.bz2"/>
|
||||
<timestamp>1499286309</timestamp>
|
||||
<size>7071217</size>
|
||||
<open-size>26177536</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="filelists_db">
|
||||
<checksum type="sha256">5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11</checksum>
|
||||
<open-checksum type="sha256">b293d51dd4e6eb4128e40b6ce228c62b169b1d47be535e56f69b8ad622c4a6ca</open-checksum>
|
||||
<location href="repodata/5e1259759b9bedefc1ff14b81760524841402776e6c1b33014f4f5d6feb40d11-filelists.sqlite.bz2"/>
|
||||
<timestamp>1499286307</timestamp>
|
||||
<size>2227395</size>
|
||||
<open-size>5529600</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="other_db">
|
||||
<checksum type="sha256">f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292</checksum>
|
||||
<open-checksum type="sha256">3f5d4619dcabe945b773c1c98ea40b8ead53340291bd504ab3faabfc7b57bb99</open-checksum>
|
||||
<location href="repodata/f6b30bdfe96d2137542704288de1345c01ea14397eb187126d4474648bad5292-other.sqlite.bz2"/>
|
||||
<timestamp>1499286311</timestamp>
|
||||
<size>5264843</size>
|
||||
<open-size>27930624</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
</repomd>
|
|
@ -0,0 +1,85 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<repomd xmlns="http://linux.duke.edu/metadata/repo" xmlns:rpm="http://linux.duke.edu/metadata/rpm">
|
||||
<revision>1651698851</revision>
|
||||
<data type="primary">
|
||||
<checksum type="sha256">42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c</checksum>
|
||||
<open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
|
||||
<location href="repodata/42155056c6d7b1f0e5437bb2a92c48e6d21a02ee8f09acc726e705c26e960a3c-primary.xml.gz"/>
|
||||
<timestamp>1651698827</timestamp>
|
||||
<size>7144060</size>
|
||||
<open-size>45898728</open-size>
|
||||
</data>
|
||||
<data type="filelists">
|
||||
<checksum type="sha256">fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92</checksum>
|
||||
<open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
|
||||
<location href="repodata/fc915adcdf5710f9f80dfffcec8f03088f09cf80fbc9c801d5a8f45f1f31bb92-filelists.xml.gz"/>
|
||||
<timestamp>1651698827</timestamp>
|
||||
<size>1934835</size>
|
||||
<open-size>7458268</open-size>
|
||||
</data>
|
||||
<data type="other">
|
||||
<checksum type="sha256">461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09</checksum>
|
||||
<open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
|
||||
<location href="repodata/461db9fa87e564d75d74c0dfbf006ea5d18ed646d4cb8dee1c69a4d95dd08d09-other.xml.gz"/>
|
||||
<timestamp>1651698827</timestamp>
|
||||
<size>3779969</size>
|
||||
<open-size>33166564</open-size>
|
||||
</data>
|
||||
<data type="primary_db">
|
||||
<checksum type="sha256">ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74</checksum>
|
||||
<open-checksum type="sha256">c752f5132f2cc5f4f137dade787154316f9503ae816212b8fabf5733cc2d344d</open-checksum>
|
||||
<location href="repodata/ac60dd254bfc7557eb646a116bf8083b49fee8e942e1ef50dff7f74004897e74-primary.sqlite.xz"/>
|
||||
<timestamp>1651698851</timestamp>
|
||||
<size>9058624</size>
|
||||
<open-size>41562112</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="filelists_db">
|
||||
<checksum type="sha256">1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8</checksum>
|
||||
<open-checksum type="sha256">e9b5c17e6004a78d20146aa54fa5ac93a01f4f2a95117588d649e92cfc008473</open-checksum>
|
||||
<location href="repodata/1a279b88531d9c2e24c0bfc9a0d6b4357d70301c24fa42f649c726ed1af1d6a8-filelists.sqlite.xz"/>
|
||||
<timestamp>1651698834</timestamp>
|
||||
<size>1809496</size>
|
||||
<open-size>6471680</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="other_db">
|
||||
<checksum type="sha256">850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f</checksum>
|
||||
<open-checksum type="sha256">d13c6da8f7ad2c9060fd5b811b86facc9e926ec9273c0e135c4fe1110f784cdc</open-checksum>
|
||||
<location href="repodata/850ad17efdebe5f9ccbef03c8aec4e7589bb6a1ca9a6249578968d60ad094a4f-other.sqlite.xz"/>
|
||||
<timestamp>1651698838</timestamp>
|
||||
<size>4285108</size>
|
||||
<open-size>27897856</open-size>
|
||||
<database_version>10</database_version>
|
||||
</data>
|
||||
<data type="primary_zck">
|
||||
<checksum type="sha256">fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b</checksum>
|
||||
<open-checksum type="sha256">a5841e7086be579d58e2dbb7628caebba32d9defa85739455d518bfaf90e39b0</open-checksum>
|
||||
<header-checksum type="sha256">2074f3da25ad0d45cf2776ad35dd22a6c63fafff319143c2f7dfefa98b99d651</header-checksum>
|
||||
<location href="repodata/fc4205cf1cca7f0c157d1aa9a1348a1742ca7df671fbf7ccccd79221d473145b-primary.xml.zck"/>
|
||||
<timestamp>1651698828</timestamp>
|
||||
<size>6030441</size>
|
||||
<open-size>45898728</open-size>
|
||||
<header-size>231</header-size>
|
||||
</data>
|
||||
<data type="filelists_zck">
|
||||
<checksum type="sha256">6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9</checksum>
|
||||
<open-checksum type="sha256">a96a4739268e250e3c3461da716472503ed5ed8b27161fec9a143d4a8ccf5767</open-checksum>
|
||||
<header-checksum type="sha256">55fc5e75acd903f01cf18328fec9c6f995bd8f80c5b085aa3e0fe116bb89e891</header-checksum>
|
||||
<location href="repodata/6c77673bb8823bf04fd4520c421fd0fc84567db9f23b8aa19f600b0688e46dd9-filelists.xml.zck"/>
|
||||
<timestamp>1651698829</timestamp>
|
||||
<size>1735208</size>
|
||||
<open-size>7458268</open-size>
|
||||
<header-size>136</header-size>
|
||||
</data>
|
||||
<data type="other_zck">
|
||||
<checksum type="sha256">c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5</checksum>
|
||||
<open-checksum type="sha256">1733c3011a0323fadac711dd25176c9934698176605c3e516b6aabb9b5775e00</open-checksum>
|
||||
<header-checksum type="sha256">93624d227c24ff4eb2332fcb038e7157e08ed051b654820def75c5511a1ce191</header-checksum>
|
||||
<location href="repodata/c87c1b085ef287ba69b1f244d3fff56fc5efc01ffd1d7c10ee22328117651cd5-other.xml.zck"/>
|
||||
<timestamp>1651698829</timestamp>
|
||||
<size>3019451</size>
|
||||
<open-size>33166564</open-size>
|
||||
<header-size>206</header-size>
|
||||
</data>
|
||||
</repomd>
|
283
swh/lister/rpm/tests/test_lister.py
Normal file
283
swh/lister/rpm/tests/test_lister.py
Normal file
|
@ -0,0 +1,283 @@
|
|||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from pathlib import Path
|
||||
from string import Template
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
from urllib3.exceptions import HTTPError
|
||||
|
||||
from swh.lister.rpm.lister import Component, Release, RPMLister
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
|
||||
FEDORA_URL = "https://fedoraproject.org/"
|
||||
FEDORA_ARCHIVE_URL = "https://archives.fedoraproject.org/pub/archive/fedora/linux"
|
||||
|
||||
FEDORA_INDEX_URL_TEMPLATES = [
|
||||
"$base_url/releases/$release/$component/source/tree/",
|
||||
"$base_url/updates/$release/$component/source/tree/",
|
||||
"$base_url/releases/$release/$component/source/SRPMS/",
|
||||
"$base_url/updates/$release/SRPMS/",
|
||||
]
|
||||
|
||||
|
||||
def mock_repomd(mocker, side_effect):
|
||||
"""Mocks the .xml files fetched by repomd for the next lister run"""
|
||||
cm = mocker.MagicMock()
|
||||
cm.read.side_effect = side_effect
|
||||
cm.__enter__.return_value = cm
|
||||
mocker.patch("repomd.urllib.request.urlopen").return_value = cm
|
||||
|
||||
|
||||
def mock_fedora_repomd(datadir, mocker, use_altered_fedora36=False):
|
||||
repodata = [
|
||||
["repomd26.xml", "primary26.xml.gz"],
|
||||
["repomd36.xml", "primary36.xml.gz"],
|
||||
]
|
||||
if use_altered_fedora36:
|
||||
repodata[1][1] = "primary36-altered.xml.gz"
|
||||
|
||||
side_effect = []
|
||||
|
||||
for paths in repodata:
|
||||
side_effect += [
|
||||
Path(datadir, "archives.fedoraproject.org", path).read_bytes()
|
||||
for path in paths
|
||||
]
|
||||
side_effect += [HTTPError() for _ in range(len(FEDORA_INDEX_URL_TEMPLATES) - 1)]
|
||||
|
||||
mock_repomd(mocker, side_effect)
|
||||
|
||||
|
||||
def rpm_repodata_url(release, component):
|
||||
return Template(FEDORA_INDEX_URL_TEMPLATES[0]).substitute(
|
||||
base_url=FEDORA_ARCHIVE_URL, release=release, component=component
|
||||
)
|
||||
|
||||
|
||||
def rpm_src_package_url(release, component, path):
|
||||
return f"{rpm_repodata_url(release, component)}Packages/{path}"
|
||||
|
||||
|
||||
def rpm_package_origin_url(package_name, instance="Fedora"):
|
||||
return f"rpm://{instance}/packages/{package_name}"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pkg_versions():
|
||||
return {
|
||||
f"{rpm_package_origin_url('0install')}": {
|
||||
"26/Everything/2.11-4": {
|
||||
"name": "0install",
|
||||
"version": "2.11-4",
|
||||
"build_time": "2017-02-10T04:59:31+00:00",
|
||||
"url": rpm_src_package_url(
|
||||
release="26",
|
||||
component="Everything",
|
||||
path="0/0install-2.11-4.fc26.src.rpm",
|
||||
),
|
||||
"checksums": {
|
||||
# note: we intentionally altered the original
|
||||
# primary26.xml file to test sha1 usage
|
||||
"sha1": "a6fdef5d1026dea208eeeba148f55ac2f545989b",
|
||||
},
|
||||
}
|
||||
},
|
||||
f"{rpm_package_origin_url('0xFFFF')}": {
|
||||
"26/Everything/0.3.9-15": {
|
||||
"name": "0xFFFF",
|
||||
"version": "0.3.9-15",
|
||||
"build_time": "2017-02-10T05:01:53+00:00",
|
||||
"url": rpm_src_package_url(
|
||||
release="26",
|
||||
component="Everything",
|
||||
path="0/0xFFFF-0.3.9-15.fc26.src.rpm",
|
||||
),
|
||||
"checksums": {
|
||||
"sha256": "96f9c163c0402d2b30e5343c8397a6d50e146c85a446804396b119ef9698231f"
|
||||
},
|
||||
},
|
||||
"36/Everything/0.9-4": {
|
||||
"name": "0xFFFF",
|
||||
"version": "0.9-4",
|
||||
"build_time": "2022-01-19T19:13:53+00:00",
|
||||
"url": rpm_src_package_url(
|
||||
release="36",
|
||||
component="Everything",
|
||||
path="0/0xFFFF-0.9-4.fc36.src.rpm",
|
||||
),
|
||||
"checksums": {
|
||||
"sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
|
||||
},
|
||||
},
|
||||
},
|
||||
f"{rpm_package_origin_url('2ping')}": {
|
||||
"36/Everything/4.5.1-2": {
|
||||
"name": "2ping",
|
||||
"version": "4.5.1-2",
|
||||
"build_time": "2022-01-19T19:12:21+00:00",
|
||||
"url": rpm_src_package_url(
|
||||
release="36",
|
||||
component="Everything",
|
||||
path="2/2ping-4.5.1-2.fc36.src.rpm",
|
||||
),
|
||||
"checksums": {
|
||||
"sha256": "2ce028d944ebea1cab8c6203c9fed882792478b42fc34682b886a9db16e9de28"
|
||||
},
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def run_lister(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
releases: List[Release],
|
||||
components: List[Component],
|
||||
pkg_versions: dict,
|
||||
origin_count: int,
|
||||
incremental: bool = False,
|
||||
updated: bool = True,
|
||||
):
|
||||
"""Runs the lister and tests that the listed origins are correct."""
|
||||
lister = RPMLister(
|
||||
scheduler=swh_scheduler,
|
||||
url=FEDORA_URL,
|
||||
instance="Fedora",
|
||||
rpm_src_data=[
|
||||
{
|
||||
"base_url": FEDORA_ARCHIVE_URL,
|
||||
"releases": releases,
|
||||
"components": components,
|
||||
"index_url_templates": FEDORA_INDEX_URL_TEMPLATES,
|
||||
}
|
||||
],
|
||||
incremental=incremental,
|
||||
)
|
||||
|
||||
stats = lister.run()
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
lister_state = lister.get_state_from_scheduler()
|
||||
state_pkg_versions = {k.split("/")[-1]: set(v) for k, v in pkg_versions.items()}
|
||||
|
||||
# One component from each release plus extra null page to flush origins
|
||||
assert stats.pages == (len(releases) + 1 if updated else 1)
|
||||
assert stats.origins == origin_count
|
||||
|
||||
assert {
|
||||
o.url: o.extra_loader_arguments["packages"] for o in scheduler_origins
|
||||
} == pkg_versions
|
||||
|
||||
if incremental:
|
||||
assert lister_state.package_versions == state_pkg_versions
|
||||
assert lister.updated == updated
|
||||
|
||||
|
||||
@pytest.mark.parametrize("status_code", [400, 404, 500])
|
||||
def test_fedora_lister_http_error(swh_scheduler, mocker, status_code):
|
||||
"""
|
||||
Simulates handling of HTTP Errors while fetching packages for fedora releases.
|
||||
"""
|
||||
|
||||
release = "18"
|
||||
component = "Everything"
|
||||
|
||||
mock_repomd(
|
||||
mocker,
|
||||
side_effect=[HTTPError() for _ in range(len(FEDORA_INDEX_URL_TEMPLATES))],
|
||||
)
|
||||
|
||||
run_lister(
|
||||
swh_scheduler,
|
||||
releases=[release],
|
||||
components=[component],
|
||||
pkg_versions={},
|
||||
origin_count=0,
|
||||
updated=False,
|
||||
)
|
||||
|
||||
|
||||
def test_full_rpm_lister(
|
||||
swh_scheduler,
|
||||
mocker,
|
||||
datadir,
|
||||
pkg_versions,
|
||||
):
|
||||
"""
|
||||
Simulates a full listing of packages for fedora releases.
|
||||
"""
|
||||
|
||||
mock_fedora_repomd(datadir, mocker)
|
||||
run_lister(
|
||||
swh_scheduler,
|
||||
releases=["26", "36"],
|
||||
components=["Everything"],
|
||||
pkg_versions=pkg_versions,
|
||||
origin_count=3,
|
||||
)
|
||||
|
||||
|
||||
def test_incremental_rpm_lister(
|
||||
swh_scheduler,
|
||||
mocker,
|
||||
datadir,
|
||||
pkg_versions,
|
||||
):
|
||||
"""
|
||||
Simulates an incremental listing of packages for fedora releases.
|
||||
"""
|
||||
|
||||
# First run
|
||||
mock_fedora_repomd(datadir, mocker)
|
||||
run_lister(
|
||||
swh_scheduler,
|
||||
releases=["26", "36"],
|
||||
components=["Everything"],
|
||||
pkg_versions=pkg_versions,
|
||||
origin_count=3,
|
||||
incremental=True,
|
||||
)
|
||||
# Second run (no updates)
|
||||
mock_fedora_repomd(datadir, mocker)
|
||||
run_lister(
|
||||
swh_scheduler,
|
||||
releases=["26", "36"],
|
||||
components=["Everything"],
|
||||
pkg_versions=pkg_versions,
|
||||
origin_count=0,
|
||||
incremental=True,
|
||||
)
|
||||
|
||||
# Use an altered version of primary36.xml in which we updated the version
|
||||
# of package 0xFFFF to 0.10:
|
||||
mock_fedora_repomd(datadir, mocker, use_altered_fedora36=True)
|
||||
# Add new version to the set of expected pkg versions:
|
||||
pkg_versions[rpm_package_origin_url("0xFFFF")].update(
|
||||
{
|
||||
"36/Everything/0.10-4": {
|
||||
"name": "0xFFFF",
|
||||
"version": "0.10-4",
|
||||
"build_time": "2022-01-19T19:13:53+00:00",
|
||||
"url": rpm_src_package_url(
|
||||
release="36",
|
||||
component="Everything",
|
||||
path="0/0xFFFF-0.10-4.fc36.src.rpm",
|
||||
),
|
||||
"checksums": {
|
||||
"sha256": "45eee8d990d502324ae665233c320b8a5469c25d735f1862e094c1878d6ff2cd"
|
||||
},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Third run (0xFFFF in fedora36 component got updated and it needs to be listed)
|
||||
run_lister(
|
||||
swh_scheduler,
|
||||
releases=["26", "36"],
|
||||
components=["Everything"],
|
||||
pkg_versions=pkg_versions,
|
||||
origin_count=1,
|
||||
incremental=True,
|
||||
)
|
67
swh/lister/rpm/tests/test_tasks.py
Normal file
67
swh/lister/rpm/tests/test_tasks.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
# Copyright (C) 2022-2023 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
from .test_lister import FEDORA_ARCHIVE_URL, FEDORA_INDEX_URL_TEMPLATES, FEDORA_URL
|
||||
|
||||
|
||||
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.rpm.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
LISTER_KWARGS = dict(
|
||||
url=FEDORA_URL,
|
||||
instance="fedora",
|
||||
rpm_src_data=[
|
||||
{
|
||||
"base_url": FEDORA_ARCHIVE_URL,
|
||||
"releases": ["36"],
|
||||
"components": ["Everything"],
|
||||
"index_url_templates": FEDORA_INDEX_URL_TEMPLATES,
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def test_full_listing(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker):
|
||||
lister = mocker.patch("swh.lister.rpm.tasks.RPMLister")
|
||||
lister.from_configfile.return_value = lister
|
||||
lister.run.return_value = ListerStats(pages=10, origins=500)
|
||||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.rpm.tasks.FullRPMLister",
|
||||
kwargs=LISTER_KWARGS,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.from_configfile.assert_called_once_with(**LISTER_KWARGS)
|
||||
lister.run.assert_called_once_with()
|
||||
|
||||
|
||||
def test_incremental_listing(
|
||||
swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker
|
||||
):
|
||||
lister = mocker.patch("swh.lister.rpm.tasks.RPMLister")
|
||||
lister.from_configfile.return_value = lister
|
||||
lister.run.return_value = ListerStats(pages=10, origins=500)
|
||||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.rpm.tasks.IncrementalRPMLister",
|
||||
kwargs=LISTER_KWARGS,
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.from_configfile.assert_called_once_with(**LISTER_KWARGS, incremental=True)
|
||||
lister.run.assert_called_once_with()
|
Loading…
Add table
Add a link
Reference in a new issue