debian: Reimplement lister using new Lister API
Port debian lister to `swh.lister.pattern.Lister` API. The new implementation will produce one instance of ListedOrigin model per package, notably containing the set of parameters expected by the debian loader. The lister is also stateful, meaning only new packages and those with new found versions since the last listing will be returned. Closes T2979
This commit is contained in:
parent
6cd31769c1
commit
bb0184c004
15 changed files with 732 additions and 787 deletions
|
@ -1,76 +1,16 @@
|
|||
# Copyright (C) 2019 The Software Heritage developers
|
||||
# Copyright (C) 2019-2021 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import logging
|
||||
from typing import Any, List, Mapping
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def debian_init(
|
||||
db_engine,
|
||||
override_conf: Mapping[str, Any] = {},
|
||||
distribution_name: str = "Debian",
|
||||
suites: List[str] = ["stretch", "buster", "bullseye"],
|
||||
components: List[str] = ["main", "contrib", "non-free"],
|
||||
):
|
||||
"""Initialize the debian data model.
|
||||
|
||||
Args:
|
||||
db_engine: SQLAlchemy manipulation database object
|
||||
override_conf: Override conf to pass to instantiate a lister
|
||||
distribution_name: Distribution to initialize
|
||||
suites: Default suites to register with the lister
|
||||
components: Default components to register per suite
|
||||
|
||||
"""
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from swh.lister.debian.models import Area, Distribution
|
||||
|
||||
db_session = sessionmaker(bind=db_engine)()
|
||||
distrib = (
|
||||
db_session.query(Distribution)
|
||||
.filter(Distribution.name == distribution_name)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if distrib is None:
|
||||
distrib = Distribution(
|
||||
name=distribution_name,
|
||||
type="deb",
|
||||
mirror_uri="http://deb.debian.org/debian/",
|
||||
)
|
||||
db_session.add(distrib)
|
||||
|
||||
# Check the existing
|
||||
existing_area = db_session.query(Area).filter(Area.distribution == distrib).all()
|
||||
existing_area = set([a.name for a in existing_area])
|
||||
|
||||
logger.debug("Area already known: %s", ", ".join(existing_area))
|
||||
|
||||
# Create only the new ones
|
||||
for suite in suites:
|
||||
for component in components:
|
||||
area_name = f"{suite}/{component}"
|
||||
if area_name in existing_area:
|
||||
logger.debug("Area '%s' already set, skipping", area_name)
|
||||
continue
|
||||
area = Area(name=area_name, distribution=distrib)
|
||||
db_session.add(area)
|
||||
|
||||
db_session.commit()
|
||||
db_session.close()
|
||||
from typing import Any, Mapping
|
||||
|
||||
|
||||
def register() -> Mapping[str, Any]:
|
||||
from .lister import DebianLister
|
||||
|
||||
return {
|
||||
"models": [DebianLister.MODEL],
|
||||
"models": [],
|
||||
"lister": DebianLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
"init": debian_init,
|
||||
}
|
||||
|
|
|
@ -1,260 +1,287 @@
|
|||
# Copyright (C) 2017-2019 The Software Heritage developers
|
||||
# Copyright (C) 2017-2021 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
||||
import bz2
|
||||
from collections import defaultdict
|
||||
import datetime
|
||||
from dataclasses import dataclass, field
|
||||
import gzip
|
||||
from itertools import product
|
||||
import logging
|
||||
import lzma
|
||||
from typing import Any, Dict, Mapping, Optional
|
||||
from typing import Any, Callable, Dict, Iterator, List, Set, Tuple
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from debian.deb822 import Sources
|
||||
from requests import Response
|
||||
from sqlalchemy.orm import joinedload, load_only
|
||||
from sqlalchemy.schema import CreateTable, DropTable
|
||||
import requests
|
||||
|
||||
from swh.lister.core.lister_base import FetchError, ListerBase
|
||||
from swh.lister.core.lister_transports import ListerHttpTransport
|
||||
from swh.lister.debian.models import (
|
||||
AreaSnapshot,
|
||||
Distribution,
|
||||
DistributionSnapshot,
|
||||
Package,
|
||||
TempPackage,
|
||||
)
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
from swh.scheduler.model import ListedOrigin
|
||||
|
||||
decompressors = {
|
||||
from .. import USER_AGENT
|
||||
from ..pattern import Lister
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
decompressors: Dict[str, Callable[[Any], Any]] = {
|
||||
"gz": lambda f: gzip.GzipFile(fileobj=f),
|
||||
"bz2": bz2.BZ2File,
|
||||
"xz": lzma.LZMAFile,
|
||||
}
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
Suite = str
|
||||
Component = str
|
||||
PkgName = str
|
||||
PkgVersion = str
|
||||
DebianOrigin = str
|
||||
DebianPageType = Iterator[Sources]
|
||||
|
||||
|
||||
class DebianLister(ListerHttpTransport, ListerBase):
|
||||
MODEL = Package
|
||||
PATH_TEMPLATE = None
|
||||
@dataclass
|
||||
class DebianListerState:
|
||||
"""State of debian lister"""
|
||||
|
||||
package_versions: Dict[PkgName, Set[PkgVersion]] = field(default_factory=dict)
|
||||
"""Dictionary mapping a package name to all the versions found during
|
||||
last listing"""
|
||||
|
||||
|
||||
class DebianLister(Lister[DebianListerState, DebianPageType]):
|
||||
"""
|
||||
List source packages for a given debian or derivative distribution.
|
||||
|
||||
The lister will create a snapshot for each package name from all its
|
||||
available versions.
|
||||
|
||||
If a package snapshot is different from the last listing operation,
|
||||
it will be send to the scheduler that will create a loading task
|
||||
to archive newly found source code.
|
||||
|
||||
Args:
|
||||
scheduler: instance of SchedulerInterface
|
||||
distribution: identifier of listed distribution (e.g. Debian, Ubuntu)
|
||||
mirror_url: debian package archives mirror URL
|
||||
suites: list of distribution suites to process
|
||||
components: list of package components to process
|
||||
"""
|
||||
|
||||
LISTER_NAME = "debian"
|
||||
instance = "debian"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
scheduler: SchedulerInterface,
|
||||
distribution: str = "Debian",
|
||||
date: Optional[datetime.datetime] = None,
|
||||
override_config: Mapping = {},
|
||||
mirror_url: str = "http://deb.debian.org/debian/",
|
||||
suites: List[Suite] = ["stretch", "buster", "bullseye"],
|
||||
components: List[Component] = ["main", "contrib", "non-free"],
|
||||
):
|
||||
"""Initialize the debian lister for a given distribution at a given
|
||||
date.
|
||||
|
||||
Args:
|
||||
distribution: name of the distribution (e.g. "Debian")
|
||||
date: date the snapshot is taken (defaults to now if empty)
|
||||
override_config: Override configuration (which takes precedence
|
||||
over the parameters if provided)
|
||||
|
||||
"""
|
||||
ListerHttpTransport.__init__(self, url="notused")
|
||||
ListerBase.__init__(self, override_config=override_config)
|
||||
self.distribution = override_config.get("distribution", distribution)
|
||||
self.date = override_config.get("date", date) or datetime.datetime.now(
|
||||
tz=datetime.timezone.utc
|
||||
super().__init__(
|
||||
scheduler=scheduler, url=mirror_url, instance=distribution,
|
||||
)
|
||||
|
||||
def transport_request(self, identifier) -> Response:
|
||||
"""Subvert ListerHttpTransport.transport_request, to try several
|
||||
index URIs in turn.
|
||||
# to ensure urljoin will produce valid Sources URL
|
||||
if not self.url.endswith("/"):
|
||||
self.url += "/"
|
||||
|
||||
The Debian repository format supports several compression algorithms
|
||||
across the ages, so we try several URIs.
|
||||
self.distribution = distribution
|
||||
self.suites = suites
|
||||
self.components = components
|
||||
|
||||
Once we have found a working URI, we break and set `self.decompressor`
|
||||
to the one that matched.
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({"User-Agent": USER_AGENT})
|
||||
|
||||
Returns:
|
||||
a requests Response object.
|
||||
# will hold all listed origins info
|
||||
self.listed_origins: Dict[DebianOrigin, ListedOrigin] = {}
|
||||
# will contain origin urls that have already been listed
|
||||
# in a previous page
|
||||
self.sent_origins: Set[DebianOrigin] = set()
|
||||
# will contain already listed package info that need to be sent
|
||||
# to the scheduler for update in the commit_page method
|
||||
self.origins_to_update: Dict[DebianOrigin, ListedOrigin] = {}
|
||||
# will contain the lister state after a call to run
|
||||
self.package_versions: Dict[PkgName, Set[PkgVersion]] = {}
|
||||
|
||||
Raises:
|
||||
FetchError: when all the URIs failed to be retrieved.
|
||||
"""
|
||||
response = None
|
||||
compression = None
|
||||
def state_from_dict(self, d: Dict[str, Any]) -> DebianListerState:
|
||||
return DebianListerState(package_versions={k: set(v) for k, v in d.items()})
|
||||
|
||||
for uri, compression in self.area.index_uris():
|
||||
response = super().transport_request(uri)
|
||||
def state_to_dict(self, state: DebianListerState) -> Dict[str, Any]:
|
||||
return {k: list(v) for k, v in state.package_versions.items()}
|
||||
|
||||
def debian_index_urls(
|
||||
self, suite: Suite, component: Component
|
||||
) -> Iterator[Tuple[str, str]]:
|
||||
"""Return an iterator on possible Sources file URLs as multiple compression
|
||||
formats can be used."""
|
||||
compression_exts = ("xz", "bz2", "gz")
|
||||
base_url = urljoin(self.url, f"dists/{suite}/{component}/source/Sources")
|
||||
for ext in compression_exts:
|
||||
yield (f"{base_url}.{ext}", ext)
|
||||
yield (base_url, "")
|
||||
|
||||
def page_request(self, suite: Suite, component: Component) -> DebianPageType:
|
||||
"""Return parsed package Sources file for a given debian suite and component."""
|
||||
for url, compression in self.debian_index_urls(suite, component):
|
||||
response = requests.get(url, stream=True)
|
||||
logging.debug("Fetched URL: %s, status code: %s", url, response.status_code)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
else:
|
||||
raise FetchError("Could not retrieve index for %s" % self.area)
|
||||
self.decompressor = decompressors.get(compression)
|
||||
return response
|
||||
raise Exception(
|
||||
"Could not retrieve sources index for %s/%s", suite, component
|
||||
)
|
||||
|
||||
def request_uri(self, identifier):
|
||||
# In the overridden transport_request, we pass
|
||||
# ListerBase.transport_request() the full URI as identifier, so we
|
||||
# need to return it here.
|
||||
return identifier
|
||||
|
||||
def request_params(self, identifier) -> Dict[str, Any]:
|
||||
# Enable streaming to allow wrapping the response in the decompressor
|
||||
# in transport_response_simplified.
|
||||
params = super().request_params(identifier)
|
||||
params["stream"] = True
|
||||
return params
|
||||
|
||||
def transport_response_simplified(self, response):
|
||||
"""Decompress and parse the package index fetched in `transport_request`.
|
||||
|
||||
For each package, we "pivot" the file list entries (Files,
|
||||
Checksums-Sha1, Checksums-Sha256), to return a files dict mapping
|
||||
filenames to their checksums.
|
||||
"""
|
||||
if self.decompressor:
|
||||
data = self.decompressor(response.raw)
|
||||
decompressor = decompressors.get(compression)
|
||||
if decompressor:
|
||||
data = decompressor(response.raw)
|
||||
else:
|
||||
data = response.raw
|
||||
|
||||
for src_pkg in Sources.iter_paragraphs(data.readlines()):
|
||||
files = defaultdict(dict)
|
||||
return Sources.iter_paragraphs(data.readlines())
|
||||
|
||||
for field in src_pkg._multivalued_fields:
|
||||
if field.startswith("checksums-"):
|
||||
sum_name = field[len("checksums-") :]
|
||||
def get_pages(self) -> Iterator[DebianPageType]:
|
||||
"""Return an iterator on parsed debian package Sources files, one per combination
|
||||
of debian suite and component."""
|
||||
for suite, component in product(self.suites, self.components):
|
||||
logger.debug(
|
||||
"Processing %s %s source packages info for %s component.",
|
||||
self.instance,
|
||||
suite,
|
||||
component,
|
||||
)
|
||||
self.current_suite = suite
|
||||
self.current_component = component
|
||||
yield self.page_request(suite, component)
|
||||
|
||||
def origin_url_for_package(self, package_name: PkgName) -> DebianOrigin:
|
||||
"""Return the origin url for the given package"""
|
||||
return f"deb://{self.instance}/packages/{package_name}"
|
||||
|
||||
def get_origins_from_page(self, page: DebianPageType) -> Iterator[ListedOrigin]:
|
||||
"""Convert a page of debian package sources into an iterator of ListedOrigin.
|
||||
|
||||
Please note that the returned origins correspond to packages only
|
||||
listed for the first time in order to get an accurate origins counter
|
||||
in the statistics returned by the run method of the lister.
|
||||
|
||||
Packages already listed in another page but with different versions will
|
||||
be put in cache by the method and updated ListedOrigin objects will
|
||||
be sent to the scheduler later in the commit_page method.
|
||||
|
||||
Indeed as multiple debian suites can be processed, a similar set of
|
||||
package names can be listed for two different package source pages,
|
||||
only their version will differ, resulting in origins counted multiple
|
||||
times in lister statistics.
|
||||
"""
|
||||
assert self.lister_obj.id is not None
|
||||
|
||||
origins_to_send = {}
|
||||
self.origins_to_update = {}
|
||||
|
||||
# iterate on each package source info
|
||||
for src_pkg in page:
|
||||
# gather package files info that will be used by the debian loader
|
||||
files: Dict[str, Dict[str, Any]] = defaultdict(dict)
|
||||
for field_ in src_pkg._multivalued_fields:
|
||||
if field_.startswith("checksums-"):
|
||||
sum_name = field_[len("checksums-") :]
|
||||
else:
|
||||
sum_name = "md5sum"
|
||||
if field in src_pkg:
|
||||
for entry in src_pkg[field]:
|
||||
if field_ in src_pkg:
|
||||
for entry in src_pkg[field_]:
|
||||
name = entry["name"]
|
||||
files[name]["name"] = entry["name"]
|
||||
files[name]["size"] = int(entry["size"], 10)
|
||||
files[name][sum_name] = entry[sum_name]
|
||||
|
||||
yield {
|
||||
"name": src_pkg["Package"],
|
||||
"version": src_pkg["Version"],
|
||||
"directory": src_pkg["Directory"],
|
||||
"files": files,
|
||||
}
|
||||
# extract package name and version
|
||||
package_name = src_pkg["Package"]
|
||||
package_version = src_pkg["Version"]
|
||||
# build origin url
|
||||
origin_url = self.origin_url_for_package(package_name)
|
||||
|
||||
def inject_repo_data_into_db(self, models_list):
|
||||
"""Generate the Package entries that didn't previously exist.
|
||||
|
||||
Contrary to ListerBase, we don't actually insert the data in
|
||||
database. `schedule_missing_tasks` does it once we have the
|
||||
origin and task identifiers.
|
||||
"""
|
||||
by_name_version = {}
|
||||
temp_packages = []
|
||||
|
||||
area_id = self.area.id
|
||||
|
||||
for model in models_list:
|
||||
name = model["name"]
|
||||
version = model["version"]
|
||||
temp_packages.append(
|
||||
{"area_id": area_id, "name": name, "version": version,}
|
||||
)
|
||||
by_name_version[name, version] = model
|
||||
|
||||
# Add all the listed packages to a temporary table
|
||||
self.db_session.execute(CreateTable(TempPackage.__table__))
|
||||
self.db_session.bulk_insert_mappings(TempPackage, temp_packages)
|
||||
|
||||
def exists_tmp_pkg(db_session, model):
|
||||
return (
|
||||
db_session.query(model)
|
||||
.filter(Package.area_id == TempPackage.area_id)
|
||||
.filter(Package.name == TempPackage.name)
|
||||
.filter(Package.version == TempPackage.version)
|
||||
.exists()
|
||||
# create package version key as expected by the debian loader
|
||||
package_version_key = (
|
||||
f"{self.current_suite}/{self.current_component}/{package_version}"
|
||||
)
|
||||
|
||||
# Filter out the packages that already exist in the main Package table
|
||||
new_packages = (
|
||||
self.db_session.query(TempPackage)
|
||||
.options(load_only("name", "version"))
|
||||
.filter(~exists_tmp_pkg(self.db_session, Package))
|
||||
.all()
|
||||
)
|
||||
# this is the first time a package is listed
|
||||
if origin_url not in self.listed_origins:
|
||||
# create a ListedOrigin object for it that can be later
|
||||
# updated with new package versions info
|
||||
self.listed_origins[origin_url] = ListedOrigin(
|
||||
lister_id=self.lister_obj.id,
|
||||
url=origin_url,
|
||||
visit_type="deb",
|
||||
extra_loader_arguments={"date": None, "packages": {}},
|
||||
)
|
||||
# origin will be yielded at the end of that method
|
||||
origins_to_send[origin_url] = self.listed_origins[origin_url]
|
||||
# init set that will contain all listed package versions
|
||||
self.package_versions[package_name] = set()
|
||||
|
||||
self.old_area_packages = (
|
||||
self.db_session.query(Package)
|
||||
.filter(exists_tmp_pkg(self.db_session, TempPackage))
|
||||
.all()
|
||||
)
|
||||
# package has already been listed in a previous page or current page
|
||||
elif origin_url not in origins_to_send:
|
||||
# if package has been listed in a previous page, its new versions
|
||||
# will be added to its ListedOrigin object but the update will
|
||||
# be sent to the scheduler in the commit_page method
|
||||
self.origins_to_update[origin_url] = self.listed_origins[origin_url]
|
||||
|
||||
self.db_session.execute(DropTable(TempPackage.__table__))
|
||||
# update package versions data in parameter that will be provided
|
||||
# to the debian loader
|
||||
self.listed_origins[origin_url].extra_loader_arguments["packages"].update(
|
||||
{
|
||||
package_version_key: {
|
||||
"name": package_name,
|
||||
"version": package_version,
|
||||
"files": files,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
added_packages = []
|
||||
for package in new_packages:
|
||||
model = by_name_version[package.name, package.version]
|
||||
# add package version key to the set of found versions
|
||||
self.package_versions[package_name].add(package_version_key)
|
||||
|
||||
added_packages.append(Package(area=self.area, **model))
|
||||
# package has already been listed during a previous listing process
|
||||
if package_name in self.state.package_versions:
|
||||
new_versions = (
|
||||
self.package_versions[package_name]
|
||||
- self.state.package_versions[package_name]
|
||||
)
|
||||
# no new versions so far, no need to send the origin to the scheduler
|
||||
if not new_versions:
|
||||
origins_to_send.pop(origin_url, None)
|
||||
self.origins_to_update.pop(origin_url, None)
|
||||
# new versions found, ensure the origin will be sent to the scheduler
|
||||
elif origin_url not in self.sent_origins:
|
||||
self.origins_to_update.pop(origin_url, None)
|
||||
origins_to_send[origin_url] = self.listed_origins[origin_url]
|
||||
|
||||
self.db_session.add_all(added_packages)
|
||||
return added_packages
|
||||
|
||||
def schedule_missing_tasks(self, models_list, added_packages):
|
||||
"""We create tasks at the end of the full snapshot processing"""
|
||||
return
|
||||
|
||||
def create_tasks_for_snapshot(self, snapshot):
|
||||
tasks = [
|
||||
snapshot.task_for_package(name, versions)
|
||||
for name, versions in snapshot.get_packages().items()
|
||||
]
|
||||
|
||||
return self.scheduler.create_tasks(tasks)
|
||||
|
||||
def run(self):
|
||||
"""Run the lister for a given (distribution, area) tuple.
|
||||
|
||||
"""
|
||||
distribution = (
|
||||
self.db_session.query(Distribution)
|
||||
.options(joinedload(Distribution.areas))
|
||||
.filter(Distribution.name == self.distribution)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if not distribution:
|
||||
logger.error("Distribution %s is not registered" % self.distribution)
|
||||
return {"status": "failed"}
|
||||
|
||||
if not distribution.type == "deb":
|
||||
logger.error("Distribution %s is not a Debian derivative" % distribution)
|
||||
return {"status": "failed"}
|
||||
|
||||
date = self.date
|
||||
# update already counted origins with changes since last page
|
||||
self.sent_origins.update(origins_to_send.keys())
|
||||
|
||||
logger.debug(
|
||||
"Creating snapshot for distribution %s on date %s" % (distribution, date)
|
||||
"Found %s new packages, %s packages with new versions.",
|
||||
len(origins_to_send),
|
||||
len(self.origins_to_update),
|
||||
)
|
||||
logger.debug(
|
||||
"Current total number of listed packages is equal to %s.",
|
||||
len(self.listed_origins),
|
||||
)
|
||||
|
||||
snapshot = DistributionSnapshot(date=date, distribution=distribution)
|
||||
yield from origins_to_send.values()
|
||||
|
||||
self.db_session.add(snapshot)
|
||||
def get_origins_to_update(self) -> Iterator[ListedOrigin]:
|
||||
yield from self.origins_to_update.values()
|
||||
|
||||
for area in distribution.areas:
|
||||
if not area.active:
|
||||
continue
|
||||
def commit_page(self, page: DebianPageType):
|
||||
"""Send to scheduler already listed origins where new versions have been found
|
||||
in current page."""
|
||||
self.send_origins(self.get_origins_to_update())
|
||||
|
||||
self.area = area
|
||||
|
||||
logger.debug("Processing area %s" % area)
|
||||
|
||||
_, new_area_packages = self.ingest_data(None)
|
||||
area_snapshot = AreaSnapshot(snapshot=snapshot, area=area)
|
||||
self.db_session.add(area_snapshot)
|
||||
area_snapshot.packages.extend(new_area_packages)
|
||||
area_snapshot.packages.extend(self.old_area_packages)
|
||||
|
||||
self.create_tasks_for_snapshot(snapshot)
|
||||
|
||||
self.db_session.commit()
|
||||
|
||||
return {"status": "eventful"}
|
||||
def finalize(self):
|
||||
# set mapping between listed package names and versions as lister state
|
||||
self.state.package_versions = self.package_versions
|
||||
self.updated = len(self.sent_origins) > 0
|
||||
|
|
|
@ -1,230 +0,0 @@
|
|||
# Copyright (C) 2017-2019 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import binascii
|
||||
from collections import defaultdict
|
||||
import datetime
|
||||
from typing import Any, Mapping
|
||||
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
Column,
|
||||
DateTime,
|
||||
Enum,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
LargeBinary,
|
||||
String,
|
||||
Table,
|
||||
UniqueConstraint,
|
||||
)
|
||||
|
||||
try:
|
||||
from sqlalchemy import JSON
|
||||
except ImportError:
|
||||
# SQLAlchemy < 1.1
|
||||
from sqlalchemy.dialects.postgresql import JSONB as JSON
|
||||
|
||||
from sqlalchemy.orm import relationship
|
||||
|
||||
from swh.lister.core.models import SQLBase
|
||||
|
||||
|
||||
class Distribution(SQLBase):
|
||||
"""A distribution (e.g. Debian, Ubuntu, Fedora, ...)"""
|
||||
|
||||
__tablename__ = "distribution"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
name = Column(String, unique=True, nullable=False)
|
||||
type = Column(Enum("deb", "rpm", name="distribution_types"), nullable=False)
|
||||
mirror_uri = Column(String, nullable=False)
|
||||
|
||||
areas = relationship("Area", back_populates="distribution")
|
||||
|
||||
def origin_for_package(self, package_name: str) -> str:
|
||||
"""Return the origin url for the given package
|
||||
|
||||
"""
|
||||
return "%s://%s/packages/%s" % (self.type, self.name, package_name)
|
||||
|
||||
def __repr__(self):
|
||||
return "Distribution(%s (%s) on %s)" % (self.name, self.type, self.mirror_uri,)
|
||||
|
||||
|
||||
class Area(SQLBase):
|
||||
__tablename__ = "area"
|
||||
__table_args__ = (UniqueConstraint("distribution_id", "name"),)
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
distribution_id = Column(Integer, ForeignKey("distribution.id"), nullable=False)
|
||||
name = Column(String, nullable=False)
|
||||
active = Column(Boolean, nullable=False, default=True)
|
||||
|
||||
distribution = relationship("Distribution", back_populates="areas")
|
||||
|
||||
def index_uris(self):
|
||||
"""Get possible URIs for this component's package index"""
|
||||
if self.distribution.type == "deb":
|
||||
compression_exts = ("xz", "bz2", "gz", None)
|
||||
base_uri = "%s/dists/%s/source/Sources" % (
|
||||
self.distribution.mirror_uri,
|
||||
self.name,
|
||||
)
|
||||
for ext in compression_exts:
|
||||
if ext:
|
||||
yield (base_uri + "." + ext, ext)
|
||||
else:
|
||||
yield (base_uri, None)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Do not know how to build index URI for Distribution type %s"
|
||||
% self.distribution.type
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return "Area(%s of %s)" % (self.name, self.distribution.name,)
|
||||
|
||||
|
||||
class Package(SQLBase):
|
||||
__tablename__ = "package"
|
||||
__table_args__ = (UniqueConstraint("area_id", "name", "version"),)
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
area_id = Column(Integer, ForeignKey("area.id"), nullable=False)
|
||||
name = Column(String, nullable=False)
|
||||
version = Column(String, nullable=False)
|
||||
directory = Column(String, nullable=False)
|
||||
files = Column(JSON, nullable=False)
|
||||
|
||||
origin_id = Column(Integer)
|
||||
task_id = Column(Integer)
|
||||
|
||||
revision_id = Column(LargeBinary(20))
|
||||
|
||||
area = relationship("Area")
|
||||
|
||||
@property
|
||||
def distribution(self):
|
||||
return self.area.distribution
|
||||
|
||||
def fetch_uri(self, filename):
|
||||
"""Get the URI to fetch the `filename` file associated with the
|
||||
package"""
|
||||
if self.distribution.type == "deb":
|
||||
return "%s/%s/%s" % (
|
||||
self.distribution.mirror_uri,
|
||||
self.directory,
|
||||
filename,
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
"Do not know how to build fetch URI for Distribution type %s"
|
||||
% self.distribution.type
|
||||
)
|
||||
|
||||
def loader_dict(self):
|
||||
ret = {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"version": self.version,
|
||||
}
|
||||
if self.revision_id:
|
||||
ret["revision_id"] = binascii.hexlify(self.revision_id).decode()
|
||||
else:
|
||||
files = {name: checksums.copy() for name, checksums in self.files.items()}
|
||||
for name in files:
|
||||
files[name]["uri"] = self.fetch_uri(name)
|
||||
|
||||
ret.update(
|
||||
{"revision_id": None, "files": files,}
|
||||
)
|
||||
return ret
|
||||
|
||||
def __repr__(self):
|
||||
return "Package(%s_%s of %s %s)" % (
|
||||
self.name,
|
||||
self.version,
|
||||
self.distribution.name,
|
||||
self.area.name,
|
||||
)
|
||||
|
||||
|
||||
class DistributionSnapshot(SQLBase):
|
||||
__tablename__ = "distribution_snapshot"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
date = Column(DateTime, nullable=False, index=True)
|
||||
distribution_id = Column(Integer, ForeignKey("distribution.id"), nullable=False)
|
||||
|
||||
distribution = relationship("Distribution")
|
||||
areas = relationship("AreaSnapshot", back_populates="snapshot")
|
||||
|
||||
def task_for_package(
|
||||
self, package_name: str, package_versions: Mapping
|
||||
) -> Mapping[str, Any]:
|
||||
"""Return the task dictionary for the given list of package versions
|
||||
|
||||
"""
|
||||
origin_url = self.distribution.origin_for_package(package_name)
|
||||
|
||||
return {
|
||||
"policy": "oneshot",
|
||||
"type": "load-%s-package" % self.distribution.type,
|
||||
"next_run": datetime.datetime.now(tz=datetime.timezone.utc),
|
||||
"arguments": {
|
||||
"args": [],
|
||||
"kwargs": {
|
||||
"url": origin_url,
|
||||
"date": self.date.isoformat(),
|
||||
"packages": package_versions,
|
||||
},
|
||||
},
|
||||
"retries_left": 3,
|
||||
}
|
||||
|
||||
def get_packages(self):
|
||||
packages = defaultdict(dict)
|
||||
for area_snapshot in self.areas:
|
||||
area_name = area_snapshot.area.name
|
||||
for package in area_snapshot.packages:
|
||||
ref_name = "%s/%s" % (area_name, package.version)
|
||||
packages[package.name][ref_name] = package.loader_dict()
|
||||
|
||||
return packages
|
||||
|
||||
|
||||
area_snapshot_package_assoc = Table(
|
||||
"area_snapshot_package",
|
||||
SQLBase.metadata,
|
||||
Column("area_snapshot_id", Integer, ForeignKey("area_snapshot.id"), nullable=False),
|
||||
Column("package_id", Integer, ForeignKey("package.id"), nullable=False),
|
||||
)
|
||||
|
||||
|
||||
class AreaSnapshot(SQLBase):
|
||||
__tablename__ = "area_snapshot"
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
snapshot_id = Column(
|
||||
Integer, ForeignKey("distribution_snapshot.id"), nullable=False
|
||||
)
|
||||
area_id = Column(Integer, ForeignKey("area.id"), nullable=False)
|
||||
|
||||
snapshot = relationship("DistributionSnapshot", back_populates="areas")
|
||||
area = relationship("Area")
|
||||
packages = relationship("Package", secondary=area_snapshot_package_assoc)
|
||||
|
||||
|
||||
class TempPackage(SQLBase):
|
||||
__tablename__ = "temp_package"
|
||||
__table_args__ = {
|
||||
"prefixes": ["TEMPORARY"],
|
||||
}
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
area_id = Column(Integer)
|
||||
name = Column(String)
|
||||
version = Column(String)
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2017-2018 the Software Heritage developers
|
||||
# Copyright (C) 2017-2021 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
|
@ -8,9 +8,9 @@ from .lister import DebianLister
|
|||
|
||||
|
||||
@shared_task(name=__name__ + ".DebianListerTask")
|
||||
def list_debian_distribution(distribution, **lister_args):
|
||||
def list_debian_distribution(**lister_args):
|
||||
"""List a Debian distribution"""
|
||||
return DebianLister(distribution=distribution, **lister_args).run()
|
||||
return DebianLister.from_configfile(**lister_args).run().dict()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
# Copyright (C) 2019-2020 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from swh.core.db.pytest_plugin import postgresql_fact
|
||||
|
||||
from swh.lister.debian import debian_init
|
||||
import swh.scheduler
|
||||
|
||||
SQL_DIR = os.path.join(os.path.dirname(swh.scheduler.__file__), "sql")
|
||||
postgresql_scheduler = postgresql_fact(
|
||||
"postgresql_proc",
|
||||
db_name="scheduler-lister",
|
||||
dump_files=os.path.join(SQL_DIR, "*.sql"),
|
||||
# do not truncate the task tables, it's required in between test
|
||||
no_truncate_tables={"dbversion", "priority_ratio", "task"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def swh_scheduler_config(postgresql_scheduler):
|
||||
return {"db": postgresql_scheduler.dsn}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lister_under_test():
|
||||
return "debian"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lister_debian(swh_lister):
|
||||
# Initialize the debian data model
|
||||
debian_init(
|
||||
swh_lister.db_engine, suites=["stretch"], components=["main", "contrib"]
|
||||
)
|
||||
|
||||
# Add the load-deb-package in the scheduler backend
|
||||
swh_lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-deb-package",
|
||||
"description": "Load a Debian package",
|
||||
"backend_name": "swh.loader.packages.debian.tasks.LoaderDebianPackage",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return swh_lister
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session(lister_db_url, engine):
|
||||
session = sessionmaker(bind=engine)()
|
||||
yield session
|
||||
session.close()
|
||||
engine.dispose()
|
107
swh/lister/debian/tests/data/Sources_bullseye
Normal file
107
swh/lister/debian/tests/data/Sources_bullseye
Normal file
|
@ -0,0 +1,107 @@
|
|||
Package: git
|
||||
Binary: git, git-man, git-doc, git-cvs, git-svn, git-mediawiki, git-email, git-daemon-run, git-daemon-sysvinit, git-gui, gitk, git-el, gitweb, git-all
|
||||
Version: 1:2.29.2-1
|
||||
Maintainer: Jonathan Nieder <jrnieder@gmail.com>
|
||||
Uploaders: Anders Kaseorg <andersk@mit.edu>
|
||||
Build-Depends: libz-dev, gettext, libpcre2-dev | libpcre3-dev, libcurl4-gnutls-dev, libexpat1-dev, subversion, libsvn-perl, libyaml-perl, tcl, python3, libhttp-date-perl | libtime-parsedate-perl, libcgi-pm-perl, liberror-perl, libmailtools-perl, cvs, cvsps, libdbd-sqlite3-perl, unzip, libio-pty-perl, debhelper-compat (= 10), dh-exec (>= 0.7), dh-apache2, dpkg-dev (>= 1.16.2~)
|
||||
Build-Depends-Indep: asciidoc (>= 8.6.10), xmlto, docbook-xsl
|
||||
Architecture: any all
|
||||
Standards-Version: 4.3.0.1
|
||||
Format: 3.0 (quilt)
|
||||
Files:
|
||||
ef246c390b2673819cd55085984fb6bc 2867 git_2.29.2-1.dsc
|
||||
f5f9d4e7a3c633bc7a9178cfd822045f 6187988 git_2.29.2.orig.tar.xz
|
||||
cfed1fd3dffd4fb31a0319e51471877f 663292 git_2.29.2-1.debian.tar.xz
|
||||
Vcs-Browser: https://repo.or.cz/w/git/debian.git/
|
||||
Vcs-Git: https://repo.or.cz/r/git/debian.git/
|
||||
Checksums-Sha256:
|
||||
9f2203314f0d076e24750fa29f38d1bb49d4124f3e8d8789b751c84473e57ead 2867 git_2.29.2-1.dsc
|
||||
f2fc436ebe657821a1360bcd1e5f4896049610082419143d60f6fa13c2f607c1 6187988 git_2.29.2.orig.tar.xz
|
||||
ad79671893257ca6205156c7c58d06e265d793f076c0efc8e225e832217f760a 663292 git_2.29.2-1.debian.tar.xz
|
||||
Homepage: https://git-scm.com/
|
||||
Package-List:
|
||||
git deb vcs optional arch=any
|
||||
git-all deb vcs optional arch=all
|
||||
git-cvs deb vcs optional arch=all
|
||||
git-daemon-run deb vcs optional arch=all
|
||||
git-daemon-sysvinit deb vcs optional arch=all
|
||||
git-doc deb doc optional arch=all
|
||||
git-el deb vcs optional arch=all
|
||||
git-email deb vcs optional arch=all
|
||||
git-gui deb vcs optional arch=all
|
||||
git-man deb doc optional arch=all
|
||||
git-mediawiki deb vcs optional arch=all
|
||||
git-svn deb vcs optional arch=all
|
||||
gitk deb vcs optional arch=all
|
||||
gitweb deb vcs optional arch=all
|
||||
Directory: pool/main/g/git
|
||||
Priority: source
|
||||
Section: vcs
|
||||
|
||||
Package: subversion
|
||||
Binary: subversion, libsvn1, libsvn-dev, libsvn-doc, libapache2-mod-svn, python3-subversion, subversion-tools, libsvn-java, libsvn-perl, ruby-svn
|
||||
Version: 1.14.0-3
|
||||
Maintainer: James McCoy <jamessan@debian.org>
|
||||
Build-Depends: autoconf, bash-completion, chrpath, debhelper-compat (= 12), default-jdk-headless (>= 2:1.8) [!hurd-i386 !hppa !sparc] <!pkg.subversion.nojava>, dh-apache2, dh-python, doxygen, junit4 [!hurd-i386 !hppa !sparc] <!pkg.subversion.nojava>, libapr1-dev, libaprutil1-dev, libdb5.3-dev, libdbus-1-dev, liblz4-dev (>= 0.0~r129), libkf5coreaddons-dev <!pkg.subversion.nokde>, libkf5i18n-dev <!pkg.subversion.nokde>, libkf5wallet-dev <!pkg.subversion.nokde>, libperl-dev, libsasl2-dev, libsecret-1-dev, libserf-dev (>= 1.3.9-4~), libsqlite3-dev (>= 3.8.7), libtool, libutf8proc-dev, perl, py3c-dev, python3-all-dev, rename, ruby <!pkg.subversion.noruby>, ruby-dev <!pkg.subversion.noruby>, swig (>= 3.0.10), zlib1g-dev
|
||||
Build-Conflicts: libsvn-dev (>= 1.15~), libsvn-dev (<< 1.14~), libsvn1 (>= 1.15~), libsvn1 (<< 1.14~)
|
||||
Architecture: any all
|
||||
Standards-Version: 4.5.0
|
||||
Format: 3.0 (quilt)
|
||||
Files:
|
||||
65f7c225ddbcc855b57341954268098b 3807 subversion_1.14.0-3.dsc
|
||||
0136e67d8f58731b2858b9f2dba7c536 11519871 subversion_1.14.0.orig.tar.gz
|
||||
f68b938ba71e19f333069bfd3c6ec236 3917 subversion_1.14.0.orig.tar.gz.asc
|
||||
de6248e80a7f8b6481606ff16a9e9237 427396 subversion_1.14.0-3.debian.tar.xz
|
||||
Vcs-Browser: https://salsa.debian.org/jamessan/subversion
|
||||
Vcs-Git: https://salsa.debian.org/jamessan/subversion.git
|
||||
Checksums-Sha256:
|
||||
ebe6e2417a79ad5254072d994ccf6313489a90f299304ee2ccfb6ebe1392c580 3807 subversion_1.14.0-3.dsc
|
||||
ef3d1147535e41874c304fb5b9ea32745fbf5d7faecf2ce21d4115b567e937d0 11519871 subversion_1.14.0.orig.tar.gz
|
||||
98333df38d29a64500d4ad1693741d3d087485555207289b4e53af309abac71a 3917 subversion_1.14.0.orig.tar.gz.asc
|
||||
fd5383bf82ccf89acd7caf0fd80dc01ee2f7a3e163dcab6b2646ad01b7b746d9 427396 subversion_1.14.0-3.debian.tar.xz
|
||||
Homepage: http://subversion.apache.org/
|
||||
Dgit: 6ef306f777223c0d5c2eaab0586420ada61435f3 debian archive/debian/1.14.0-3 https://git.dgit.debian.org/subversion
|
||||
Package-List:
|
||||
libapache2-mod-svn deb httpd optional arch=any
|
||||
libsvn-dev deb libdevel optional arch=any
|
||||
libsvn-doc deb doc optional arch=all
|
||||
libsvn-java deb java optional arch=any profile=!pkg.subversion.nojava
|
||||
libsvn-perl deb perl optional arch=any
|
||||
libsvn1 deb libs optional arch=any
|
||||
python3-subversion deb python optional arch=any
|
||||
ruby-svn deb ruby optional arch=any profile=!pkg.subversion.noruby
|
||||
subversion deb vcs optional arch=any
|
||||
subversion-tools deb vcs optional arch=any
|
||||
Testsuite: autopkgtest
|
||||
Testsuite-Triggers: apache2, wget
|
||||
Directory: pool/main/s/subversion
|
||||
Priority: source
|
||||
Section: vcs
|
||||
|
||||
Package: hg-git
|
||||
Binary: mercurial-git
|
||||
Version: 0.9.0-2
|
||||
Maintainer: Debian Python Team <team+python@tracker.debian.org>
|
||||
Uploaders: Tristan Seligmann <mithrandi@debian.org>
|
||||
Build-Depends: debhelper-compat (= 13), dh-python, git, python3-mercurial, openssh-client, python3, python3-dulwich (>= 0.20.6), python3-setuptools, unzip
|
||||
Architecture: all
|
||||
Standards-Version: 4.5.0
|
||||
Format: 3.0 (quilt)
|
||||
Files:
|
||||
7dee1b877cf129c1f6ee618ebf690179 2090 hg-git_0.9.0-2.dsc
|
||||
bcf30d513d8463332288aa93c1c67d3e 129138 hg-git_0.9.0.orig.tar.bz2
|
||||
5674d6e2e8271150adf68b08833e4806 6996 hg-git_0.9.0-2.debian.tar.xz
|
||||
Vcs-Browser: https://salsa.debian.org/python-team/packages/hg-git
|
||||
Vcs-Git: https://salsa.debian.org/python-team/packages/hg-git.git
|
||||
Checksums-Sha256:
|
||||
a40beaef731c00a820d89918afedc1f01580d87f6e8c29e74903b1e108e38b27 2090 hg-git_0.9.0-2.dsc
|
||||
eedd8773de76b21b47fd21a7e5c04c05c7ab0ecfc62a54bc947eb225b2c44424 129138 hg-git_0.9.0.orig.tar.bz2
|
||||
ded524f1688a248a0eefbd0cf9843daedf60001cc39bfbb9e89734742fa4a4d2 6996 hg-git_0.9.0-2.debian.tar.xz
|
||||
Homepage: https://hg-git.github.io/
|
||||
Package-List:
|
||||
mercurial-git deb vcs optional arch=all
|
||||
Testsuite: autopkgtest
|
||||
Testsuite-Triggers: git, openssh-client, unzip
|
||||
Directory: pool/main/h/hg-git
|
||||
Priority: source
|
||||
Section: vcs
|
78
swh/lister/debian/tests/data/Sources_buster
Normal file
78
swh/lister/debian/tests/data/Sources_buster
Normal file
|
@ -0,0 +1,78 @@
|
|||
Package: git
|
||||
Binary: git, git-man, git-doc, git-cvs, git-svn, git-mediawiki, git-email, git-daemon-run, git-daemon-sysvinit, git-gui, gitk, git-el, gitweb, git-all
|
||||
Version: 1:2.20.1-2+deb10u3
|
||||
Maintainer: Gerrit Pape <pape@smarden.org>
|
||||
Uploaders: Jonathan Nieder <jrnieder@gmail.com>, Anders Kaseorg <andersk@mit.edu>
|
||||
Build-Depends: libz-dev, gettext, libpcre2-dev | libpcre3-dev, libcurl4-gnutls-dev, libexpat1-dev, subversion, libsvn-perl, libyaml-perl, tcl, python, libhttp-date-perl | libtime-parsedate-perl, libcgi-pm-perl, liberror-perl, libmailtools-perl, cvs, cvsps, libdbd-sqlite3-perl, unzip, libio-pty-perl, debhelper (>= 9), dh-exec (>= 0.7), dh-apache2, dpkg-dev (>= 1.16.2~)
|
||||
Build-Depends-Indep: asciidoc (>= 8.6.10), xmlto, docbook-xsl
|
||||
Architecture: any all
|
||||
Standards-Version: 4.3.0.1
|
||||
Format: 3.0 (quilt)
|
||||
Files:
|
||||
fcfb1e01b74dfa383f8171ae7d331de9 2923 git_2.20.1-2+deb10u3.dsc
|
||||
5fb4ff92b56ce3172b99c1c74c046c1a 5359872 git_2.20.1.orig.tar.xz
|
||||
3b629f9b0d2da6fa6ce5816478a57e09 646216 git_2.20.1-2+deb10u3.debian.tar.xz
|
||||
Vcs-Browser: https://repo.or.cz/w/git/debian.git/
|
||||
Vcs-Git: https://repo.or.cz/r/git/debian.git/
|
||||
Checksums-Sha256:
|
||||
6322d0dbe9b867a6cd1cd75f95a4a20335faa2030c38688f460ddaaaacbd4d06 2923 git_2.20.1-2+deb10u3.dsc
|
||||
9d2e91e2faa2ea61ba0a70201d023b36f54d846314591a002c610ea2ab81c3e9 5359872 git_2.20.1.orig.tar.xz
|
||||
3c6e2f8495350bccd0981d579d4d1cac6b0e051e1f7ba8b1d22c842bd4cb3453 646216 git_2.20.1-2+deb10u3.debian.tar.xz
|
||||
Homepage: https://git-scm.com/
|
||||
Package-List:
|
||||
git deb vcs optional arch=any
|
||||
git-all deb vcs optional arch=all
|
||||
git-cvs deb vcs optional arch=all
|
||||
git-daemon-run deb vcs optional arch=all
|
||||
git-daemon-sysvinit deb vcs optional arch=all
|
||||
git-doc deb doc optional arch=all
|
||||
git-el deb vcs optional arch=all
|
||||
git-email deb vcs optional arch=all
|
||||
git-gui deb vcs optional arch=all
|
||||
git-man deb doc optional arch=all
|
||||
git-mediawiki deb vcs optional arch=all
|
||||
git-svn deb vcs optional arch=all
|
||||
gitk deb vcs optional arch=all
|
||||
gitweb deb vcs optional arch=all
|
||||
Directory: pool/main/g/git
|
||||
Priority: source
|
||||
Section: vcs
|
||||
|
||||
Package: subversion
|
||||
Binary: subversion, libsvn1, libsvn-dev, libsvn-doc, libapache2-mod-svn, python-subversion, subversion-tools, libsvn-java, libsvn-perl, ruby-svn
|
||||
Version: 1.10.4-1+deb10u1
|
||||
Maintainer: James McCoy <jamessan@debian.org>
|
||||
Build-Depends: apache2-dev (>= 2.4.16), autoconf, bash-completion, chrpath, debhelper (>= 11~), default-jdk-headless (>= 2:1.6) [!hurd-i386 !hppa !sparc], dh-apache2, dh-python, doxygen, junit [!hurd-i386 !hppa !sparc], libapr1-dev, libaprutil1-dev, libdb5.3-dev, libdbus-1-dev, liblz4-dev (>= 0.0~r129), libkf5coreaddons-dev, libkf5i18n-dev, libkf5wallet-dev, libperl-dev, libsasl2-dev, libsecret-1-dev, libserf-dev (>= 1.3.9-4~), libsqlite3-dev (>= 3.8.7), libtool, libutf8proc-dev, perl, python-all-dev (>= 2.7), rename, ruby, ruby-dev, swig, zlib1g-dev
|
||||
Build-Conflicts: libsvn-dev (<< 1.10~)
|
||||
Architecture: any all
|
||||
Standards-Version: 4.3.0
|
||||
Format: 3.0 (quilt)
|
||||
Files:
|
||||
70b1d3c8ae91301a3f7766b8181d09c9 3428 subversion_1.10.4-1+deb10u1.dsc
|
||||
fcfd1bcd95a8b44e6a6de3a97425aead 11347907 subversion_1.10.4.orig.tar.gz
|
||||
98e9c6902e6a18973b3d936657384a88 2107 subversion_1.10.4.orig.tar.gz.asc
|
||||
a4a14bcff3cef49d0d9388356213f3e4 438024 subversion_1.10.4-1+deb10u1.debian.tar.xz
|
||||
Vcs-Browser: https://salsa.debian.org/jamessan/subversion
|
||||
Vcs-Git: https://salsa.debian.org/jamessan/subversion.git
|
||||
Checksums-Sha256:
|
||||
c9956fd5b850924dd123048b39195b3d591f55b9cbdf18d4d2a0f496f7decc72 3428 subversion_1.10.4-1+deb10u1.dsc
|
||||
354022a837596eb1b5676639ea8d73aa326fa8b2c610d8e1b39aeb7228921f4e 11347907 subversion_1.10.4.orig.tar.gz
|
||||
bc6173c43ac837f875d9f2921e118c194455796b419769e155496cf084376428 2107 subversion_1.10.4.orig.tar.gz.asc
|
||||
1bc8900ef1b9d2af84827dab0fd0164e2058381be3bba0db6fd13cbc858c9b1e 438024 subversion_1.10.4-1+deb10u1.debian.tar.xz
|
||||
Homepage: http://subversion.apache.org/
|
||||
Package-List:
|
||||
libapache2-mod-svn deb httpd optional arch=any
|
||||
libsvn-dev deb libdevel optional arch=any
|
||||
libsvn-doc deb doc optional arch=all
|
||||
libsvn-java deb java optional arch=any
|
||||
libsvn-perl deb perl optional arch=any
|
||||
libsvn1 deb libs optional arch=any
|
||||
python-subversion deb python optional arch=any
|
||||
ruby-svn deb ruby optional arch=any
|
||||
subversion deb vcs optional arch=any
|
||||
subversion-tools deb vcs optional arch=any
|
||||
Testsuite: autopkgtest
|
||||
Testsuite-Triggers: apache2, wget
|
||||
Directory: pool/main/s/subversion
|
||||
Priority: source
|
||||
Section: vcs
|
113
swh/lister/debian/tests/data/Sources_stretch
Normal file
113
swh/lister/debian/tests/data/Sources_stretch
Normal file
|
@ -0,0 +1,113 @@
|
|||
Package: dh-elpa
|
||||
Binary: dh-elpa
|
||||
Version: 0.0.18
|
||||
Maintainer: Debian Emacs addons team <pkg-emacsen-addons@lists.alioth.debian.org>
|
||||
Uploaders: David Bremner <bremner@debian.org>
|
||||
Build-Depends: debhelper (>= 9), emacs24-nox | emacs24 (>= 24~) | emacs24-lucid (>= 24~)
|
||||
Architecture: all
|
||||
Standards-Version: 3.9.6
|
||||
Format: 1.0
|
||||
Files:
|
||||
25beb4376110fe075460f4b7776d0349 1471 dh-elpa_0.0.18.dsc
|
||||
dc0d3b42c1db80cac9817f43c171bfb3 10038 dh-elpa_0.0.18.tar.gz
|
||||
Vcs-Browser: http://anonscm.debian.org/cgit/pkg-emacsen/pkg/dh-elpa.git/
|
||||
Vcs-Git: git://anonscm.debian.org/pkg-emacsen/pkg/dh-elpa.git
|
||||
Checksums-Sha256:
|
||||
87fb2f13d4a8cdea0cec752cc9873eef1c92961655315d2f14d178f9b1b7fc43 1471 dh-elpa_0.0.18.dsc
|
||||
24e5be28cda286398db0018d9577493445c61a0602e239ca285a2981f1068b10 10038 dh-elpa_0.0.18.tar.gz
|
||||
Package-List:
|
||||
dh-elpa deb devel optional arch=all
|
||||
Extra-Source-Only: yes
|
||||
Directory: pool/main/d/dh-elpa
|
||||
Priority: extra
|
||||
Section: misc
|
||||
|
||||
Package: dh-elpa
|
||||
Binary: dh-elpa
|
||||
Version: 0.0.19
|
||||
Maintainer: Debian Emacs addons team <pkg-emacsen-addons@lists.alioth.debian.org>
|
||||
Uploaders: David Bremner <bremner@debian.org>
|
||||
Build-Depends: debhelper (>= 9), emacs24-nox | emacs24 (>= 24~) | emacs24-lucid (>= 24~)
|
||||
Architecture: all
|
||||
Standards-Version: 3.9.6
|
||||
Format: 1.0
|
||||
Files:
|
||||
e4513c0f2112ba60031777ad0a65f9dc 1471 dh-elpa_0.0.19.dsc
|
||||
ac70db483578ecac510612e1b894e53b 10291 dh-elpa_0.0.19.tar.gz
|
||||
Vcs-Browser: http://anonscm.debian.org/cgit/pkg-emacsen/pkg/dh-elpa.git/
|
||||
Vcs-Git: git://anonscm.debian.org/pkg-emacsen/pkg/dh-elpa.git
|
||||
Checksums-Sha256:
|
||||
796a96fad0b03eb589f47c44406f8d32e5b8881dce34c425f1c915650618235c 1471 dh-elpa_0.0.19.dsc
|
||||
4bb0a0ecdb75585e168a56a53c79e620b2da70584db9d29e136a3ae9f8a92a76 10291 dh-elpa_0.0.19.tar.gz
|
||||
Package-List:
|
||||
dh-elpa deb devel optional arch=all
|
||||
Extra-Source-Only: yes
|
||||
Directory: pool/main/d/dh-elpa
|
||||
Priority: extra
|
||||
Section: misc
|
||||
|
||||
Package: dh-elpa
|
||||
Binary: dh-elpa
|
||||
Version: 0.0.20
|
||||
Maintainer: Debian Emacs addons team <pkg-emacsen-addons@lists.alioth.debian.org>
|
||||
Uploaders: David Bremner <bremner@debian.org>, Sean Whitton <spwhitton@spwhitton.name>,
|
||||
Build-Depends: debhelper (>= 9.20151004), emacs24-nox | emacs24 (>= 24~) | emacs24-lucid (>= 24~)
|
||||
Architecture: all
|
||||
Standards-Version: 3.9.8
|
||||
Format: 1.0
|
||||
Files:
|
||||
82455df65ccd88896cdc083541d29236 1526 dh-elpa_0.0.20.dsc
|
||||
4a7cc13b097e44228b5635c400e33202 12884 dh-elpa_0.0.20.tar.gz
|
||||
Vcs-Browser: https://anonscm.debian.org/cgit/pkg-emacsen/pkg/dh-elpa.git/
|
||||
Vcs-Git: https://anonscm.debian.org/pkg-emacsen/pkg/dh-elpa.git
|
||||
Checksums-Sha256:
|
||||
77c9761b1359c256ad25d4c7a826a27643a0094929a4cb3ac8cdaa0fcdb02d1b 1526 dh-elpa_0.0.20.dsc
|
||||
13e4c6ffaaa6cd793d19de677af470ac0edac098779627e9f8555644a7da42f0 12884 dh-elpa_0.0.20.tar.gz
|
||||
Package-List:
|
||||
dh-elpa deb devel optional arch=all
|
||||
Extra-Source-Only: yes
|
||||
Directory: pool/main/d/dh-elpa
|
||||
Priority: extra
|
||||
Section: misc
|
||||
|
||||
Package: git
|
||||
Binary: git, git-man, git-core, git-doc, git-arch, git-cvs, git-svn, git-mediawiki, git-email, git-daemon-run, git-daemon-sysvinit, git-gui, gitk, git-el, gitweb, git-all
|
||||
Version: 1:2.11.0-3+deb9u7
|
||||
Maintainer: Gerrit Pape <pape@smarden.org>
|
||||
Uploaders: Jonathan Nieder <jrnieder@gmail.com>, Anders Kaseorg <andersk@mit.edu>
|
||||
Build-Depends: libz-dev, libpcre3-dev, gettext, libcurl4-gnutls-dev, libexpat1-dev, subversion, libsvn-perl, libyaml-perl, tcl, libhttp-date-perl | libtime-modules-perl, libcgi-pm-perl, python, cvs, cvsps, libdbd-sqlite3-perl, unzip, libio-pty-perl, debhelper (>= 9), dh-exec (>= 0.7), dh-apache2, dpkg-dev (>= 1.16.2~)
|
||||
Build-Depends-Indep: asciidoc, xmlto, docbook-xsl
|
||||
Architecture: any all
|
||||
Standards-Version: 3.9.6.0
|
||||
Format: 3.0 (quilt)
|
||||
Files:
|
||||
e594aeada05ecb15253cc5768412ce3b 2944 git_2.11.0-3+deb9u7.dsc
|
||||
dd4e3360e28aec5bb902fb34dd7fce3b 4197984 git_2.11.0.orig.tar.xz
|
||||
e8d896e5307397f0e106e6a85c1b8682 610188 git_2.11.0-3+deb9u7.debian.tar.xz
|
||||
Vcs-Browser: http://repo.or.cz/w/git/debian.git/
|
||||
Vcs-Git: https://repo.or.cz/r/git/debian.git/
|
||||
Checksums-Sha256:
|
||||
7f2be1b1709c216ad06590687cc8fc0ff6b55a6c3e0ad6ec32b2567ce10adec1 2944 git_2.11.0-3+deb9u7.dsc
|
||||
7e7e8d69d494892373b87007674be5820a4bc1ef596a0117d03ea3169119fd0b 4197984 git_2.11.0.orig.tar.xz
|
||||
3f54b7ea7b8cda477ddb559c63de063c5bd49d8ab772330c05c79ace546ce38d 610188 git_2.11.0-3+deb9u7.debian.tar.xz
|
||||
Homepage: https://git-scm.com/
|
||||
Package-List:
|
||||
git deb vcs optional arch=any
|
||||
git-all deb vcs optional arch=all
|
||||
git-arch deb vcs optional arch=all
|
||||
git-core deb vcs optional arch=all
|
||||
git-cvs deb vcs optional arch=all
|
||||
git-daemon-run deb vcs optional arch=all
|
||||
git-daemon-sysvinit deb vcs extra arch=all
|
||||
git-doc deb doc optional arch=all
|
||||
git-el deb vcs optional arch=all
|
||||
git-email deb vcs optional arch=all
|
||||
git-gui deb vcs optional arch=all
|
||||
git-man deb doc optional arch=all
|
||||
git-mediawiki deb vcs optional arch=all
|
||||
git-svn deb vcs optional arch=all
|
||||
gitk deb vcs optional arch=all
|
||||
gitweb deb vcs optional arch=all
|
||||
Directory: pool/main/g/git
|
||||
Priority: source
|
||||
Section: vcs
|
Binary file not shown.
Binary file not shown.
|
@ -1,92 +0,0 @@
|
|||
# Copyright (C) 2019 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import pytest
|
||||
|
||||
from swh.lister.debian import debian_init
|
||||
from swh.lister.debian.models import Area, Distribution
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def engine(session):
|
||||
session.autoflush = False
|
||||
return session.bind
|
||||
|
||||
|
||||
def test_debian_init_step(engine, session):
|
||||
distribution_name = "KaliLinux"
|
||||
|
||||
distrib = (
|
||||
session.query(Distribution)
|
||||
.filter(Distribution.name == distribution_name)
|
||||
.one_or_none()
|
||||
)
|
||||
assert distrib is None
|
||||
|
||||
all_area = session.query(Area).all()
|
||||
assert all_area == []
|
||||
|
||||
suites = ["wheezy", "jessie"]
|
||||
components = ["main", "contrib"]
|
||||
|
||||
debian_init(
|
||||
engine,
|
||||
distribution_name=distribution_name,
|
||||
suites=suites,
|
||||
components=components,
|
||||
)
|
||||
distrib = (
|
||||
session.query(Distribution)
|
||||
.filter(Distribution.name == distribution_name)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
assert distrib is not None
|
||||
assert distrib.name == distribution_name
|
||||
assert distrib.type == "deb"
|
||||
assert distrib.mirror_uri == "http://deb.debian.org/debian/"
|
||||
|
||||
all_area = session.query(Area).all()
|
||||
assert len(all_area) == 2 * 2, "2 suites * 2 components per suite"
|
||||
|
||||
expected_area_names = []
|
||||
for suite in suites:
|
||||
for component in components:
|
||||
expected_area_names.append(f"{suite}/{component}")
|
||||
|
||||
for area in all_area:
|
||||
area.id = None
|
||||
assert area.distribution == distrib
|
||||
assert area.name in expected_area_names
|
||||
|
||||
# check idempotency (on exact same call)
|
||||
|
||||
debian_init(
|
||||
engine,
|
||||
distribution_name=distribution_name,
|
||||
suites=suites,
|
||||
components=components,
|
||||
)
|
||||
|
||||
distribs = (
|
||||
session.query(Distribution).filter(Distribution.name == distribution_name).all()
|
||||
)
|
||||
|
||||
assert len(distribs) == 1
|
||||
distrib = distribs[0]
|
||||
|
||||
all_area = session.query(Area).all()
|
||||
assert len(all_area) == 2 * 2, "2 suites * 2 components per suite"
|
||||
|
||||
# Add a new suite
|
||||
debian_init(
|
||||
engine,
|
||||
distribution_name=distribution_name,
|
||||
suites=["lenny"],
|
||||
components=components,
|
||||
)
|
||||
|
||||
all_area = [a.name for a in session.query(Area).all()]
|
||||
assert len(all_area) == (2 + 1) * 2, "3 suites * 2 components per suite"
|
|
@ -1,35 +1,201 @@
|
|||
# Copyright (C) 2019 The Software Heritage developers
|
||||
# Copyright (C) 2019-2021 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from debian.deb822 import Sources
|
||||
import pytest
|
||||
|
||||
from swh.lister.debian.lister import (
|
||||
DebianLister,
|
||||
DebianOrigin,
|
||||
PkgName,
|
||||
PkgVersion,
|
||||
Suite,
|
||||
)
|
||||
from swh.scheduler.interface import SchedulerInterface
|
||||
|
||||
# Those tests use sample debian Sources files whose content has been extracted
|
||||
# from the real Sources files from stretch, buster and bullseye suite.
|
||||
# They contain the follwowing package source info
|
||||
# - stretch:
|
||||
# * dh-elpa (versions: 0.0.18, 0.0.19, 0.0.20),
|
||||
# * git (version: 1:2.11.0-3+deb9u7)
|
||||
# - buster:
|
||||
# * git (version: 1:2.20.1-2+deb10u3),
|
||||
# * subversion (version: 1.10.4-1+deb10u1)
|
||||
# - bullseye:
|
||||
# * git (version: 1:2.29.2-1)
|
||||
# * subversion (version: 1.14.0-3)
|
||||
# * hg-git (version: 0.9.0-2)
|
||||
|
||||
_mirror_url = "http://deb.debian.org/debian"
|
||||
_suites = ["stretch", "buster", "bullseye"]
|
||||
_components = ["main"]
|
||||
|
||||
SourcesText = str
|
||||
|
||||
|
||||
def test_lister_debian(lister_debian, datadir, requests_mock_datadir):
|
||||
"""Simple debian listing should create scheduled tasks
|
||||
def _debian_sources_content(datadir: str, suite: Suite) -> SourcesText:
|
||||
return Path(datadir, f"Sources_{suite}").read_text()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def debian_sources(datadir: str) -> Dict[Suite, SourcesText]:
|
||||
return {suite: _debian_sources_content(datadir, suite) for suite in _suites}
|
||||
|
||||
|
||||
# suite -> package name -> list of versions
|
||||
DebianSuitePkgSrcInfo = Dict[Suite, Dict[PkgName, List[Sources]]]
|
||||
|
||||
|
||||
def _init_test(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
debian_sources: Dict[Suite, SourcesText],
|
||||
requests_mock,
|
||||
) -> Tuple[DebianLister, DebianSuitePkgSrcInfo]:
|
||||
lister = DebianLister(
|
||||
scheduler=swh_scheduler,
|
||||
mirror_url=_mirror_url,
|
||||
suites=list(debian_sources.keys()),
|
||||
components=_components,
|
||||
)
|
||||
|
||||
suite_pkg_info: DebianSuitePkgSrcInfo = {}
|
||||
|
||||
for suite, sources in debian_sources.items():
|
||||
suite_pkg_info[suite] = defaultdict(list)
|
||||
for pkg_src in Sources.iter_paragraphs(sources):
|
||||
suite_pkg_info[suite][pkg_src["Package"]].append(pkg_src)
|
||||
|
||||
for idx_url, compression in lister.debian_index_urls(suite, _components[0]):
|
||||
if compression:
|
||||
requests_mock.get(idx_url, status_code=404)
|
||||
else:
|
||||
requests_mock.get(idx_url, text=sources)
|
||||
|
||||
return lister, suite_pkg_info
|
||||
|
||||
|
||||
def _check_listed_origins(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
lister: DebianLister,
|
||||
suite_pkg_info: DebianSuitePkgSrcInfo,
|
||||
lister_previous_state: Dict[PkgName, Set[PkgVersion]],
|
||||
) -> Set[DebianOrigin]:
|
||||
|
||||
scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
|
||||
|
||||
origin_urls = set()
|
||||
|
||||
# iterate on each debian suite for the main component
|
||||
for suite, pkg_info in suite_pkg_info.items():
|
||||
# iterate on each package
|
||||
for package_name, pkg_srcs in pkg_info.items():
|
||||
# iterate on each package version info
|
||||
for pkg_src in pkg_srcs:
|
||||
# build package version key
|
||||
package_version_key = f"{suite}/{_components[0]}/{pkg_src['Version']}"
|
||||
# if package or its version not previously listed, those info should
|
||||
# have been sent to the scheduler database
|
||||
if (
|
||||
package_name not in lister_previous_state
|
||||
or package_version_key not in lister_previous_state[package_name]
|
||||
):
|
||||
# build origin url
|
||||
origin_url = lister.origin_url_for_package(package_name)
|
||||
origin_urls.add(origin_url)
|
||||
# get ListerOrigin object from scheduler database
|
||||
filtered_origins = [
|
||||
scheduler_origin
|
||||
for scheduler_origin in scheduler_origins
|
||||
if scheduler_origin.url == origin_url
|
||||
]
|
||||
|
||||
assert filtered_origins
|
||||
# check the version info are available
|
||||
assert (
|
||||
package_version_key
|
||||
in filtered_origins[0].extra_loader_arguments["packages"]
|
||||
)
|
||||
|
||||
# check listed package version is in lister state
|
||||
assert package_name in lister.state.package_versions
|
||||
assert (
|
||||
package_version_key
|
||||
in lister.state.package_versions[package_name]
|
||||
)
|
||||
return origin_urls
|
||||
|
||||
|
||||
def test_lister_debian_all_suites(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
debian_sources: Dict[Suite, SourcesText],
|
||||
requests_mock,
|
||||
):
|
||||
"""
|
||||
# Run the lister
|
||||
lister_debian.run()
|
||||
Simulate a full listing of main component packages for all debian suites.
|
||||
"""
|
||||
lister, suite_pkg_info = _init_test(swh_scheduler, debian_sources, requests_mock)
|
||||
|
||||
r = lister_debian.scheduler.search_tasks(task_type="load-deb-package")
|
||||
assert len(r) == 151
|
||||
stats = lister.run()
|
||||
|
||||
for row in r:
|
||||
assert row["type"] == "load-deb-package"
|
||||
# arguments check
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
origin_urls = _check_listed_origins(
|
||||
swh_scheduler, lister, suite_pkg_info, lister_previous_state={}
|
||||
)
|
||||
|
||||
# kwargs
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert set(kwargs.keys()) == {"url", "date", "packages"}
|
||||
assert stats.pages == len(_suites) * len(_components)
|
||||
assert stats.origins == len(origin_urls)
|
||||
|
||||
logger.debug("kwargs: %s", kwargs)
|
||||
assert isinstance(kwargs["url"], str)
|
||||
stats = lister.run()
|
||||
|
||||
assert row["policy"] == "oneshot"
|
||||
assert row["priority"] is None
|
||||
assert stats.pages == len(_suites) * len(_components)
|
||||
assert stats.origins == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"suites_params",
|
||||
[[_suites[:1]], [_suites[:1], _suites[:2]], [_suites[:1], _suites[:2], _suites],],
|
||||
)
|
||||
def test_lister_debian_updated_packages(
|
||||
swh_scheduler: SchedulerInterface,
|
||||
debian_sources: Dict[Suite, SourcesText],
|
||||
requests_mock,
|
||||
suites_params: List[Suite],
|
||||
):
|
||||
"""
|
||||
Simulate incremental listing of main component packages by adding new suite
|
||||
to process between each listing operation.
|
||||
"""
|
||||
|
||||
lister_previous_state: Dict[PkgName, Set[PkgVersion]] = {}
|
||||
|
||||
for idx, suites in enumerate(suites_params):
|
||||
|
||||
sources = {suite: debian_sources[suite] for suite in suites}
|
||||
|
||||
lister, suite_pkg_info = _init_test(swh_scheduler, sources, requests_mock)
|
||||
|
||||
stats = lister.run()
|
||||
|
||||
origin_urls = _check_listed_origins(
|
||||
swh_scheduler,
|
||||
lister,
|
||||
suite_pkg_info,
|
||||
lister_previous_state=lister_previous_state,
|
||||
)
|
||||
|
||||
assert stats.pages == len(sources)
|
||||
assert stats.origins == len(origin_urls)
|
||||
|
||||
lister_previous_state = lister.state.package_versions
|
||||
|
||||
# only new packages or packages with new versions should be listed
|
||||
if len(suites) > 1 and idx < len(suites) - 1:
|
||||
assert stats.origins == 0
|
||||
else:
|
||||
assert stats.origins != 0
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
# Copyright (C) 2019 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import pytest
|
||||
|
||||
from swh.lister.debian.models import Area, Distribution
|
||||
|
||||
|
||||
def test_area_index_uris_deb(session):
|
||||
d = Distribution(
|
||||
name="Debian", type="deb", mirror_uri="http://deb.debian.org/debian"
|
||||
)
|
||||
a = Area(distribution=d, name="unstable/main", active=True,)
|
||||
session.add_all([d, a])
|
||||
session.commit()
|
||||
|
||||
uris = list(a.index_uris())
|
||||
assert uris
|
||||
|
||||
|
||||
def test_area_index_uris_rpm(session):
|
||||
d = Distribution(
|
||||
name="CentOS", type="rpm", mirror_uri="http://centos.mirrors.proxad.net/"
|
||||
)
|
||||
a = Area(distribution=d, name="8", active=True,)
|
||||
session.add_all([d, a])
|
||||
session.commit()
|
||||
|
||||
with pytest.raises(NotImplementedError):
|
||||
list(a.index_uris())
|
|
@ -1,10 +1,12 @@
|
|||
# Copyright (C) 2019-2020 The Software Heritage developers
|
||||
# Copyright (C) 2019-2021 The Software Heritage developers
|
||||
# See the AUTHORS file at the top-level directory of this distribution
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from swh.lister.pattern import ListerStats
|
||||
|
||||
|
||||
def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
res = swh_scheduler_celery_app.send_task("swh.lister.debian.tasks.ping")
|
||||
|
@ -17,15 +19,25 @@ def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
|||
@patch("swh.lister.debian.tasks.DebianLister")
|
||||
def test_lister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker):
|
||||
# setup the mocked DebianLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
lister.from_configfile.return_value = lister
|
||||
stats = ListerStats(pages=12, origins=35618)
|
||||
lister.run.return_value = stats
|
||||
|
||||
kwargs = dict(
|
||||
mirror_url="http://www-ftp.lip6.fr/pub/linux/distributions/Ubuntu/archive/",
|
||||
distribution="Ubuntu",
|
||||
suites=["xenial", "bionic", "focal"],
|
||||
components=["main", "multiverse", "restricted", "universe"],
|
||||
)
|
||||
|
||||
res = swh_scheduler_celery_app.send_task(
|
||||
"swh.lister.debian.tasks.DebianListerTask", ("stretch",)
|
||||
"swh.lister.debian.tasks.DebianListerTask", kwargs=kwargs
|
||||
)
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
||||
lister.assert_called_once_with(distribution="stretch")
|
||||
lister.from_configfile.assert_called_once_with(**kwargs)
|
||||
lister.run.assert_called_once_with()
|
||||
|
||||
assert res.result == stats.dict()
|
||||
|
|
|
@ -1,83 +0,0 @@
|
|||
# Copyright (C) 2017-2019 the Software Heritage developers
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import logging
|
||||
|
||||
import click
|
||||
|
||||
from swh.lister.debian.lister import DebianLister
|
||||
from swh.lister.debian.models import Area, Distribution, SQLBase
|
||||
|
||||
|
||||
@click.group()
|
||||
@click.option("--verbose/--no-verbose", default=False)
|
||||
@click.pass_context
|
||||
def cli(ctx, verbose):
|
||||
ctx.obj["lister"] = DebianLister()
|
||||
if verbose:
|
||||
loglevel = logging.DEBUG
|
||||
logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO)
|
||||
else:
|
||||
loglevel = logging.INFO
|
||||
|
||||
logging.basicConfig(
|
||||
format="%(asctime)s %(process)d %(levelname)s %(message)s", level=loglevel,
|
||||
)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.pass_context
|
||||
def create_schema(ctx):
|
||||
"""Create the schema from the models"""
|
||||
SQLBase.metadata.create_all(ctx.obj["lister"].db_engine)
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--name", help="The name of the distribution")
|
||||
@click.option("--type", help="The type of distribution")
|
||||
@click.option("--mirror-uri", help="The URL to the mirror of the distribution")
|
||||
@click.option("--area", help="The areas for the distribution", multiple=True)
|
||||
@click.pass_context
|
||||
def create_distribution(ctx, name, type, mirror_uri, area):
|
||||
to_add = []
|
||||
db_session = ctx.obj["lister"].db_session
|
||||
d = (
|
||||
db_session.query(Distribution)
|
||||
.filter(Distribution.name == name)
|
||||
.filter(Distribution.type == type)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if not d:
|
||||
d = Distribution(name=name, type=type, mirror_uri=mirror_uri)
|
||||
to_add.append(d)
|
||||
|
||||
for area_name in area:
|
||||
a = None
|
||||
if d.id:
|
||||
a = (
|
||||
db_session.query(Area)
|
||||
.filter(Area.distribution == d)
|
||||
.filter(Area.name == area_name)
|
||||
.one_or_none()
|
||||
)
|
||||
|
||||
if not a:
|
||||
a = Area(name=area_name, distribution=d)
|
||||
to_add.append(a)
|
||||
|
||||
db_session.add_all(to_add)
|
||||
db_session.commit()
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.option("--name", help="The name of the distribution")
|
||||
@click.pass_context
|
||||
def list_distribution(ctx, name):
|
||||
"""List the distribution"""
|
||||
ctx.obj["lister"].run(name)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli(obj={})
|
Loading…
Add table
Add a link
Reference in a new issue