From 56d7cff6e122db75db734c16edf5fdab6133f0e9 Mon Sep 17 00:00:00 2001 From: "Antoine R. Dumont (@ardumont)" Date: Thu, 17 Oct 2019 17:58:13 +0200 Subject: [PATCH] debian/model: Install lister model within the lister repository This is no longer shared between the new debian loader and the lister. The swh.storage.schemata module is still part of the swh.storage module though. As this is still a dependency for the current swh.loader.debian production loader. This will be cleaned up later. Related D2135 --- requirements-swh.txt | 1 - swh/lister/debian/lister.py | 2 +- swh/lister/debian/models.py | 251 ++++++++++++++++++++++++++++++++++++ 3 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 swh/lister/debian/models.py diff --git a/requirements-swh.txt b/requirements-swh.txt index d7661c0..01f1bd4 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,2 @@ swh.core >= 0.0.75 -swh.storage[schemata] >= 0.0.122 swh.scheduler >= 0.0.58 diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py index 6763df2..6f5f487 100644 --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -14,7 +14,7 @@ from debian.deb822 import Sources from sqlalchemy.orm import joinedload, load_only from sqlalchemy.schema import CreateTable, DropTable -from swh.storage.schemata.distribution import ( +from swh.lister.debian.models import ( AreaSnapshot, Distribution, DistributionSnapshot, Package, TempPackage, ) diff --git a/swh/lister/debian/models.py b/swh/lister/debian/models.py new file mode 100644 index 0000000..7ddb7a2 --- /dev/null +++ b/swh/lister/debian/models.py @@ -0,0 +1,251 @@ +# Copyright (C) 2017-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import binascii +from collections import defaultdict +import datetime + +from sqlalchemy import ( + Boolean, + Column, + DateTime, + Enum, + ForeignKey, + Integer, + LargeBinary, + String, + Table, + UniqueConstraint, +) + +try: + from sqlalchemy import JSON +except ImportError: + # SQLAlchemy < 1.1 + from sqlalchemy.dialects.postgresql import JSONB as JSON + +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship + +SQLBase = declarative_base() + + +class Distribution(SQLBase): + """A distribution (e.g. Debian, Ubuntu, Fedora, ...)""" + __tablename__ = 'distribution' + + id = Column(Integer, primary_key=True) + name = Column(String, unique=True, nullable=False) + type = Column(Enum('deb', 'rpm', name='distribution_types'), + nullable=False) + mirror_uri = Column(String, nullable=False) + + areas = relationship('Area', back_populates='distribution') + + def origin_for_package(self, package_name, package_versions): + """Return the origin dictionary for the given package""" + return { + 'type': self.type, + 'url': '%s://%s/packages/%s' % ( + self.type, self.name, package_name + ), + } + + def __repr__(self): + return 'Distribution(%s (%s) on %s)' % ( + self.name, + self.type, + self.mirror_uri, + ) + + +class Area(SQLBase): + __tablename__ = 'area' + __table_args__ = ( + UniqueConstraint('distribution_id', 'name'), + ) + + id = Column(Integer, primary_key=True) + distribution_id = Column(Integer, ForeignKey('distribution.id'), + nullable=False) + name = Column(String, nullable=False) + active = Column(Boolean, nullable=False, default=True) + + distribution = relationship('Distribution', back_populates='areas') + + def index_uris(self): + """Get possible URIs for this component's package index""" + if self.distribution.type == 'deb': + compression_exts = ('xz', 'bz2', 'gz', None) + base_uri = '%s/dists/%s/source/Sources' % ( + self.distribution.mirror_uri, + self.name, + ) + for ext in compression_exts: + if ext: + yield (base_uri + '.' + ext, ext) + else: + yield (base_uri, None) + + raise NotImplementedError( + 'Do not know how to build index URI for Distribution type %s' % + self.distribution.type + ) + + def __repr__(self): + return 'Area(%s of %s)' % ( + self.name, + self.distribution.name, + ) + + +class Package(SQLBase): + __tablename__ = 'package' + __table_args__ = ( + UniqueConstraint('area_id', 'name', 'version'), + ) + + id = Column(Integer, primary_key=True) + area_id = Column(Integer, ForeignKey('area.id'), nullable=False) + name = Column(String, nullable=False) + version = Column(String, nullable=False) + directory = Column(String, nullable=False) + files = Column(JSON, nullable=False) + + origin_id = Column(Integer) + task_id = Column(Integer) + + revision_id = Column(LargeBinary(20)) + + area = relationship('Area') + + @property + def distribution(self): + return self.area.distribution + + def fetch_uri(self, filename): + """Get the URI to fetch the `filename` file associated with the + package""" + if self.distribution.type == 'deb': + return '%s/%s/%s' % ( + self.distribution.mirror_uri, + self.directory, + filename, + ) + else: + raise NotImplementedError( + 'Do not know how to build fetch URI for Distribution type %s' % + self.distribution.type + ) + + def loader_dict(self): + ret = { + 'id': self.id, + 'name': self.name, + 'version': self.version, + } + if self.revision_id: + ret['revision_id'] = binascii.hexlify(self.revision_id).decode() + else: + files = { + name: checksums.copy() + for name, checksums in self.files.items() + } + for name in files: + files[name]['uri'] = self.fetch_uri(name) + + ret.update({ + 'revision_id': None, + 'files': files, + }) + return ret + + def __repr__(self): + return 'Package(%s_%s of %s %s)' % ( + self.name, + self.version, + self.distribution.name, + self.area.name, + ) + + +class DistributionSnapshot(SQLBase): + __tablename__ = 'distribution_snapshot' + + id = Column(Integer, primary_key=True) + date = Column(DateTime, nullable=False, index=True) + distribution_id = Column(Integer, + ForeignKey('distribution.id'), + nullable=False) + + distribution = relationship('Distribution') + areas = relationship('AreaSnapshot', back_populates='snapshot') + + def task_for_package(self, package_name, package_versions): + """Return the task dictionary for the given list of package versions""" + origin = self.distribution.origin_for_package( + package_name, package_versions, + ) + + return { + 'policy': 'oneshot', + 'type': 'load-%s-package' % self.distribution.type, + 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), + 'arguments': { + 'args': [], + 'kwargs': { + 'origin': origin, + 'date': self.date.isoformat(), + 'packages': package_versions, + }, + } + } + + def get_packages(self): + packages = defaultdict(dict) + for area_snapshot in self.areas: + area_name = area_snapshot.area.name + for package in area_snapshot.packages: + ref_name = '%s/%s' % (area_name, package.version) + packages[package.name][ref_name] = package.loader_dict() + + return packages + + +area_snapshot_package_assoc = Table( + 'area_snapshot_package', SQLBase.metadata, + Column('area_snapshot_id', Integer, ForeignKey('area_snapshot.id'), + nullable=False), + Column('package_id', Integer, ForeignKey('package.id'), + nullable=False), +) + + +class AreaSnapshot(SQLBase): + __tablename__ = 'area_snapshot' + + id = Column(Integer, primary_key=True) + snapshot_id = Column(Integer, + ForeignKey('distribution_snapshot.id'), + nullable=False) + area_id = Column(Integer, + ForeignKey('area.id'), + nullable=False) + + snapshot = relationship('DistributionSnapshot', back_populates='areas') + area = relationship('Area') + packages = relationship('Package', secondary=area_snapshot_package_assoc) + + +class TempPackage(SQLBase): + __tablename__ = 'temp_package' + __table_args__ = { + 'prefixes': ['TEMPORARY'], + } + + id = Column(Integer, primary_key=True) + area_id = Column(Integer) + name = Column(String) + version = Column(String)