From 46425917c25e4d58cff6ab8870eaddc13180bf92 Mon Sep 17 00:00:00 2001 From: Antoine Lambert Date: Thu, 2 Dec 2021 17:19:08 +0100 Subject: [PATCH] debian: Add missing file URIs in lister output For a given package, the debian lister generates a dictionary mapping distribution and version to a list of files to be processed by the debian loader. For each file to process, the debian loader expects to find an URI in order to download it and then use its content to ingest package source code into the archive. However, it turns out these URIs were not computed by the lister in its current implementation making any debian loading task fail due to these missing info. So add the computation of these URIS and ensure they will be provided in the debian loader input parameters. Related to T2400 --- swh/lister/debian/lister.py | 6 +++++- swh/lister/debian/tests/test_lister.py | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py index 165bbc2..78470cd 100644 --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -11,6 +11,7 @@ import gzip from itertools import product import logging import lzma +import os from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple from urllib.parse import urljoin @@ -198,9 +199,12 @@ class DebianLister(Lister[DebianListerState, DebianPageType]): if field_ in src_pkg: for entry in src_pkg[field_]: name = entry["name"] - files[name]["name"] = entry["name"] + files[name]["name"] = name files[name]["size"] = int(entry["size"], 10) files[name][sum_name] = entry[sum_name] + files[name]["uri"] = os.path.join( + self.url, src_pkg["Directory"], name + ) # extract package name and version package_name = src_pkg["Package"] diff --git a/swh/lister/debian/tests/test_lister.py b/swh/lister/debian/tests/test_lister.py index 2dc5c6d..754adb5 100644 --- a/swh/lister/debian/tests/test_lister.py +++ b/swh/lister/debian/tests/test_lister.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information from collections import defaultdict +import os from pathlib import Path from typing import Dict, List, Set, Tuple @@ -117,11 +118,19 @@ def _check_listed_origins( ] assert filtered_origins + packages = filtered_origins[0].extra_loader_arguments["packages"] # check the version info are available - assert ( - package_version_key - in filtered_origins[0].extra_loader_arguments["packages"] - ) + assert package_version_key in packages + + # check package files URIs are available + for file in pkg_src["files"]: + filename = file["name"] + file_uri = os.path.join( + _mirror_url, pkg_src["Directory"], filename + ) + package_files = packages[package_version_key]["files"] + assert filename in package_files + assert package_files[filename]["uri"] == file_uri # check listed package version is in lister state assert package_name in lister.state.package_versions