arch: Use tempfile module to create temporary directory

It ensures created temporary directories will be removed once they
are no longer needed.
This commit is contained in:
Antoine Lambert 2022-09-13 12:08:02 +02:00
parent 67211adb60
commit a55f171ed5

View file

@ -2,11 +2,13 @@
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import datetime
import logging
from pathlib import Path
import re
import tarfile
import tempfile
from typing import Any, Dict, Iterator, List, Optional
from urllib.parse import unquote, urljoin
@ -82,8 +84,6 @@ class ArchLister(StatelessLister[ArchListerPage]):
VISIT_TYPE = "arch"
INSTANCE = "arch"
DESTINATION_PATH = Path("/tmp/archlinux_archive")
ARCH_PACKAGE_URL_PATTERN = "{base_url}/packages/{repo}/{arch}/{pkgname}"
ARCH_PACKAGE_VERSIONS_URL_PATTERN = "{base_url}/packages/{pkgname[0]}/{pkgname}"
ARCH_PACKAGE_DOWNLOAD_URL_PATTERN = (
@ -360,109 +360,110 @@ class ArchLister(StatelessLister[ArchListerPage]):
def _get_repo_page(
self, name: str, flavour: Dict[str, Any], arch: str, repo: str
) -> ArchListerPage:
page = []
if name == "official":
prefix = urljoin(flavour["base_archive_url"], "/repos/last/")
filename = f"{repo}.files.tar.gz"
archive_url = urljoin(prefix, f"{repo}/os/{arch}/{filename}")
destination_path = Path(self.DESTINATION_PATH, arch, filename)
base_url = flavour["base_archive_url"]
dl_url_fmt = self.ARCH_PACKAGE_DOWNLOAD_URL_PATTERN
base_info_url = flavour["base_info_url"]
info_url_fmt = self.ARCH_PACKAGE_URL_PATTERN
elif name == "arm":
filename = f"{repo}.files.tar.gz"
archive_url = urljoin(
flavour["base_mirror_url"], f"{arch}/{repo}/{filename}"
)
destination_path = Path(self.DESTINATION_PATH, arch, filename)
base_url = flavour["base_mirror_url"]
dl_url_fmt = self.ARM_PACKAGE_DOWNLOAD_URL_PATTERN
base_info_url = flavour["base_info_url"]
info_url_fmt = self.ARM_PACKAGE_URL_PATTERN
archive = self.get_repo_archive(
url=archive_url, destination_path=destination_path
)
assert archive
packages_desc = list(archive.glob("**/desc"))
logger.debug(
"Processing %(instance)s source packages info from "
"%(flavour)s %(arch)s %(repo)s repository, "
"(%(qty)s packages).",
dict(
instance=self.instance,
flavour=name,
arch=arch,
repo=repo,
qty=len(packages_desc),
),
)
for package_desc in packages_desc:
data = self.parse_desc_file(
path=package_desc,
repo=repo,
base_url=base_url,
dl_url_fmt=dl_url_fmt,
)
assert data["builddate"]
last_modified = datetime.datetime.fromtimestamp(
float(data["builddate"]), tz=datetime.timezone.utc
)
assert data["name"]
assert data["filename"]
assert data["arch"]
url = info_url_fmt.format(
base_url=base_info_url,
pkgname=data["name"],
filename=data["filename"],
repo=repo,
arch=data["arch"],
)
assert data["version"]
with tempfile.TemporaryDirectory() as tmpdir:
page = []
if name == "official":
# find all versions of a package scrapping archive
versions = self.scrap_package_versions(
name=data["name"], repo=repo, base_url=base_url
)
prefix = urljoin(flavour["base_archive_url"], "/repos/last/")
filename = f"{repo}.files.tar.gz"
archive_url = urljoin(prefix, f"{repo}/os/{arch}/{filename}")
destination_path = Path(tmpdir, arch, filename)
base_url = flavour["base_archive_url"]
dl_url_fmt = self.ARCH_PACKAGE_DOWNLOAD_URL_PATTERN
base_info_url = flavour["base_info_url"]
info_url_fmt = self.ARCH_PACKAGE_URL_PATTERN
elif name == "arm":
# There is no way to get related versions of a package,
# but 'data' represents the latest released version,
# use it in this case
assert data["builddate"]
assert data["csize"]
assert data["url"]
versions = [
dict(
name=data["name"],
version=data["version"],
repo=repo,
arch=data["arch"],
filename=data["filename"],
url=data["url"],
last_modified=last_modified.replace(tzinfo=None).isoformat(
timespec="seconds"
),
length=int(data["csize"]),
)
]
filename = f"{repo}.files.tar.gz"
archive_url = urljoin(
flavour["base_mirror_url"], f"{arch}/{repo}/{filename}"
)
destination_path = Path(tmpdir, arch, filename)
base_url = flavour["base_mirror_url"]
dl_url_fmt = self.ARM_PACKAGE_DOWNLOAD_URL_PATTERN
base_info_url = flavour["base_info_url"]
info_url_fmt = self.ARM_PACKAGE_URL_PATTERN
package = {
"name": data["name"],
"version": data["version"],
"last_modified": last_modified,
"url": url,
"versions": versions,
"data": data,
}
page.append(package)
return page
archive = self.get_repo_archive(
url=archive_url, destination_path=destination_path
)
assert archive
packages_desc = list(archive.glob("**/desc"))
logger.debug(
"Processing %(instance)s source packages info from "
"%(flavour)s %(arch)s %(repo)s repository, "
"(%(qty)s packages).",
dict(
instance=self.instance,
flavour=name,
arch=arch,
repo=repo,
qty=len(packages_desc),
),
)
for package_desc in packages_desc:
data = self.parse_desc_file(
path=package_desc,
repo=repo,
base_url=base_url,
dl_url_fmt=dl_url_fmt,
)
assert data["builddate"]
last_modified = datetime.datetime.fromtimestamp(
float(data["builddate"]), tz=datetime.timezone.utc
)
assert data["name"]
assert data["filename"]
assert data["arch"]
url = info_url_fmt.format(
base_url=base_info_url,
pkgname=data["name"],
filename=data["filename"],
repo=repo,
arch=data["arch"],
)
assert data["version"]
if name == "official":
# find all versions of a package scrapping archive
versions = self.scrap_package_versions(
name=data["name"], repo=repo, base_url=base_url
)
elif name == "arm":
# There is no way to get related versions of a package,
# but 'data' represents the latest released version,
# use it in this case
assert data["builddate"]
assert data["csize"]
assert data["url"]
versions = [
dict(
name=data["name"],
version=data["version"],
repo=repo,
arch=data["arch"],
filename=data["filename"],
url=data["url"],
last_modified=last_modified.replace(tzinfo=None).isoformat(
timespec="seconds"
),
length=int(data["csize"]),
)
]
package = {
"name": data["name"],
"version": data["version"],
"last_modified": last_modified,
"url": url,
"versions": versions,
"data": data,
}
page.append(package)
return page
def get_origins_from_page(self, page: ArchListerPage) -> Iterator[ListedOrigin]:
"""Iterate on all arch pages and yield ListedOrigin instances."""