gnu.lister: Update docstrings and fix type annotations

This commit is contained in:
Antoine R. Dumont (@ardumont) 2019-10-18 03:26:59 +02:00
parent c6372eea7e
commit 89b409d30f
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
4 changed files with 36 additions and 31 deletions

View file

@ -38,10 +38,14 @@ class GNULister(SimpleLister):
'url': 'https://ftp.gnu.org/gnu/3dldf/',
'artifacts': [{
'url': 'https://...',
'time': 1071002600,
'length': 128},
'time': '2003-12-09T21:43:20+00:00',
'length': 128,
'version': '1.0.1',
'filename': 'something-1.0.1.tar.gz',
},
...
]}
]
}
"""
artifacts = self.gnu_tree.artifacts[origin_url]
@ -73,14 +77,13 @@ class GNULister(SimpleLister):
List of packages name, url, last modification time
.. code-block:: python
[
{'name': '3dldf',
'url': 'https://ftp.gnu.org/gnu/3dldf/',
'time_modified': 1071002600},
'time_modified': '2003-12-09T20:43:20+00:00'},
{'name': '8sync',
'url': 'https://ftp.gnu.org/gnu/8sync/',
'time_modified': 1480991830},
'time_modified': '2016-12-06T02:37:10+00:00'},
...
]

View file

@ -2,7 +2,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from sqlalchemy import Column, DateTime, Integer, String
from sqlalchemy import Column, DateTime, String
from ..core.models import ModelBase

View file

@ -149,7 +149,9 @@ def test_check_filename_is_archive():
def test_get_version():
"""From url to branch name should yield something relevant
"""Parsing version from url should yield some form of "sensible" version
Given the dataset, it's not a simple task to extract correctly the version.
"""
for url, expected_branchname in [
@ -198,7 +200,8 @@ def test_get_version():
'2.5.3-ansi-japi-xdr.20030701_mingw32'),
('gettext-runtime-0.13.1.bin.woe32.zip', '0.13.1.bin.woe32'),
('sather-logo_images.tar.gz', 'sather-logo_images'),
('sather-specification-000328.html.tar.gz', '000328.html')
('sather-specification-000328.html.tar.gz', '000328.html'),
('something-10.1.0.7z', '10.1.0'),
]:
actual_branchname = get_version(url)

View file

@ -13,7 +13,7 @@ from datetime import datetime
from os import path
from pathlib import Path
from pytz import utc
from typing import Any, Dict, List, Mapping, Tuple
from typing import Any, List, Mapping, Sequence, Tuple
from urllib.parse import urlparse
@ -31,22 +31,22 @@ class GNUTree:
# Interesting top level directories
self.top_level_directories = ['gnu', 'old-gnu']
# internal state
self._artifacts = {} # type: Dict
self._projects = {} # type: Dict
self._artifacts = {} # type: Mapping[str, Any]
self._projects = {} # type: Mapping[str, Any]
@property
def projects(self) -> Dict:
def projects(self) -> Mapping[str, Any]:
if not self._projects:
self._projects, self._artifacts = self._load()
return self._projects
@property
def artifacts(self) -> Dict:
def artifacts(self) -> Mapping[str, Any]:
if not self._artifacts:
self._projects, self._artifacts = self._load()
return self._artifacts
def _load(self) -> Tuple[Dict, Dict]:
def _load(self) -> Tuple[Mapping[str, Any], Mapping[str, Any]]:
"""Compute projects and artifacts per project
Returns:
@ -81,8 +81,8 @@ class GNUTree:
return projects, artifacts
def find_artifacts(
filesystem: List[Dict], url: str) -> List[Mapping[str, Any]]:
def find_artifacts(filesystem: List[Mapping[str, Any]],
url: str) -> List[Mapping[str, Any]]:
"""Recursively list artifacts present in the folder and subfolders for a
particular package url.
@ -125,7 +125,7 @@ def find_artifacts(
]
"""
artifacts = []
artifacts = [] # type: List[Mapping[str, Any]]
for info_file in filesystem:
filetype = info_file['type']
filename = info_file['name']
@ -176,7 +176,6 @@ def check_filename_is_archive(filename: str) -> bool:
"""
file_suffixes = Path(filename).suffixes
logger.debug('Path(%s).suffixed: %s' % (filename, file_suffixes))
if len(file_suffixes) == 1 and file_suffixes[-1] in ('.zip', '.tar'):
return True
elif len(file_suffixes) > 1:
@ -186,17 +185,17 @@ def check_filename_is_archive(filename: str) -> bool:
# to recognize existing naming pattern
extensions = [
EXTENSIONS = [
'zip',
'tar',
'gz', 'tgz',
'bz2', 'bzip2',
'lzma', 'lz',
'xz',
'Z',
'Z', '7z',
]
version_keywords = [
VERSION_KEYWORDS = [
'cygwin_me',
'w32', 'win32', 'nt', 'cygwin', 'mingw',
'latest', 'alpha', 'beta',
@ -226,24 +225,24 @@ version_keywords = [
# greedily with +, software_name and release_number are matched lazily
# with +? and *?).
pattern = r'''
PATTERN = r'''
^
(?:
# We have a software name and a release number, separated with a
# -, _ or dot.
(?P<software_name1>.+?[-_.])
(?P<release_number>(%(vkeywords)s|[0-9][0-9a-zA-Z_.+:~-]*?)+)
(?P<release_number>({vkeywords}|[0-9][0-9a-zA-Z_.+:~-]*?)+)
|
# We couldn't match a release number, put everything in the
# software name.
(?P<software_name2>.+?)
)
(?P<extension>(?:\.(?:%(extensions)s))+)
(?P<extension>(?:\.(?:{extensions}))+)
$
''' % {
'extensions': '|'.join(extensions),
'vkeywords': '|'.join('%s[-]?' % k for k in version_keywords),
}
'''.format(
extensions='|'.join(EXTENSIONS),
vkeywords='|'.join('%s[-]?' % k for k in VERSION_KEYWORDS),
)
def get_version(uri: str) -> str:
@ -268,7 +267,7 @@ def get_version(uri: str) -> str:
"""
filename = path.split(uri)[-1]
m = re.match(pattern, filename,
m = re.match(PATTERN, filename,
flags=re.VERBOSE | re.IGNORECASE)
if m:
d = m.groupdict()
@ -280,7 +279,7 @@ def get_version(uri: str) -> str:
return ''
def load_raw_data(url: str) -> List[Dict]:
def load_raw_data(url: str) -> Sequence[Mapping]:
"""Load the raw json from the tree.json.gz
Args: