lister/gnu: Modify gnu lister's loading task creation

Loader Task signature for the loader gnu is now:
- args:
  - package
  - package urls

- kwargs:
  tarballs: List of Dict with keys archive (unchanged), 'time' (was 'date'),
      length (new)
This commit is contained in:
Antoine R. Dumont (@ardumont) 2019-10-04 11:05:58 +02:00
parent 00bb6c7bbf
commit 3ce6c5c6ef
No known key found for this signature in database
GPG key ID: 52E2E9840D10C3B8
4 changed files with 75 additions and 37 deletions

View file

@ -103,7 +103,7 @@ class GNULister(SimpleLister):
directory['name'],
repo['name'])
package_tarballs = find_tarballs(
repo['contents'], package_url)
repo['contents'], package_url)
if package_tarballs != []:
repo_details = {
'name': repo['name'],
@ -125,7 +125,7 @@ class GNULister(SimpleLister):
'full_name': repo['name'],
'html_url': repo['url'],
'origin_url': repo['url'],
'time_last_updated': repo['time_modified'],
'time_last_updated': int(repo['time_modified']),
'origin_type': 'tar',
}
@ -137,44 +137,47 @@ class GNULister(SimpleLister):
def find_tarballs(package_file_structure, url):
'''
Recursively lists all the tarball present in the folder and
subfolders for a particular package url.
'''Recursively lists tarballs present in the folder and subfolders for a
particular package url.
Args
package_file_structure : File structure of the package root directory
url : URL of the corresponding package
package_file_structure: File structure of the package root directory
url: URL of the corresponding package
Returns
List of all the tarball urls and the last their time of update
example-
For a package called 3dldf
List of tarball urls and their associated metadata (time, length).
For example:
[
{'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz',
'date': '1071002600'}
'time': 1071002600,
'length': 543},
{'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz',
'date': '1071078759'}
'time': 1071078759,
'length': 456},
{'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.1.tar.gz',
'date': '1074278633'}
'time': 1074278633,
'length': 251},
...
]
'''
tarballs = []
for single_file in package_file_structure:
file_type = single_file['type']
file_name = single_file['name']
if file_type == 'file':
if file_extension_check(file_name):
tarballs .append({
"archive": url + file_name,
"date": single_file['time']
filetype = single_file['type']
filename = single_file['name']
if filetype == 'file':
if file_extension_check(filename):
tarballs.append({
'archive': url + filename,
'time': int(single_file['time']),
'length': int(single_file['size']),
})
# It will recursively check for tarballs in all sub-folders
elif file_type == 'directory':
elif filetype == 'directory':
tarballs_in_dir = find_tarballs(
single_file['contents'],
url + file_name + '/')
single_file['contents'],
url + filename + '/')
tarballs.extend(tarballs_in_dir)
return tarballs

View file

@ -17,20 +17,39 @@ def test_filter_directories():
assert False
def test_find_tarballs():
f = open('swh/lister/gnu/tests/find_tarballs_output.json')
expected_list_of_all_tarballs = json.load(f)
def test_find_tarballs_small_sample():
expected_tarballs = [
{
'archive': '/root/artanis/artanis-0.2.1.tar.bz2',
'time': 1495205979,
'length': 424081,
},
{
'archive': '/root/xboard/winboard/winboard-4_0_0-src.zip', # noqa
'time': 898422900,
'length': 1514448
},
{
'archive': '/root/xboard/xboard-3.6.2.tar.gz', # noqa
'time': 869814000,
'length': 450164,
},
{
'archive': '/root/xboard/xboard-4.0.0.tar.gz', # noqa
'time': 898422900,
'length': 514951,
},
]
f = open('swh/lister/gnu/tests/file_structure.json')
file_structure = json.load(f)
list_of_all_tarballs = []
list_of_all_tarballs.extend(
find_tarballs(file_structure[0]['contents'],
"https://ftp.gnu.org/gnu/artanis/"))
list_of_all_tarballs.extend(
find_tarballs(file_structure[1]['contents'],
"https://ftp.gnu.org/old-gnu/xboard/"))
assert list_of_all_tarballs == expected_list_of_all_tarballs
file_structure = json.load(open('swh/lister/gnu/tests/tree.min.json'))
actual_tarballs = find_tarballs(file_structure, '/root/')
assert actual_tarballs == expected_tarballs
def test_find_tarballs():
file_structure = json.load(open('swh/lister/gnu/tests/tree.json'))
actual_tarballs = find_tarballs(file_structure, '/root/')
assert len(actual_tarballs) == 42 + 3 # tar + zip
def test_file_extension_check():

View file

@ -70,4 +70,4 @@
{"type":"file","name":"xboard-4.2.6.tar.gz","size":1057625,"time":"1012641715"},
{"type":"file","name":"xboard-4.2.7.tar.gz","size":1318110,"time":"1070057764"}
]}
]
]

View file

@ -0,0 +1,16 @@
[
{"type":"directory","name":"artanis","size":4096,"time":"1546205705","contents":[
{"type":"file","name":"artanis-0.2.1.tar.bz2","size":424081,"time":"1495205979"},
{"type":"file","name":"artanis-0.2.1.tar.bz2.sig","size":833,"time":"1495205982"}
]},
{"type":"directory","name":"xboard","size":4096,"time":"1254860068","contents":[
{"type":"directory","name":"winboard","size":4096,"time":"1181795103","contents":[
{"type":"file","name":"README","size":107,"time":"1070058107"},
{"type":"file","name":"README.sig","size":65,"time":"1070058115"},
{"type":"file","name":"winboard-4_0_0-src.zip","size":1514448,"time":"898422900"},
{"type":"file","name":"winboard-4_0_0.exe","size":1652037,"time":"898422900"}
]},
{"type":"file","name":"xboard-3.6.2.tar.gz","size":450164,"time":"869814000"},
{"type":"file","name":"xboard-4.0.0.tar.gz","size":514951,"time":"898422900"}
]}
]