diff --git a/README.md b/README.md index 4d56957..556cf7a 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ following Python modules: - `swh.lister.pypi` - `swh.lister.npm` - `swh.lister.phabricator` +- `swh.lister.gnu` Dependencies ------------ diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py index f91dbf2..617137a 100644 --- a/swh/lister/gnu/lister.py +++ b/swh/lister/gnu/lister.py @@ -17,78 +17,104 @@ class GNULister(SimpleLister): MODEL = GNUModel LISTER_NAME = 'gnu' TREE_URL = 'https://ftp.gnu.org/tree.json.gz' - - def __init__(self, override_config=None): - SimpleLister.__init__(self, override_config=override_config) + BASE_URL = 'https://ftp.gnu.org' + instance = 'gnu' def task_dict(self, origin_type, origin_url, **kwargs): - """(Override) + """ Return task format dict This is overridden from the lister_base as more information is needed for the ingestion task creation. """ - _type = 'load-%s' % origin_type - _policy = 'recurring' - project_name = kwargs.get('name') - project_metadata_url = kwargs.get('html_url') return utils.create_task_dict( - _type, _policy, project_name, origin_url, - project_metadata_url=project_metadata_url) + 'load-%s' % origin_type, 'recurring', kwargs.get('name'), + origin_url, list_of_tarballs=kwargs.get('list_of_tarballs')) def get_file(self): ''' - Downloads and unzip tree.json.gz file and returns its content - in JSON format + Downloads and unzip tree.json.gz file and returns its content + in JSON format - Returns - File content in JSON format + Returns + File content in JSON format ''' - response = requests.get('https://ftp.gnu.org/tree.json.gz', + response = requests.get(self.TREE_URL, allow_redirects=True) uncompressed_content = gzip.decompress(response.content) return json.loads(uncompressed_content.decode('utf-8')) def safely_issue_request(self, identifier): - '''(Override)Make network request with to download the file which - has file structure of the GNU website. - - Args: - identifier: resource identifier - Returns: - server response ''' - response = self.get_file() - return response + Make network request with to download the file which + has file structure of the GNU website. + + Args: + identifier: resource identifier + Returns: + server response + ''' + return self.get_file() def list_packages(self, response): - """(Override) List the actual gnu origins with their names and - time last updated from the response. """ - response = clean_up_response(response) + List the actual gnu origins with their names and + time last updated from the response. + + Args: + response : File structure of the website + in JSON format + + Returns: + a list of all the packages with their names, url of their root + directory and the tarballs present for the particular package. + [ + {'name': '3dldf', 'url': 'https://ftp.gnu.org/gnu/3dldf/', + 'list_of_tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'}, + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'}} + ] + }, + {'name': '8sync', 'url': 'https://ftp.gnu.org/gnu/8sync/', + 'list_of_tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', + 'date': '1461357336'}, + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz', + 'date': '1480991830'} + ] + ] + """ + response = filter_directories(response) packages = [] for directory in response: content = directory['contents'] for repo in content: if repo['type'] == 'directory': - repo_details = { - 'name': repo['name'], - 'url': self._get_project_url(directory['name'], - repo['name']), - 'time_modified': repo['time'] - } - packages.append(repo_details) + package_url = '%s/%s/%s/' % (self.BASE_URL, + directory['name'], + repo['name']) + list_of_tarballs = find_tarballs( + repo['contents'], package_url) + if list_of_tarballs != []: + repo_details = { + 'name': repo['name'], + 'url': package_url, + 'list_of_tarballs ': list_of_tarballs + } + packages.append(repo_details) random.shuffle(packages) return packages - def _get_project_url(self, dir_name, package_name): - """Returns project_url - - """ - return 'https://ftp.gnu.org/%s/%s/' % (dir_name, package_name) - def get_model_from_repo(self, repo): - """(Override) Transform from repository representation to model + """Transform from repository representation to model """ return { @@ -103,7 +129,7 @@ class GNULister(SimpleLister): } def transport_response_simplified(self, response): - """(Override) Transform response to list for model manipulation + """Transform response to list for model manipulation """ return [self.get_model_from_repo(repo) for repo in response] @@ -118,10 +144,59 @@ class GNULister(SimpleLister): pass -def clean_up_response(response): +def find_tarballs(package_file_structure, url): + ''' + Recursively lists all the tarball present in the folder and + subfolders for a particular package url. + + Args + package_file_structure : File structure of the package root directory + url : URL of the corresponding package + + Returns + List of all the tarball urls and the last their time of update + example- + For a package called 3dldf + + [ + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.1.tar.gz', + 'date': '1074278633'} + ... + ] + ''' + list_of_tarballs = [] + for single_file in package_file_structure: + file_type = single_file['type'] + file_name = single_file['name'] + if file_type == 'file': + if(file_name[-6:-3] == "tar" or + file_name[-3:] == "zip"): + list_of_tarballs .append({ + "archive": url + file_name, + "date": single_file['time'] + }) + # It will recursively check for tarballs in all sub-folders + elif file_type == 'directory': + list_of_tarballs_in_dir = find_tarballs( + single_file['contents'], + url + file_name + '/') + list_of_tarballs .extend(list_of_tarballs_in_dir) + + return list_of_tarballs + + +def filter_directories(response): + ''' + Removes unnecessary directories from JSON response and + keep only gnu/ and old-gnu/ + ''' final_response = [] - file_system = response[0]['content'] + file_system = response[0]['contents'] for directory in file_system: - if directory['name'] in ('gnu', 'mirrors', 'old-gnu'): + if directory['name'] in ('gnu', 'old-gnu'): final_response.append(directory) return final_response diff --git a/swh/lister/gnu/tests/api_response.json b/swh/lister/gnu/tests/api_response.json new file mode 100644 index 0000000..7d825f7 --- /dev/null +++ b/swh/lister/gnu/tests/api_response.json @@ -0,0 +1,141 @@ +[{"type":"directory","name": ".","contents":[ + {"type":"file","name":".footer.shtml","size":444,"time":"1359994299"}, + {"type":"file","name":".gnu-gnu-gnu.png","size":12413,"time":"1314632619"}, + {"type":"file","name":".header.shtml","size":1833,"time":"1546469072"}, + {"type":"file","name":".header.shtml~","size":1364,"time":"1454341750"}, + {"type":"file","name":".htaccess","size":334,"time":"1314639683"}, + {"type":"file","name":".message","size":1125,"time":"1507930451"}, + {"type":"link","name":"CRYPTO.README","target":".message","size":8,"time":"1093018000","contents":[]}, + {"type":"file","name":"MISSING-FILES","size":17864,"time":"1066928263"}, + {"type":"file","name":"MISSING-FILES.README","size":4178,"time":"1060815936"}, + {"type":"file","name":"README","size":2925,"time":"1554408947"}, + {"type":"file","name":"before-2003-08-01.md5sums.asc","size":405121,"time":"1066928156"}, + {"type":"file","name":"find.txt.gz","size":261428,"time":"1557684608"}, + {"type":"directory","name":"gnu","size":12288,"time":"1556742017","contents":[]}, + {"type":"directory","name":"gnu+linux-distros","size":4096,"time":"1299783002","contents":[ + {"type":"directory","name":"ututo-e","size":4096,"time":"1487780066","contents":[ + {"type":"file","name":"README","size":48,"time":"1487780066"}, + {"type":"file","name":"index.html","size":158,"time":"1487780054"} + ]} + ]}, + {"type":"file","name":"ls-lrRt.txt.gz","size":480054,"time":"1557684607"}, + {"type":"directory","name":"mirrors","size":4096,"time":"1114010630","contents":[ + {"type":"directory","name":"dynebolic","size":4096,"time":"1317827602","contents":[ + {"type":"file","name":"MOVED_TO_mirror.fsf.org_dynebolic","size":0,"time":"1317826935"}, + {"type":"file","name":"README.txt","size":41,"time":"1317827081"}, + {"type":"file","name":"index.html","size":107,"time":"1317827601"} + ]} + ]}, + {"type":"link","name":"non-gnu","target":"gnu/non-gnu","size":11,"time":"1082055542","contents":[]}, + {"type":"directory","name":"old-gnu","size":4096,"time":"1548360019","contents":[]}, + {"type":"link","name":"pub","target":".","size":1,"time":"1060090003","contents":[]}, + {"type":"directory","name":"savannah","size":4096,"time":"1194544006","contents":[ + {"type":"file","name":"README","size":473,"time":"1143758028"} + ]}, + {"type":"directory","name":"third-party","size":4096,"time":"1059825710","contents":[ + {"type":"file","name":"README","size":374,"time":"983824071"} + ]}, + {"type":"directory","name":"tmp","size":4096,"time":"1239072509","contents":[ + ]}, + {"type":"file","name":"tree.json.gz","size":0,"time":"1557684608"}, + {"type":"directory","name":"video","size":4096,"time":"1367963189","contents":[ + {"type":"file","name":".bash_history","size":27,"time":"1307027604"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-360p.webm","size":138044199,"time":"1284892010"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-480p.webm","size":365119650,"time":"1284895035"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-720p.webm","size":820524785,"time":"1284903666"}, + {"type":"file","name":"Autobuild_Status_Update.ogv","size":95034503,"time":"1281781973"}, + {"type":"file","name":"FOSDEM2006-GPL.ogg","size":23901186,"time":"1299776852"}, + {"type":"file","name":"FOSDEM2006-GPL.ogg.sig","size":536,"time":"1299776852"}, + {"type":"file","name":"FSF_event.ogv","size":52056292,"time":"1251497852"}, + {"type":"file","name":"FSF_event_small.ogv","size":5237196,"time":"1251840561"}, + {"type":"file","name":"GNU-Hurd_-_Its_About_Freedom,_Or_Why_you_should_care.ogv","size":177589989,"time":"1281779528"}, + {"type":"file","name":"GNU_Bazaar_-_a_distributed_version_control_system_for_free_software_communities.ogv","size":122114984,"time":"1281783734"}, + {"type":"file","name":"GNU_Psycosynth.ogv","size":69931061,"time":"1281784291"}, + {"type":"file","name":"GNU_in_the_cloud.ogv","size":84068213,"time":"1281784003"}, + {"type":"file","name":"GNUnet_-_Transports_and_Transport_Selection.ogv","size":334973582,"time":"1281801792"}, + {"type":"file","name":"GNUnet_Distributed_Data_Storage_-_DHT_and_Distance_Vector_Transport.ogv","size":630257300,"time":"1281791152"}, + {"type":"file","name":"Introduction_to_LilyPond.ogv","size":135277165,"time":"1281787990"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-100kbit_vorbis.ogg","size":4481112,"time":"1220091688"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-12kbit_speex.spx","size":511060,"time":"1220091688"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-hq_600px_780kbit.ogv","size":34370182,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-hq_600px_780kbit_fr.ogv","size":35983054,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-lq_300px_190kbit.ogv","size":8126685,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-lq_300px_190kbit_fr.ogv","size":8978906,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit.ogv","size":22443268,"time":"1220091690"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit_en.ogv","size":23467445,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit_fr.ogv","size":23649669,"time":"1220091690"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-vlq_192px_56kbit.ogv","size":2530887,"time":"1220091690"}, + {"type":"file","name":"TIME","size":11,"time":"1557682561"}, + {"type":"file","name":"The_GCC_Compile_Farm.ogv","size":102712932,"time":"1281788256"}, + {"type":"file","name":"The_GNU_Record_Utilities.ogv","size":156667664,"time":"1281788873"}, + {"type":"file","name":"The_GNUnet_Peer-to-Peer_Framework.ogv","size":863516287,"time":"1281792840"}, + {"type":"file","name":"The_new_CPP_Standard_and_library_Cpp0x.ogv","size":406746577,"time":"1281790439"}, + {"type":"file","name":"dyne.org_hackers_network.ogv","size":154795708,"time":"1281785482"}, + {"type":"file","name":"fry720.jpg","size":141588,"time":"1219677812"}, + {"type":"file","name":"fsf-2009-hillaryrettig.ogv","size":17557909,"time":"1262115081"}, + {"type":"file","name":"fsf-2009-jeremyallison.ogv","size":20719197,"time":"1256665061"}, + {"type":"file","name":"fsf-2009-larrylessig.ogv","size":10571717,"time":"1261667503"}, + {"type":"file","name":"fsf-2009-maryloujepsen.ogv","size":4198432,"time":"1256222925"}, + {"type":"file","name":"fsf-2009-peterbrown-final.ogv","size":14283895,"time":"1259856669"}, + {"type":"file","name":"fsf-2009-robsavoye.ogv","size":10262195,"time":"1256596109"}, + {"type":"file","name":"fsf-2009-savoye.ogv","size":13594470,"time":"1256596105"}, + {"type":"file","name":"gNewSense.ogv","size":89891356,"time":"1281783196"}, + {"type":"file","name":"gnulib_-_Contributing_reusable_code.ogv","size":120341572,"time":"1281779839"}, + {"type":"file","name":"lp2010-eben-moglen.ogv","size":63138569,"time":"1280508649"}, + {"type":"file","name":"lp2010-fri-command-line.ogg","size":102351845,"time":"1280515516"}, + {"type":"file","name":"lp2010-fri-gimp-inkscape.ogg","size":73786977,"time":"1280515751"}, + {"type":"file","name":"lp2010-fri-inkscape-gimp.ogv","size":163618762,"time":"1279906253"}, + {"type":"file","name":"lp2010-fri-intro-to-command-line.ogv","size":166684617,"time":"1279913790"}, + {"type":"file","name":"lp2010-fri-intro.ogg","size":8830316,"time":"1280515539"}, + {"type":"file","name":"lp2010-fri-speaking-workshop.ogg","size":66985650,"time":"1280515967"}, + {"type":"file","name":"lp2010-fri-welcome.ogv","size":18634141,"time":"1279914475"}, + {"type":"file","name":"lp2010-fri-workshop-speaking.ogv","size":104913218,"time":"1279920003"}, + {"type":"file","name":"lp2010-sat-eben-moglen.ogg","size":36063678,"time":"1280516115"}, + {"type":"file","name":"lp2010-sat-eben-moglen.ogv","size":63138569,"time":"1280155656"}, + {"type":"file","name":"lp2010-sat-film-discussion.ogg","size":6212226,"time":"1280516133"}, + {"type":"file","name":"lp2010-sat-gilmore-fixed.ogv","size":315091609,"time":"1281103644"}, + {"type":"file","name":"lp2010-sat-gilmore.ogv","size":278528,"time":"1281106393"}, + {"type":"file","name":"lp2010-sat-gnome-shell.ogg","size":26624876,"time":"1280516218"}, + {"type":"file","name":"lp2010-sat-gnome-shell.ogv","size":54603867,"time":"1280158479"}, + {"type":"file","name":"lp2010-sat-gnu-generation.ogg","size":25279954,"time":"1280516298"}, + {"type":"file","name":"lp2010-sat-gnu-generation.ogv","size":34687879,"time":"1280160636"}, + {"type":"file","name":"lp2010-sat-intro.ogg","size":2257992,"time":"1280516304"}, + {"type":"file","name":"lp2010-sat-john-gilmore.ogg","size":39422949,"time":"1280516426"}, + {"type":"file","name":"lp2010-sat-karen-sandler.ogg","size":37913634,"time":"1280516542"}, + {"type":"file","name":"lp2010-sat-licensing-hoedown.ogv","size":81030741,"time":"1280163330"}, + {"type":"file","name":"lp2010-sat-rms+awards.ogg","size":41024302,"time":"1280516669"}, + {"type":"file","name":"lp2010-sat-rms+awards.ogv","size":138070626,"time":"1280167204"}, + {"type":"file","name":"lp2010-sat-sandler-sflc.ogv","size":48057099,"time":"1280170655"}, + {"type":"file","name":"lp2010-sat-sugar-labs.ogv","size":63820951,"time":"1280173681"}, + {"type":"file","name":"lp2010-sat-symbian.ogv","size":71807650,"time":"1280176185"}, + {"type":"file","name":"lp2010-sun-deb-nicholson-1.ogg","size":51329142,"time":"1280517315"}, + {"type":"file","name":"lp2010-sun-deb-nicholson-2.ogg","size":29574674,"time":"1280517409"}, + {"type":"file","name":"lp2010-sun-free-network-services.ogv","size":294504464,"time":"1280180824"}, + {"type":"file","name":"lp2010-sun-free-software-mentoring.ogg","size":11780923,"time":"1280517447"}, + {"type":"file","name":"lp2010-sun-fsf-campaigns-team-update.ogv","size":166676786,"time":"1280183913"}, + {"type":"file","name":"lp2010-sun-gnash.ogv","size":129681345,"time":"1280185455"}, + {"type":"file","name":"lp2010-sun-gnu-telephony.ogv","size":205191264,"time":"1280188317"}, + {"type":"file","name":"lp2010-sun-jeff-jaffe.ogv","size":98181482,"time":"1280189804"}, + {"type":"file","name":"lp2010-sun-libredwg.ogv","size":138981040,"time":"1280191724"}, + {"type":"file","name":"lp2010-sun-lightning-talks.ogg","size":47004371,"time":"1280517591"}, + {"type":"file","name":"lp2010-sun-lightning-talks.ogv","size":105266782,"time":"1280196010"}, + {"type":"file","name":"lp2010-sun-luis-villa.ogv","size":182437632,"time":"1280200006"}, + {"type":"file","name":"lp2010-sun-mentoring.ogv","size":47095807,"time":"1280201658"}, + {"type":"file","name":"lp2010-sun-non-coding-roles.ogv","size":109852516,"time":"1280206251"}, + {"type":"file","name":"lp2010-sun-noncoding-workshop.ogg","size":23507355,"time":"1280517664"}, + {"type":"file","name":"lp2010-sun-play-ogg.ogv","size":306319862,"time":"1280210339"}, + {"type":"file","name":"lp2010-sun-recruiting-retaining-women.ogv","size":31282146,"time":"1280212310"}, + {"type":"file","name":"lp2010-sun-recruiting-women.ogg","size":37264232,"time":"1280517788"}, + {"type":"file","name":"lp2010-sun-wrapup.ogv","size":109607244,"time":"1280215323"}, + {"type":"file","name":"mediagoblin_campaign_pitch-small.webm","size":11948337,"time":"1349289671"}, + {"type":"file","name":"mediagoblin_campaign_pitch.webm","size":27197918,"time":"1349289722"}, + {"type":"file","name":"rms-education-es.webm","size":64071545,"time":"1304351680"}, + {"type":"file","name":"rms-education.es.ogv","size":21726974,"time":"1304351770"}, + {"type":"file","name":"sf-large.ogv","size":140102085,"time":"1220111045"}, + {"type":"file","name":"stallmanupv.ogg","size":18683460,"time":"1299776853"}, + {"type":"file","name":"stallmanupv.ogg.sig","size":536,"time":"1299776853"} + ]}, + {"type":"file","name":"welcome.msg","size":2830,"time":"1545163301"} +]}, +{"type":"report","directories":2743,"files":63983} +] diff --git a/swh/lister/gnu/tests/file_structure.json b/swh/lister/gnu/tests/file_structure.json new file mode 100644 index 0000000..f120723 --- /dev/null +++ b/swh/lister/gnu/tests/file_structure.json @@ -0,0 +1,130 @@ +[ + {"type":"directory","name":"artanis","size":4096,"time":"1546205705","contents":[ + {"type":"file","name":"artanis-0.2.1.tar.bz2","size":424081,"time":"1495205979"}, + {"type":"file","name":"artanis-0.2.1.tar.bz2.sig","size":833,"time":"1495205982"}, + {"type":"file","name":"artanis-0.2.1.tar.gz","size":506599,"time":"1495205967"}, + {"type":"file","name":"artanis-0.2.1.tar.gz.sig","size":833,"time":"1495205970"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.bz2","size":421984,"time":"1494994239"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.bz2.sig","size":833,"time":"1494994240"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz","size":504759,"time":"1494994222"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz.sig","size":833,"time":"1494994224"}, + {"type":"file","name":"artanis-0.2.3.tar.bz2","size":439269,"time":"1520284021"}, + {"type":"file","name":"artanis-0.2.3.tar.bz2.sig","size":833,"time":"1520284023"}, + {"type":"file","name":"artanis-0.2.3.tar.gz","size":526293,"time":"1520284007"}, + {"type":"file","name":"artanis-0.2.3.tar.gz.sig","size":833,"time":"1520284009"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2","size":426626,"time":"1521742071"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2.sig","size":833,"time":"1521742074"}, + {"type":"file","name":"artanis-0.2.4.tar.gz","size":508420,"time":"1521742057"}, + {"type":"file","name":"artanis-0.2.4.tar.gz.sig","size":833,"time":"1521742060"}, + {"type":"file","name":"artanis-0.2.5.tar.bz2","size":440350,"time":"1525717261"}, + {"type":"file","name":"artanis-0.2.5.tar.bz2.sig","size":833,"time":"1525717263"}, + {"type":"file","name":"artanis-0.2.5.tar.gz","size":518316,"time":"1525717246"}, + {"type":"file","name":"artanis-0.2.5.tar.gz.sig","size":833,"time":"1525717249"}, + {"type":"file","name":"artanis-0.3.1.tar.bz2","size":448329,"time":"1546205569"}, + {"type":"file","name":"artanis-0.3.1.tar.bz2.sig","size":833,"time":"1546205571"}, + {"type":"file","name":"artanis-0.3.1.tar.gz","size":535098,"time":"1546205555"}, + {"type":"file","name":"artanis-0.3.1.tar.gz.sig","size":833,"time":"1546205558"}, + {"type":"file","name":"artanis-0.3.tar.bz2","size":452609,"time":"1546205025"}, + {"type":"file","name":"artanis-0.3.tar.bz2.sig","size":833,"time":"1546205027"}, + {"type":"file","name":"artanis-0.3.tar.gz","size":550938,"time":"1546205012"}, + {"type":"file","name":"artanis-0.3.tar.gz.sig","size":833,"time":"1546205015"}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.bz2","target":"artanis-0.2.12-f39e-dirty.tar.bz2","size":33,"time":"1494994512","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.bz2.sig","target":"artanis-0.2.12-f39e-dirty.tar.bz2.sig","size":37,"time":"1494994512","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.gz","target":"artanis-0.2.12-f39e-dirty.tar.gz","size":32,"time":"1494994519","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.gz.sig","target":"artanis-0.2.12-f39e-dirty.tar.gz.sig","size":36,"time":"1494994519","contents":[]}, + {"type":"link","name":"artanis-latest.tar.bz2","target":"artanis-0.3.1.tar.bz2","size":21,"time":"1546205705","contents":[]}, + {"type":"link","name":"artanis-latest.tar.bz2.sig","target":"artanis-0.3.1.tar.bz2.sig","size":25,"time":"1546205705","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz","target":"artanis-0.3.1.tar.gz","size":20,"time":"1546205703","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz.sig","target":"artanis-0.3.1.tar.gz.sig","size":24,"time":"1546205703","contents":[]} + ]}, + {"type":"directory","name":"xboard","size":4096,"time":"1254860068","contents":[ + {"type":"directory","name":"winboard","size":4096,"time":"1181795103","contents":[ + {"type":"file","name":"README","size":107,"time":"1070058107"}, + {"type":"file","name":"README.sig","size":65,"time":"1070058115"}, + {"type":"file","name":"winboard-4_0_0-src.zip","size":1514448,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_0.README","size":4152,"time":"898422960"}, + {"type":"file","name":"winboard-4_0_0.exe","size":1652037,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_2-src.zip","size":1482621,"time":"920018269"}, + {"type":"file","name":"winboard-4_0_2.README","size":3617,"time":"920018270"}, + {"type":"file","name":"winboard-4_0_2.exe","size":1716772,"time":"920018294"}, + {"type":"file","name":"winboard-4_0_3-src.zip","size":1499275,"time":"936750503"}, + {"type":"file","name":"winboard-4_0_3.README","size":2218,"time":"940361675"}, + {"type":"file","name":"winboard-4_0_3.exe","size":1725023,"time":"936750506"}, + {"type":"file","name":"winboard-4_0_4-src.tar.gz","size":1753506,"time":"944290190"}, + {"type":"file","name":"winboard-4_0_4.README","size":5393,"time":"944290195"}, + {"type":"file","name":"winboard-4_0_4.exe","size":2195155,"time":"944290204"}, + {"type":"file","name":"winboard-4_0_5-src.tar.gz","size":1752189,"time":"944600462"}, + {"type":"file","name":"winboard-4_0_5.README","size":2348,"time":"944600462"}, + {"type":"file","name":"winboard-4_0_5.exe","size":2195822,"time":"944600468"}, + {"type":"file","name":"winboard-4_0_6-src.tar.gz","size":1761396,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_6.README","size":1592,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_6.exe","size":2026273,"time":"952156230"}, + {"type":"file","name":"winboard-4_0_7-src.tar.gz","size":1764000,"time":"952313061"}, + {"type":"file","name":"winboard-4_0_7.README","size":1721,"time":"952313072"}, + {"type":"file","name":"winboard-4_0_7.exe","size":2202166,"time":"952313082"}, + {"type":"file","name":"winboard-4_1_0-src.tar.gz","size":1902251,"time":"969299378"}, + {"type":"file","name":"winboard-4_1_0.exe","size":2126431,"time":"969299361"}, + {"type":"file","name":"winboard-4_2_0beta-src.tar.gz","size":2000471,"time":"977027031"}, + {"type":"file","name":"winboard-4_2_0beta.README","size":3048,"time":"977033442"}, + {"type":"file","name":"winboard-4_2_0beta.exe","size":2292716,"time":"977027033"}, + {"type":"file","name":"winboard-4_2_1-src.tar.gz","size":2090945,"time":"981323331"}, + {"type":"file","name":"winboard-4_2_1.README","size":1336,"time":"981323332"}, + {"type":"file","name":"winboard-4_2_1.exe","size":2298010,"time":"981323012"}, + {"type":"file","name":"winboard-4_2_2-src.tar.gz","size":2025689,"time":"981570576"}, + {"type":"file","name":"winboard-4_2_2.README","size":2705,"time":"981562643"}, + {"type":"file","name":"winboard-4_2_2.exe","size":2298407,"time":"981570908"}, + {"type":"file","name":"winboard-4_2_3-src.tar.gz","size":2001746,"time":"982656672"}, + {"type":"file","name":"winboard-4_2_3.README","size":3014,"time":"982656842"}, + {"type":"file","name":"winboard-4_2_3.exe","size":2299250,"time":"982656841"}, + {"type":"file","name":"winboard-4_2_4-src.tar.gz","size":2388388,"time":"1007952574"}, + {"type":"file","name":"winboard-4_2_4.README","size":1863,"time":"1007952575"}, + {"type":"file","name":"winboard-4_2_4.exe","size":10020579,"time":"1007952203"}, + {"type":"file","name":"winboard-4_2_5-src.tar.gz","size":1962754,"time":"1008502483"}, + {"type":"file","name":"winboard-4_2_5.README","size":2069,"time":"1008466769"}, + {"type":"file","name":"winboard-4_2_5.exe","size":2489300,"time":"1008502215"}, + {"type":"file","name":"winboard-4_2_6-src.tar.gz","size":1982333,"time":"1012641285"}, + {"type":"file","name":"winboard-4_2_6.README","size":1765,"time":"1012640603"}, + {"type":"file","name":"winboard-4_2_6.exe","size":2490333,"time":"1012641027"}, + {"type":"file","name":"winboard-4_2_7-only.exe","size":1729532,"time":"1070149476"}, + {"type":"file","name":"winboard-4_2_7-only.exe.sig","size":65,"time":"1070149498"}, + {"type":"file","name":"winboard-4_2_7.exe","size":5629711,"time":"1070057661"}, + {"type":"file","name":"winboard-4_2_7.exe.sig","size":65,"time":"1070057687"}, + {"type":"file","name":"winboard-4_2_7a.exe","size":5629711,"time":"1070082423"}, + {"type":"file","name":"winboard-4_2_7a.exe.sig","size":65,"time":"1070082566"}, + {"type":"file","name":"winboard-4_2_7b.exe","size":6213290,"time":"1181794790"}, + {"type":"file","name":"winboard-4_2_7b.exe.sig","size":65,"time":"1181794954"} + ]}, + {"type":"file","name":"xboard-3.6.2.tar.gz","size":450164,"time":"869814000"}, + {"type":"file","name":"xboard-4.0.0.README","size":4152,"time":"920018090"}, + {"type":"file","name":"xboard-4.0.0.tar.gz","size":514951,"time":"898422900"}, + {"type":"file","name":"xboard-4.0.2.README","size":3617,"time":"920018199"}, + {"type":"file","name":"xboard-4.0.2.tar.gz","size":564856,"time":"920018202"}, + {"type":"file","name":"xboard-4.0.3.README","size":2218,"time":"936750507"}, + {"type":"file","name":"xboard-4.0.3.tar.gz","size":577351,"time":"936750512"}, + {"type":"file","name":"xboard-4.0.4.README","size":5393,"time":"944290145"}, + {"type":"file","name":"xboard-4.0.4.tar.gz","size":575421,"time":"944290148"}, + {"type":"file","name":"xboard-4.0.5.README","size":2348,"time":"944599460"}, + {"type":"file","name":"xboard-4.0.5.tar.gz","size":576300,"time":"944599461"}, + {"type":"file","name":"xboard-4.0.6.README","size":1592,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.6.tar.gz","size":579076,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.7.README","size":1721,"time":"952313082"}, + {"type":"file","name":"xboard-4.0.7.tar.gz","size":578350,"time":"952313085"}, + {"type":"file","name":"xboard-4.1.0.tar.gz","size":1069507,"time":"969299287"}, + {"type":"file","name":"xboard-4.2.0beta.README","size":3048,"time":"977027107"}, + {"type":"file","name":"xboard-4.2.0beta.tar.gz","size":1093901,"time":"977027108"}, + {"type":"file","name":"xboard-4.2.1.README","size":1336,"time":"981323502"}, + {"type":"file","name":"xboard-4.2.1.tar.gz","size":1097200,"time":"981323501"}, + {"type":"file","name":"xboard-4.2.2.README","size":2705,"time":"981562810"}, + {"type":"file","name":"xboard-4.2.2.tar.gz","size":1097682,"time":"981562809"}, + {"type":"file","name":"xboard-4.2.3.README","size":3014,"time":"982657007"}, + {"type":"file","name":"xboard-4.2.3.tar.gz","size":1100059,"time":"982657006"}, + {"type":"file","name":"xboard-4.2.4.README","size":1863,"time":"1007952746"}, + {"type":"file","name":"xboard-4.2.4.tar.gz","size":1034728,"time":"1007952745"}, + {"type":"file","name":"xboard-4.2.5.README","size":2069,"time":"1008466946"}, + {"type":"file","name":"xboard-4.2.5.tar.gz","size":1055502,"time":"1008466945"}, + {"type":"file","name":"xboard-4.2.6.README","size":1765,"time":"1012641715"}, + {"type":"file","name":"xboard-4.2.6.tar.gz","size":1057625,"time":"1012641715"}, + {"type":"file","name":"xboard-4.2.7.tar.gz","size":1318110,"time":"1070057764"}, + {"type":"file","name":"xboard-4.2.7.tar.gz.sig","size":65,"time":"1070057702"} + ]} +] \ No newline at end of file diff --git a/swh/lister/gnu/tests/find_tarballs_output.json b/swh/lister/gnu/tests/find_tarballs_output.json new file mode 100644 index 0000000..5cc06f4 --- /dev/null +++ b/swh/lister/gnu/tests/find_tarballs_output.json @@ -0,0 +1,158 @@ +[ + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.1.tar.gz", + "date": "1495205967" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.12-f39e-dirty.tar.gz", + "date": "1494994222" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.3.tar.gz", + "date": "1520284007" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.4.tar.gz", + "date": "1521742057" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.5.tar.gz", + "date": "1525717246" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.1.tar.gz", + "date": "1546205555" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.tar.gz", + "date": "1546205012" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_0-src.zip", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_2-src.zip", + "date": "920018269" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_3-src.zip", + "date": "936750503" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_4-src.tar.gz", + "date": "944290190" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_5-src.tar.gz", + "date": "944600462" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_6-src.tar.gz", + "date": "952156231" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_7-src.tar.gz", + "date": "952313061" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_1_0-src.tar.gz", + "date": "969299378" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_0beta-src.tar.gz", + "date": "977027031" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_1-src.tar.gz", + "date": "981323331" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_2-src.tar.gz", + "date": "981570576" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_3-src.tar.gz", + "date": "982656672" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_4-src.tar.gz", + "date": "1007952574" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_5-src.tar.gz", + "date": "1008502483" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_6-src.tar.gz", + "date": "1012641285" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-3.6.2.tar.gz", + "date": "869814000" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.0.tar.gz", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.2.tar.gz", + "date": "920018202" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.3.tar.gz", + "date": "936750512" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.4.tar.gz", + "date": "944290148" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.5.tar.gz", + "date": "944599461" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.6.tar.gz", + "date": "952156235" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.7.tar.gz", + "date": "952313085" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.1.0.tar.gz", + "date": "969299287" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.0beta.tar.gz", + "date": "977027108" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.1.tar.gz", + "date": "981323501" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.2.tar.gz", + "date": "981562809" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.3.tar.gz", + "date": "982657006" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.4.tar.gz", + "date": "1007952745" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.5.tar.gz", + "date": "1008466945" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.6.tar.gz", + "date": "1012641715" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.7.tar.gz", + "date": "1070057764" + } +] \ No newline at end of file diff --git a/swh/lister/gnu/tests/test_lister.py b/swh/lister/gnu/tests/test_lister.py new file mode 100644 index 0000000..9d6db40 --- /dev/null +++ b/swh/lister/gnu/tests/test_lister.py @@ -0,0 +1,33 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json + +from swh.lister.gnu.lister import find_tarballs, filter_directories + + +def test_filter_directories(): + f = open('swh/lister/gnu/tests/api_response.json') + api_response = json.load(f) + cleared_api_response = filter_directories(api_response) + for directory in cleared_api_response: + if directory['name'] not in ('gnu', 'old-gnu'): + assert False + + +def test_find_tarballs(): + f = open('swh/lister/gnu/tests/find_tarballs_output.json') + expected_list_of_all_tarballs = json.load(f) + + f = open('swh/lister/gnu/tests/file_structure.json') + file_structure = json.load(f) + list_of_all_tarballs = [] + list_of_all_tarballs.extend( + find_tarballs(file_structure[0]['contents'], + "https://ftp.gnu.org/gnu/artanis/")) + list_of_all_tarballs.extend( + find_tarballs(file_structure[1]['contents'], + "https://ftp.gnu.org/old-gnu/xboard/")) + + assert list_of_all_tarballs == expected_list_of_all_tarballs