swh.lister.github: Refactor to use swh.storage instead of sqlalchemy
This commit is contained in:
parent
f13764ba36
commit
533f6fa1a3
20 changed files with 600 additions and 694 deletions
135
bin/ghlister
135
bin/ghlister
|
@ -1,135 +0,0 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
# Copyright (C) 2015 Stefano Zacchiroli <zack@upsilon.cc>
|
||||
# License: GNU General Public License version 3, or any later version
|
||||
# See top-level LICENSE file for more information
|
||||
|
||||
import argparse
|
||||
import configparser
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from swh.lister.github import lister, models
|
||||
from swh.lister.github.db_utils import session_scope
|
||||
|
||||
|
||||
DEFAULT_CONF = {
|
||||
'cache_dir': './cache',
|
||||
'log_dir': './log',
|
||||
'cache_json': 'False',
|
||||
}
|
||||
|
||||
|
||||
def db_connect(db_url):
|
||||
engine = create_engine(db_url)
|
||||
session = sessionmaker(bind=engine)
|
||||
|
||||
return (engine, session)
|
||||
|
||||
|
||||
def int_interval(s):
|
||||
"""parse an "N-M" string as an interval.
|
||||
|
||||
Return an (N,M) int (or None) pair
|
||||
|
||||
"""
|
||||
def not_an_interval():
|
||||
raise argparse.ArgumentTypeError('not an interval: ' + s)
|
||||
|
||||
def parse_int(s):
|
||||
if s:
|
||||
return int(s)
|
||||
else:
|
||||
return None
|
||||
|
||||
if '-' not in s:
|
||||
not_an_interval()
|
||||
parts = s.split('-')
|
||||
if len(parts) > 2:
|
||||
not_an_interval()
|
||||
return tuple([parse_int(p) for p in parts])
|
||||
|
||||
|
||||
def parse_args():
|
||||
cli = argparse.ArgumentParser(
|
||||
description='list GitHub repositories and load them into a DB')
|
||||
cli.add_argument('--db-url', '-d', metavar='SQLALCHEMY_URL',
|
||||
help='SQLAlchemy DB URL (override conffile); see '
|
||||
'<http://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls>') # NOQA
|
||||
cli.add_argument('--verbose', '-v', action='store_true',
|
||||
help='be verbose')
|
||||
|
||||
subcli = cli.add_subparsers(dest='action')
|
||||
subcli.add_parser('createdb', help='initialize DB')
|
||||
subcli.add_parser('dropdb', help='destroy DB')
|
||||
|
||||
list_cli = subcli.add_parser('list', help='list repositories')
|
||||
list_cli.add_argument('interval',
|
||||
type=int_interval,
|
||||
help='interval of repository IDs to list, '
|
||||
'in N-M format; either N or M can be omitted.')
|
||||
|
||||
list_cli = subcli.add_parser('catchup',
|
||||
help='catchup with new repos since last time')
|
||||
|
||||
args = cli.parse_args()
|
||||
|
||||
if not args.action:
|
||||
cli.error('no action given')
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def read_conf(args):
|
||||
config = configparser.ConfigParser(defaults=DEFAULT_CONF)
|
||||
config.read(os.path.expanduser('~/.config/swh/lister-github.ini'))
|
||||
|
||||
conf = config._sections['main']
|
||||
|
||||
# overrides
|
||||
if args.db_url:
|
||||
conf['db_url'] = args.db_url
|
||||
|
||||
# typing
|
||||
if 'cache_json' in conf and conf['cache_json'].lower() == 'true':
|
||||
conf['cache_json'] = True
|
||||
else:
|
||||
conf['cache_json'] = False
|
||||
|
||||
return conf
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO) # XXX
|
||||
|
||||
args = parse_args()
|
||||
conf = read_conf(args)
|
||||
|
||||
db_engine, mk_session = db_connect(conf['db_url'])
|
||||
|
||||
if args.action == 'createdb':
|
||||
models.SQLBase.metadata.create_all(db_engine)
|
||||
elif args.action == 'dropdb':
|
||||
models.SQLBase.metadata.drop_all(db_engine)
|
||||
elif args.action == 'list':
|
||||
lister.fetch(conf,
|
||||
mk_session,
|
||||
min_id=args.interval[0],
|
||||
max_id=args.interval[1])
|
||||
elif args.action == 'catchup':
|
||||
with session_scope(mk_session) as db_session:
|
||||
last_known_id = lister.last_repo_id(db_session)
|
||||
if last_known_id is not None:
|
||||
logging.info('catching up from last known repo id: %d' %
|
||||
last_known_id)
|
||||
lister.fetch(conf,
|
||||
mk_session,
|
||||
min_id=last_known_id + 1,
|
||||
max_id=None)
|
||||
else:
|
||||
logging.error('Cannot catchup: no last known id found. Abort.')
|
||||
sys.exit(2)
|
Loading…
Add table
Add a link
Reference in a new issue