lister: retrieve old task and origin id if a full_name has been recycled

If a repo changed hands, it is possible that a full_name is recycled. In
that case, we reuse the task_id and origin_id from the old repository
instead of recreating them.
This commit is contained in:
Nicolas Dandrimont 2016-10-20 16:26:53 +02:00
parent ab25599cca
commit a1a6228e05

View file

@ -145,6 +145,16 @@ class GitHubLister(SWHLister):
.filter(Repository.id == repo_id) \
.first()
def lookup_full_names(self, full_names, db_session=None):
if not db_session:
with session_scope(self.mk_session) as db_session:
return self.lookup_full_names(full_names,
db_session=db_session)
return db_session.query(Repository) \
.filter(Repository.full_name.in_(full_names)) \
.all()
def last_repo_id(self, db_session=None):
if not db_session:
with session_scope(self.mk_session) as db_session:
@ -242,6 +252,18 @@ class GitHubLister(SWHLister):
full_name = repo['full_name']
mapped_repos[full_name] = self.inject_repo(repo, db_session)
# Retrieve and reset task and origin ids from existing repos
old_repos = self.lookup_full_names(list(mapped_repos.keys()),
db_session=db_session)
for old_repo in old_repos:
full_name = old_repo.full_name
if old_repo.task_id:
tasks[full_name] = old_repo.task_id
old_repo.task_id = None
if old_repo.origin_id:
origins[full_name] = old_repo.origin_id
old_repo.origin_id = None
# Create missing origins
missing_origins = [
full_name for full_name in sorted(mapped_repos)