core.lister_base: Batch create origins (storage) & tasks (scheduler)
This commit is contained in:
parent
b272a36237
commit
8b2ee221ac
3 changed files with 32 additions and 8 deletions
3
debian/control
vendored
3
debian/control
vendored
|
@ -13,7 +13,7 @@ Build-Depends: debhelper (>= 9),
|
|||
python3-sqlalchemy (>= 1.0),
|
||||
python3-swh.core,
|
||||
python3-swh.scheduler (>= 0.0.31~),
|
||||
python3-swh.storage,
|
||||
python3-swh.storage (>= 0.0.103~),
|
||||
python3-swh.storage.schemata,
|
||||
python3-testing.postgresql,
|
||||
python3-vcversioner,
|
||||
|
@ -24,6 +24,7 @@ Homepage: https://forge.softwareheritage.org/source/swh-lister/
|
|||
Package: python3-swh.lister
|
||||
Architecture: all
|
||||
Depends: python3-swh.scheduler (>= 0.0.31~),
|
||||
python3-swh.storage (>= 0.0.103~),
|
||||
${misc:Depends},
|
||||
${python3:Depends}
|
||||
Breaks: python3-swh.lister.github
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
swh.core
|
||||
swh.storage >= 0.0.103
|
||||
swh.storage[schemata] >= 0.0.76
|
||||
swh.scheduler >= 0.0.31
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
import abc
|
||||
import datetime
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
@ -378,7 +379,7 @@ class SWHListerBase(abc.ABC, config.SWHConfig):
|
|||
|
||||
return sql_repo
|
||||
|
||||
def origin_dict(self, origin_type, origin_url):
|
||||
def origin_dict(self, origin_type, origin_url, **kwargs):
|
||||
"""Return special dict format for the origins list
|
||||
|
||||
Args:
|
||||
|
@ -458,16 +459,37 @@ class SWHListerBase(abc.ABC, config.SWHConfig):
|
|||
Returns:
|
||||
Nothing. Modifies injected_repos.
|
||||
"""
|
||||
origins = {}
|
||||
tasks = {}
|
||||
|
||||
def _origin_key(m):
|
||||
_type = m.get('origin_type', m.get('type'))
|
||||
_url = m.get('origin_url', m.get('url'))
|
||||
return '%s-%s' % (_type, _url)
|
||||
|
||||
def _task_key(m):
|
||||
return '%s-%s' % (m['type'], json.dumps(m['arguments']))
|
||||
|
||||
for m in models_list:
|
||||
ir = injected_repos[m['uid']]
|
||||
if not ir.origin_id:
|
||||
ir.origin_id = self.storage.origin_add_one(
|
||||
self.origin_dict(m['origin_type'], m['origin_url'])
|
||||
)
|
||||
origin_dict = self.origin_dict(**m)
|
||||
origins[_origin_key(m)] = (ir, m, origin_dict)
|
||||
if not ir.task_id:
|
||||
ir.task_id = self.scheduler.create_tasks(
|
||||
[self.task_dict(**m)]
|
||||
)[0]['id']
|
||||
task_dict = self.task_dict(**m)
|
||||
tasks[_task_key(task_dict)] = (ir, m, task_dict)
|
||||
|
||||
new_origins = self.storage.origin_add(
|
||||
(origin_dicts for (_, _, origin_dicts) in origins.values()))
|
||||
for origin in new_origins:
|
||||
ir, m, _ = origins[_origin_key(origin)]
|
||||
ir.origin_id = origin['id']
|
||||
|
||||
new_tasks = self.scheduler.create_tasks(
|
||||
(task_dicts for (_, _, task_dicts) in tasks.values()))
|
||||
for task in new_tasks:
|
||||
ir, m, _ = tasks[_task_key(task)]
|
||||
ir.task_id = task['id']
|
||||
|
||||
def ingest_data(self, identifier, checks=False):
|
||||
"""The core data fetch sequence. Request server endpoint. Simplify and
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue