Updated README for listers
This commit is contained in:
commit
d7ae2f1305
3 changed files with 123 additions and 14 deletions
70
swh/lister/.vscode/launch.json
vendored
Normal file
70
swh/lister/.vscode/launch.json
vendored
Normal file
|
@ -0,0 +1,70 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: Current File (Integrated Terminal)",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "integratedTerminal"
|
||||
},
|
||||
{
|
||||
"name": "Python: Remote Attach",
|
||||
"type": "python",
|
||||
"request": "attach",
|
||||
"port": 5678,
|
||||
"host": "localhost",
|
||||
"pathMappings": [
|
||||
{
|
||||
"localRoot": "${workspaceFolder}",
|
||||
"remoteRoot": "."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Python: Module",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "enter-your-module-name-here",
|
||||
"console": "integratedTerminal"
|
||||
},
|
||||
{
|
||||
"name": "Python: Django",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/manage.py",
|
||||
"console": "integratedTerminal",
|
||||
"args": [
|
||||
"runserver",
|
||||
"--noreload",
|
||||
"--nothreading"
|
||||
],
|
||||
"django": true
|
||||
},
|
||||
{
|
||||
"name": "Python: Flask",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "flask",
|
||||
"env": {
|
||||
"FLASK_APP": "app.py"
|
||||
},
|
||||
"args": [
|
||||
"run",
|
||||
"--no-debugger",
|
||||
"--no-reload"
|
||||
],
|
||||
"jinja": true
|
||||
},
|
||||
{
|
||||
"name": "Python: Current File (External Terminal)",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "${file}",
|
||||
"console": "externalTerminal"
|
||||
}
|
||||
]
|
||||
}
|
9
swh/lister/.vscode/settings.json
vendored
Normal file
9
swh/lister/.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
"editor.fontSize": 18,
|
||||
"python.venvPath": "~/.virtualenvs/swh/bin/python",
|
||||
"python.unitTest.unittestEnabled": true,
|
||||
"python.pythonPath": "~/.virtualenvs/swh/bin/python",
|
||||
"python.linting.pylintEnabled": false,
|
||||
"python.linting.flake8Enabled": true,
|
||||
"python.linting.enabled": true
|
||||
}
|
|
@ -6,6 +6,7 @@ import abc
|
|||
import logging
|
||||
from itertools import count
|
||||
|
||||
import dateutil
|
||||
from sqlalchemy import func
|
||||
|
||||
from .lister_transports import SWHListerHttpTransport
|
||||
|
@ -103,23 +104,52 @@ class SWHIndexingLister(SWHListerBase):
|
|||
declare approximately equal-sized ranges of existing
|
||||
repos
|
||||
"""
|
||||
|
||||
n = max(self.db_num_entries(), 10)
|
||||
|
||||
partitions = []
|
||||
partition_size = min(partition_size, n)
|
||||
prev_index = None
|
||||
for i in range(0, n-1, partition_size):
|
||||
# indexable column from the ith row
|
||||
index = self.db_session.query(self.MODEL.indexable) \
|
||||
.order_by(self.MODEL.indexable).offset(i).first()
|
||||
if index:
|
||||
index = index[0]
|
||||
if index is not None and prev_index is not None:
|
||||
partitions.append((prev_index, index))
|
||||
prev_index = index
|
||||
n_partitions = n // partition_size
|
||||
|
||||
partitions.append((prev_index, self.db_last_index()))
|
||||
return partitions
|
||||
min_index = self.db_first_index()
|
||||
max_index = self.db_last_index()
|
||||
|
||||
if not min_index or not max_index:
|
||||
raise ValueError("Can't partition an empty range")
|
||||
|
||||
if isinstance(min_index, str):
|
||||
def format_bound(bound):
|
||||
return bound.isoformat()
|
||||
min_index = dateutil.parser.parse(min_index)
|
||||
max_index = dateutil.parser.parse(max_index)
|
||||
else:
|
||||
def format_bound(bound):
|
||||
return bound
|
||||
|
||||
partition_width = (max_index - min_index) / n_partitions
|
||||
|
||||
partitions = [
|
||||
[
|
||||
format_bound(min_index + i * partition_width),
|
||||
format_bound(min_index + (i+1) * partition_width),
|
||||
] for i in range(n_partitions)
|
||||
]
|
||||
|
||||
# Remove bounds for lowest and highest partition
|
||||
partitions[0][0] = None
|
||||
partitions[-1][1] = None
|
||||
|
||||
return [tuple(partition) for partition in partitions]
|
||||
|
||||
def db_first_index(self):
|
||||
"""Look in the db for the smallest indexable value
|
||||
|
||||
Returns:
|
||||
the smallest indexable value of all repos in the db
|
||||
"""
|
||||
t = self.db_session.query(func.min(self.MODEL.indexable)).first()
|
||||
if t:
|
||||
return t[0]
|
||||
else:
|
||||
return None
|
||||
|
||||
def db_last_index(self):
|
||||
"""Look in the db for the largest indexable value
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue