Enable black
- blackify all the python files, - enable black in pre-commit, - add a black tox environment.
This commit is contained in:
parent
1ae75166c7
commit
93a4d8b784
97 changed files with 1734 additions and 1642 deletions
|
@ -7,14 +7,15 @@ def register():
|
|||
from .models import NpmVisitModel, NpmModel
|
||||
from .lister import NpmLister
|
||||
|
||||
return {'models': [NpmVisitModel, NpmModel],
|
||||
'lister': NpmLister,
|
||||
'task_modules': ['%s.tasks' % __name__],
|
||||
'task_types': {
|
||||
'list-npm-full': {
|
||||
'default_interval': '7 days',
|
||||
'min_interval': '7 days',
|
||||
'max_interval': '7 days',
|
||||
},
|
||||
},
|
||||
}
|
||||
return {
|
||||
"models": [NpmVisitModel, NpmModel],
|
||||
"lister": NpmLister,
|
||||
"task_modules": ["%s.tasks" % __name__],
|
||||
"task_types": {
|
||||
"list-npm-full": {
|
||||
"default_interval": "7 days",
|
||||
"min_interval": "7 days",
|
||||
"max_interval": "7 days",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
|
@ -14,15 +14,17 @@ class NpmListerBase(IndexingHttpLister):
|
|||
"""List packages available in the npm registry in a paginated way
|
||||
|
||||
"""
|
||||
MODEL = NpmModel
|
||||
LISTER_NAME = 'npm'
|
||||
instance = 'npm'
|
||||
|
||||
def __init__(self, url='https://replicate.npmjs.com',
|
||||
per_page=1000, override_config=None):
|
||||
MODEL = NpmModel
|
||||
LISTER_NAME = "npm"
|
||||
instance = "npm"
|
||||
|
||||
def __init__(
|
||||
self, url="https://replicate.npmjs.com", per_page=1000, override_config=None
|
||||
):
|
||||
super().__init__(url=url, override_config=override_config)
|
||||
self.per_page = per_page + 1
|
||||
self.PATH_TEMPLATE += '&limit=%s' % self.per_page
|
||||
self.PATH_TEMPLATE += "&limit=%s" % self.per_page
|
||||
|
||||
@property
|
||||
def ADDITIONAL_CONFIG(self) -> Dict[str, Any]:
|
||||
|
@ -30,22 +32,22 @@ class NpmListerBase(IndexingHttpLister):
|
|||
|
||||
"""
|
||||
default_config = super().ADDITIONAL_CONFIG
|
||||
default_config['loading_task_policy'] = ('str', 'recurring')
|
||||
default_config["loading_task_policy"] = ("str", "recurring")
|
||||
return default_config
|
||||
|
||||
def get_model_from_repo(self, repo_name: str) -> Dict[str, str]:
|
||||
"""(Override) Transform from npm package name to model
|
||||
|
||||
"""
|
||||
package_url = 'https://www.npmjs.com/package/%s' % repo_name
|
||||
package_url = "https://www.npmjs.com/package/%s" % repo_name
|
||||
return {
|
||||
'uid': repo_name,
|
||||
'indexable': repo_name,
|
||||
'name': repo_name,
|
||||
'full_name': repo_name,
|
||||
'html_url': package_url,
|
||||
'origin_url': package_url,
|
||||
'origin_type': 'npm',
|
||||
"uid": repo_name,
|
||||
"indexable": repo_name,
|
||||
"name": repo_name,
|
||||
"full_name": repo_name,
|
||||
"html_url": package_url,
|
||||
"origin_url": package_url,
|
||||
"origin_type": "npm",
|
||||
}
|
||||
|
||||
def task_dict(self, origin_type: str, origin_url: str, **kwargs):
|
||||
|
@ -56,10 +58,9 @@ class NpmListerBase(IndexingHttpLister):
|
|||
needed for the ingestion task creation.
|
||||
|
||||
"""
|
||||
task_type = 'load-%s' % origin_type
|
||||
task_policy = self.config['loading_task_policy']
|
||||
return create_task_dict(task_type, task_policy,
|
||||
url=origin_url)
|
||||
task_type = "load-%s" % origin_type
|
||||
task_policy = self.config["loading_task_policy"]
|
||||
return create_task_dict(task_type, task_policy, url=origin_url)
|
||||
|
||||
def request_headers(self) -> Dict[str, Any]:
|
||||
"""(Override) Set requests headers to send when querying the npm
|
||||
|
@ -67,7 +68,7 @@ class NpmListerBase(IndexingHttpLister):
|
|||
|
||||
"""
|
||||
headers = super().request_headers()
|
||||
headers['Accept'] = 'application/json'
|
||||
headers["Accept"] = "application/json"
|
||||
return headers
|
||||
|
||||
def string_pattern_check(self, inner: int, lower: int, upper: int = None):
|
||||
|
@ -83,25 +84,24 @@ class NpmLister(NpmListerBase):
|
|||
"""List all packages available in the npm registry in a paginated way
|
||||
|
||||
"""
|
||||
|
||||
PATH_TEMPLATE = '/_all_docs?startkey="%s"'
|
||||
|
||||
def get_next_target_from_response(
|
||||
self, response: Response) -> Optional[str]:
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[str]:
|
||||
"""(Override) Get next npm package name to continue the listing
|
||||
|
||||
"""
|
||||
repos = response.json()['rows']
|
||||
return repos[-1]['id'] if len(repos) == self.per_page else None
|
||||
repos = response.json()["rows"]
|
||||
return repos[-1]["id"] if len(repos) == self.per_page else None
|
||||
|
||||
def transport_response_simplified(
|
||||
self, response: Response) -> List[Dict[str, str]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, str]]:
|
||||
"""(Override) Transform npm registry response to list for model manipulation
|
||||
|
||||
"""
|
||||
repos = response.json()['rows']
|
||||
repos = response.json()["rows"]
|
||||
if len(repos) == self.per_page:
|
||||
repos = repos[:-1]
|
||||
return [self.get_model_from_repo(repo['id']) for repo in repos]
|
||||
return [self.get_model_from_repo(repo["id"]) for repo in repos]
|
||||
|
||||
|
||||
class NpmIncrementalLister(NpmListerBase):
|
||||
|
@ -109,30 +109,29 @@ class NpmIncrementalLister(NpmListerBase):
|
|||
update_seq value of the underlying CouchDB database, in a paginated way.
|
||||
|
||||
"""
|
||||
PATH_TEMPLATE = '/_changes?since=%s'
|
||||
|
||||
PATH_TEMPLATE = "/_changes?since=%s"
|
||||
|
||||
@property
|
||||
def CONFIG_BASE_FILENAME(self): # noqa: N802
|
||||
return 'lister_npm_incremental'
|
||||
return "lister_npm_incremental"
|
||||
|
||||
def get_next_target_from_response(
|
||||
self, response: Response) -> Optional[str]:
|
||||
def get_next_target_from_response(self, response: Response) -> Optional[str]:
|
||||
"""(Override) Get next npm package name to continue the listing.
|
||||
|
||||
"""
|
||||
repos = response.json()['results']
|
||||
return repos[-1]['seq'] if len(repos) == self.per_page else None
|
||||
repos = response.json()["results"]
|
||||
return repos[-1]["seq"] if len(repos) == self.per_page else None
|
||||
|
||||
def transport_response_simplified(
|
||||
self, response: Response) -> List[Dict[str, str]]:
|
||||
def transport_response_simplified(self, response: Response) -> List[Dict[str, str]]:
|
||||
"""(Override) Transform npm registry response to list for model
|
||||
manipulation.
|
||||
|
||||
"""
|
||||
repos = response.json()['results']
|
||||
repos = response.json()["results"]
|
||||
if len(repos) == self.per_page:
|
||||
repos = repos[:-1]
|
||||
return [self.get_model_from_repo(repo['id']) for repo in repos]
|
||||
return [self.get_model_from_repo(repo["id"]) for repo in repos]
|
||||
|
||||
def filter_before_inject(self, models_list: List[Dict[str, Any]]):
|
||||
"""(Override) Filter out documents in the CouchDB database
|
||||
|
@ -141,9 +140,9 @@ class NpmIncrementalLister(NpmListerBase):
|
|||
"""
|
||||
models_filtered = []
|
||||
for model in models_list:
|
||||
package_name = model['name']
|
||||
package_name = model["name"]
|
||||
# document related to CouchDB internals
|
||||
if package_name.startswith('_design/'):
|
||||
if package_name.startswith("_design/"):
|
||||
continue
|
||||
models_filtered.append(model)
|
||||
return models_filtered
|
||||
|
|
|
@ -11,9 +11,10 @@ class NpmVisitModel(SQLBase, metaclass=ABCSQLMeta):
|
|||
"""Table to store the npm registry state at the time of a
|
||||
content listing by Software Heritage
|
||||
"""
|
||||
__tablename__ = 'npm_visit'
|
||||
|
||||
uid = Column(Integer, Sequence('npm_visit_id_seq'), primary_key=True)
|
||||
__tablename__ = "npm_visit"
|
||||
|
||||
uid = Column(Integer, Sequence("npm_visit_id_seq"), primary_key=True)
|
||||
visit_date = Column(DateTime, nullable=False)
|
||||
doc_count = Column(BigInteger)
|
||||
doc_del_count = Column(BigInteger)
|
||||
|
@ -29,7 +30,8 @@ class NpmModel(IndexingModelBase):
|
|||
"""A npm package representation
|
||||
|
||||
"""
|
||||
__tablename__ = 'npm_repo'
|
||||
|
||||
__tablename__ = "npm_repo"
|
||||
|
||||
uid = Column(String, primary_key=True)
|
||||
indexable = Column(String, index=True)
|
||||
|
|
|
@ -13,15 +13,22 @@ from swh.lister.npm.models import NpmVisitModel
|
|||
|
||||
@contextmanager
|
||||
def save_registry_state(lister):
|
||||
params = {'headers': lister.request_headers()}
|
||||
params = {"headers": lister.request_headers()}
|
||||
registry_state = lister.session.get(lister.url, **params)
|
||||
registry_state = registry_state.json()
|
||||
keys = ('doc_count', 'doc_del_count', 'update_seq', 'purge_seq',
|
||||
'disk_size', 'data_size', 'committed_update_seq',
|
||||
'compacted_seq')
|
||||
keys = (
|
||||
"doc_count",
|
||||
"doc_del_count",
|
||||
"update_seq",
|
||||
"purge_seq",
|
||||
"disk_size",
|
||||
"data_size",
|
||||
"committed_update_seq",
|
||||
"compacted_seq",
|
||||
)
|
||||
|
||||
state = {key: registry_state[key] for key in keys}
|
||||
state['visit_date'] = datetime.now()
|
||||
state["visit_date"] = datetime.now()
|
||||
yield
|
||||
npm_visit = NpmVisitModel(**state)
|
||||
lister.db_session.add(npm_visit)
|
||||
|
@ -34,29 +41,31 @@ def get_last_update_seq(lister):
|
|||
query = lister.db_session.query(NpmVisitModel.update_seq)
|
||||
row = query.order_by(NpmVisitModel.uid.desc()).first()
|
||||
if not row:
|
||||
raise ValueError('No npm registry listing previously performed ! '
|
||||
'This is required prior to the execution of an '
|
||||
'incremental listing.')
|
||||
raise ValueError(
|
||||
"No npm registry listing previously performed ! "
|
||||
"This is required prior to the execution of an "
|
||||
"incremental listing."
|
||||
)
|
||||
return row[0]
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.NpmListerTask')
|
||||
@shared_task(name=__name__ + ".NpmListerTask")
|
||||
def list_npm_full(**lister_args):
|
||||
'Full lister for the npm (javascript) registry'
|
||||
"Full lister for the npm (javascript) registry"
|
||||
lister = NpmLister(**lister_args)
|
||||
with save_registry_state(lister):
|
||||
return lister.run()
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.NpmIncrementalListerTask')
|
||||
@shared_task(name=__name__ + ".NpmIncrementalListerTask")
|
||||
def list_npm_incremental(**lister_args):
|
||||
'Incremental lister for the npm (javascript) registry'
|
||||
"Incremental lister for the npm (javascript) registry"
|
||||
lister = NpmIncrementalLister(**lister_args)
|
||||
update_seq_start = get_last_update_seq(lister)
|
||||
with save_registry_state(lister):
|
||||
return lister.run(min_bound=update_seq_start)
|
||||
|
||||
|
||||
@shared_task(name=__name__ + '.ping')
|
||||
@shared_task(name=__name__ + ".ping")
|
||||
def _ping():
|
||||
return 'OK'
|
||||
return "OK"
|
||||
|
|
|
@ -10,14 +10,16 @@ from swh.lister.core.tests.conftest import * # noqa
|
|||
|
||||
@pytest.fixture
|
||||
def lister_npm(swh_listers):
|
||||
lister = swh_listers['npm']
|
||||
lister = swh_listers["npm"]
|
||||
|
||||
# Add the load-deb-package in the scheduler backend
|
||||
lister.scheduler.create_task_type({
|
||||
'type': 'load-npm',
|
||||
'description': 'Load npm package',
|
||||
'backend_name': 'swh.loader.package.tasks.LoadNpm',
|
||||
'default_interval': '1 day',
|
||||
})
|
||||
lister.scheduler.create_task_type(
|
||||
{
|
||||
"type": "load-npm",
|
||||
"description": "Load npm package",
|
||||
"backend_name": "swh.loader.package.tasks.LoadNpm",
|
||||
"default_interval": "1 day",
|
||||
}
|
||||
)
|
||||
|
||||
return lister
|
||||
|
|
|
@ -21,10 +21,10 @@ logger = logging.getLogger(__name__)
|
|||
class NpmListerTester(HttpListerTesterBase, unittest.TestCase):
|
||||
Lister = NpmLister
|
||||
test_re = re.compile(r'^.*/_all_docs\?startkey="(.+)".*')
|
||||
lister_subdir = 'npm'
|
||||
good_api_response_file = 'data/replicate.npmjs.com/api_response.json'
|
||||
bad_api_response_file = 'data/api_empty_response.json'
|
||||
first_index = 'jquery'
|
||||
lister_subdir = "npm"
|
||||
good_api_response_file = "data/replicate.npmjs.com/api_response.json"
|
||||
bad_api_response_file = "data/api_empty_response.json"
|
||||
first_index = "jquery"
|
||||
entries_per_page = 100
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -37,11 +37,11 @@ class NpmListerTester(HttpListerTesterBase, unittest.TestCase):
|
|||
|
||||
class NpmIncrementalListerTester(HttpListerTesterBase, unittest.TestCase):
|
||||
Lister = NpmIncrementalLister
|
||||
test_re = re.compile(r'^.*/_changes\?since=([0-9]+).*')
|
||||
lister_subdir = 'npm'
|
||||
good_api_response_file = 'data/api_inc_response.json'
|
||||
bad_api_response_file = 'data/api_inc_empty_response.json'
|
||||
first_index = '6920642'
|
||||
test_re = re.compile(r"^.*/_changes\?since=([0-9]+).*")
|
||||
lister_subdir = "npm"
|
||||
good_api_response_file = "data/api_inc_response.json"
|
||||
bad_api_response_file = "data/api_inc_empty_response.json"
|
||||
first_index = "6920642"
|
||||
entries_per_page = 100
|
||||
|
||||
@requests_mock.Mocker()
|
||||
|
@ -58,27 +58,27 @@ def check_tasks(tasks: List[Any]):
|
|||
|
||||
"""
|
||||
for row in tasks:
|
||||
logger.debug('row: %s', row)
|
||||
assert row['type'] == 'load-npm'
|
||||
logger.debug("row: %s", row)
|
||||
assert row["type"] == "load-npm"
|
||||
# arguments check
|
||||
args = row['arguments']['args']
|
||||
args = row["arguments"]["args"]
|
||||
assert len(args) == 0
|
||||
|
||||
# kwargs
|
||||
kwargs = row['arguments']['kwargs']
|
||||
kwargs = row["arguments"]["kwargs"]
|
||||
assert len(kwargs) == 1
|
||||
package_url = kwargs['url']
|
||||
package_name = package_url.split('/')[-1]
|
||||
assert package_url == f'https://www.npmjs.com/package/{package_name}'
|
||||
package_url = kwargs["url"]
|
||||
package_name = package_url.split("/")[-1]
|
||||
assert package_url == f"https://www.npmjs.com/package/{package_name}"
|
||||
|
||||
assert row['policy'] == 'recurring'
|
||||
assert row['priority'] is None
|
||||
assert row["policy"] == "recurring"
|
||||
assert row["priority"] is None
|
||||
|
||||
|
||||
def test_lister_npm_basic_listing(lister_npm, requests_mock_datadir):
|
||||
lister_npm.run()
|
||||
|
||||
tasks = lister_npm.scheduler.search_tasks(task_type='load-npm')
|
||||
tasks = lister_npm.scheduler.search_tasks(task_type="load-npm")
|
||||
assert len(tasks) == 100
|
||||
|
||||
check_tasks(tasks)
|
||||
|
@ -89,10 +89,11 @@ def test_lister_npm_listing_pagination(lister_npm, requests_mock_datadir):
|
|||
# Patch per page pagination
|
||||
lister.per_page = 10 + 1
|
||||
lister.PATH_TEMPLATE = lister.PATH_TEMPLATE.replace(
|
||||
'&limit=1001', '&limit=%s' % lister.per_page)
|
||||
"&limit=1001", "&limit=%s" % lister.per_page
|
||||
)
|
||||
lister.run()
|
||||
|
||||
tasks = lister.scheduler.search_tasks(task_type='load-npm')
|
||||
tasks = lister.scheduler.search_tasks(task_type="load-npm")
|
||||
assert len(tasks) == 2 * 10 # only 2 files with 10 results each
|
||||
|
||||
check_tasks(tasks)
|
||||
|
|
|
@ -8,23 +8,22 @@ def mock_save(lister):
|
|||
|
||||
|
||||
def test_ping(swh_app, celery_session_worker):
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.npm.tasks.ping')
|
||||
res = swh_app.send_task("swh.lister.npm.tasks.ping")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
assert res.result == 'OK'
|
||||
assert res.result == "OK"
|
||||
|
||||
|
||||
@patch('swh.lister.npm.tasks.save_registry_state')
|
||||
@patch('swh.lister.npm.tasks.NpmLister')
|
||||
@patch("swh.lister.npm.tasks.save_registry_state")
|
||||
@patch("swh.lister.npm.tasks.NpmLister")
|
||||
def test_lister(lister, save, swh_app, celery_session_worker):
|
||||
# setup the mocked NpmLister
|
||||
lister.return_value = lister
|
||||
lister.run.return_value = None
|
||||
save.side_effect = mock_save
|
||||
|
||||
res = swh_app.send_task('swh.lister.npm.tasks.NpmListerTask')
|
||||
res = swh_app.send_task("swh.lister.npm.tasks.NpmListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
@ -33,9 +32,9 @@ def test_lister(lister, save, swh_app, celery_session_worker):
|
|||
lister.run.assert_called_once_with()
|
||||
|
||||
|
||||
@patch('swh.lister.npm.tasks.save_registry_state')
|
||||
@patch('swh.lister.npm.tasks.get_last_update_seq')
|
||||
@patch('swh.lister.npm.tasks.NpmIncrementalLister')
|
||||
@patch("swh.lister.npm.tasks.save_registry_state")
|
||||
@patch("swh.lister.npm.tasks.get_last_update_seq")
|
||||
@patch("swh.lister.npm.tasks.NpmIncrementalLister")
|
||||
def test_incremental(lister, seq, save, swh_app, celery_session_worker):
|
||||
# setup the mocked NpmLister
|
||||
lister.return_value = lister
|
||||
|
@ -43,8 +42,7 @@ def test_incremental(lister, seq, save, swh_app, celery_session_worker):
|
|||
seq.return_value = 42
|
||||
save.side_effect = mock_save
|
||||
|
||||
res = swh_app.send_task(
|
||||
'swh.lister.npm.tasks.NpmIncrementalListerTask')
|
||||
res = swh_app.send_task("swh.lister.npm.tasks.NpmIncrementalListerTask")
|
||||
assert res
|
||||
res.wait()
|
||||
assert res.successful()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue