Skip to content

Commit

Permalink
agents: improve the CLI
Browse files Browse the repository at this point in the history
* Splits the large `bulk_load` function into smaller functions.
* Restricts jsonschema < 4.0.0 because an incompatibility with invenio!
* Updates security dependencies.

Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep and rerowep committed Oct 7, 2021
1 parent 04e8075 commit 7934c3e
Show file tree
Hide file tree
Showing 56 changed files with 1,042 additions and 988 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ recursive-include rero_mef *.py
recursive-include tests *.py
recursive-include tests *.xml

# Mef
# MEF
recursive-include rero_mef *.html
recursive-include rero_mef *.png
recursive-include rero_mef *.scss
199 changes: 108 additions & 91 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ jedi = "<0.18.0"
coverage = {version = ">=6.0b1", allow-prereleases = true}
# TODO celery 5.0.6 can not start forground worker
celery = "<5.0.6"
# TODO jsonschema 4.0.0 is not working with invenio because there is a new parameter type!
jsonschema = "<4.0.0"

## RERO ILS specific python modules
PyYAML = ">=5.3.1"
Expand Down
4 changes: 2 additions & 2 deletions rero_mef/agents/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@


class AgentRecord(ReroMefRecord):
"""Authority Record class."""
"""Agent Record class."""

name = None
agent = None
Expand Down Expand Up @@ -186,7 +186,7 @@ def create_or_update_agent_mef_viaf(cls, data, id_=None, delete_pid=True,
dbcommit=False, reindex=False,
test_md5=False, online=False,
verbose=False):
"""Create or update agent, Mef and Viaf record."""
"""Create or update agent, MEF and VIAF record."""
from rero_mef.agents.mef.api import AgentMefRecord
from rero_mef.agents.viaf.api import AgentViafRecord

Expand Down
182 changes: 89 additions & 93 deletions rero_mef/agents/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,65 +19,25 @@

from __future__ import absolute_import, print_function

import itertools
from time import sleep
import os

import click
from celery.bin.control import inspect
from flask.cli import with_appcontext

from .mef.api import AgentMefRecord
from .tasks import \
create_mef_and_agents_from_viaf as task_mef_and_agents_from_viaf
from .tasks import create_mef_from_agent as task_mef_from_agent
from .tasks import create_from_viaf as task_mef_and_agents_from_viaf
from .tasks import create_mef as task_mef_from_agent
from .utils import create_mef_files, create_viaf_files
from .viaf.api import AgentViafRecord
from ..monitoring import Monitoring
from ..utils import get_agent_class, get_agent_classes, progressbar
from ..utils import get_entity_class, get_entity_classes, progressbar


@click.group()
def utils():
"""Misc management commands."""
def agents():
"""Agent management commands."""


def queue_count():
"""Count tasks in celery."""
inspector = inspect()
task_count = 0
reserved = inspector.reserved()
if reserved:
for key, values in reserved.items():
task_count += len(values)
active = inspector.active()
if active:
task_count = sum(active.values())
return task_count


def wait_empty_tasks(delay, verbose=False):
"""Wait for tasks to be empty."""
if verbose:
spinner = itertools.cycle(['-', '\\', '|', '/'])
click.echo(
f'Waiting: {next(spinner)}\r',
nl=False
)
count = queue_count()
sleep(5)
count += queue_count()
while count:
if verbose:
click.echo(
f'Waiting: {next(spinner)}\r',
nl=False
)
sleep(delay)
count = queue_count()
sleep(5)
count += queue_count()


@utils.command('create_mef_and_agents_from_viaf')
@agents.command()
@click.option('-5', '--md5', 'test_md5', is_flag=True, default=False,
help='Compaire md5 to find out if we have to update')
@click.option('-k', '--enqueue', 'enqueue', is_flag=True, default=False,
Expand All @@ -88,15 +48,15 @@ def wait_empty_tasks(delay, verbose=False):
@click.option('-w', '--wait', 'wait', is_flag=True, default=False)
@click.option('-m', '--missing', 'missing', is_flag=True, default=False)
@with_appcontext
def create_mef_and_agents_from_viaf(test_md5, enqueue, online, verbose,
progress, wait, missing):
"""Create Mef and agents from viaf."""
def create_from_viaf(test_md5, enqueue, online, verbose, progress, wait,
missing):
"""Create MEF and agents from viaf."""
click.secho(
'Create MEF and Agency from VIAF.',
fg='green'
)
counts = {}
agent_classes = get_agent_classes(without_mef_viaf=False)
agent_classes = get_entity_classes(without_mef_viaf=False)
for name, agent_class in agent_classes.items():
counts[name] = {}
counts[name]['old'] = agent_class.count()
Expand Down Expand Up @@ -135,8 +95,9 @@ def create_mef_and_agents_from_viaf(test_md5, enqueue, online, verbose,
verbose=verbose
)
if wait:
from ..cli import wait_empty_tasks
wait_empty_tasks(delay=3, verbose=True)
for name, agent_class in get_agent_classes(
for name, agent_class in get_entity_classes(
without_mef_viaf=False
).items():
counts[name]['new'] = agent_class.count()
Expand All @@ -153,7 +114,7 @@ def create_mef_and_agents_from_viaf(test_md5, enqueue, online, verbose,
)


@utils.command('create_mef_from_agent')
@agents.command()
@click.option('-t', '--pid_type', 'pid_type', multiple=True,
default=['aidref', 'aggnd', 'agrero'])
@click.option('-k', '--enqueue', 'enqueue', is_flag=True, default=False,
Expand All @@ -164,14 +125,14 @@ def create_mef_and_agents_from_viaf(test_md5, enqueue, online, verbose,
@click.option('-w', '--wait', 'wait', is_flag=True, default=False)
@click.option('-m', '--missing', 'missing', is_flag=True, default=False)
@with_appcontext
def create_mef_from_agent(pid_type, enqueue, online, verbose, progress, wait,
missing):
"""Create Mef from agents."""
def create_mef(pid_type, enqueue, online, verbose, progress, wait, missing):
"""Create MEF from agents."""
if missing:
missing_pids, to_much_pids = AgentMefRecord.get_all_missing_agents_pids(
agents=pid_type,
verbose=progress
)
missing_pids, to_much_pids = \
AgentMefRecord.get_all_missing_agents_pids(
agents=pid_type,
verbose=progress
)
for agent in pid_type:
if agent not in ['aidref', 'aggnd', 'agrero']:
click.secho(
Expand All @@ -183,7 +144,7 @@ def create_mef_from_agent(pid_type, enqueue, online, verbose, progress, wait,
f'Create MEF from {agent}.',
fg='green'
)
agent_class = get_agent_class(agent)
agent_class = get_entity_class(agent)
counts = {}
counts[agent] = agent_class.count()
counts['mef'] = AgentMefRecord.count()
Expand Down Expand Up @@ -221,6 +182,7 @@ def create_mef_from_agent(pid_type, enqueue, online, verbose, progress, wait,
if verbose:
click.echo(msg)
if wait:
from ..cli import wait_empty_tasks
wait_empty_tasks(delay=3, verbose=True)
click.secho(
f'COUNTS: mef: {counts["mef"]}|{AgentMefRecord.count()}'
Expand All @@ -229,37 +191,71 @@ def create_mef_from_agent(pid_type, enqueue, online, verbose, progress, wait,
)


@utils.command('reindex_missing')
@click.option('-a', '--agents', 'agents', multiple=True,
default=['aggnd', 'aidref', 'agrero', 'mef', 'viaf', 'corero'])
@agents.command()
@click.argument('viaf_file')
@click.argument('output_directory')
@click.option('-v', '--verbose', 'verbose', is_flag=True, default=False)
@with_appcontext
def reindex_missing(agents, verbose):
"""Reindex agents missing in ES."""
for agent in agents:
click.secho(
f'Reindex missing {agent} from ES.',
fg='green'
def create_csv_viaf(viaf_file, output_directory, verbose):
"""Create VIAF CSV from VIAF source text file.
:param viaf_file: VIAF source text file.
:param output_directory: Output directory.
:param verbose: Verbose.
"""
click.secho(f' Create VIAF CSV files.', err=True)
pidstore = os.path.join(output_directory, 'viaf_pidstore.csv')
metadata = os.path.join(output_directory, 'viaf_metadata.csv')
click.secho(
f' VIAF input file: {viaf_file} ',
err=True
)

count = create_viaf_files(
viaf_input_file=viaf_file,
viaf_pidstore_file_name=pidstore,
viaf_metadata_file_name=metadata,
verbose=verbose
)
click.secho(
f' Number of VIAF records created: {count}.',
fg='green',
err=True)


@agents.command()
@click.argument('viaf_metadata_file')
@click.argument('output_directory')
@click.option('-v', '--verbose', 'verbose', is_flag=True, default=False)
@with_appcontext
def create_csv_mef(viaf_metadata_file, output_directory, verbose):
"""Create MEF CSV from INVENIO metadata file.
:param viaf_metadata_file: VIAF metadata CSV file.
:param output_directory: Output directory.
:param verbose: Verbose.
"""
click.secho(f' Create MEF CSV files from JSON.', err=True)
pidstore = os.path.join(output_directory, 'mef_pidstore.csv')
metadata = os.path.join(output_directory, 'mef_metadata.csv')
ids = os.path.join(output_directory, 'mef_id.csv')

click.secho(
f' VIAF input file: {viaf_metadata_file} ',
err=True
)
message = f' CSV output files: {pidstore}, {metadata}'

count = create_mef_files(
viaf_pidstore_file=viaf_metadata_file,
input_directory=output_directory,
mef_pidstore_file_name=pidstore,
mef_metadata_file_name=metadata,
mef_ids_file_name=ids,
verbose=verbose
)
agent_class = get_agent_class(agent)
pids_es, pids_db, pids_es_double, index = \
Monitoring.get_es_db_missing_pids(doc_type=agent, verbose=verbose)
if verbose:
click.secho(
f' {agent} ES: {len(pids_es)} DB: {len(pids_db)} '
f'Double:{len(pids_es_double)}'
)
progress_bar = progressbar(
items=pids_db,
length=len(pids_db),
verbose=verbose
)
for pid in progress_bar:
rec = agent_class.get_record_by_pid(pid)
if rec:
rec.reindex()
else:
click.secho(
f' {agent} record not found: {pid}',
fg='red'
)

click.secho(
f' Number of MEF records created: {count}.',
fg='green',
err=True)
4 changes: 2 additions & 2 deletions rero_mef/agents/gnd/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""API for manipulating gnd agent."""
"""API for manipulating GND agent."""

from invenio_search.api import RecordsSearch

Expand All @@ -41,7 +41,7 @@ class Meta:


class AgentGndRecord(AgentRecord):
"""Gnd Authority class."""
"""Gnd agent class."""

minter = gnd_id_minter
fetcher = gnd_id_fetcher
Expand Down
2 changes: 1 addition & 1 deletion rero_mef/agents/gnd/jsonresolvers/gnd_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@

@jsonresolver.route('/api/gnd/<path:path>', host=get_host())
def resolve_gnd(path):
"""Resolve Gnd records."""
"""Resolve GND records."""
return resolve_record(path, AgentGndRecord)
2 changes: 1 addition & 1 deletion rero_mef/agents/gnd/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


class AgentGndIdentifier(RecordIdentifier):
"""Sequence generator for gnd Authority identifiers."""
"""Sequence generator for GND Authority identifiers."""

__tablename__ = 'agent_gnd_id'
__mapper_args__ = {'concrete': True}
Expand Down
2 changes: 1 addition & 1 deletion rero_mef/agents/gnd/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO MEF
# Copyright (C) 2020 RERO
# Copyright (C) 2021 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand Down
4 changes: 2 additions & 2 deletions rero_mef/agents/idref/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""API for manipulating idref agent."""
"""API for manipulating IDREF agent."""

from invenio_search.api import RecordsSearch

Expand All @@ -41,7 +41,7 @@ class Meta:


class AgentIdrefRecord(AgentRecord):
"""Idref Authority class."""
"""Idref agent class."""

minter = idref_id_minter
fetcher = idref_id_fetcher
Expand Down
2 changes: 1 addition & 1 deletion rero_mef/agents/idref/jsonresolvers/idref_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@

@jsonresolver.route('/api/idref/<path:path>', host=get_host())
def resolve_idref(path):
"""Resolve Idref records."""
"""Resolve IDREF records."""
return resolve_record(path, AgentIdrefRecord)
2 changes: 1 addition & 1 deletion rero_mef/agents/idref/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


class AgentIdrefIdentifier(RecordIdentifier):
"""Sequence generator for Idref Authority identifiers."""
"""Sequence generator for IDREF agent identifiers."""

__tablename__ = 'agent_idref_id'
__mapper_args__ = {'concrete': True}
Expand Down
4 changes: 2 additions & 2 deletions rero_mef/agents/idref/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO MEF
# Copyright (C) 2020 RERO
# Copyright (C) 2021 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -15,7 +15,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Agent IdRef views."""
"""Agent IDREF views."""

from flask import Blueprint, redirect, request, url_for

Expand Down
Loading

0 comments on commit 7934c3e

Please sign in to comment.