diff --git a/rero_mef/agents/api.py b/rero_mef/agents/api.py index b4ec24fa..d7b0fb0e 100644 --- a/rero_mef/agents/api.py +++ b/rero_mef/agents/api.py @@ -91,7 +91,13 @@ def replace_test_md5(self, data, dbcommit=False, reindex=False): def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False, online=False): - """Create or update MEF and VIAF record.""" + """Create or update MEF and VIAF record. + + :param dbcommit: Commit changes to DB. + :param reindex: Reindex record. + :param online: Try to get VIAF record online. + :returns: MEF record, MEF action, VIAF record, VIAF + """ from .viaf.api import AgentViafRecord AgentViafRecord.update_indexes() viaf_record, got_online = AgentViafRecord.get_viaf_by_agent( @@ -104,10 +110,7 @@ def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False, agent_pid=self.pid ) mef_data = {self.agent: {'$ref': ref_string}} - mef_record = AgentMefRecord.get_mef_by_agent_pid( - agent_pid=self.pid, - agent_name=self.name - ) + mef_record = AgentMefRecord.get_mef_by_entity_pid(self.pid, self.name) if viaf_record: mef_data['viaf_pid'] = viaf_record.pid if not mef_record: @@ -142,10 +145,7 @@ def delete_from_mef(self, dbcommit=False, reindex=False, verbose=False): from .mef.api import AgentMefRecord mef_action = Action.DISCARD old_mef_pid = 'None' - mef_record = AgentMefRecord.get_mef_by_agent_pid( - agent_pid=self.pid, - agent_name=self.name - ) + mef_record = AgentMefRecord.get_mef_by_entity_pid(self.pid, self.name) if mef_record: old_mef_pid = mef_record.pid if not mef_record.deleted: @@ -225,10 +225,8 @@ def create_or_update_agent_mef_viaf(cls, data, id_=None, delete_pid=True, online = False else: if action == Action.UPTODATE: - mef_record = AgentMefRecord.get_mef_by_agent_pid( - agent_pid=record.pid, - agent_name=record.name - ) + mef_record = AgentMefRecord.get_mef_by_entity_pid( + record.pid, record.name) mef_action = Action.UPTODATE viaf_record, online = AgentViafRecord.get_viaf_by_agent(record) else: diff --git a/rero_mef/agents/cli.py b/rero_mef/agents/cli.py index 5e3250de..ccb6812b 100644 --- a/rero_mef/agents/cli.py +++ b/rero_mef/agents/cli.py @@ -22,11 +22,11 @@ import os import click +from flask import current_app from flask.cli import with_appcontext from .mef.api import AgentMefRecord -from .tasks import create_from_viaf as task_mef_and_agents_from_viaf -from .tasks import create_mef as task_mef_from_agent +from .tasks import task_create_mef_for_agent, task_create_mef_from_viaf_agent from .utils import create_mef_files, create_viaf_files from .viaf.api import AgentViafRecord from ..utils import get_entity_class, get_entity_classes, progressbar @@ -77,7 +77,7 @@ def create_from_viaf(test_md5, enqueue, online, verbose, progress, wait, ) for pid in progress_bar: if enqueue: - task = task_mef_and_agents_from_viaf.delay( + task = task_create_mef_from_viaf_agent.delay( pid=pid, dbcommit=True, reindex=True, @@ -86,7 +86,7 @@ def create_from_viaf(test_md5, enqueue, online, verbose, progress, wait, ) click.echo(f'viaf pid: {pid} task:{task}') else: - task_mef_and_agents_from_viaf( + task_create_mef_from_viaf_agent( pid=pid, dbcommit=True, reindex=True, @@ -127,14 +127,12 @@ def create_from_viaf(test_md5, enqueue, online, verbose, progress, wait, @with_appcontext def create_mef(pid_type, enqueue, online, verbose, progress, wait, missing): """Create MEF from agents.""" + AGENTS = current_app.config.get('AGENTS', []) if missing: missing_pids, to_much_pids = \ - AgentMefRecord.get_all_missing_agents_pids( - agents=pid_type, - verbose=progress - ) + AgentMefRecord.get_all_missing_pids(pid_type, verbose=progress) for agent in pid_type: - if agent not in ['aidref', 'aggnd', 'agrero']: + if agent not in AGENTS: click.secho( f'Error create MEF from {agent}. Wrong agent!', fg='red' @@ -162,7 +160,7 @@ def create_mef(pid_type, enqueue, online, verbose, progress, wait, missing): ) for pid in progress_bar: if enqueue: - task = task_mef_from_agent.delay( + task = task_create_mef_for_agent.delay( pid=pid, agent=agent, dbcommit=True, @@ -172,7 +170,7 @@ def create_mef(pid_type, enqueue, online, verbose, progress, wait, missing): if verbose: click.echo(f'{agent} pid: {pid} task:{task}') else: - msg = task_mef_from_agent( + msg = task_create_mef_for_agent( pid=pid, agent=agent, dbcommit=True, diff --git a/rero_mef/agents/gnd/jsonresolvers/gnd_resolver.py b/rero_mef/agents/gnd/jsonresolvers/gnd_resolver.py index 6acf4d05..8fde3d9c 100644 --- a/rero_mef/agents/gnd/jsonresolvers/gnd_resolver.py +++ b/rero_mef/agents/gnd/jsonresolvers/gnd_resolver.py @@ -27,7 +27,7 @@ from ..api import AgentGndRecord -@jsonresolver.route('/api/gnd/', host=get_host()) +@jsonresolver.route('/api/agents/gnd/', host=get_host()) def resolve_gnd(path): """Resolve GND records.""" return resolve_record(path, AgentGndRecord) diff --git a/rero_mef/agents/gnd/views.py b/rero_mef/agents/gnd/views.py index 591b500a..051c5f1b 100644 --- a/rero_mef/agents/gnd/views.py +++ b/rero_mef/agents/gnd/views.py @@ -27,19 +27,19 @@ @api_blueprint.route('') -def redirect_idref_list(): +def redirect_gnd_list(): """Redirect list to new address.""" return redirect( - url_for('invenio_records_rest.aidref_list', **request.args), + url_for('invenio_records_rest.aggnd_list', **request.args), code=308 ) @api_blueprint.route('/') -def redirect_idref_item(pid): +def redirect_gnd_item(pid): """Redirect item to new address.""" return redirect( url_for( - 'invenio_records_rest.aidref_item', pid_value=pid, **request.args), + 'invenio_records_rest.aggnd_item', pid_value=pid, **request.args), code=308 ) diff --git a/rero_mef/agents/idref/jsonresolvers/idref_resolver.py b/rero_mef/agents/idref/jsonresolvers/idref_resolver.py index de37b43d..9c686b8a 100644 --- a/rero_mef/agents/idref/jsonresolvers/idref_resolver.py +++ b/rero_mef/agents/idref/jsonresolvers/idref_resolver.py @@ -26,7 +26,7 @@ from ....utils import get_host, resolve_record -@jsonresolver.route('/api/idref/', host=get_host()) +@jsonresolver.route('/api/agents/idref/', host=get_host()) def resolve_idref(path): """Resolve IDREF records.""" return resolve_record(path, AgentIdrefRecord) diff --git a/rero_mef/agents/idref/views.py b/rero_mef/agents/idref/views.py index e0040c12..ce3175ca 100644 --- a/rero_mef/agents/idref/views.py +++ b/rero_mef/agents/idref/views.py @@ -30,7 +30,7 @@ def redirect_idref_list(): """Redirect list to new address.""" return redirect( - url_for('invenio_records_rest.aggnd_list', **request.args), + url_for('invenio_records_rest.aidref_item', **request.args), code=308 ) @@ -40,6 +40,6 @@ def redirect_idref_item(pid): """Redirect item to new address.""" return redirect( url_for( - 'invenio_records_rest.aggnd_item', pid_value=pid, **request.args), + 'invenio_records_rest.aidref_item', pid_value=pid, **request.args), code=308 ) diff --git a/rero_mef/agents/mef/api.py b/rero_mef/agents/mef/api.py index 12d27adb..f1fd720e 100644 --- a/rero_mef/agents/mef/api.py +++ b/rero_mef/agents/mef/api.py @@ -17,11 +17,7 @@ """API for manipulating MEF records.""" -from datetime import datetime - import click -import pytz -from elasticsearch_dsl import Q from flask import current_app from invenio_search import current_search from invenio_search.api import RecordsSearch @@ -30,8 +26,9 @@ from .minters import mef_id_minter from .models import AgentMefMetadata from .providers import MefProvider -from ..api import Action, ReroIndexer, ReroMefRecord -from ...utils import get_entity_class, get_entity_classes, progressbar +from ...api import Action, ReroIndexer +from ...api_mef import EntityMefRecord +from ...utils import progressbar class AgentMefSearch(RecordsSearch): @@ -48,242 +45,45 @@ class Meta: default_filter = None -class AgentMefRecord(ReroMefRecord): +class AgentMefRecord(EntityMefRecord): """Mef agent class.""" minter = mef_id_minter fetcher = mef_id_fetcher provider = MefProvider model_cls = AgentMefMetadata + search = AgentMefSearch + mef_type = 'AGENTS' @classmethod def build_ref_string(cls, agent_pid, agent): """Build url for agent's api.""" with current_app.app_context(): ref_string = (f'{current_app.config.get("RERO_MEF_APP_BASE_URL")}' - f'/api/{agent}/{agent_pid}') + f'/api/agents/{agent}/{agent_pid}') return ref_string - def reindex(self, forceindex=False): - """Reindex record.""" - if forceindex: - result = AgentMefIndexer(version_type='external_gte').index(self) - else: - result = AgentMefIndexer().index(self) - return result - - @classmethod - def get_mef_by_agent_pid(cls, agent_pid, agent_name, pid_only=False): - """Get MEF record by agent pid value.""" - key = f'{agent_name}.pid' - search = AgentMefSearch() \ - .filter('term', **{key: agent_pid}) \ - .source(['pid']) - if search.count() > 1: - current_app.logger.error( - f'MULTIPLE MEF FOUND FOR: {agent_name} {agent_pid}' - ) - try: - mef_pid = next(search.scan()).pid - if pid_only: - return mef_pid - else: - return cls.get_record_by_pid(mef_pid) - except StopIteration: - return None - @classmethod - def get_all_mef_pids_by_agent(cls, agent): - """Get all MEF pids for agent. - - :param agent: Agent to search pid for. - :returns: Generator of agent pids. - """ - key = f'{agent}.pid' - search = AgentMefSearch() - results = search.filter( - 'exists', - field=key - ).source(['pid', key]).scan() - for result in results: - result_dict = result.to_dict() - yield result_dict.get(agent, {}).get('pid'),\ - result_dict.get('pid') - - @classmethod - def get_mef_by_viaf_pid(cls, viaf_pid): - """Get MEF record by agent pid value. - - :param viaf_pid: VIAF pid. - :returns: Associated MEF record. - """ - search = AgentMefSearch() - result = search.filter( - 'term', viaf_pid=viaf_pid).source(['pid']).scan() + def update_indexes(cls): + """Update indexes.""" try: - mef_pid = next(result).pid - return cls.get_record_by_pid(mef_pid) - except StopIteration: - return None - - @classmethod - def get_all_pids_without_agents_viaf(cls): - """Get all pids for records without agents and VIAF pids. - - :returns: Generator of MEF pids without agent links and without VIAF. - """ - query = AgentMefSearch()\ - .filter('bool', must_not=[Q('exists', field="viaf_pid")]) \ - .filter('bool', must_not=[Q('exists', field="gnd")]) \ - .filter('bool', must_not=[Q('exists', field="idref")]) \ - .filter('bool', must_not=[Q('exists', field="rero")]) \ - .source('pid')\ - .scan() - for hit in query: - yield hit.pid - - @classmethod - def get_all_pids_without_viaf(cls): - """Get all pids for records without VIAF pid. - - :returns: Generator of MEF pids without VIAF pid. - """ - query = AgentMefSearch()\ - .filter('bool', must_not=[Q('exists', field="viaf_pid")])\ - .filter('bool', should=[Q('exists', field="gnd")]) \ - .filter('bool', should=[Q('exists', field="idref")]) \ - .filter('bool', should=[Q('exists', field="rero")]) \ - .source('pid')\ - .scan() - for hit in query: - yield hit.pid - - @classmethod - def get_agent_pids_with_multiple_mef( - cls, - agents=['aggnd', 'aidref', 'agrero'], - verbose=False - ): - """Get agent pids with multiple MEF records. - - :params agents: Agents default=['aggnd', 'aidref', 'agrero']. - :param verbose: Verbose. - :returns: pids, multiple pids, missing pids. - """ - pids = {} - multiple_pids = {} - missing_pids = {} - for agent in agents: - if verbose: - click.echo(f'Calculating {agent}:') - pids[agent] = {} - multiple_pids[agent] = {} - missing_pids[agent] = [] - - agent_class = get_entity_class(agent) - agent_name = agent_class.name - search = AgentMefSearch().filter('exists', field=agent_name) - progress = progressbar( - items=search.scan(), - length=search.count(), - verbose=verbose - ) - for hit in progress: - data = hit.to_dict() - mef_pid = data['pid'] - agent_pid = data[agent_name]['pid'] - pids[agent].setdefault(agent_pid, []) - pids[agent][agent_pid].append(mef_pid) - if len(pids[agent][agent_pid]) > 1: - multiple_pids[agent][agent_pid] = pids[agent][agent_pid] - if len(pids[agent]) < agent_class.count(): - progress = progressbar( - items=agent_class.get_all_pids(), - length=agent_class.count(), - verbose=verbose - ) - for pid in progress: - if not pids[agent].pop(pid, None): - missing_pids[agent].append(pid) - else: - pids[agent] = {} - return pids, multiple_pids, missing_pids - - # multiple_pids = {} - # for agent in agents: - # multiple_pids[agent] = {} - # agent_class = get_entity_class(agent) - # if agent_class: - # agent_name = agent_class.name - # search = AgentMefSearch() - # search.aggs.bucket( - # 'MULTIPLE', - # 'terms', - # field=f'{agent_name}.pid', - # min_doc_count=2, - # size=size - # ) - # res = search.execute() - # for values in res.aggregations.MULTIPLE.buckets: - # agent_pid = values.key - # field = f'{agent_name}.pid' - # search = AgentMefSearch().filter( - # Q('term', **{field: agent_pid})) - # mef_pids = [] - # for hit in search: - # mef_pids.append(hit.pid) - # mef_pids = sorted(mef_pids) - # multiple_pids[agent][agent_pid] = mef_pids - # return multiple_pids + current_search.flush_and_refresh(index='mef') + except Exception as err: + current_app.logger.error(f'ERROR flush and refresh: {err}') - @classmethod - def get_all_missing_agents_pids( - cls, - agents=['aggnd', 'aidref', 'agrero'], - verbose=False - ): - """Get all missing agents. - - :params agents: Agents default=['aggnd', 'aidref', 'agrero']. - :param verbose: Verbose. - :returns: missing pids, to much pids. - """ - missing_pids = {} - to_much_pids = {} - used_classes = {} - agent_classes = get_entity_classes() - for agent_classe in agent_classes: - if agent_classe in agents: - used_classes[agent_classe] = agent_classes[agent_classe] - for agent, agent_class in used_classes.items(): - if verbose: - click.echo(f'Get pids from {agent} ...') - missing_pids[agent] = {} - progress = progressbar( - items=agent_class.get_all_pids(), - length=agent_class.count(), - verbose=verbose + def delete_agent(self, agent_record, dbcommit=False, reindex=False): + """Delete Agency from record.""" + action = Action.DISCARD + if self.pop(agent_record.agent, None): + action = Action.UPDATE + self.replace( + data=self, + dbcommit=dbcommit, + reindex=reindex ) - for pid in progress: - missing_pids[agent][pid] = 1 - if verbose: - click.echo('Get pids from MEF and calculate missing ...') - progress = progressbar( - items=AgentMefSearch().filter('match_all').source().scan(), - length=AgentMefSearch().filter('match_all').source().count(), - verbose=verbose - ) - for hit in progress: - pid = hit.pid - data = hit.to_dict() - for agent, agent_class in used_classes.items(): - agent_data = data.get(agent_class.name) - if agent_data: - agent_pid = agent_data.get('pid') - if not missing_pids[agent].pop(agent_pid, None): - to_much_pids.setdefault(pid, {}) - to_much_pids[pid][agent] = agent_pid - return missing_pids, to_much_pids + if reindex: + AgentMefRecord.update_indexes() + return self, action @classmethod def get_all_missing_viaf_pids(cls, verbose=False): @@ -302,8 +102,8 @@ def get_all_missing_viaf_pids(cls, verbose=False): if verbose: click.echo('Get pids from MEF and calculate missing ...') progress = progressbar( - items=AgentMefSearch().filter('match_all').source().scan(), - length=AgentMefSearch().filter('match_all').source().count(), + items=cls.search().filter('match_all').source().scan(), + length=cls.search().filter('match_all').source().count(), verbose=True ) for hit in progress: @@ -313,71 +113,6 @@ def get_all_missing_viaf_pids(cls, verbose=False): missing_pids.pop(viaf_pid, None) return missing_pids - def mark_as_deleted(self, dbcommit=False, reindex=False): - """Mark record as deleted.""" - # if current_app.config['INDEXER_REPLACE_REFS']: - # data = deepcopy(self.replace_refs()) - # else: - # data = self.dumps() - # data['_deleted'] = pytz.utc.localize(self.created).isoformat() - # - # indexer = AgentMefIndexer() - # index, doc_type = indexer.record_to_index(self) - # print('---->', index, doc_type) - # body = indexer._prepare_record(data, index, doc_type) - # index, doc_type = indexer._prepare_index(index, doc_type) - # print('---->', index, doc_type) - # - # return indexer.client.index( - # id=str(self.id), - # version=self.revision_id, - # version_type=indexer._version_type, - # index=index, - # doc_type=doc_type, - # body=body - # ) - self['deleted'] = pytz.utc.localize(datetime.now()).isoformat() - self.update(data=self, dbcommit=dbcommit, reindex=reindex) - return self - - @property - def deleted(self): - """Get record deleted value.""" - return self.get('deleted') - - @classmethod - def create_deleted(cls, agent, dbcommit=False, reindex=False): - """Create a deleted record for an agent.""" - data = {} - data[agent.name] = {'$ref': cls.build_ref_string( - agent_pid=agent.pid, - agent=agent.name - )} - data['deleted'] = pytz.utc.localize(datetime.now()).isoformat() - return cls.create(data=data, dbcommit=dbcommit, reindex=reindex) - - @classmethod - def update_indexes(cls): - """Update indexes.""" - try: - current_search.flush_and_refresh(index='mef') - except Exception as err: - current_app.logger.error(f'ERROR flush and refresh: {err}') - - def delete_agent(self, agent_record, dbcommit=False, reindex=False): - """Delete Agency from record.""" - action = Action.DISCARD - if self.pop(agent_record.agent, None): - action = Action.UPDATE - self.replace( - data=self, - dbcommit=dbcommit, - reindex=reindex - ) - if reindex: - AgentMefRecord.update_indexes() - return self, action - class AgentMefIndexer(ReroIndexer): """AgentMefIndexer.""" diff --git a/rero_mef/agents/mef/jsonschemas/mef/mef-v0.0.1.json b/rero_mef/agents/mef/jsonschemas/mef/mef-v0.0.1.json index de0453ab..1a27850f 100644 --- a/rero_mef/agents/mef/jsonschemas/mef/mef-v0.0.1.json +++ b/rero_mef/agents/mef/jsonschemas/mef/mef-v0.0.1.json @@ -20,20 +20,20 @@ "minLength": 1 }, "viaf_pid": { - "title": "VIAF authority ID", + "title": "VIAF agent ID", "type": "string", "minLength": 1 }, "gnd": { - "title": "GND authority ID", + "title": "GND agent ID", "type": "object" }, "rero": { - "title": "RERO authority ID", + "title": "RERO agent ID", "type": "object" }, "idref": { - "title": "IDREF authority ID", + "title": "IDREF agent ID", "type": "object" }, "deleted": { diff --git a/rero_mef/agents/mef/listner.py b/rero_mef/agents/mef/listner.py index 4d2a7488..4361dbbc 100644 --- a/rero_mef/agents/mef/listner.py +++ b/rero_mef/agents/mef/listner.py @@ -20,8 +20,8 @@ from .api import AgentMefSearch -def enrich_mef_data(sender, json=None, record=None, index=None, doc_type=None, - arguments=None, **kwargs): +def enrich_agent_mef_data(sender, json=None, record=None, index=None, + doc_type=None, arguments=None, **kwargs): """Signal sent before a record is indexed. :param json: The dumped record dictionary which can be modified. diff --git a/rero_mef/serializers.py b/rero_mef/agents/mef/serializers.py similarity index 65% rename from rero_mef/serializers.py rename to rero_mef/agents/mef/serializers.py index affe012f..66095dc3 100644 --- a/rero_mef/serializers.py +++ b/rero_mef/agents/mef/serializers.py @@ -23,50 +23,17 @@ from invenio_records_rest.serializers.json import JSONSerializer from invenio_records_rest.serializers.response import record_responsify -from .agents.mef.api import AgentMefRecord, AgentMefSearch -from .agents.viaf.api import AgentViafSearch -from .utils import get_entity_classes +from ...utils import get_entity_classes def add_links(pid, record): - """Add MEF link to agents.""" + """Add VIAF links to MEF.""" links = {} - if pid.pid_type == 'mef': - viaf_pid = record.get('viaf_pid') - if viaf_pid: - links['viaf'] = '{scheme}://{host}/api/agents/viaf/' \ - + str(viaf_pid) - links['viaf.org'] = 'http://www.viaf.org/viaf/' + str(viaf_pid) - elif pid.pid_type == "viaf": - viaf_pid = record.get('pid') - mef_pid_search = AgentMefSearch() \ - .filter('term', viaf_pid=viaf_pid) \ - .source(['pid']).scan() - try: - mef_pid = next(mef_pid_search).pid - links['mef'] = '{scheme}://{host}/api/agents/mef/' + str(mef_pid) - except Exception: - pass - links['viaf.org'] = 'http://www.viaf.org/viaf/' + str(viaf_pid) - else: - mef_pid = AgentMefRecord.get_mef_by_agent_pid( - record.pid, - record.name, - pid_only=True - ) - if mef_pid: - links['mef'] = '{scheme}://{host}/api/agents/mef/' + str(mef_pid) - try: - viaf_pid_name = record.viaf_pid_name - query = AgentViafSearch(). \ - filter({'term': {viaf_pid_name: pid.pid_value}}). \ - source('pid') - viaf_pid = next(query.scan()).pid - links['viaf'] = '{scheme}://{host}/api/agents/viaf/' \ + viaf_pid = record.get('viaf_pid') + if viaf_pid: + links['viaf'] = '{scheme}://{host}/api/agents/viaf/' \ + str(viaf_pid) - links['viaf.org'] = 'http://www.viaf.org/viaf/' + str(viaf_pid) - except Exception: - pass + links['viaf.org'] = 'http://www.viaf.org/viaf/' + str(viaf_pid) link_factory = default_links_factory_with_additional(links) return link_factory(pid) @@ -123,4 +90,5 @@ def serialize(self, pid, record, links_factory=None, **kwargs): json_v1 = ReroMefSerializer(RecordSchemaJSONV1) """JSON v1 serializer.""" -json_v1_response = record_responsify(json_v1, 'application/rero+json') +json_v1_agent_mef_response = record_responsify( + json_v1, 'application/rero+json') diff --git a/rero_mef/agents/mef/views.py b/rero_mef/agents/mef/views.py index 46fdc633..93032b7b 100644 --- a/rero_mef/agents/mef/views.py +++ b/rero_mef/agents/mef/views.py @@ -27,7 +27,7 @@ @api_blueprint.route('') -def redirect_idref_list(): +def redirect_mef_list(): """Redirect list to new address.""" return redirect( url_for('invenio_records_rest.mef_list', **request.args), @@ -36,7 +36,7 @@ def redirect_idref_list(): @api_blueprint.route('/') -def redirect_idref_item(pid): +def redirect_mef_item(pid): """Redirect item to new address.""" return redirect( url_for( diff --git a/rero_mef/agents/rero/jsonresolvers/rero_resolver.py b/rero_mef/agents/rero/jsonresolvers/rero_resolver.py index 91cfcbdd..bbb3e533 100644 --- a/rero_mef/agents/rero/jsonresolvers/rero_resolver.py +++ b/rero_mef/agents/rero/jsonresolvers/rero_resolver.py @@ -26,7 +26,7 @@ from ....utils import get_host, resolve_record -@jsonresolver.route('/api/rero/', host=get_host()) +@jsonresolver.route('/api/agents/rero/', host=get_host()) def resolve_rero(path): """Resolve RERO records.""" return resolve_record(path, AgentReroRecord) diff --git a/rero_mef/agents/rero/views.py b/rero_mef/agents/rero/views.py index 8db83258..fff7c72c 100644 --- a/rero_mef/agents/rero/views.py +++ b/rero_mef/agents/rero/views.py @@ -27,7 +27,7 @@ @api_blueprint.route('') -def redirect_idref_list(): +def redirect_rero_list(): """Redirect list to new address.""" return redirect( url_for('invenio_records_rest.agrero_list', **request.args), @@ -36,7 +36,7 @@ def redirect_idref_list(): @api_blueprint.route('/') -def redirect_idref_item(pid): +def redirect_rero_item(pid): """Redirect item to new address.""" return redirect( url_for( diff --git a/rero_mef/agents/serializers.py b/rero_mef/agents/serializers.py new file mode 100644 index 00000000..d4c682a7 --- /dev/null +++ b/rero_mef/agents/serializers.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Record serialization.""" + +from flask import request +from invenio_records_rest.links import default_links_factory_with_additional +from invenio_records_rest.schemas import RecordSchemaJSONV1 +from invenio_records_rest.serializers.json import JSONSerializer +from invenio_records_rest.serializers.response import record_responsify + +from .mef.api import AgentMefRecord +from .viaf.api import AgentViafSearch + + +def add_links(pid, record): + """Add MEF link to agents.""" + links = {} + mef_pid = AgentMefRecord.get_mef_by_entity_pid( + record.pid, record.name, pid_only=True) + if mef_pid: + links['mef'] = '{scheme}://{host}/api/agents/mef/' + str(mef_pid) + try: + viaf_pid_name = record.viaf_pid_name + query = AgentViafSearch(). \ + filter({'term': {viaf_pid_name: pid.pid_value}}). \ + source('pid') + viaf_pid = next(query.scan()).pid + links['viaf'] = '{scheme}://{host}/api/agents/viaf/' \ + + str(viaf_pid) + links['viaf.org'] = 'http://www.viaf.org/viaf/' + str(viaf_pid) + except Exception: + pass + + link_factory = default_links_factory_with_additional(links) + return link_factory(pid) + + +class ReroMefSerializer(JSONSerializer): + """Mixin serializing records as JSON.""" + + def serialize(self, pid, record, links_factory=None, **kwargs): + """Serialize a single record and persistent identifier. + + :param pid: Persistent identifier instance. + :param record: Record instance. + :param links_factory: Factory function for record links. + """ + if request and request.args.get('resolve'): + record = record.replace_refs() + + return super(ReroMefSerializer, self).serialize( + pid, record, links_factory=add_links, **kwargs + ) + + +json_v1 = ReroMefSerializer(RecordSchemaJSONV1) +"""JSON v1 serializer.""" + +json_v1_agent_response = record_responsify(json_v1, 'application/rero+json') diff --git a/rero_mef/agents/tasks.py b/rero_mef/agents/tasks.py index e63b6681..8df99ffc 100644 --- a/rero_mef/agents/tasks.py +++ b/rero_mef/agents/tasks.py @@ -24,7 +24,7 @@ @shared_task -def create_from_viaf(pid, dbcommit=True, reindex=True, +def task_create_mef_from_viaf_agent(pid, dbcommit=True, reindex=True, test_md5=False, online=False, verbose=False): """Create MEF and agents from VIAF task. @@ -49,15 +49,14 @@ def create_from_viaf(pid, dbcommit=True, reindex=True, @shared_task -def create_mef(pid, agent, dbcommit=True, reindex=True, - online=False): +def task_create_mef_for_agent(pid, agent, dbcommit=True, reindex=True, + online=False): """Create MEF from agent task. :param pid: pid for agent to use :param agent: agent :param dbcommit: db commit or not :param reindex: reindex or not - :param test_md5: test md5 or not :param online: get VIAF online if not exist :returns: no return """ @@ -70,12 +69,15 @@ def create_mef(pid, agent, dbcommit=True, reindex=True, reindex=reindex, online=online ) - mef_pid = 'Non' - if mef_record: - mef_pid = mef_record.pid - viaf_pid = 'Non' - if viaf_record: - viaf_pid = viaf_record.pid + mef_pid = 'Non' + if mef_record: + mef_pid = mef_record.pid + viaf_pid = 'Non' + if viaf_record: + viaf_pid = viaf_record.pid - actions = f'mef: {mef_pid} {mef_action.value} viaf: {viaf_pid} {online}' - return f'Create MEF from {agent} pid: {pid} | {actions}' + actions = f'mef: {mef_pid} {mef_action.value} ' \ + 'viaf: {viaf_pid} {online}' + return f'Create MEF from {agent} pid: {pid} | {actions}' + else: + return f'Not found agent {agent}:{pid}' diff --git a/rero_mef/agents/viaf/api.py b/rero_mef/agents/viaf/api.py index 0253f11f..007bd39f 100644 --- a/rero_mef/agents/viaf/api.py +++ b/rero_mef/agents/viaf/api.py @@ -222,7 +222,6 @@ def delete(self, dbcommit=False, delindex=False, online=False): """Delete record and persistent identifier.""" agents_records = self.get_agents_records() # delete viaf_pid from MEF record - from ..mef.api import AgentMefRecord mef_record = AgentMefRecord.get_mef_by_viaf_pid(self.pid) if mef_record: mef_record.pop('viaf_pid', None) diff --git a/rero_mef/agents/viaf/jsonresolvers/__init__.py b/rero_mef/agents/viaf/jsonresolvers/__init__.py new file mode 100644 index 00000000..d751db1d --- /dev/null +++ b/rero_mef/agents/viaf/jsonresolvers/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""JSON resolvers.""" diff --git a/rero_mef/agents/viaf/jsonresolvers/viaf_resolver.py b/rero_mef/agents/viaf/jsonresolvers/viaf_resolver.py new file mode 100644 index 00000000..25a3d28b --- /dev/null +++ b/rero_mef/agents/viaf/jsonresolvers/viaf_resolver.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + + +"""Json resolvers.""" + +from __future__ import absolute_import, print_function, unicode_literals + +import jsonresolver + +from ..api import AgentViafRecord +from ....utils import get_host, resolve_record + + +@jsonresolver.route('/api/agents/viaf/', host=get_host()) +def resolve_rero(path): + """Resolve VIAF records.""" + return resolve_record(path, AgentViafRecord) diff --git a/rero_mef/agents/viaf/serializers.py b/rero_mef/agents/viaf/serializers.py new file mode 100644 index 00000000..d369f7ee --- /dev/null +++ b/rero_mef/agents/viaf/serializers.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Record serialization.""" + +from flask import request, url_for +from invenio_records_rest.links import default_links_factory_with_additional +from invenio_records_rest.schemas import RecordSchemaJSONV1 +from invenio_records_rest.serializers.json import JSONSerializer +from invenio_records_rest.serializers.response import record_responsify + +from ..mef.api import AgentMefSearch + + +def add_links(pid, record): + """Add MEF link to VIAF.""" + links = {} + viaf_pid = record.get('pid') + mef_pid_search = AgentMefSearch() \ + .filter('term', viaf_pid=viaf_pid) \ + .source(['pid']).scan() + try: + for idx, search in enumerate(mef_pid_search): + url = '{scheme}://{host}/api/agents/mef/' + str(search.pid) + if idx: + links[f'mef {idx}'] = url + else: + links['mef'] = url + except Exception: + pass + links['viaf.org'] = 'http://www.viaf.org/viaf/' + str(viaf_pid) + + link_factory = default_links_factory_with_additional(links) + return link_factory(pid) + + +# Nice to have direct working links in test server! +def local_link(agent, name, record): + """Change links to actual links.""" + if name in record: + ref = record[name].get('$ref') + if ref: + my_pid = ref.split('/')[-1] + url = url_for( + f'invenio_records_rest.{agent}_item', + pid_value=my_pid, + _external=True + ) + record[name].update({'$ref': url}) + + +class ReroMefSerializer(JSONSerializer): + """Mixin serializing records as JSON.""" + + def serialize(self, pid, record, links_factory=None, **kwargs): + """Serialize a single record and persistent identifier. + + :param pid: Persistent identifier instance. + :param record: Record instance. + :param links_factory: Factory function for record links. + """ + if request and request.args.get('resolve'): + record = record.replace_refs() + + return super(ReroMefSerializer, self).serialize( + pid, record, links_factory=add_links, **kwargs + ) + + +json_v1 = ReroMefSerializer(RecordSchemaJSONV1) +"""JSON v1 serializer.""" + +json_v1_agent_viaf_response = record_responsify( + json_v1, 'application/rero+json') diff --git a/rero_mef/agents/viaf/views.py b/rero_mef/agents/viaf/views.py index a31c1c2b..6fe035ff 100644 --- a/rero_mef/agents/viaf/views.py +++ b/rero_mef/agents/viaf/views.py @@ -27,7 +27,7 @@ @api_blueprint.route('') -def redirect_idref_list(): +def redirect_viaf_list(): """Redirect list to new address.""" return redirect( url_for('invenio_records_rest.viaf_list', **request.args), @@ -36,7 +36,7 @@ def redirect_idref_list(): @api_blueprint.route('/') -def redirect_idref_item(pid): +def redirect_viaf_item(pid): """Redirect item to new address.""" return redirect( url_for( diff --git a/rero_mef/api.py b/rero_mef/api.py index 26e05764..70fe9046 100644 --- a/rero_mef/api.py +++ b/rero_mef/api.py @@ -341,20 +341,41 @@ def dbcommit(self, reindex=False, forceindex=False): if reindex: self.reindex(forceindex=forceindex) + @classmethod + def get_indexer_class(cls): + """Get the indexer from config.""" + try: + indexer = obj_or_import_string( + current_app.config['RECORDS_REST_ENDPOINTS'][ + cls.provider.pid_type + ]['indexer_class'] + ) + except Exception: + # provide default indexer if no indexer is defined in config. + indexer = ReroIndexer + return indexer + def reindex(self, forceindex=False): """Reindex record.""" + indexer = self.get_indexer_class() if forceindex: - result = RecordIndexer(version_type='external_gte').index(self) + result = indexer(version_type='external_gte').index(self) else: - result = RecordIndexer().index(self) + result = indexer().index(self) return result def delete_from_index(self): """Delete record from index.""" + indexer = self.get_indexer_class() try: - RecordIndexer().delete(self) + indexer().delete(self) except NotFoundError: - pass + current_app.logger.warning( + 'Can not delete from index {class_name}: {pid}'.format( + class_name=self.__class__.__name__, + pid=self.pid + ) + ) @property def pid(self): diff --git a/rero_mef/api_mef.py b/rero_mef/api_mef.py new file mode 100644 index 00000000..3e0a313a --- /dev/null +++ b/rero_mef/api_mef.py @@ -0,0 +1,277 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""API for manipulating MEF records.""" + +from datetime import datetime + +import click +import pytz +from elasticsearch_dsl import Q +from flask import current_app +from invenio_search import current_search + +from .api import Action, ReroMefRecord +from .utils import get_entity_class, get_entity_classes, progressbar + + +class EntityMefRecord(ReroMefRecord): + """Mef agent class.""" + + minter = None + fetcher = None + provider = None + model_cls = None + viaf_cls = None + search = None + mef_type = '' + + @classmethod + def get_mef_by_entity_pid(cls, entity_pid, entity_name, pid_only=False): + """Get MEF record by entity pid value. + + :param entity_pid: Pid of entety. + :param entity_name: Name of entity (pid_type). + :param pid_only: return pid only or the complete record. + :returns: pid or record + """ + key = f'{entity_name}.pid' + search = cls.search() \ + .filter('term', **{key: entity_pid}) \ + .source(['pid']) + if search.count() > 1: + current_app.logger.error( + f'MULTIPLE MEF FOUND FOR: {entity_name} {entity_pid}' + ) + try: + mef_pid = next(search.scan()).pid + if pid_only: + return mef_pid + else: + return cls.get_record_by_pid(mef_pid) + except StopIteration: + return None + + @classmethod + def get_all_mef_pids(cls, entitiy): + """Get all MEF pids for entity. + + :param entitiy: Entitiy to search pid for. + :returns: Generator of entitiy pids. + """ + key = f'{entitiy}.pid' + search = cls.search() + results = search.filter( + 'exists', + field=key + ).source(['pid', key]).scan() + for result in results: + result_dict = result.to_dict() + yield result_dict.get(entitiy, {}).get('pid'),\ + result_dict.get('pid') + + @classmethod + def get_mef_by_viaf_pid(cls, viaf_pid): + """Get MEF record by agent pid value. + + :param viaf_pid: VIAF pid. + :returns: Associated MEF record. + """ + search = cls.search() + result = search.filter( + 'term', viaf_pid=viaf_pid).source(['pid']).scan() + try: + mef_pid = next(result).pid + return cls.get_record_by_pid(mef_pid) + except StopIteration: + return None + + @classmethod + def get_all_pids_without_viaf(cls): + """Get all pids for records without agents and VIAF pids. + + :returns: Generator of MEF pids without agent links and without VIAF. + """ + query = cls.search()\ + .filter('bool', must_not=[Q('exists', field="viaf_pid")]) + for pid_type in current_app.config.get(cls.mef_type, []): + query = query.filter( + 'bool', must_not=[Q('exists', field=pid_type)]) + for hit in query.source('pid').scan(): + yield hit.pid + + @classmethod + def get_all_pids_without_viaf(cls): + """Get all pids for records without VIAF pid. + + :returns: Generator of MEF pids without VIAF pid. + """ + query = cls.search()\ + .filter('bool', must_not=[Q('exists', field="viaf_pid")]) + for pid_type in current_app.config.get(cls.mef_type, []): + query = query.filter( + 'bool', should=[Q('exists', field=pid_type)]) + for hit in query.source('pid').scan(): + yield hit.pid + + @classmethod + def get_pids_with_multiple_mef(cls, record_types=[], verbose=False): + """Get agent pids with multiple MEF records. + + :params record_types: Record types (pid_types). + :param verbose: Verbose. + :returns: pids, multiple pids, missing pids. + """ + pids = {} + multiple_pids = {} + missing_pids = {} + for record_type in record_types: + if verbose: + click.echo(f'Calculating {record_types}:') + pids[record_types] = {} + multiple_pids[record_types] = {} + missing_pids[record_types] = [] + + agent_class = get_entity_class(record_types) + agent_name = agent_class.name + search = cls.search().filter('exists', field=agent_name) + progress = progressbar( + items=search.scan(), + length=search.count(), + verbose=verbose + ) + for hit in progress: + data = hit.to_dict() + mef_pid = data['pid'] + agent_pid = data[agent_name]['pid'] + pids[record_types].setdefault(agent_pid, []) + pids[record_types][agent_pid].append(mef_pid) + if len(pids[record_types][agent_pid]) > 1: + multiple_pids[record_types][agent_pid] = \ + pids[record_types][agent_pid] + if len(pids[record_types]) < agent_class.count(): + progress = progressbar( + items=agent_class.get_all_pids(), + length=agent_class.count(), + verbose=verbose + ) + for pid in progress: + if not pids[record_types].pop(pid, None): + missing_pids[record_types].append(pid) + else: + pids[record_types] = {} + return pids, multiple_pids, missing_pids + + @classmethod + def get_all_missing_pids(cls, record_types=[], verbose=False): + """Get all missing agents. + + :params record_types: Record types (pid_type). + :param verbose: Verbose. + :returns: missing pids, to much pids. + """ + missing_pids = {} + to_much_pids = {} + used_classes = {} + entity_classes = get_entity_classes() + for entity_class in entity_classes: + if entity_class in record_types: + used_classes[entity_class] = entity_classes[entity_class] + for entity, entity_class in used_classes.items(): + if verbose: + click.echo(f'Get pids from {entity} ...') + missing_pids[entity] = {} + progress = progressbar( + items=entity_class.get_all_pids(), + length=entity_class.count(), + verbose=verbose + ) + for pid in progress: + missing_pids[entity][pid] = 1 + if verbose: + click.echo('Get pids from MEF and calculate missing ...') + progress = progressbar( + items=cls.search().filter('match_all').source().scan(), + length=cls.search().filter('match_all').source().count(), + verbose=verbose + ) + for hit in progress: + pid = hit.pid + data = hit.to_dict() + for agent, agent_class in used_classes.items(): + agent_data = data.get(agent_class.name) + if agent_data: + agent_pid = agent_data.get('pid') + if not missing_pids[agent].pop(agent_pid, None): + to_much_pids.setdefault(pid, {}) + to_much_pids[pid][agent] = agent_pid + return missing_pids, to_much_pids + + def mark_as_deleted(self, dbcommit=False, reindex=False): + """Mark record as deleted. + + :param dbcommit: Commit changes to DB. + :param reindex: Reindex record. + :returns: Modified record. + """ + self['deleted'] = pytz.utc.localize(datetime.now()).isoformat() + self.update(data=self, dbcommit=dbcommit, reindex=reindex) + return self + + @property + def deleted(self): + """Get record deleted value.""" + return self.get('deleted') + + @classmethod + def create_deleted(cls, record, dbcommit=False, reindex=False): + """Create a deleted record for an record. + + :param record: Record to create. + :param dbcommit: Commit changes to DB. + :param reindex: Reindex record. + :returns: Created record. + """ + data = {} + data[record.name] = { + '$ref': cls.build_ref_string(record.pid, record.name)} + data['deleted'] = pytz.utc.localize(datetime.now()).isoformat() + return cls.create(data=data, dbcommit=dbcommit, reindex=reindex) + + @classmethod + def update_indexes(cls): + """Update indexes.""" + try: + current_search.flush_and_refresh(index='mef') + except Exception as err: + current_app.logger.error(f'ERROR flush and refresh: {err}') + + def delete_ref(self, record, dbcommit=False, reindex=False): + """Delete $ref from record. + + :param record: Record to delete the $ref. + :param dbcommit: Commit changes to DB. + :param reindex: Reindex record. + :returns: Modified record and executed action. + """ + action = Action.DISCARD + if self.pop(record.name, None): + action = Action.UPDATE + self.replace(data=self, dbcommit=dbcommit, reindex=reindex) + if reindex: + self.update_indexes() + return self, action diff --git a/rero_mef/concepts/api.py b/rero_mef/concepts/api.py index b0cdfdb3..092c8670 100644 --- a/rero_mef/concepts/api.py +++ b/rero_mef/concepts/api.py @@ -16,10 +16,11 @@ # along with this program. If not, see . """API for manipulating records.""" +import click from flask import current_app from invenio_search import current_search -from ..api import ReroIndexer, ReroMefRecord +from ..api import Action, ReroIndexer, ReroMefRecord class ConceptRecord(ReroMefRecord): @@ -37,11 +38,103 @@ def __init__(self, *args, **kwargs): def update_indexes(cls): """Update indexes.""" try: - index = 'fconcepts_{cls.concept}' + index = f'concepts_{cls.concept}' current_search.flush_and_refresh(index=index) except Exception as err: current_app.logger.error(f'ERROR flush and refresh: {err}') + def delete_from_mef(self, dbcommit=False, reindex=False, verbose=False): + """Delete concept from MEF record.""" + from .mef.api import ConceptMefRecord + mef_action = Action.DISCARD + old_mef_pid = 'None' + mef_record = ConceptMefRecord.get_mef_by_entity_pid( + self.pid, self.name) + if mef_record: + old_mef_pid = mef_record.pid + if not mef_record.deleted: + mef_record.pop(self.concept, None) + mef_action = Action.DELETEAGENT + mef_record = mef_record.replace( + data=mef_record, dbcommit=dbcommit, reindex=reindex) + mef_record = ConceptMefRecord.create_deleted( + record=self, dbcommit=dbcommit, reindex=reindex) + else: + mef_action = Action.ALREADYDELETED + else: + # MEF record is missing create one + mef_record = ConceptMefRecord.create_deleted( + record=self, dbcommit=dbcommit, reindex=reindex) + mef_action = Action.CREATE + if reindex: + ConceptMefRecord.update_indexes() + if verbose: + click.echo( + f'Delete {self.concept}: {self.pid} ' + f'from mef: {old_mef_pid} {mef_action.value} ' + f'new mef: {mef_record.pid}' + ) + return mef_record, mef_action + + def create_or_update_mef_viaf_record(self, dbcommit=False, reindex=False, + online=False): + """Create or update MEF and VIAF record. + + :param dbcommit: Commit changes to DB. + :param reindex: Reindex record. + :param online: Try to get VIAF record online. + :returns: MEF record, MEF action, VIAF record, VIAF + """ + viaf_record = None + got_online = False + # from .viaf.api import AgentViafRecord + # AgentViafRecord.update_indexes() + # viaf_record, got_online = AgentViafRecord.get_viaf_by_agent( + # concept=self, + # online=online + # ) + from .mef.api import ConceptMefRecord + ref_string = ConceptMefRecord.build_ref_string( + concept=self.concept, + concept_pid=self.pid + ) + mef_data = {self.concept: {'$ref': ref_string}} + mef_record = ConceptMefRecord.get_mef_by_entity_pid( + self.pid, self.name) + # if viaf_record: + # mef_data['viaf_pid'] = viaf_record.pid + # if not mef_record: + # mef_record = ConceptMefRecord.get_mef_by_viaf_pid( + # viaf_record.pid) + if self.deleted: + mef_record, mef_action = self.delete_from_mef( + dbcommit=dbcommit, + reindex=reindex + ) + else: + if mef_record: + mef_action = Action.UPDATE + mef_record = mef_record.update( + data=mef_data, + dbcommit=dbcommit, + reindex=reindex + ) + else: + mef_action = Action.CREATE + mef_record = ConceptMefRecord.create( + data=mef_data, + dbcommit=dbcommit, + reindex=reindex, + ) + if reindex: + ConceptMefRecord.update_indexes() + return mef_record, mef_action, viaf_record, got_online + + @property + def deleted(self): + """Get record deleted value.""" + return self.get('deleted') + class ConceptIndexer(ReroIndexer): - """Indexing class for agents.""" + """Indexing class for concepts.""" diff --git a/rero_mef/concepts/cli.py b/rero_mef/concepts/cli.py new file mode 100644 index 00000000..ad3449d8 --- /dev/null +++ b/rero_mef/concepts/cli.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Click command-line interface for MEF record management.""" + +from __future__ import absolute_import, print_function + +import click +from flask import current_app +from flask.cli import with_appcontext + +from .mef.api import ConceptMefRecord +from .tasks import task_create_mef_for_concept +from ..utils import get_entity_class, progressbar + + +@click.group() +def concepts(): + """Agent management commands.""" + + +@concepts.command() +@click.option('-t', '--pid_type', 'pid_type', multiple=True, + default=['corero']) +@click.option('-k', '--enqueue', 'enqueue', is_flag=True, default=False, + help="Enqueue record creation.") +@click.option('-o', '--online', 'online', is_flag=True, default=False) +@click.option('-v', '--verbose', 'verbose', is_flag=True, default=False) +@click.option('-p', '--progress', 'progress', is_flag=True, default=False) +@click.option('-w', '--wait', 'wait', is_flag=True, default=False) +@click.option('-m', '--missing', 'missing', is_flag=True, default=False) +@with_appcontext +def create_mef(pid_type, enqueue, online, verbose, progress, wait, missing): + """Create MEF from concepts.""" + CONCEPTS = current_app.config.get('CONCEPTS', []) + if missing: + missing_pids, to_much_pids = \ + ConceptMefRecord.get_all_missing_pids(pid_type, verbose=progress) + for concept in pid_type: + if concept not in CONCEPTS: + click.secho( + f'Error create MEF from {concept}. Wrong concept!', + fg='red' + ) + else: + click.secho( + f'Create MEF from {concept}.', + fg='green' + ) + concept_class = get_entity_class(concept) + counts = {} + counts[concept] = concept_class.count() + counts['mef'] = ConceptMefRecord.count() + if missing: + progress_bar = progressbar( + items=missing_pids[concept], + length=len(missing_pids[concept]), + verbose=progress + ) + else: + progress_bar = progressbar( + items=concept_class.get_all_pids(), + length=counts[concept], + verbose=progress + ) + for pid in progress_bar: + if enqueue: + task = task_create_mef_for_concept.delay( + pid=pid, + concept=concept, + dbcommit=True, + reindex=True, + online=online + ) + if verbose: + click.echo(f'{concept} pid: {pid} task:{task}') + else: + msg = task_create_mef_for_concept( + pid=pid, + concept=concept, + dbcommit=True, + reindex=True, + online=online + ) + if verbose: + click.echo(msg) + if wait: + from ..cli import wait_empty_tasks + wait_empty_tasks(delay=3, verbose=True) + click.secho( + f'COUNTS: mef: {counts["mef"]}|{ConceptMefRecord.count()}' + f', {concept}: {counts[concept]}|{concept_class.count()}', + fg='blue' + ) diff --git a/rero_mef/concepts/mef/__init__.py b/rero_mef/concepts/mef/__init__.py new file mode 100644 index 00000000..711b5fb1 --- /dev/null +++ b/rero_mef/concepts/mef/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""MEF.""" diff --git a/rero_mef/concepts/mef/api.py b/rero_mef/concepts/mef/api.py new file mode 100644 index 00000000..974e2f65 --- /dev/null +++ b/rero_mef/concepts/mef/api.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""API for manipulating MEF records.""" + +from flask import current_app +from invenio_search import current_search +from invenio_search.api import RecordsSearch + +from .fetchers import mef_id_fetcher +from .minters import mef_id_minter +from .models import ConceptMefMetadata +from .providers import ConceptMefProvider +from ...api import ReroIndexer +from ...api_mef import EntityMefRecord + + +class ConceptMefSearch(RecordsSearch): + """RecordsSearch.""" + + class Meta: + """Search only on index.""" + + index = 'concepts_mef' + doc_types = None + fields = ('*', ) + facets = {} + + default_filter = None + + +class ConceptMefRecord(EntityMefRecord): + """Mef concept class.""" + + minter = mef_id_minter + fetcher = mef_id_fetcher + provider = ConceptMefProvider + model_cls = ConceptMefMetadata + search = ConceptMefSearch + mef_type = 'CONCEPTS' + + @classmethod + def build_ref_string(cls, concept_pid, concept): + """Build url for concept's api.""" + with current_app.app_context(): + ref_string = (f'{current_app.config.get("RERO_MEF_APP_BASE_URL")}' + f'/api/concepts/{concept}/{concept_pid}') + return ref_string + + @classmethod + def update_indexes(cls): + """Update indexes.""" + try: + current_search.flush_and_refresh(index='concepts_mef') + except Exception as err: + current_app.logger.error( + 'ERROR flush and refresh: {err}'.format(err=err) + ) + + +class ConceptMefIndexer(ReroIndexer): + """MefIndexer.""" + + record_cls = ConceptMefRecord + + def bulk_index(self, record_id_iterator): + """Bulk index records. + + :param record_id_iterator: Iterator yielding record UUIDs. + """ + self._bulk_op(record_id_iterator, op_type='index', doc_type='mef') diff --git a/rero_mef/concepts/mef/fetchers.py b/rero_mef/concepts/mef/fetchers.py new file mode 100644 index 00000000..1e500e67 --- /dev/null +++ b/rero_mef/concepts/mef/fetchers.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Persistent identifier fetchers.""" + + +from __future__ import absolute_import, print_function + +from functools import partial + +from .providers import ConceptMefProvider +from ...fetchers import id_fetcher + +mef_id_fetcher = partial(id_fetcher, provider=ConceptMefProvider) diff --git a/rero_mef/concepts/mef/jsonresolvers/__init__.py b/rero_mef/concepts/mef/jsonresolvers/__init__.py new file mode 100644 index 00000000..d751db1d --- /dev/null +++ b/rero_mef/concepts/mef/jsonresolvers/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""JSON resolvers.""" diff --git a/rero_mef/concepts/mef/jsonresolvers/mef_resolver.py b/rero_mef/concepts/mef/jsonresolvers/mef_resolver.py new file mode 100644 index 00000000..df7b5008 --- /dev/null +++ b/rero_mef/concepts/mef/jsonresolvers/mef_resolver.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + + +"""Json resolvers.""" + +from __future__ import absolute_import, print_function, unicode_literals + +import jsonresolver + +from ..api import ConceptMefRecord +from ....utils import get_host, resolve_record + + +@jsonresolver.route('/api/concepts/mef/', host=get_host()) +def resolve_mef(path): + """Resolve Mef records.""" + return resolve_record(path, ConceptMefRecord) diff --git a/rero_mef/concepts/mef/jsonschemas/__init__.py b/rero_mef/concepts/mef/jsonschemas/__init__.py new file mode 100644 index 00000000..afc95f91 --- /dev/null +++ b/rero_mef/concepts/mef/jsonschemas/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""JSON schemas.""" + +from __future__ import absolute_import, print_function diff --git a/rero_mef/concepts/mef/jsonschemas/concepts_mef/mef-concept-v0.0.1.json b/rero_mef/concepts/mef/jsonschemas/concepts_mef/mef-concept-v0.0.1.json new file mode 100644 index 00000000..3be82c02 --- /dev/null +++ b/rero_mef/concepts/mef/jsonschemas/concepts_mef/mef-concept-v0.0.1.json @@ -0,0 +1,31 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Schema for MEF concepts", + "type": "object", + "required": [ + "$schema", + "pid" + ], + "additionalProperties": false, + "properties": { + "$schema": { + "title": "Schema", + "description": "Schema to MEF concept record.", + "type": "string", + "minLength": 7 + }, + "pid": { + "title": "Mef concept ID", + "type": "string", + "minLength": 1 + }, + "rero": { + "title": "RERO concept ID", + "type": "object" + }, + "deleted": { + "title": "Deletion date", + "type": "string" + } + } +} diff --git a/rero_mef/concepts/mef/listner.py b/rero_mef/concepts/mef/listner.py new file mode 100644 index 00000000..6fd409bf --- /dev/null +++ b/rero_mef/concepts/mef/listner.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Signals connector for MEF records.""" + +from .api import ConceptMefSearch + + +def enrich_concept_mef_data(sender, json=None, record=None, index=None, + doc_type=None, arguments=None, **kwargs): + """Signal sent before a record is indexed. + + :param json: The dumped record dictionary which can be modified. + :param record: The record being indexed. + :param index: The index in which the record will be indexed. + :param doc_type: The doc_type for the record. + """ + if index.split('-')[0] == ConceptMefSearch.Meta.index: + sources = [] + if 'rero' in json: + sources.append('rero') + json['sources'] = sources diff --git a/rero_mef/concepts/mef/mappings/__init__.py b/rero_mef/concepts/mef/mappings/__init__.py new file mode 100644 index 00000000..bfa39c0f --- /dev/null +++ b/rero_mef/concepts/mef/mappings/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Elasticsearch mappings.""" diff --git a/rero_mef/concepts/mef/mappings/v7/__init__.py b/rero_mef/concepts/mef/mappings/v7/__init__.py new file mode 100644 index 00000000..bfa39c0f --- /dev/null +++ b/rero_mef/concepts/mef/mappings/v7/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Elasticsearch mappings.""" diff --git a/rero_mef/concepts/mef/mappings/v7/concepts_mef/mef-concept-v0.0.1.json b/rero_mef/concepts/mef/mappings/v7/concepts_mef/mef-concept-v0.0.1.json new file mode 100644 index 00000000..09843761 --- /dev/null +++ b/rero_mef/concepts/mef/mappings/v7/concepts_mef/mef-concept-v0.0.1.json @@ -0,0 +1,154 @@ +{ + "settings": { + "number_of_shards": 8, + "number_of_replicas": 2, + "max_result_window": 100000000, + "analysis": { + "filter": { + "autocomplete_filter": { + "type": "edge_ngram", + "min_gram": 1, + "max_gram": 20 + }, + "my_ascii_folding": { + "type": "asciifolding", + "preserve_original": true + } + }, + "analyzer": { + "autocomplete": { + "type": "custom", + "tokenizer": "standard", + "filter": [ + "lowercase", + "autocomplete_filter", + "my_ascii_folding" + ] + } + } + } + }, + "mappings": { + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "keyword" + }, + "pid": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "deleted": { + "type": "date" + }, + "rero": { + "type": "object", + "properties": { + "$schema": { + "type": "keyword" + }, + "pid": { + "type": "keyword" + }, + "bf:Agent": { + "type": "keyword" + }, + "md5": { + "type": "keyword" + }, + "language": { + "type": "keyword" + }, + "gender": { + "type": "keyword" + }, + "identifier": { + "type": "keyword" + }, + "date_of_birth": { + "type": "text" + }, + "date_of_death": { + "type": "text" + }, + "biographical_information": { + "type": "text" + }, + "preferred_name": { + "type": "text" + }, + "authorized_access_point": { + "type": "text", + "copy_to": "autocomplete_name" + }, + "qualifier": { + "type": "keyword" + }, + "numeration": { + "type": "keyword" + }, + "variant_name": { + "type": "text" + }, + "date_of_establishment": { + "type": "text" + }, + "date_of_termination": { + "type": "text" + }, + "conference": { + "type": "keyword" + }, + "variant_access_point": { + "type": "text", + "copy_to": "autocomplete_name" + }, + "parallel_access_point": { + "type": "text", + "copy_to": "autocomplete_name" + }, + "country_associated": { + "type": "keyword" + }, + "deleted": { + "type": "date" + }, + "relation_pid": { + "type": "object", + "properties": { + "value": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + } + } + }, + "sources": { + "type": "keyword" + }, + "autocomplete_name": { + "type": "text", + "analyzer": "autocomplete", + "search_analyzer": "standard" + }, + "_created": { + "type": "date" + }, + "_updated": { + "type": "date" + } + } + } +} diff --git a/rero_mef/concepts/mef/minters.py b/rero_mef/concepts/mef/minters.py new file mode 100644 index 00000000..0e2f788f --- /dev/null +++ b/rero_mef/concepts/mef/minters.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Identifier minters.""" + +from __future__ import absolute_import, print_function, unicode_literals + +from .providers import ConceptMefProvider + + +def mef_id_minter(record_uuid, data, provider=ConceptMefProvider, + pid_key='pid', object_type='rec'): + """RERIOLS mef conceot id minter.""" + assert pid_key not in data + provider = provider.create( + object_type=object_type, + object_uuid=record_uuid + ) + pid = provider.pid + data[pid_key] = pid.pid_value + + return pid diff --git a/rero_mef/concepts/mef/models.py b/rero_mef/concepts/mef/models.py new file mode 100644 index 00000000..26456cd6 --- /dev/null +++ b/rero_mef/concepts/mef/models.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Define relation between records and buckets.""" + +from __future__ import absolute_import + +from invenio_db import db +from invenio_pidstore.models import RecordIdentifier +from invenio_records.models import RecordMetadataBase + + +class ConceptMefIdentifier(RecordIdentifier): + """Sequence generator for MEF Authority identifiers.""" + + __tablename__ = 'concept_mef_id' + __mapper_args__ = {'concrete': True} + + recid = db.Column( + db.BigInteger().with_variant(db.Integer, 'sqlite'), + primary_key=True, + autoincrement=True, + ) + + +class ConceptMefMetadata(db.Model, RecordMetadataBase): + """Represent a record metadata.""" + + __tablename__ = 'concept_mef_metadata' diff --git a/rero_mef/concepts/mef/providers.py b/rero_mef/concepts/mef/providers.py new file mode 100644 index 00000000..fc5042ba --- /dev/null +++ b/rero_mef/concepts/mef/providers.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Identifier provider.""" + +from __future__ import absolute_import, print_function + +from invenio_pidstore.models import PIDStatus +from invenio_pidstore.providers.base import BaseProvider + +from .models import ConceptMefIdentifier + + +class ConceptMefProvider(BaseProvider): + """Mef identifier provider.""" + + pid_type = 'comef' + """Type of persistent identifier.""" + + pid_identifier = ConceptMefIdentifier.__tablename__ + """Identifier for table name""" + + pid_provider = None + """Provider name. + + The provider name is not recorded in the PID since the provider does not + provide any additional features besides creation of Document ids. + """ + + default_status = PIDStatus.REGISTERED + """Mef IDs are by default registered immediately.""" + + @classmethod + def create(cls, object_type=None, object_uuid=None, **kwargs): + """Create a new Mef Authority identifier.""" + assert 'pid_value' not in kwargs + kwargs['pid_value'] = str(ConceptMefIdentifier.next()) + kwargs.setdefault('status', cls.default_status) + if object_type and object_uuid: + kwargs['status'] = PIDStatus.REGISTERED + return super().create( + object_type=object_type, object_uuid=object_uuid, **kwargs) diff --git a/rero_mef/concepts/mef/serializers.py b/rero_mef/concepts/mef/serializers.py new file mode 100644 index 00000000..9f792f05 --- /dev/null +++ b/rero_mef/concepts/mef/serializers.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Record serialization.""" + +from flask import request, url_for +from invenio_records_rest.links import default_links_factory_with_additional +from invenio_records_rest.schemas import RecordSchemaJSONV1 +from invenio_records_rest.serializers.json import JSONSerializer +from invenio_records_rest.serializers.response import record_responsify + +from ...utils import get_entity_classes + + +def add_links(pid, record): + """Add VIAF links to MEF.""" + links = {} + # viaf_pid = record.get('viaf_pid') + # if viaf_pid: + # links['viaf'] = '{scheme}://{host}/api/agents/viaf/' \ + # + str(viaf_pid) + # links['viaf.org'] = 'http://www.viaf.org/viaf/' + str(viaf_pid) + + link_factory = default_links_factory_with_additional(links) + return link_factory(pid) + + +# Nice to have direct working links in test server! +def local_link(agent, name, record): + """Change links to actual links.""" + if name in record: + ref = record[name].get('$ref') + if ref: + my_pid = ref.split('/')[-1] + url = url_for( + f'invenio_records_rest.{agent}_item', + pid_value=my_pid, + _external=True + ) + record[name].update({'$ref': url}) + + +class ReroMefSerializer(JSONSerializer): + """Mixin serializing records as JSON.""" + + def serialize(self, pid, record, links_factory=None, **kwargs): + """Serialize a single record and persistent identifier. + + :param pid: Persistent identifier instance. + :param record: Record instance. + :param links_factory: Factory function for record links. + """ + if request and request.args.get('resolve'): + record = record.replace_refs() + if request and request.args.get('sources'): + sources = [] + # TODO: add the list of sources into the current_app.config + if 'rero' in record: + sources.append('rero') + record['sources'] = sources + + concept_classes = get_entity_classes() + for concept, concept_classe in concept_classes.items(): + if concept in ['corero']: + local_link(concept, concept_classe.name, record) + + return super(ReroMefSerializer, self).serialize( + pid, record, links_factory=add_links, **kwargs + ) + + +json_v1 = ReroMefSerializer(RecordSchemaJSONV1) +"""JSON v1 serializer.""" + +json_v1_concept_mef_response = record_responsify( + json_v1, 'application/rero+json') diff --git a/rero_mef/concepts/rero/api.py b/rero_mef/concepts/rero/api.py index e1308ce6..60d91c76 100644 --- a/rero_mef/concepts/rero/api.py +++ b/rero_mef/concepts/rero/api.py @@ -51,20 +51,6 @@ class ConceptReroRecord(ConceptRecord): model_cls = ConceptReroMetadata name = 'rero' - # @classmethod - # def get_online_record(cls, id, verbose=False): - # """Get online record.""" - # from .tasks import concepts_get_record - # return concepts_get_record(id=id, verbose=verbose) - - def reindex(self, forceindex=False): - """Reindex record.""" - if forceindex: - result = ConceptReroIndexer(version_type='external_gte').index(self) - else: - result = ConceptReroIndexer().index(self) - return result - class ConceptReroIndexer(ConceptIndexer): """ConceptsIndexer.""" diff --git a/rero_mef/concepts/serializers.py b/rero_mef/concepts/serializers.py new file mode 100644 index 00000000..1b062ce3 --- /dev/null +++ b/rero_mef/concepts/serializers.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Record serialization.""" + +from flask import request +from invenio_records_rest.links import default_links_factory_with_additional +from invenio_records_rest.schemas import RecordSchemaJSONV1 +from invenio_records_rest.serializers.json import JSONSerializer +from invenio_records_rest.serializers.response import record_responsify + +from .mef.api import ConceptMefRecord + + +def add_links(pid, record): + """Add MEF link to agents.""" + links = {} + mef_pid = ConceptMefRecord.get_mef_by_entity_pid( + record.pid, record.name, pid_only=True) + if mef_pid: + links['mef'] = '{scheme}://{host}/api/concepts/mef/' + str(mef_pid) + + link_factory = default_links_factory_with_additional(links) + return link_factory(pid) + + +class ReroMefSerializer(JSONSerializer): + """Mixin serializing records as JSON.""" + + def serialize(self, pid, record, links_factory=None, **kwargs): + """Serialize a single record and persistent identifier. + + :param pid: Persistent identifier instance. + :param record: Record instance. + :param links_factory: Factory function for record links. + """ + if request and request.args.get('resolve'): + record = record.replace_refs() + + return super(ReroMefSerializer, self).serialize( + pid, record, links_factory=add_links, **kwargs + ) + + +json_v1 = ReroMefSerializer(RecordSchemaJSONV1) +"""JSON v1 serializer.""" + +json_v1_concept_response = record_responsify(json_v1, 'application/rero+json') diff --git a/rero_mef/concepts/tasks.py b/rero_mef/concepts/tasks.py new file mode 100644 index 00000000..8aadde70 --- /dev/null +++ b/rero_mef/concepts/tasks.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# +# RERO MEF +# Copyright (C) 2020 RERO +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Tasks used by RERO-MEF.""" + +from celery import shared_task + +from ..utils import get_entity_class + + +@shared_task +def task_create_mef_for_concept(pid, concept, dbcommit=True, reindex=True, + online=False): + """Create MEF from concept task. + + :param pid: pid for concept to use + :param concept: concept + :param dbcommit: db commit or not + :param reindex: reindex or not + :param online: get missing records from internet + :returns: no return + """ + concept_class = get_entity_class(concept) + concept_record = concept_class.get_record_by_pid(pid) + if concept_record: + mef_record, mef_action, viaf_record, online = \ + concept_record.create_or_update_mef_viaf_record( + dbcommit=dbcommit, + reindex=reindex, + online=online + ) + mef_pid = 'Non' + if mef_record: + mef_pid = mef_record.pid + msg = f'Create MEF from {concept} pid: {pid} ' \ + f'| mef: {mef_pid} {mef_action.value}' + return msg + else: + return f'Not found concept {concept}:{pid}' diff --git a/rero_mef/config.py b/rero_mef/config.py index daa2148c..a303822c 100644 --- a/rero_mef/config.py +++ b/rero_mef/config.py @@ -34,6 +34,7 @@ from .agents.mef.models import AgentMefIdentifier from .agents.rero.models import AgentReroIdentifier from .agents.viaf.models import ViafIdentifier +from .concepts.mef.models import ConceptMefIdentifier from .concepts.rero.models import ConceptReroIdentifier from .filter import exists_filter from .marctojson.do_gnd_agent import Transformation as AgentGndTransformation @@ -190,19 +191,20 @@ BULK_CHUNK_COUNT = 100000 TRANSFORMATION = { - 'corero': ConceptReroTransformation, 'aggnd': AgentGndTransformation, 'aidref': AgentIdrefTransformation, - 'agrero': AgentReroTransformation + 'agrero': AgentReroTransformation, + 'corero': ConceptReroTransformation } IDENTIFIERS = { 'mef': AgentMefIdentifier, 'viaf': ViafIdentifier, - 'corero': ConceptReroIdentifier, 'aggnd': AgentGndIdentifier, 'aidref': AgentIdrefIdentifier, - 'agrero': AgentReroIdentifier + 'agrero': AgentReroIdentifier, + 'comef': ConceptMefIdentifier, + 'corero': ConceptReroIdentifier } AGENTS = [ @@ -212,7 +214,7 @@ ] CONCEPTS = [ - 'corero', + 'corero' ] RERO_MEF_APP_BASE_URL = 'https://mef.rero.ch' @@ -236,8 +238,8 @@ search_index='mef', search_type=None, record_serializers={ - 'application/json': ('rero_mef.serializers' - ':json_v1_response'), + 'application/json': ('rero_mef.agents.mef.serializers' + ':json_v1_agent_mef_response'), }, search_serializers={ 'application/json': ('invenio_records_rest.serializers' @@ -261,8 +263,8 @@ search_index='viaf', search_type=None, record_serializers={ - 'application/json': ('rero_mef.serializers' - ':json_v1_response'), + 'application/json': ('rero_mef.agents.viaf.serializers' + ':json_v1_agent_viaf_response'), }, search_serializers={ 'application/json': ('invenio_records_rest.serializers' @@ -286,8 +288,8 @@ search_index='agents_gnd', search_type=None, record_serializers={ - 'application/json': ('rero_mef.serializers' - ':json_v1_response'), + 'application/json': ('rero_mef.agents.serializers' + ':json_v1_agent_response'), }, search_serializers={ 'application/json': ('invenio_records_rest.serializers' @@ -311,8 +313,8 @@ search_index='agents_idref', search_type=None, record_serializers={ - 'application/json': ('rero_mef.serializers' - ':json_v1_response'), + 'application/json': ('rero_mef.agents.serializers' + ':json_v1_agent_response'), }, search_serializers={ 'application/json': ('invenio_records_rest.serializers' @@ -337,8 +339,8 @@ search_index='agents_rero', search_type=None, record_serializers={ - 'application/json': ('rero_mef.serializers' - ':json_v1_response'), + 'application/json': ('rero_mef.agents.serializers' + ':json_v1_agent_response'), }, search_serializers={ 'application/json': ('invenio_records_rest.serializers' @@ -352,6 +354,31 @@ max_result_window=MAX_RESULT_WINDOW, error_handlers=dict(), ), + comef=dict( + pid_type='comef', + pid_minter='concept_mef_id', + pid_fetcher='concept_mef_id', + search_class="rero_mef.concepts.mef.api:ConceptMefSearch", + indexer_class="rero_mef.concepts.mef.api:ConceptMefIndexer", + record_class="rero_mef.concepts.mef.api:ConceptMefRecord", + search_index='concepts_mef', + search_type=None, + record_serializers={ + 'application/json': ('rero_mef.concepts.mef.serializers' + ':json_v1_concept_mef_response'), + }, + search_serializers={ + 'application/json': ('invenio_records_rest.serializers' + ':json_v1_search'), + }, + search_factory_imp='rero_mef.query:and_search_factory', + list_route='/concepts/mef/', + item_route=('/concepts/mef/'), + default_media_type='application/json', + max_result_window=MAX_RESULT_WINDOW, + error_handlers=dict(), + ), corero=dict( pid_type='corero', pid_minter='concept_rero_id', @@ -362,8 +389,8 @@ search_index='concepts_rero', search_type=None, record_serializers={ - 'application/json': ('rero_mef.serializers' - ':json_v1_response'), + 'application/json': ('rero_mef.concepts.serializers' + ':json_v1_concept_response'), }, search_serializers={ 'application/json': ('invenio_records_rest.serializers' @@ -380,8 +407,19 @@ ) ) +RERO_AGENTS = [ + 'aggnd', + 'aidref', + 'agrero' +] + +RERO_CONCEPTS = [ + 'corero' +] + RECORDS_JSON_SCHEMA = { 'corero': '/concepts_rero/rero-concept-v0.0.1.json', + 'comef': '/concepts_mef/mef-concept-v0.0.1.json', 'aggnd': '/agents_gnd/gnd-agent-v0.0.1.json', 'agrero': '/agents_rero/rero-agent-v0.0.1.json', 'aidref': '/agents_idref/idref-agent-v0.0.1.json', @@ -470,6 +508,22 @@ 'deleted': exists_filter('deleted'), } ), + concepts_mef=dict( + aggs=dict( + sources=dict( + terms=dict(field='sources', size=30) + ), + deleted=dict( + filter=dict(exists=dict(field="deleted")) + ), + ), + filters={ + 'agent_type': terms_filter('type'), + 'agent_sources': terms_filter('sources'), + 'deleted': exists_filter('deleted'), + 'rero_double': terms_filter('rero.pid') + } + ), concepts_rero=dict( aggs=dict( classification=dict( diff --git a/rero_mef/ext.py b/rero_mef/ext.py index 220190f6..ac671e49 100644 --- a/rero_mef/ext.py +++ b/rero_mef/ext.py @@ -21,7 +21,8 @@ from invenio_indexer.signals import before_record_index -from .agents.mef.listner import enrich_mef_data +from .agents.mef.listner import enrich_agent_mef_data +from .concepts.mef.listner import enrich_concept_mef_data class REROMEFAPP(object): @@ -39,4 +40,5 @@ def init_app(self, app): def register_signals(self, app): """Register signals.""" - before_record_index.connect(enrich_mef_data, sender=app) + before_record_index.connect(enrich_agent_mef_data, sender=app) + before_record_index.connect(enrich_concept_mef_data, sender=app) diff --git a/rero_mef/utils.py b/rero_mef/utils.py index 45461512..8f969b7c 100644 --- a/rero_mef/utils.py +++ b/rero_mef/utils.py @@ -625,15 +625,6 @@ def pidstore_csv_line(agent, agent_pid, record_uuid, date): return pidstore_line + os.linesep -def add_agent_to_json(mef_record, agent, agent_pid): - """Add agent ref to MEF record.""" - from .agents.mef.api import AgentMefRecord - ref_string = AgentMefRecord.build_ref_string( - agent=agent, agent_pid=agent_pid - ) - mef_record[agent] = {'$ref': ref_string} - - def raw_connection(): """Return a raw connection to the database.""" with current_app.app_context(): @@ -989,27 +980,27 @@ def get_entity_classes(without_mef_viaf=True): return agents -def get_endpoint_class(agent, class_name): - """Get agent class from config.""" +def get_endpoint_class(entity, class_name): + """Get entity class from config.""" endpoints = current_app.config.get('RECORDS_REST_ENDPOINTS', {}) - endpoint = endpoints.get(agent, {}) + endpoint = endpoints.get(entity, {}) endpoint_class = obj_or_import_string(endpoint.get(class_name)) return endpoint_class -def get_entity_class(agent): - """Get agent record class from config.""" - return get_endpoint_class(agent=agent, class_name='record_class') +def get_entity_class(entity): + """Get entity record class from config.""" + return get_endpoint_class(entity=entity, class_name='record_class') -def get_entity_search_class(agent): - """Get agent search class from config.""" - return get_endpoint_class(agent=agent, class_name='search_class') +def get_entity_search_class(entity): + """Get entity search class from config.""" + return get_endpoint_class(entity=entity, class_name='search_class') -def get_entity_indexer_class(agent): - """Get agent indexer class from config.""" - return get_endpoint_class(agent=agent, class_name='indexer_class') +def get_entity_indexer_class(entity): + """Get entity indexer class from config.""" + return get_endpoint_class(entity=entity, class_name='indexer_class') def write_link_json( diff --git a/scripts/setup b/scripts/setup index 090aebc5..11feb069 100755 --- a/scripts/setup +++ b/scripts/setup @@ -65,7 +65,7 @@ function db_init () { # Purge celery celery -A rero_mef.celery purge -f # Clean redis - invenio utils flush_cache --yes-i-know || true + invenio utils flush-cache --yes-i-know || true invenio db destroy --yes-i-know || true invenio db init create invenio index destroy --force --yes-i-know || true @@ -113,6 +113,8 @@ function sample_load () { info_msg "Importing ${ENTITY} concept records:" dojson -i ./data/${ENTITY}.json | invenio fixtures create-or-update ${ENTITY} done + info_msg "Creating MEF records:" + invenio concepts create-mef oai_init } @@ -129,7 +131,7 @@ function deploy () { oai_init invenio utils runindex --raise-on-error --concurrency 8 --delayed invenio utils runindex --raise-on-error - invenio utils reindex-missing -v + # invenio utils reindex-missing -v invenio agents-counts } diff --git a/setup.py b/setup.py index d191ae4e..75446525 100644 --- a/setup.py +++ b/setup.py @@ -83,6 +83,7 @@ 'agents_gnd = rero_mef.agents.gnd.models', 'agents_idref = rero_mef.agents.idref.models', 'agents_rero = rero_mef.agents.rero.models', + 'concepts_mef = rero_mef.concepts.mef.models', 'concepts_rero = rero_mef.concepts.rero.models', ], 'invenio_pidstore.minters': [ @@ -91,7 +92,8 @@ 'agent_gnd_id = rero_mef.agents.gnd.minters:gnd_id_minter', 'agent_idref_id = rero_mef.agents.idref.minters:idref_id_minter', 'agent_rero_id = rero_mef.agents.rero.minters:rero_id_minter', - 'concept_rero_id = rero_mef.concepts.rero.minters:rero_id_minter' + 'concept_mef_id = rero_mef.concepts.mef.minters:mef_id_minter', + 'concept_rero_id = rero_mef.concepts.rero.minters:rero_id_minter', ], 'invenio_pidstore.fetchers': [ 'mef_id = rero_mef.agents.mef.fetchers:mef_id_fetcher', @@ -99,7 +101,8 @@ 'agent_gnd_id = rero_mef.agents.gnd.fetchers:gnd_id_fetcher', 'agent_idref_id = rero_mef.agents.idref.fetchers:idref_id_fetcher', 'agent_rero_id = rero_mef.agents.rero.fetchers:rero_id_fetcher', - 'concept_rero_id = rero_mef.concepts.rero.fetchers:rero_id_fetcher' + 'concept_mef_id = rero_mef.concepts.mef.fetchers:mef_id_fetcher', + 'concept_rero_id = rero_mef.concepts.rero.fetchers:rero_id_fetcher', ], 'invenio_jsonschemas.schemas': [ 'common = rero_mef.jsonschemas', @@ -108,7 +111,8 @@ 'agents_gnd = rero_mef.agents.gnd.jsonschemas', 'agents_idref = rero_mef.agents.idref.jsonschemas', 'agents_rero = rero_mef.agents.rero.jsonschemas', - 'cocepts_rero = rero_mef.concepts.rero.jsonschemas' + 'cocepts_mef = rero_mef.concepts.mef.jsonschemas', + 'cocepts_rero = rero_mef.concepts.rero.jsonschemas', ], 'invenio_search.mappings': [ 'mef = rero_mef.agents.mef.mappings', @@ -116,13 +120,16 @@ 'agents_gnd = rero_mef.agents.gnd.mappings', 'agents_idref = rero_mef.agents.idref.mappings', 'agents_rero = rero_mef.agents.rero.mappings', - 'concepts_rero = rero_mef.concepts.rero.mappings' + 'concepts_mef = rero_mef.concepts.mef.mappings', + 'concepts_rero = rero_mef.concepts.rero.mappings', ], 'invenio_records.jsonresolver': [ 'mef = rero_mef.agents.mef.jsonresolvers.mef_resolver', + 'viaf = rero_mef.agents.viaf.jsonresolvers.viaf_resolver', 'agents_gnd = rero_mef.agents.gnd.jsonresolvers.gnd_resolver', 'agents_idref = rero_mef.agents.idref.jsonresolvers.idref_resolver', 'agents_rero = rero_mef.agents.rero.jsonresolvers.rero_resolver', + 'concepts_mef = rero_mef.concepts.mef.jsonresolvers.mef_resolver', 'concepts_rero = rero_mef.concepts.rero.jsonresolvers.rero_resolver', ], 'invenio_base.api_blueprints': [ @@ -130,20 +137,21 @@ 'api_agents_viaf = rero_mef.agents.viaf.views:api_blueprint', 'api_agents_gnd = rero_mef.agents.gnd.views:api_blueprint', 'api_agents_idref = rero_mef.agents.idref.views:api_blueprint', - 'api_agents_rero = rero_mef.agents.rero.views:api_blueprint' + 'api_agents_rero = rero_mef.agents.rero.views:api_blueprint', ], 'flask.commands': [ 'fixtures = rero_mef.cli:fixtures', 'utils = rero_mef.cli:utils', 'celery = rero_mef.cli:celery', 'agents = rero_mef.agents.cli:agents', + 'concepts = rero_mef.concepts.cli:concepts', ], 'dojson.cli.rule': [ 'tomarc = dojson.contrib.to_marc21:to_marc21', - 'idrefjson = rero_ebooks.dojson.from_unimarc.model:from_unimarc' + 'idrefjson = rero_ebooks.dojson.from_unimarc.model:from_unimarc', ], 'dojson.cli.dump': [ - 'pjson = rero_mef.dojson.utils:dump' + 'pjson = rero_mef.dojson.utils:dump', ] }, classifiers=[ @@ -154,7 +162,7 @@ 'Programming Language :: Python', 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Development Status :: 3 - Alpha', ], ) diff --git a/tests/agents/test_agents_api.py b/tests/agents/test_agents_api.py index c5762d2a..f9880638 100644 --- a/tests/agents/test_agents_api.py +++ b/tests/agents/test_agents_api.py @@ -50,7 +50,7 @@ def test_create_agent_record_with_viaf_links( assert m_record == { '$schema': 'https://mef.rero.ch/schemas/mef/mef-v0.0.1.json', - 'gnd': {'$ref': 'https://mef.rero.ch/api/gnd/12391664X'}, + 'gnd': {'$ref': 'https://mef.rero.ch/api/agents/gnd/12391664X'}, 'pid': '1', 'viaf_pid': '66739143' } @@ -68,9 +68,9 @@ def test_create_agent_record_with_viaf_links( assert m_record == { '$schema': 'https://mef.rero.ch/schemas/mef/mef-v0.0.1.json', - 'gnd': {'$ref': 'https://mef.rero.ch/api/gnd/12391664X'}, + 'gnd': {'$ref': 'https://mef.rero.ch/api/agents/gnd/12391664X'}, 'pid': '1', - 'rero': {'$ref': 'https://mef.rero.ch/api/rero/A023655346'}, + 'rero': {'$ref': 'https://mef.rero.ch/api/agents/rero/A023655346'}, 'viaf_pid': '66739143' } @@ -87,10 +87,10 @@ def test_create_agent_record_with_viaf_links( assert m_record == { '$schema': 'https://mef.rero.ch/schemas/mef/mef-v0.0.1.json', - 'gnd': {'$ref': 'https://mef.rero.ch/api/gnd/12391664X'}, - 'idref': {'$ref': 'https://mef.rero.ch/api/idref/069774331'}, + 'gnd': {'$ref': 'https://mef.rero.ch/api/agents/gnd/12391664X'}, + 'idref': {'$ref': 'https://mef.rero.ch/api/agents/idref/069774331'}, 'pid': '1', - 'rero': {'$ref': 'https://mef.rero.ch/api/rero/A023655346'}, + 'rero': {'$ref': 'https://mef.rero.ch/api/agents/rero/A023655346'}, 'viaf_pid': '66739143' }