From 63066c11a4b2ec71993e26d476a1953f48ad4576 Mon Sep 17 00:00:00 2001 From: Peter Weber Date: Thu, 9 Jun 2022 23:04:44 +0200 Subject: [PATCH] MEF: fix deleted agents * Deleted entities could be accessed by MEF. * Adds `with_deleted` to search. * Update dependencies. * Improved code from Sourcery suggestions. * Fixes missing `$schema` in updated records. Co-Authored-by: Peter Weber --- rero_mef/agents/api.py | 53 ++++++++++++++----------------- rero_mef/agents/mef/api.py | 6 +--- rero_mef/agents/tasks.py | 4 +-- rero_mef/api.py | 34 ++++++++++---------- rero_mef/api_mef.py | 4 ++- rero_mef/config.py | 9 +++++- rero_mef/filter.py | 11 +++++++ rero_mef/query.py | 9 ++++-- rero_mef/tasks.py | 7 ++-- rero_mef/utils.py | 16 ++++------ tests/api/test_view_agents_mef.py | 5 +-- tests/unit/test_api.py | 4 +-- 12 files changed, 84 insertions(+), 78 deletions(-) diff --git a/rero_mef/agents/api.py b/rero_mef/agents/api.py index 2a038e7f..900a99a7 100644 --- a/rero_mef/agents/api.py +++ b/rero_mef/agents/api.py @@ -16,6 +16,9 @@ # along with this program. If not, see . """API for manipulating records.""" + +import contextlib + import click from flask import current_app from invenio_pidstore.models import PersistentIdentifier, PIDStatus @@ -161,7 +164,7 @@ def create_or_update_agent_mef_viaf(cls, data, id_=None, delete_pid=True, from rero_mef.agents.mef.api import AgentMefRecord from rero_mef.agents.viaf.api import AgentViafRecord - try: + with contextlib.suppress(Exception): persistent_id = PersistentIdentifier.query.filter_by( pid_type=cls.provider.pid_type, pid_value=data.get('pid') @@ -169,9 +172,6 @@ def create_or_update_agent_mef_viaf(cls, data, id_=None, delete_pid=True, if persistent_id.status == PIDStatus.DELETED: return None, Action.ALREADYDELETED, None, Action.DISCARD, \ None, False - except Exception: - pass - record, action = cls.create_or_update( data=data, id_=id_, @@ -182,36 +182,29 @@ def create_or_update_agent_mef_viaf(cls, data, id_=None, delete_pid=True, ) if action == Action.ERROR: return None, action, None, Action.ERROR, None, False + if record.deleted: + mef_record, mef_action = record.delete_from_mef( + dbcommit=dbcommit, + reindex=reindex, + verbose=verbose + ) + viaf_record = None + action = Action.DELETE + online = False + elif action == Action.UPTODATE: + mef_record = AgentMefRecord.get_mef_by_entity_pid( + record.pid, record.name) + mef_action = Action.UPTODATE + viaf_record, online = AgentViafRecord.get_viaf_by_agent( + record) else: - if record.deleted: - mef_record, mef_action = record.delete_from_mef( + mef_record, mef_action, viaf_record, online = \ + record.create_or_update_mef_viaf_record( dbcommit=dbcommit, reindex=reindex, - verbose=verbose + online=online ) - # record.delete( - # dbcommit=dbcommit, - # delindex=True, - # ) - # record = None - action = Action.DELETE - viaf_record = None - online = False - else: - if action == Action.UPTODATE: - mef_record = AgentMefRecord.get_mef_by_entity_pid( - record.pid, record.name) - mef_action = Action.UPTODATE - viaf_record, online = AgentViafRecord.get_viaf_by_agent( - record) - else: - mef_record, mef_action, viaf_record, online = \ - record.create_or_update_mef_viaf_record( - dbcommit=dbcommit, - reindex=reindex, - online=online - ) - return record, action, mef_record, mef_action, viaf_record, online + return record, action, mef_record, mef_action, viaf_record, online @classmethod def get_online_record(cls, id, verbose=False): diff --git a/rero_mef/agents/mef/api.py b/rero_mef/agents/mef/api.py index 98b7f56d..b47b80bd 100644 --- a/rero_mef/agents/mef/api.py +++ b/rero_mef/agents/mef/api.py @@ -123,11 +123,7 @@ def replace_refs(self): sources = [] for agent in ['rero', 'gnd', 'idref']: if agent_data := data.get(agent): - if agent_data.get('deleted'): - data.pop(agent) - current_app.logger.info( - f'MEF replace refs {data.get("pid")} {agent} deleted') - elif agent_data.get('status'): + if agent_data.get('status'): data.pop(agent) current_app.logger.error( f'MEF replace refs {data.get("pid")} {agent}' diff --git a/rero_mef/agents/tasks.py b/rero_mef/agents/tasks.py index 34d38003..58383d1c 100644 --- a/rero_mef/agents/tasks.py +++ b/rero_mef/agents/tasks.py @@ -72,8 +72,8 @@ def task_create_mef_for_agent(pid, agent, dbcommit=True, reindex=True, reindex=reindex, online=online ) - mef_pid = mef_record.pid if mef_record else 'Non' - viaf_pid = viaf_record.pid if viaf_record else 'Non' + mef_pid = mef_record.pid if mef_record else 'None' + viaf_pid = viaf_record.pid if viaf_record else 'None' actions = f'mef: {mef_pid} {mef_action.value} ' \ f'viaf: {viaf_pid} {online}' return f'Create MEF from {agent} pid: {pid} | {actions}' diff --git a/rero_mef/api.py b/rero_mef/api.py index d5ee9913..4c6d610d 100644 --- a/rero_mef/api.py +++ b/rero_mef/api.py @@ -99,8 +99,7 @@ def create(cls, data, id_=None, delete_pid=False, dbcommit=False, """Create a new agent record.""" assert cls.minter if '$schema' not in data: - type = cls.provider.pid_type - data = add_schema(data, type) + data = add_schema(data, cls.provider.pid_type) if delete_pid: data.pop('pid', None) if not id_: @@ -130,7 +129,6 @@ def update_test_md5(self, data, dbcommit=False, reindex=False): if data.get('md5', 'data') == self.get('md5', 'agent'): # record has no changes return self, Action.UPTODATE - data = add_schema(data, self.name) return_record = self.replace( data=data, dbcommit=dbcommit, reindex=reindex) return return_record, Action.UPDATE @@ -145,6 +143,7 @@ def create_or_update(cls, data, id_=None, delete_pid=True, dbcommit=False, pid = data.get('pid') if agent_record := cls.get_record_by_pid(pid): # record exist + data = add_schema(data, agent_record.provider.pid_type) if test_md5: return_record, agent_action = agent_record.update_test_md5( data=data, @@ -185,9 +184,8 @@ def get_record_by_pid(cls, pid, with_deleted=False): pid ) get_record_ok = True - return super().get_record( - persistent_identifier.object_uuid, - with_deleted=with_deleted) + return super().get_record(persistent_identifier.object_uuid, + with_deleted=with_deleted) except PIDDoesNotExistError: return None @@ -358,8 +356,9 @@ def get_indexer_class(cls): def reindex(self, forceindex=False): """Reindex record.""" indexer = self.get_indexer_class() - return indexer(version_type='external_gte').index(self) \ - if forceindex else indexer().index(self) + if forceindex: + return indexer(version_type='external_gte').index(self) + return indexer().index(self) def delete_from_index(self): """Delete record from index.""" @@ -408,9 +407,9 @@ def _get_indexer_class(self, payload): """Get the record class from payload.""" # take the first defined doc type for finding the class pid_type = payload.get('doc_type', 'rec') - return obj_or_import_string(current_app.config.get( - 'RECORDS_REST_ENDPOINTS' - ).get(pid_type).get('indexer_class', RecordIndexer)) + endpoints = current_app.config.get('RECORDS_REST_ENDPOINTS') + return obj_or_import_string( + endpoints.get(pid_type).get('indexer_class', RecordIndexer)) def process_bulk_queue(self, es_bulk_kwargs=None, stats_only=True): """Process bulk indexing queue. @@ -495,14 +494,12 @@ def _index_action(self, payload): f'{get_record_error_count} {payload["id"]}') current_app.logger.error(msg) db.session.rollback() - except Exception as err: - raise Exception(err) - - index, doc_type = self.record_to_index(record) arguments = {} body = self._prepare_record(record, index, doc_type, arguments) - return { + index, doc_type = self.record_to_index(record) + + action = { '_op_type': 'index', '_index': index, '_type': doc_type, @@ -510,4 +507,7 @@ def _index_action(self, payload): '_version': record.revision_id, '_version_type': self._version_type, '_source': body - } | arguments + } + action.update(arguments) + + return action diff --git a/rero_mef/api_mef.py b/rero_mef/api_mef.py index d59e2e3c..7e5ec503 100644 --- a/rero_mef/api_mef.py +++ b/rero_mef/api_mef.py @@ -161,13 +161,15 @@ def get_pids_with_multiple_mef(cls, record_types=[], verbose=False): return pids, multiple_pids, missing_pids @classmethod - def get_all_missing_pids(cls, record_types=[], verbose=False): + def get_all_missing_pids(cls, record_types=None, verbose=False): """Get all missing agents. :params record_types: Record types (pid_type). :param verbose: Verbose. :returns: missing pids, to much pids. """ + if record_types is None: + record_types = [] missing_pids = {} to_much_pids = {} entity_classes = get_entity_classes() diff --git a/rero_mef/config.py b/rero_mef/config.py index f5283fc5..9dc277af 100644 --- a/rero_mef/config.py +++ b/rero_mef/config.py @@ -440,11 +440,15 @@ deleted=dict( filter=dict(exists=dict(field="deleted")) ), + deleted_entities=dict( + filter=dict(exists=dict(field="*.deleted")) + ), ), filters={ 'agent_type': terms_filter('type'), 'agent_sources': terms_filter('sources'), 'deleted': exists_filter('deleted'), + 'deleted_entities': exists_filter('*.deleted'), 'rero_double': terms_filter('rero.pid') } ), @@ -459,7 +463,6 @@ rero=dict( filter=dict(exists=dict(field="rero_pid")) ), - ), filters={ 'gnd': exists_filter('gnd_pid'), @@ -517,11 +520,15 @@ deleted=dict( filter=dict(exists=dict(field="deleted")) ), + deleted_entities=dict( + filter=dict(exists=dict(field="*.deleted")) + ), ), filters={ 'agent_type': terms_filter('type'), 'agent_sources': terms_filter('sources'), 'deleted': exists_filter('deleted'), + 'deleted_entities': exists_filter('*.deleted'), 'rero_double': terms_filter('rero.pid') } ), diff --git a/rero_mef/filter.py b/rero_mef/filter.py index 63796661..808f65ac 100644 --- a/rero_mef/filter.py +++ b/rero_mef/filter.py @@ -29,3 +29,14 @@ def exists_filter(field): def inner(values): return Q('exists', field=field) return inner + + +def not_exists_filter(field): + """Create a term filter. + + :param field: Field name. + :returns: Function that returns the Terms query. + """ + def inner(values): + return Q('bool', must_not=[Q('exists', field=field)]) + return inner diff --git a/rero_mef/query.py b/rero_mef/query.py index 47cf2694..67a2578b 100644 --- a/rero_mef/query.py +++ b/rero_mef/query.py @@ -60,8 +60,11 @@ def _default_parser(qstr=None): urlkwargs.add('q', query_string) # include deleted - deleted = request.args.get('deleted') - if not deleted: - search = search.filter('bool', must_not=[Q('exists', field='deleted')]) + with_deleted = request.args.get('with_deleted') + if not with_deleted: + search = search.filter('bool', must_not=[ + Q('exists', field='deleted'), # no deleted MEF's + Q('exists', field='*.deleted') # no deleted entities + ]) return search, urlkwargs diff --git a/rero_mef/tasks.py b/rero_mef/tasks.py index dcb0b7c9..82cc6b95 100644 --- a/rero_mef/tasks.py +++ b/rero_mef/tasks.py @@ -101,7 +101,7 @@ def create_or_update(index, record, entity, dbcommit=True, reindex=True, reindex=reindex, online=online ) - rec_id = returned_record.get('pid') + pid = returned_record.get('pid') id_type = 'pid :' if not rec_id: id_type = 'uuid:' @@ -114,7 +114,7 @@ def create_or_update(index, record, entity, dbcommit=True, reindex=True, msg += (f' mef: {mef_record.pid} {mef_action.name} |' f' viaf: {v_pid} {got_online}') click.echo(msg) - return id_type, str(id), str(agent_action) + return id_type, str(pid), str(agent_action) @shared_task @@ -133,8 +133,7 @@ def delete(index, pid, entity, dbcommit=True, delindex=True, verbose=False): agent_record = agent_class.get_record_by_pid(pid) action = None if agent_record: - result, action = agent_record.delete(dbcommit=dbcommit, - delindex=delindex) + _, action = agent_record.delete(dbcommit=dbcommit, delindex=delindex) if verbose: click.echo(f'{index:<10} Deleted {entity} {pid:<38} {action}') else: diff --git a/rero_mef/utils.py b/rero_mef/utils.py index d65e3030..63f92158 100644 --- a/rero_mef/utils.py +++ b/rero_mef/utils.py @@ -307,17 +307,15 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator, f' | mef: {m_pid} {m_action}' f' | viaf: {v_pid} online: {v_online}' ) - else: - if verbose: - click.echo( - f'NO TRANSFORMATION: {name} {count} ' - f'{records[0]}' - ) + elif verbose: + click.echo( + f'NO TRANSFORMATION: {name} {count} ' + f'{records[0]}' + ) except Exception as err: msg = f'Creating {name} {count}: {err}' if rec: msg += f'\n{rec}' - current_app.logger.error(msg) if debug: traceback.print_exc() @@ -329,7 +327,6 @@ def oai_process_records_from_dates(name, sickle, oai_item_iterator, if debug: traceback.print_exc() count = -1 - my_from_date = my_from_date + timedelta(days=days_spann + 1) if verbose: from_date = my_from_date.strftime("%Y-%m-%d") @@ -587,8 +584,7 @@ def resolve_record(path, object_class): :returns: record for pid or {} """ try: - record = object_class.get_record_by_pid(path) - return record + return object_class.get_record_by_pid(path) except PIDDoesNotExistError: return {} diff --git a/tests/api/test_view_agents_mef.py b/tests/api/test_view_agents_mef.py index d4526051..8485f540 100644 --- a/tests/api/test_view_agents_mef.py +++ b/tests/api/test_view_agents_mef.py @@ -47,7 +47,6 @@ def test_view_agents_mef(client, agent_mef_record): 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0 }, - 'deleted': {'doc_count': 0}, 'sources': { 'buckets': [ {'doc_count': 1, 'key': 'gnd'}, @@ -56,7 +55,9 @@ def test_view_agents_mef(client, agent_mef_record): ], 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0 - } + }, + 'deleted': {'doc_count': 0}, + 'deleted_entities': {'doc_count': 0}, } url = url_for('api_agents_mef.redirect_item', pid=pid) res = client.get(url) diff --git a/tests/unit/test_api.py b/tests/unit/test_api.py index 61e35cf8..a9028de9 100644 --- a/tests/unit/test_api.py +++ b/tests/unit/test_api.py @@ -42,9 +42,7 @@ def test_reromefrecord_api(app, agent_idref_record): assert AgentIdrefRecord.get_metadata_identifier_names() == ( 'agent_idref_metadata', 'agent_idref_id') - count = 0 - for _ in AgentIdrefRecord.get_all_records(): - count += 1 + count = sum(1 for _ in AgentIdrefRecord.get_all_records()) assert count == 1 _, agent_action = idref.update_test_md5(