Skip to content

Commit

Permalink
import: fix import SLSP
Browse files Browse the repository at this point in the history
* Make SLSP import more robust against wrong MARC21 for example
  019 has no subfields.
* Adds $ref creation for different entity types.
* Closes rero#3468.

Co-Authored-by: Peter Weber <[email protected]>
  • Loading branch information
rerowep committed Oct 3, 2023
1 parent 33053ce commit 084bf40
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 160 deletions.
159 changes: 77 additions & 82 deletions rero_ils/dojson/utils.py

Large diffs are not rendered by default.

27 changes: 13 additions & 14 deletions rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
from flask import current_app

from rero_ils.dojson.utils import ReroIlsMarc21Overdo, TitlePartList, \
build_identifier, build_string_from_subfields, get_contribution_link, \
get_field_items, remove_trailing_punctuation
build_identifier, build_string_from_subfields, get_field_items, \
get_mef_link, remove_trailing_punctuation
from rero_ils.modules.entities.models import EntityType

from ..utils import do_abbreviated_title, \
Expand Down Expand Up @@ -603,18 +603,17 @@ def marc21_to_subjects_6XX(self, key, value):
value, subfield_code_per_tag[creator_tag_key]), '.', '.')
field_key = 'genreForm' if tag_key == '655' else config_field_key

if data_type in [EntityType.PERSON,
EntityType.ORGANISATION]:
if ref := get_contribution_link(
bibid=marc21.bib_id,
reroid=marc21.bib_id,
ids=utils.force_list(value.get('0')),
key=key
):
subject = {
'$ref': ref
}
if not subject.get('$ref'):
if field_key != 'subjects_imported' and (ref := get_mef_link(
bibid=marc21.bib_id,
reroid=marc21.bib_id,
entity_type=data_type,
ids=utils.force_list(value.get('0')),
key=key
)):
subject = {
'$ref': ref
}
else:
identifier = build_identifier(value)
if identifier:
subject['identifiedBy'] = identifier
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,8 @@
from dojson.utils import GroupableOrderedDict

from rero_ils.dojson.utils import ReroIlsMarc21Overdo, build_identifier, \
build_string_from_subfields, error_print, get_contribution_link, \
get_field_items, not_repetitive, re_identified, \
remove_trailing_punctuation
build_string_from_subfields, error_print, get_field_items, get_mef_link, \
not_repetitive, re_identified, remove_trailing_punctuation
from rero_ils.modules.documents.utils import create_authorized_access_point
from rero_ils.modules.entities.models import EntityType

Expand Down Expand Up @@ -123,9 +122,10 @@ def marc21_to_contribution(self, key, value):
self['work_access_point'].append(work_access_point)
return None
agent = {}
if ref := get_contribution_link(
if ref := get_mef_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
entity_type=EntityType.PERSON,
ids=utils.force_list(value.get('0')),
key=key
):
Expand Down Expand Up @@ -573,18 +573,17 @@ def marc21_to_subjects(self, key, value):
) + '. ' + subject['authorized_access_point']
field_key = 'genreForm' if tag_key == '655' else 'subjects'
subfields_0 = utils.force_list(value.get('0'))
if (data_type in [EntityType.PERSON, EntityType.ORGANISATION]
and subfields_0):
if ref := get_contribution_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
ids=subfields_0,
key=key
):
subject = {
'$ref': ref,
}
if not subject.get('$ref'):
if field_key != 'subjects_imported' and (ref := get_mef_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
entity_type=data_type,
ids=utils.force_list(subfields_0),
key=key
)):
subject = {
'$ref': ref,
}
else:
if identifier := build_identifier(value):
subject['identifiedBy'] = identifier
if field_key != 'genreForm':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
from flask import current_app

from rero_ils.dojson.utils import ReroIlsMarc21Overdo, build_identifier, \
build_string_from_subfields, get_contribution_link, \
remove_trailing_punctuation
build_string_from_subfields, get_mef_link, remove_trailing_punctuation
from rero_ils.modules.entities.models import EntityType

from ..utils import do_abbreviated_title, \
Expand Down Expand Up @@ -394,21 +393,24 @@ def marc21_to_subjects_6XX(self, key, value):
subject['conference'] = conference_per_tag[tag_key]
elif tag_key in ['600t', '610t', '611t']:
creator_tag_key = tag_key[:3] # to keep only tag: 600, 610, 611
subject['creator'] = remove_trailing_punctuation(
creator = remove_trailing_punctuation(
build_string_from_subfields(
value, subfield_code_per_tag[creator_tag_key]), '.', '.')
if creator:
subject['authorized_access_point'] = \
f'{creator}. {subject["authorized_access_point"]}'
field_key = 'genreForm' if tag_key == '655' else config_field_key
if data_type in [EntityType.PERSON, EntityType.ORGANISATION]:
if ref := get_contribution_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
ids=utils.force_list(value.get('0')),
key=key
):
subject = {
'$ref': ref
}
if not subject.get('$ref'):
if field_key != 'subjects_imported' and (ref := get_mef_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
entity_type=data_type,
ids=utils.force_list(value.get('0')),
key=key
)):
subject = {
'$ref': ref
}
else:
if identifier := build_identifier(value):
sub_2 = next(iter(utils.force_list(value.get('2') or [])), '')
if data_type == EntityType.TOPIC and sub_2.lower() == 'rero':
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
from rero_ils.dojson.utils import _LANGUAGES, TitlePartList, add_note, \
build_identifier, build_responsibility_data, build_string_from_subfields, \
error_print, extract_subtitle_and_parallel_titles_from_field_245_b, \
get_contribution_link, get_field_items, get_field_link_data, \
not_repetitive, re_identified, remove_trailing_punctuation
get_field_items, get_field_link_data, get_mef_link, not_repetitive, \
re_identified, remove_trailing_punctuation
from rero_ils.modules.documents.utils import create_authorized_access_point
from rero_ils.modules.entities.models import EntityType

Expand Down Expand Up @@ -600,9 +600,10 @@ def do_contribution(data, marc21, key, value):
return None

agent = {}
if ref := get_contribution_link(
if ref := get_mef_link(
bibid=marc21.bib_id,
reroid=marc21.rero_id,
entity_type=EntityType.PERSON,
ids=utils.force_list(value.get('0')),
key=key
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
remove_trailing_punctuation
from rero_ils.modules.documents.api import Document
from rero_ils.modules.documents.dojson.contrib.marc21tojson.utils import \
get_contribution_link
get_mef_link
from rero_ils.modules.documents.utils import create_authorized_access_point
from rero_ils.modules.entities.models import EntityType

Expand Down Expand Up @@ -655,9 +655,10 @@ def unimarc_to_contribution(self, key, value):

ids = utils.force_list(value.get('3')) or []
ids = [f'(idref){id_}' for id_ in ids]
if ids and (ref := get_contribution_link(
if ids and (ref := get_mef_link(
bibid=unimarc.bib_id,
reroid=unimarc.rero_id,
entity_type=EntityType.PERSON,
ids=ids,
key=key
)):
Expand Down
35 changes: 18 additions & 17 deletions rero_ils/modules/imports/serializers/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,23 +122,24 @@ def post_process(self, metadata):
titles, responsibility, with_subtitle=False)
if text_title:
metadata['ui_title_text_responsibility'] = text_title
contributions = metadata.get('contribution', [])
new_contributions = []
for contribution in contributions:
agent = contribution['entity']
# convert a MEF link into a local entity
if agent_data := JsonRef.replace_refs(agent, loader=None).get(
'metadata'
):
agent = {
local_value: agent_data[local_key]
for local_key, local_value in self.entity_mapping.items()
if agent_data.get(local_key)
}
new_contributions.append({'entity': agent})
if new_contributions:
metadata['contribution'] = \
process_i18n_literal_fields(new_contributions)
for entity_type in ['contribution', 'subjects', 'genreForm']:
entities = metadata.get(entity_type, [])
new_entities = []
for entity in entities:
ent = entity['entity']
# convert a MEF link into a local entity
if entity_data := JsonRef.replace_refs(ent, loader=None).get(
'metadata'
):
ent = {
local_value: entity_data[local_key]
for local_key, local_value in self.entity_mapping.items()
if entity_data.get(local_key)
}
new_entities.append({'entity': ent})
if new_entities:
metadata[entity_type] = \
process_i18n_literal_fields(new_entities)
return metadata


Expand Down
23 changes: 11 additions & 12 deletions tests/unit/documents/test_documents_dojson.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,17 @@

from __future__ import absolute_import, print_function

import os

import mock
from dojson.contrib.marc21.utils import create_record
from utils import mock_response

from rero_ils.dojson.utils import not_repetitive
from rero_ils.modules.documents.dojson.contrib.marc21tojson.rero import marc21
from rero_ils.modules.documents.dojson.contrib.marc21tojson.rero.model import \
get_contribution_link
get_mef_link
from rero_ils.modules.documents.views import create_publication_statement, \
get_cover_art, get_other_accesses
from rero_ils.modules.entities.models import EntityType


def test_not_repetetive(capsys):
Expand Down Expand Up @@ -5598,43 +5597,43 @@ def test_marc21_to_identified_by_from_930():


@mock.patch('requests.Session.get')
def test_get_contribution_link(mock_get, capsys):
def test_get_mef_link(mock_get, capsys):
"""Test get mef contribution link"""

os.environ[
'RERO_ILS_MEF_AGENTS_URL'] = 'https://mef.xxx.rero.ch/api/agents'

mock_get.return_value = mock_response(json_data={
'pid': 'test',
'idref': {'pid': '003945843'}
})
mef_url = get_contribution_link(
mef_url = get_mef_link(
bibid='1',
reroid='1',
entity_type=EntityType.PERSON,
ids=['(IdRef)003945843'],
key='100..'
)
assert mef_url == 'https://mef.xxx.rero.ch/api/agents/idref/003945843'
assert mef_url == 'https://mef.rero.ch/api/agents/idref/003945843'

mock_get.return_value = mock_response(status=404)
mef_url = get_contribution_link(
mef_url = get_mef_link(
bibid='1',
reroid='1',
entity_type=EntityType.PERSON,
ids=['(IdRef)123456789'],
key='100..'
)
assert not mef_url
out, err = capsys.readouterr()
assert out == (
'WARNING GET MEF CONTRIBUTION:\t1\t1\t100..\t(IdRef)123456789\t'
'https://mef.xxx.rero.ch/api/agents/mef/latest/'
'https://mef.rero.ch/api/agents/mef/latest/'
'idref:123456789\t404\t0\t\n'
)

mock_get.return_value = mock_response(status=400)
mef_url = get_contribution_link(
mef_url = get_mef_link(
bibid='1',
reroid='1',
entity_type=EntityType.PERSON,
ids=['X123456789'],
key='100..'
)
Expand Down

0 comments on commit 084bf40

Please sign in to comment.