From ad36b25793ff5768920f33c148f90457cc5cacfa Mon Sep 17 00:00:00 2001 From: Peter Weber Date: Thu, 31 Oct 2024 10:53:28 +0100 Subject: [PATCH 1/2] places, concepts: GND closeMatch corrections Co-Authored-by: Peter Weber --- poetry.lock | 23 ++++--- .../alembic/d8536341fc5e_delete_identifier.py | 4 +- rero_mef/marctojson/do_gnd_concepts.py | 33 ++++++---- rero_mef/marctojson/do_gnd_places.py | 25 ++++++-- .../concepts/examples/xml_minimal_record.xml | 61 ++++++++++++++----- .../test_concepts_gnd_transformation.py | 23 ++++--- .../places/test_places_gnd_transformation.py | 10 +-- 7 files changed, 117 insertions(+), 62 deletions(-) diff --git a/poetry.lock b/poetry.lock index dd1a8c6d..f228d346 100644 --- a/poetry.lock +++ b/poetry.lock @@ -229,21 +229,20 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "bleach" -version = "6.1.0" +version = "6.2.0" description = "An easy safelist-based HTML-sanitizing tool." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "bleach-6.1.0-py3-none-any.whl", hash = "sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6"}, - {file = "bleach-6.1.0.tar.gz", hash = "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe"}, + {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"}, + {file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"}, ] [package.dependencies] -six = ">=1.9.0" webencodings = "*" [package.extras] -css = ["tinycss2 (>=1.1.0,<1.3)"] +css = ["tinycss2 (>=1.1.0,<1.5)"] [[package]] name = "blinker" @@ -4238,23 +4237,23 @@ tornado = ["tornado (>=5)"] [[package]] name = "setuptools" -version = "75.2.0" +version = "75.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"}, - {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"}, + {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, + {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] [[package]] name = "sickle" diff --git a/rero_mef/alembic/d8536341fc5e_delete_identifier.py b/rero_mef/alembic/d8536341fc5e_delete_identifier.py index f0952382..6aafb378 100644 --- a/rero_mef/alembic/d8536341fc5e_delete_identifier.py +++ b/rero_mef/alembic/d8536341fc5e_delete_identifier.py @@ -64,7 +64,7 @@ def upgrade(): ids.append(id_) rec = agent_cls.get_record(id_) rec.pop("identifier", None) - rec.update(data=rec, bcommit=False, reindex=True) + rec.update(data=rec, dbcommit=False, reindex=True) if idx % 1000 == 0: print(f" {idx} commit", end=" | ", flush=True) db.session.commit() @@ -102,7 +102,7 @@ def downgrade(): ids.append(id_) rec = agent_cls.get_record(id_) rec["identifier"] = f'"{url}{rec.pid}"' - rec.update(data=rec, bcommit=False, reindex=True) + rec.update(data=rec, dbcommit=False, reindex=True) if idx % 1000 == 0: print(f" {idx} commit", end=" | ", flush=True) db.session.commit() diff --git a/rero_mef/marctojson/do_gnd_concepts.py b/rero_mef/marctojson/do_gnd_concepts.py index cfb7d355..d53e17b9 100644 --- a/rero_mef/marctojson/do_gnd_concepts.py +++ b/rero_mef/marctojson/do_gnd_concepts.py @@ -215,14 +215,8 @@ def trans_gnd_relation(self): if value: self.json_dict[relation] = value - def trans_gnd_classification(self): - """Transformation classification from field 686.""" - if self.logger and self.verbose: - self.logger.info("Call Function", "trans_gnd_classification") - # TODO: find classification - def trans_gnd_match(self): - """Transformation closeMatch and exactfrom field 750.""" + """Transformation closeMatch and exactMatch from field 750.""" if self.logger and self.verbose: self.logger.info("Call Function", "trans_gnd_match") for field_750 in self.marc.get_fields("750"): @@ -253,29 +247,42 @@ def trans_gnd_match(self): if authorized_ap := build_string_from_field( field=field_750, subfields=subfields, tag_grouping=tag_grouping ): - match = { + match_data = { "authorized_access_point": authorized_ap, "source": "GND", } + identified_by = [] + other_source = None for subfield_0 in field_750.get_subfields("0"): if subfield_0.startswith("http"): - match.setdefault("identifiedBy", []).append( + identified_by.insert( + 0, { "type": "uri", "value": subfield_0, - } + }, ) + if other_source: + identified_by[0]["source"] = other_source else: source, id_ = get_source_and_id(subfield_0) if source: - match.setdefault("identifiedBy", []).append( + insert_pos = -1 + if source != "GND": + other_source = source + match_data["source"] = other_source + insert_pos = 0 + identified_by.insert( + insert_pos, { "source": source, "type": "bf:Nbn", "value": id_, - } + }, ) - self.json_dict.setdefault(match_type, []).append(match) + if identified_by: + match_data["identifiedBy"] = identified_by + self.json_dict.setdefault(match_type, []).append(match_data) def trans_gnd_note(self): """Transformation notes from field. diff --git a/rero_mef/marctojson/do_gnd_places.py b/rero_mef/marctojson/do_gnd_places.py index 23e81f46..c84cc316 100644 --- a/rero_mef/marctojson/do_gnd_places.py +++ b/rero_mef/marctojson/do_gnd_places.py @@ -270,29 +270,42 @@ def trans_gnd_match(self): if authorized_ap := build_string_from_field( field=field_751, subfields=subfields, tag_grouping=tag_grouping ): - match = { + match_data = { "authorized_access_point": authorized_ap, "source": "GND", } + identified_by = [] + other_source = None for subfield_0 in field_751.get_subfields("0"): if subfield_0.startswith("http"): - match.setdefault("identifiedBy", []).append( + identified_by.insert( + 0, { "type": "uri", "value": subfield_0, - } + }, ) + if other_source: + identified_by[0]["source"] = other_source else: source, id_ = get_source_and_id(subfield_0) if source: - match.setdefault("identifiedBy", []).append( + insert_pos = -1 + if source != "GND": + other_source = source + match_data["source"] = other_source + insert_pos = 0 + identified_by.insert( + insert_pos, { "source": source, "type": "bf:Nbn", "value": id_, - } + }, ) - self.json_dict.setdefault(match_type, []).append(match) + if identified_by: + match_data["identifiedBy"] = identified_by + self.json_dict.setdefault(match_type, []).append(match_data) def trans_gnd_note(self): """Transformation notes from field. diff --git a/tests/unit/concepts/examples/xml_minimal_record.xml b/tests/unit/concepts/examples/xml_minimal_record.xml index 32257621..a24d3104 100644 --- a/tests/unit/concepts/examples/xml_minimal_record.xml +++ b/tests/unit/concepts/examples/xml_minimal_record.xml @@ -1,24 +1,55 @@ 00589nx a2200193 45 - - 027630501 - sudoc + + + Grand Larousse universel (art. : Livre) + - - frBN001940328 + + + Laval RVM (en ligne), 2004-11-23 + - - frBN000000089 + + + Mers profondément engagées dans la masse des continents + - - FRBNF118620892 - FRBNF11862089 + + Note interne - - http://viaf.org/viaf/124265140 - VIAF - VIAF - 20200302 + + + Voir le descripteur Opposition (science politique) + + + + + Combiner un des descripteurs Mouvements contestataires + + + + + Voir les vedettes : Mouvements contestataires ; Opposition + + + + + Voir les vedettes du type : Antifascisme ; Mouvements + + + + + Voir aux mouvements d'opposition particuliers, par ex. : Combat + + + + + Voir aussi aux mers et océans particuliers + + + + VF3, NC3, NC30 \ No newline at end of file diff --git a/tests/unit/concepts/test_concepts_gnd_transformation.py b/tests/unit/concepts/test_concepts_gnd_transformation.py index b7040ced..6dbf5d0b 100644 --- a/tests/unit/concepts/test_concepts_gnd_transformation.py +++ b/tests/unit/concepts/test_concepts_gnd_transformation.py @@ -174,12 +174,12 @@ def test_gnd_close_match(): "closeMatch": [ { "authorized_access_point": "Atlases", - "source": "GND", + "source": "DLC", "identifiedBy": [ { - "source": "GND", - "type": "bf:Nbn", - "value": "(DE-101)1134384173", + "source": "DLC", + "type": "uri", + "value": "http://id.loc.gov/authorities/subjects/sh85009231", }, { "source": "DLC", @@ -187,8 +187,9 @@ def test_gnd_close_match(): "value": "sh85009231", }, { - "type": "uri", - "value": "http://id.loc.gov/authorities/subjects/sh85009231", + "source": "GND", + "type": "bf:Nbn", + "value": "(DE-101)1134384173", }, ], } @@ -196,11 +197,15 @@ def test_gnd_close_match(): "exactMatch": [ { "authorized_access_point": "Atlas", - "source": "GND", + "source": "DNLM", "identifiedBy": [ - {"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"}, + { + "source": "DNLM", + "type": "uri", + "value": "http://id.nlm.nih.gov/mesh/D020466", + }, {"source": "DNLM", "type": "bf:Nbn", "value": "D020466"}, - {"type": "uri", "value": "http://id.nlm.nih.gov/mesh/D020466"}, + {"source": "GND", "type": "bf:Nbn", "value": "(DE-101)125348144X"}, ], }, ], diff --git a/tests/unit/places/test_places_gnd_transformation.py b/tests/unit/places/test_places_gnd_transformation.py index c3144e81..e2af23e4 100644 --- a/tests/unit/places/test_places_gnd_transformation.py +++ b/tests/unit/places/test_places_gnd_transformation.py @@ -153,17 +153,17 @@ def test_gnd_close_match(): "exactMatch": [ { "authorized_access_point": "Venedig", - "source": "GND", + "source": "ZBW", "identifiedBy": [ { - "source": "GND", + "source": "ZBW", "type": "bf:Nbn", - "value": "(DE-101)997977663", + "value": "091419204", }, { - "source": "ZBW", + "source": "GND", "type": "bf:Nbn", - "value": "091419204", + "value": "(DE-101)997977663", }, ], } From f14bdacb4553276ccef0388e579c9be867322502 Mon Sep 17 00:00:00 2001 From: Peter Weber Date: Thu, 7 Nov 2024 13:04:13 +0100 Subject: [PATCH 2/2] places, concepts: correct MEF creations/updates Co-Authored-by: Peter Weber --- rero_mef/api.py | 30 +++++++++++++++------ tests/ui/places/test_places_api.py | 42 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/rero_mef/api.py b/rero_mef/api.py index 2540698f..8795a096 100644 --- a/rero_mef/api.py +++ b/rero_mef/api.py @@ -613,11 +613,11 @@ def get_mef_record(mef_cls, name, pid): ) } if ( - not mef_self_association_pid - and not mef_other_association_pid - and mef_other_pid + not bool(mef_self_association_pid) + and not bool(mef_other_association_pid) + and bool(mef_other_pid) ): - # Delete associated ref from MEF an create a new one + # Delete associated ref from MEF and create a new one new_mef_record.pop(association_name) if association_record := association_info[ "record_cls" @@ -631,10 +631,10 @@ def get_mef_record(mef_cls, name, pid): ) actions |= action if ( - mef_self_pid - and not mef_self_association_pid - and not mef_other_pid - and mef_other_association_pid + bool(mef_self_pid) + and not bool(mef_self_association_pid) + and not bool(mef_other_pid) + and bool(mef_other_association_pid) ): # Delete entity from old MEF and add it to new MEF ref = mef_associated_record.pop(association_name) @@ -643,6 +643,20 @@ def get_mef_record(mef_cls, name, pid): ) actions[associated_mef_record.pid] = Action.DELETE_ENTITY new_mef_record[association_name] = ref + if ( + bool(mef_self_pid) + and not bool(mef_self_association_pid) + and bool(mef_other_pid) + and bool(mef_other_association_pid) + ): + # Delete entity from new MEF and add it to old MEF + ref = new_mef_record.pop(self.name) + new_mef_record.replace( + data=new_mef_record, dbcommit=dbcommit, reindex=reindex + ) + actions[new_mef_record.pid] = Action.DELETE_ENTITY + mef_associated_record[self.name] = ref + new_mef_record = mef_associated_record mef_record = new_mef_record.replace( data=new_mef_record, dbcommit=dbcommit, reindex=reindex diff --git a/tests/ui/places/test_places_api.py b/tests/ui/places/test_places_api.py index fc288a85..73e0baef 100644 --- a/tests/ui/places/test_places_api.py +++ b/tests/ui/places/test_places_api.py @@ -205,3 +205,45 @@ def test_create_place_record(app, place_idref_data, place_gnd_data, tmpdir): "pid": "1", "type": "bf:Place", } + + # test idref changes to other gnd + place_gnd_data["pid"] = "TEST2" + gnd_record_2 = PlaceGndRecord.create( + data=place_gnd_data, dbcommit=True, reindex=True, delete_pid=False + ) + assert gnd_record_2.pid == "TEST2" + m_gnd_record_2, m_action = gnd_record_2.create_or_update_mef( + dbcommit=True, reindex=True + ) + assert m_action == {"4": Action.CREATE} + assert m_gnd_record_2 == { + "$schema": "https://mef.rero.ch/schemas/places_mef/mef-place-v0.0.1.json", + "gnd": {"$ref": "https://mef.rero.ch/api/places/gnd/TEST2"}, + "pid": "4", + "type": "bf:Place", + } + + for identified_by in idref_record["identifiedBy"]: + if identified_by.get("source") == "GND": + identified_by["value"] = "(DE-101)TEST2" + idref_record = idref_record.update(data=idref_record, dbcommit=True, reindex=True) + + PlaceMefRecord.flush_indexes() + m_idref_record, m_action = idref_record.create_or_update_mef( + dbcommit=True, reindex=True + ) + assert m_action == {"1": Action.DELETE_ENTITY, "4": Action.UPDATE} + assert m_idref_record == { + "$schema": f"{SCHEMA_URL}/mef-place-v0.0.1.json", + "idref": {"$ref": "https://mef.rero.ch/api/places/idref/271330163"}, + "gnd": {"$ref": "https://mef.rero.ch/api/places/gnd/TEST2"}, + "pid": "4", + "type": "bf:Place", + } + m_gnd_record = PlaceMefRecord.get_record_by_pid(m_gnd_record.pid) + assert m_gnd_record == { + "$schema": f"{SCHEMA_URL}/mef-place-v0.0.1.json", + "gnd": {"$ref": "https://mef.rero.ch/api/places/gnd/TEST"}, + "pid": "1", + "type": "bf:Place", + }