Skip to content

Commit

Permalink
feat(xmllib): add create_label_to_name_list_node_mapping() to lib (D…
Browse files Browse the repository at this point in the history
…EV-4293) (#1245)

Co-authored-by: Johannes Nussbaum <[email protected]>
  • Loading branch information
Nora-Olivia-Ammann and jnussbaum authored Oct 30, 2024
1 parent 84df21c commit c0cdbeb
Showing 1 changed file with 58 additions and 0 deletions.
58 changes: 58 additions & 0 deletions src/dsp_tools/xmllib/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import json
from typing import Any
from typing import Iterable


def create_label_to_name_list_node_mapping(
project_json_path: str,
list_name: str,
language_of_label: str,
) -> dict[str, str]:
"""
Often, data sources contain list values named after the "label" of the JSON project list node, instead of the "name"
which is needed for the `dsp-tools xmlupload`.
To create a correct XML, you need a dictionary that maps the "labels" to their correct "names".
Args:
project_json_path: path to a JSON project file (a.k.a. ontology)
list_name: name of a list in the JSON project
language_of_label: which language of the label to choose
Returns:
a dictionary of the form {label: name}
"""
with open(project_json_path, encoding="utf-8") as f:
json_file = json.load(f)
json_subset = [x for x in json_file["project"]["lists"] if x["name"] == list_name]
# json_subset is a list containing one item, namely the json object containing the entire json-list
res = {}
for label, name in _name_label_mapper_iterator(json_subset, language_of_label):
if name != list_name:
res[label] = name
res[label.strip().lower()] = name
return res


def _name_label_mapper_iterator(
json_subset: list[dict[str, Any]],
language_of_label: str,
) -> Iterable[tuple[str, str]]:
"""
Go through list nodes of a JSON project and yield (label, name) pairs.
Args:
json_subset: list of DSP lists (a DSP list being a dictionary with the keys "name", "labels" and "nodes")
language_of_label: which language of the label to choose
Yields:
(label, name) pairs
"""
for node in json_subset:
# node is the json object containing the entire json-list
if "nodes" in node:
# "nodes" is the json sub-object containing the entries of the json-list
yield from _name_label_mapper_iterator(node["nodes"], language_of_label)
# each yielded value is a (label, name) pair of a single list entry
if "name" in node:
yield node["labels"][language_of_label], node["name"]
# the actual values of the name and the label

0 comments on commit c0cdbeb

Please sign in to comment.