Skip to content

Commit

Permalink
Merge pull request #10 from sharkdata/4-remove-hardcoded-specifics/if…
Browse files Browse the repository at this point in the history
…cb-ml

IFCB unit fix
  • Loading branch information
jonatanmindroad authored Dec 6, 2024
2 parents 889bc67 + 21b26b4 commit 66413cc
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 17 deletions.
3 changes: 2 additions & 1 deletion dwca_config/dwca_phytoplankton_ifcb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ fieldMapping:
directory:
dwca_config/field_mapping
files:
- dwca_terms_event.yaml
- - dwca_terms_event.yaml
- _IFCB
- dwca_terms_occurrence.yaml
- dwca_terms_emof.yaml

Expand Down
2 changes: 2 additions & 0 deletions dwca_config/field_mapping/dwca_terms_event.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ dwcaEventContent:
- sampled_volume_ml
sampleSizeUnit:
text: Litres
sampleSizeUnit_IFCB:
text: Millilitres
samplingEffort:
sourceKey:
eventDate:
Expand Down
6 changes: 1 addition & 5 deletions dwca_generator/dwca_format_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,7 @@ def create_dwca_event(self):
if not event_dict.get("sampleSizeValue"):
event_dict["sampleSizeUnit"] = ""

# IFCB sample vol unit fix litres to millilitres
if "IFCB" in event_dict.get("dynamicProperties", "") and event_dict.get("sampleSizeValue"):
event_dict["sampleSizeUnit"] = "Millilitres"

# Seal Pathology area fix using obis.org/maptool moving position from county capital to position in water and with individual uncertainty radius m
# Seal Pathology area fix using obis.org/maptool moving position from county capital to position in water and with individual uncertainty radius m
if event_dict.get("verbatimLocality") == "BD" and "SHARK_SealPathology" in event_dict.get("dynamicProperties"):
event_dict["locality"] = "BD Norrbotten County"
event_dict["latitude"] = "65.45"
Expand Down
43 changes: 32 additions & 11 deletions dwca_generator/dwca_generator_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,16 @@
# License: MIT License (see LICENSE.txt or http://opensource.org/licenses/mit).

import pathlib
from collections import namedtuple

import yaml
import dict2xml
import collections.abc

from dwca_generator import dwca_data_shark
from dwca_generator.dwca_utils import config_with_suffix


FileWithPrefix = namedtuple("FileWithPrefix", ("file", "prefix"))


class DwcaGeneratorConfig:
Expand Down Expand Up @@ -58,7 +63,7 @@ def load_config(self):
file_list = self.get_config_files("dwcaKeys")
self.dwca_keys = self.merge_config_yaml_files(file_list)
# "fieldMapping"
file_list = self.get_config_files("fieldMapping")
file_list = self.get_config_files("fieldMapping", include_prefix=True)
self.field_mapping = self.merge_config_yaml_files(file_list)
# "taxaWorms"
file_list = self.get_config_files("taxaWorms")
Expand Down Expand Up @@ -125,29 +130,44 @@ def load_source_files(self):
# print("\n".join(sorted(source_file_list)))
return sorted(source_file_list)

def get_config_files(self, config_key):
def get_config_files(self, config_key, include_prefix=False) -> list[str] | list[FileWithPrefix]:
""" """
file_list = []
file_path = pathlib.Path()
if config_key in self.dwca_config:
dwca_keys = self.dwca_config[config_key]

dir_path = pathlib.Path()
if "directory" in dwca_keys:
dir_path = pathlib.Path(file_path, dwca_keys["directory"])
dir_path /= dwca_keys["directory"]

if "files" in dwca_keys:
for file_name in dwca_keys["files"]:
file_path = pathlib.Path(dir_path, file_name)
file_list.append(str(file_path))
for config_file in dwca_keys["files"]:
if isinstance(config_file, str):
suffix = None
else:
config_file, suffix = config_file

config_file = str(dir_path / config_file)

if include_prefix:
config_file = FileWithPrefix(config_file, suffix)
file_list.append(config_file)
return file_list

def merge_config_yaml_files(self, yaml_file_list):
def merge_config_yaml_files(self, yaml_file_list: list[str] | list[FileWithPrefix]):
""" Merge configurations as defined in the yaml file list order. """
result_dict = {}
for file_name in yaml_file_list:
if isinstance(file_name, FileWithPrefix):
file_name, suffix = file_name
else:
suffix = None
file_path = pathlib.Path(file_name)
with open(file_path, encoding="utf8") as file:
new_data = yaml.load(file, Loader=yaml.FullLoader)
self.dict_deep_update(result_dict, new_data)
# print(result_dict)
new_data = config_with_suffix(new_data, suffix)
result_dict |= new_data

return result_dict

def dict_deep_update(self, target, updates):
Expand Down Expand Up @@ -185,3 +205,4 @@ def stripValues(self, data):
return data.strip()
else:
return data

26 changes: 26 additions & 0 deletions dwca_generator/dwca_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,29 @@ def appendFileAsZipEntry(self, zip_entry_name, file_path):
finally:
if ziparchive:
ziparchive.close()


def config_with_suffix(config, suffix):
"""
Replace values with values of prefixed key
Finds keys with given prefix on any level and replaces same key without prefix on same
level.
"""
if not suffix:
return config
match config:
case [*element] if all(isinstance(element, str) for element in config):
return config
case list():
return [config_with_suffix(element, suffix) for element in config]
case dict():
for suffix_key in [key for key in config.keys() if key.endswith(suffix)]:
key = suffix_key.replace(suffix, "")
config[key] = config.pop(suffix_key)
return {
key: config_with_suffix(value, suffix)
for key, value in config.items()
}
case _:
return config
86 changes: 86 additions & 0 deletions tests/test_swca_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import pytest

from dwca_generator.dwca_utils import config_with_suffix


@pytest.mark.parametrize(
"given_config, given_suffix, expecrted_config",
(
({}, None, {}),
({}, "Something", {}),
([], None, []),
([], "Something", []),
({"key": "value"}, None, {"key": "value"}),
({"key_SUFFIX": 42}, "_SUFFIX", {"key": 42}),
(
{"key": "original_value", "key_SUFFIX": "suffix_value"},
"_SUFFIX",
{"key": "suffix_value"},
),
(
{"nested": {"dict": {"key_SUFFIX": "deep_value"}}},
"_SUFFIX",
{"nested": {"dict": {"key": "deep_value"}}},
),
(["a", "b", "c", "d"], "_SUFFIX", ["a", "b", "c", "d"]),
([[{"key_SUFFIX": "suffix_value"}]], "_SUFFIX", [[{"key": "suffix_value"}]]),
(
[
{
"complex": [
{
"config": [
{
"with": {
"many_levels": [
{"key": "value_A"},
{
"key1": "value_A",
"key2": "initial_value_A",
"key2_SUFFIX": "override_value_B",
},
],
"many_levels_SUFFIX": [
{"key": "value_B"},
{
"key1": "value_B",
"key2": "initial_value_B",
"key2_SUFFIX": "override_value_B",
},
],
}
}
]
}
]
}
],
"_SUFFIX",
[
{
"complex": [
{
"config": [
{
"with": {
"many_levels": [
{"key": "value_B"},
{
"key1": "value_B",
"key2": "override_value_B",
},
]
}
}
]
}
]
}
],
),
),
)
def test_config_with_suffix(given_config, given_suffix, expecrted_config):
filterd_config = config_with_suffix(given_config, given_suffix)

assert filterd_config == expecrted_config

0 comments on commit 66413cc

Please sign in to comment.