diff --git a/docs/src/further_topics/metadata.rst b/docs/src/further_topics/metadata.rst index bba879a707..10efcdf7fe 100644 --- a/docs/src/further_topics/metadata.rst +++ b/docs/src/further_topics/metadata.rst @@ -736,7 +736,7 @@ Let's reinforce this behaviour, but this time by combining metadata where the >>> metadata != cube.metadata True >>> metadata.combine(cube.metadata).attributes - {'Model scenario': 'A1B'} + CubeAttrsDict(globals={}, locals={'Model scenario': 'A1B'}) The combined result for the ``attributes`` member only contains those **common keys** with **common values**. diff --git a/lib/iris/common/_split_attribute_dicts.py b/lib/iris/common/_split_attribute_dicts.py new file mode 100644 index 0000000000..41ce321a8f --- /dev/null +++ b/lib/iris/common/_split_attribute_dicts.py @@ -0,0 +1,125 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Dictionary operations for dealing with the CubeAttrsDict "split"-style attribute +dictionaries. + +The idea here is to convert a split-dictionary into a "plain" one for calculations, +whose keys are all pairs of the form ('global', ) or ('local', ). +And to convert back again after the operation, if the result is a dictionary. + +For "strict" operations this clearly does all that is needed. For lenient ones, +we _might_ want for local+global attributes of the same name to interact. +However, on careful consideration, it seems that this is not actually desirable for +any of the common-metadata operations. +So, we simply treat "global" and "local" attributes of the same name as entirely +independent. Which happily is also the easiest to code, and to explain. +""" + +from collections.abc import Mapping, Sequence +from functools import wraps + + +def _convert_splitattrs_to_pairedkeys_dict(dic): + """ + Convert a split-attributes dictionary to a "normal" dict. + + Transform a :class:`~iris.cube.CubeAttributesDict` "split" attributes dictionary + into a 'normal' :class:`dict`, with paired keys of the form ('global', name) or + ('local', name). + """ + + def _global_then_local_items(dic): + # Routine to produce global, then local 'items' in order, and with all keys + # "labelled" as local or global type, to ensure they are all unique. + for key, value in dic.globals.items(): + yield ("global", key), value + for key, value in dic.locals.items(): + yield ("local", key), value + + return dict(_global_then_local_items(dic)) + + +def _convert_pairedkeys_dict_to_splitattrs(dic): + """ + Convert an input with global/local paired keys back into a split-attrs dict. + + For now, this is always and only a :class:`iris.cube.CubeAttrsDict`. + """ + from iris.cube import CubeAttrsDict + + result = CubeAttrsDict() + for key, value in dic.items(): + keytype, keyname = key + if keytype == "global": + result.globals[keyname] = value + else: + assert keytype == "local" + result.locals[keyname] = value + return result + + +def adjust_for_split_attribute_dictionaries(operation): + """ + Decorator to make a function of attribute-dictionaries work with split attributes. + + The wrapped function of attribute-dictionaries is currently always one of "equals", + "combine" or "difference", with signatures like : + equals(left: dict, right: dict) -> bool + combine(left: dict, right: dict) -> dict + difference(left: dict, right: dict) -> None | (dict, dict) + + The results of the wrapped operation are either : + * for "equals" (or "__eq__") : a boolean + * for "combine" : a (converted) attributes-dictionary + * for "difference" : a list of (None or "pair"), where a pair contains two + dictionaries + + Before calling the wrapped operation, its inputs (left, right) are modified by + converting any "split" dictionaries to a form where the keys are pairs + of the form ("global", name) or ("local", name). + + After calling the wrapped operation, for "combine" or "difference", the result can + contain a dictionary or dictionaries. These are then transformed back from the + 'converted' form to split-attribute dictionaries, before returning. + + "Split" dictionaries are all of class :class:`~iris.cube.CubeAttrsDict`, since + the only usage of 'split' attribute dictionaries is in Cubes (i.e. they are not + used for cube components). + """ + + @wraps(operation) + def _inner_function(*args, **kwargs): + from iris.cube import CubeAttrsDict + + # First make all inputs into CubeAttrsDict, if not already. + args = [ + arg if isinstance(arg, CubeAttrsDict) else CubeAttrsDict(arg) + for arg in args + ] + # Convert all inputs into 'pairedkeys' type dicts + args = [_convert_splitattrs_to_pairedkeys_dict(arg) for arg in args] + + result = operation(*args, **kwargs) + + # Convert known specific cases of 'pairedkeys' dicts in the result, and convert + # those back into split-attribute dictionaries. + if isinstance(result, Mapping): + # Fix a result which is a single dictionary -- for "combine" + result = _convert_pairedkeys_dict_to_splitattrs(result) + elif isinstance(result, Sequence) and len(result) == 2: + # Fix a result which is a pair of dictionaries -- for "difference" + left, right = result + left, right = ( + _convert_pairedkeys_dict_to_splitattrs(left), + _convert_pairedkeys_dict_to_splitattrs(right), + ) + result = result.__class__([left, right]) + # ELSE: leave other types of result unchanged. E.G. None, bool + + return result + + return _inner_function diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index 136e7a4a1d..131029ccee 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -21,6 +21,7 @@ from xxhash import xxh64_hexdigest from ..config import get_logger +from ._split_attribute_dicts import adjust_for_split_attribute_dictionaries from .lenient import _LENIENT from .lenient import _lenient_service as lenient_service from .lenient import _qualname as qualname @@ -1255,6 +1256,46 @@ def _check(item): return result + # + # Override each of the attribute-dict operations in BaseMetadata, to enable + # them to deal with split-attribute dictionaries correctly. + # There are 6 of these, for (equals/combine/difference) * (lenient/strict). + # Each is overridden with a *wrapped* version of the parent method, using the + # "@adjust_for_split_attribute_dictionaries" decorator, which converts any + # split-attribute dictionaries in the inputs to ordinary dicts, and likewise + # re-converts any dictionaries in the return value. + # + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _combine_lenient_attributes(left, right): + return BaseMetadata._combine_lenient_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _combine_strict_attributes(left, right): + return BaseMetadata._combine_strict_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _compare_lenient_attributes(left, right): + return BaseMetadata._compare_lenient_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _compare_strict_attributes(left, right): + return BaseMetadata._compare_strict_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _difference_lenient_attributes(left, right): + return BaseMetadata._difference_lenient_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _difference_strict_attributes(left, right): + return BaseMetadata._difference_strict_attributes(left, right) + class DimCoordMetadata(CoordMetadata): """ diff --git a/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py b/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py index 9bd996312c..2b1c5db93d 100644 --- a/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py +++ b/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py @@ -565,7 +565,8 @@ def encode_matrix_result(results: List[List[str]]) -> List[str]: if not isinstance(results[0], list): results = [results] assert all( - all(val is None or len(val) == 1 for val in vals) for vals in results + all(val is None or isinstance(val, str) for val in vals) + for vals in results ) # Translate "None" values to "-" diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index ac47735393..7e1f64262e 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -20,6 +20,7 @@ from iris.common.lenient import _LENIENT, _qualname from iris.common.metadata import BaseMetadata, CubeMetadata +from iris.cube import CubeAttrsDict def _make_metadata( @@ -95,15 +96,355 @@ def test_bases(self): @pytest.fixture(params=CubeMetadata._fields) def fieldname(request): + """Parametrize testing over all CubeMetadata field names.""" return request.param @pytest.fixture(params=["strict", "lenient"]) def op_leniency(request): + """Parametrize testing over strict or lenient operation.""" return request.param -class Test___eq__: +@pytest.fixture(params=["primaryAA", "primaryAX", "primaryAB"]) +def primary_values(request): + """ + Parametrize over the possible non-trivial pairs of operation values. + + The parameters all provide two attribute values which are the left- and right-hand + arguments to the tested operation. The attribute values are single characters from + the end of the parameter name -- except that "X" denotes a "missing" attribute. + + The possible cases are: + + * one side has a value and the other is missing + * left and right have the same non-missing value + * left and right have different non-missing values + """ + return request.param + + +@pytest.fixture(params=[False, True], ids=["primaryLocal", "primaryGlobal"]) +def primary_is_global_not_local(request): + """Parametrize split-attribute testing over "global" or "local" attribute types.""" + return request.param + + +@pytest.fixture(params=[False, True], ids=["leftrightL2R", "leftrightR2L"]) +def order_reversed(request): + """Parametrize split-attribute testing over "left OP right" or "right OP left".""" + return request.param + + +# Define the expected results for split-attribute testing. +# This dictionary records the expected results for the various possible arrangements of +# values of a single attribute in the "left" and "right" inputs of a CubeMetadata +# operation. +# The possible operations are "equal", "combine" or "difference", and may all be +# performed "strict" or "lenient". +# N.B. the *same* results should also apply when left+right are swapped, with a suitable +# adjustment to the result value. Likewise, results should be the same for either +# global- or local-style attributes. +_ALL_RESULTS = { + "equal": { + "primaryAA": {"lenient": True, "strict": True}, + "primaryAX": {"lenient": True, "strict": False}, + "primaryAB": {"lenient": False, "strict": False}, + }, + "combine": { + "primaryAA": {"lenient": "A", "strict": "A"}, + "primaryAX": {"lenient": "A", "strict": None}, + "primaryAB": {"lenient": None, "strict": None}, + }, + "difference": { + "primaryAA": {"lenient": None, "strict": None}, + "primaryAX": {"lenient": None, "strict": ("A", None)}, + "primaryAB": {"lenient": ("A", "B"), "strict": ("A", "B")}, + }, +} +# A fixed attribute name used for all the split-attribute testing. +_TEST_ATTRNAME = "_test_attr_" + + +def extract_attribute_value(split_dict, extract_global): + """ + Extract a test-attribute value from a split-attribute dictionary. + + Parameters + ---------- + split_dict : CubeAttrsDict + a split dictionary from an operation result + extract_global : bool + whether to extract values of the global, or local, `_TEST_ATTRNAME` attribute + + Returns + ------- + str | None + """ + if extract_global: + result = split_dict.globals.get(_TEST_ATTRNAME, None) + else: + result = split_dict.locals.get(_TEST_ATTRNAME, None) + return result + + +def extract_result_value(input, extract_global): + """ + Extract the values(s) of the main test attribute from an operation result. + + Parameters + ---------- + input : bool | CubeMetadata + an operation result : the structure varies for the three different operations. + extract_global : bool + whether to return values of a global, or local, `_TEST_ATTRNAME` attribute. + + Returns + ------- + None | bool | str | tuple[None | str] + result value(s) + """ + if not isinstance(input, CubeMetadata): + # Result is either boolean (for "equals") or a None (for "difference"). + result = input + else: + # Result is a CubeMetadata. Get the value(s) of the required attribute. + result = input.attributes + + if isinstance(result, CubeAttrsDict): + result = extract_attribute_value(result, extract_global) + else: + # For "difference", input.attributes is a *pair* of dictionaries. + assert isinstance(result, tuple) + result = tuple( + [ + extract_attribute_value(dic, extract_global) + for dic in result + ] + ) + if result == (None, None): + # This value occurs when the desired attribute is *missing* from a + # difference result, but other (secondary) attributes were *different*. + # We want only differences of the *target* attribute, so convert these + # to a plain 'no difference', for expected-result testing purposes. + result = None + + return result + + +def make_attrsdict(value): + """ + Return a dictionary containing a test attribute with the given value. + + If the value is "X", the attribute is absent (result is empty dict). + """ + if value == "X": + # Translate an "X" input as "missing". + result = {} + else: + result = {_TEST_ATTRNAME: value} + return result + + +def check_splitattrs_testcase( + operation_name: str, + check_is_lenient: bool, + primary_inputs: str = "AA", # two character values + secondary_inputs: str = "XX", # two character values + check_global_not_local: bool = True, + check_reversed: bool = False, +): + """ + Test a metadata operation with split-attributes against known expected results. + + Parameters + ---------- + operation_name : str + One of "equal", "combine" or "difference. + check_is_lenient : bool + Whether the tested operation is performed 'lenient' or 'strict'. + primary_inputs : str + A pair of characters defining left + right attribute values for the operands of + the operation. + secondary_inputs : str + A further pair of values for an attribute of the same name but "other" type + ( i.e. global/local when the main test is local/global ). + check_global_not_local : bool + If `True` then the primary operands, and the tested result values, are *global* + attributes, and the secondary ones are local. + Otherwise, the other way around. + check_reversed : bool + If True, the left and right operands are exchanged, and the expected value + modified according. + + Notes + ----- + The expected result of an operation is mostly defined by : the operation applied; + the main "primary" inputs; and the lenient/strict mode. + + In the case of the "equals" operation, however, the expected result is simply + set to `False` if the secondary inputs do not match. + + Calling with different values for the keywords aims to show that the main operation + has the expected value, from _ALL_RESULTS, the ***same in essentially all cases*** + ( though modified in specific ways for some factors ). + + This regularity also demonstrates the required independence over the other + test-factors, i.e. global/local attribute type, and right-left order. + """ + # Just for comfort, check that inputs are all one of a few single characters. + assert all( + (item in list("ABCDX")) for item in (primary_inputs + secondary_inputs) + ) + # Interpret "primary" and "secondary" inputs as "global" and "local" attributes. + if check_global_not_local: + global_values, local_values = primary_inputs, secondary_inputs + else: + local_values, global_values = primary_inputs, secondary_inputs + + # Form 2 inputs to the operation : Make left+right split-attribute input + # dictionaries, with both the primary and secondary attribute value settings. + input_dicts = [ + CubeAttrsDict( + globals=make_attrsdict(global_value), + locals=make_attrsdict(local_value), + ) + for global_value, local_value in zip(global_values, local_values) + ] + # Make left+right CubeMetadata with those attributes, other fields all blank. + input_l, input_r = [ + CubeMetadata( + **{ + field: attrs if field == "attributes" else None + for field in CubeMetadata._fields + } + ) + for attrs in input_dicts + ] + + if check_reversed: + # Swap the inputs to perform a 'reversed' calculation. + input_l, input_r = input_r, input_l + + # Run the actual operation + result = getattr(input_l, operation_name)( + input_r, lenient=check_is_lenient + ) + + if operation_name == "difference" and check_reversed: + # Adjust the result of a "reversed" operation to the 'normal' way round. + # ( N.B. only "difference" results are affected by reversal. ) + if isinstance(result, CubeMetadata): + result = result._replace(attributes=result.attributes[::-1]) + + # Extract, from the operation result, the value to be tested against "expected". + result = extract_result_value(result, check_global_not_local) + + # Get the *expected* result for this operation. + which = "lenient" if check_is_lenient else "strict" + primary_key = "primary" + primary_inputs + expected = _ALL_RESULTS[operation_name][primary_key][which] + if operation_name == "equal" and expected: + # Account for the equality cases made `False` by mismatched secondary values. + left, right = secondary_inputs + secondaries_same = left == right or ( + check_is_lenient and "X" in (left, right) + ) + if not secondaries_same: + expected = False + + # Check that actual extracted operation result matches the "expected" one. + assert result == expected + + +class MixinSplitattrsMatrixTests: + """ + Define split-attributes tests to perform on all the metadata operations. + + This is inherited by the testclass for each operation : + i.e. Test___eq__, Test_combine and Test_difference + """ + + # Define the operation name : set in each inheritor + operation_name = None + + def test_splitattrs_cases( + self, + op_leniency, + primary_values, + primary_is_global_not_local, + order_reversed, + ): + """ + Check the basic operation against the expected result from _ALL_RESULTS. + + Parametrisation checks this for all combinations of various factors : + + * possible arrangements of the primary values + * strict and lenient + * global- and local-type attributes + * left-to-right or right-to-left operation order. + """ + primary_inputs = primary_values[-2:] + check_is_lenient = {"strict": False, "lenient": True}[op_leniency] + check_splitattrs_testcase( + operation_name=self.operation_name, + check_is_lenient=check_is_lenient, + primary_inputs=primary_inputs, + secondary_inputs="XX", + check_global_not_local=primary_is_global_not_local, + check_reversed=order_reversed, + ) + + @pytest.mark.parametrize( + "secondary_values", + [ + "secondaryXX", + "secondaryCX", + "secondaryXC", + "secondaryCC", + "secondaryCD", + ] + # NOTE: test CX as well as XC, since primary choices has "AX" but not "XA". + ) + def test_splitattrs_global_local_independence( + self, + op_leniency, + primary_values, + secondary_values, + ): + """ + Check that results are (mostly) independent of the "other" type attributes. + + The operation on attributes of the 'primary' type (global/local) should be + basically unaffected by those of the 'secondary' type (--> local/global). + + This is not really true for equality, so we adjust those results to compensate. + See :func:`check_splitattrs_testcase` for explanations. + + Notes + ----- + We provide this *separate* test for global/local attribute independence, + parametrized over selected relevant arrangements of the 'secondary' values. + We *don't* test with reversed order or "local" primary inputs, because matrix + testing over *all* relevant factors produces too many possible combinations. + """ + primary_inputs = primary_values[-2:] + secondary_inputs = secondary_values[-2:] + check_is_lenient = {"strict": False, "lenient": True}[op_leniency] + check_splitattrs_testcase( + operation_name=self.operation_name, + check_is_lenient=check_is_lenient, + primary_inputs=primary_inputs, + secondary_inputs=secondary_inputs, + check_global_not_local=True, + check_reversed=False, + ) + + +class Test___eq__(MixinSplitattrsMatrixTests): + operation_name = "equal" + @pytest.fixture(autouse=True) def setup(self): self.lvalues = dict( @@ -279,7 +620,9 @@ def test__ignore_attributes_cell_methods(self): self.assertFalse(result) -class Test_combine: +class Test_combine(MixinSplitattrsMatrixTests): + operation_name = "combine" + @pytest.fixture(autouse=True) def setup(self): self.lvalues = dict( @@ -457,7 +800,9 @@ def test_op_different__attribute_value(self, op_leniency): assert rmetadata.combine(lmetadata)._asdict() == expected -class Test_difference: +class Test_difference(MixinSplitattrsMatrixTests): + operation_name = "difference" + @pytest.fixture(autouse=True) def setup(self): self.lvalues = dict(