From d168a89b33184e0ab2879a292e87775a037bccc5 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 13 Aug 2023 17:37:53 +0100 Subject: [PATCH 01/11] Define common-metadata operartions on split attribute dictionaries. --- lib/iris/common/metadata.py | 134 ++++++++++++++++++++++++++++++++++-- 1 file changed, 129 insertions(+), 5 deletions(-) diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index 136e7a4a1d..b533dec17c 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -140,6 +140,97 @@ def __new__(mcs, name, bases, namespace): return super().__new__(mcs, name, bases, namespace) +# +# "Extended" dictionary access, for dict-like operations which work with regular dicts, +# but in specific extended ways for 'CubeAttrsDict' style split dictionaries +# +# The idea here is to convert a split-dictionary into a "plain" one for calculations, +# whose keys are all pairs of the form ('global', ) or ('local', ). +# And to convert back again after the operation, if the result is a dictionary. +# For "strict" operations this probably does all that is needed. For lenient ones, +# it's not so clear whether local+global keys with the same attribute name "should", +# in some cases, affect one another in some ways +# +def xd_is_split(dic): + """Detect whether a dictionary is a "split-attribute" type.""" + return hasattr(dic, "globals") and hasattr(dic, "locals") + + +def _global_local_items(dic): + for key, value in dic.globals.items(): + yield ("global", key), value + for key, value in dic.locals.items(): + yield ("local", key), value + + +def xd_to_normal(dic): + """ + Convert the input to a 'normal' dict with paired keys, if it is split-attrs type + """ + if xd_is_split(dic): + result = dict(_global_local_items(dic)) + else: + result = dic + return result + + +def xd_from_normal(dic): + """ + Convert the input to a split-attrs dict, if it has global//local paired keys. + """ + result = dic + is_first_key = True + for key, value in dic.items(): + if is_first_key: + if ( + isinstance(key, tuple) + and len(key) == 2 + and key[0] in ("global", "local") + ): + # Input passes a "duck type" test for being a split dictionary. + # For now at least, this can *only* be a CubeAttrsDict. + from iris.cube import CubeAttrsDict + + # produce a split result + result = CubeAttrsDict() + # expect all keys to be 'split type' + is_first_key = False + else: + # Input is a "normal" dict : return it unchanged + break + + # From here on, we are converting items to a 'split' dict. + # Assign the items with paired keys into global+local parts. + keytype, keyname = key + if keytype == "global": + result.globals[keyname] = value + else: + assert keytype == "local" + result.locals[keyname] = value + + return result + + +def xd_normalise_input_pair(left, right): + """Work out whether inputs are "split" type, and convert if so.""" + is_split = xd_is_split(left) + if is_split: + assert xd_is_split(right) + left = xd_to_normal(left) + right = xd_to_normal(right) + else: + assert not xd_is_split(right) + + return is_split, left, right + + +def xd_reconvert_output(is_split, result): + """Re-form a 'split dict' result from a dict with paired keys, if needed.""" + if is_split: + result = xd_from_normal(result) + return result + + class BaseMetadata(metaclass=_NamedTupleMeta): """ Container for common metadata. @@ -370,9 +461,10 @@ def func(field): @staticmethod def _combine_lenient_attributes(left, right): """Leniently combine the dictionary members together.""" - # Copy the dictionaries. + # Copy the dictionaries, convert from split form if required left = deepcopy(left) right = deepcopy(right) + is_split, left, right = xd_normalise_input_pair(left, right) # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. @@ -393,15 +485,17 @@ def _combine_lenient_attributes(left, right): result = {k: left[k] for k, _ in common} result.update({k: left[k] for k in dsleft.keys()}) result.update({k: right[k] for k in dsright.keys()}) - + # Convert result back to split-attrs dict, if original inputs were + result = xd_reconvert_output(is_split, result) return result @staticmethod def _combine_strict_attributes(left, right): """Perform strict combination of the dictionary members.""" - # Copy the dictionaries. + # Copy the dictionaries, convert from split form if required left = deepcopy(left) right = deepcopy(right) + is_split, left, right = xd_normalise_input_pair(left, right) # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. @@ -411,7 +505,8 @@ def _combine_strict_attributes(left, right): common = sleft & sright # Now bring the result together. result = {k: left[k] for k, _ in common} - + # Convert result back to split-attrs dict, if original inputs were + result = xd_reconvert_output(is_split, result) return result def _compare_lenient(self, other): @@ -464,6 +559,12 @@ def _compare_lenient_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. + + # Convert from split if required --> i.e. all distinct keys (global+local) + _, left, right = xd_normalise_input_pair(left, right) + # TODO: ?maybe? global + local versions of an attr SHOULD conflict + # -- this way treats them as entirely separate entries, for now. + sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. @@ -472,7 +573,6 @@ def _compare_lenient_attributes(left, right): dsright = dict(sright - sleft) # Intersection of common item keys with different values. keys = set(dsleft.keys()) & set(dsright.keys()) - return not bool(keys) @staticmethod @@ -481,6 +581,10 @@ def _compare_strict_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. + + # Convert from split if required --> i.e. all distinct keys (global+local) + _, left, right = xd_normalise_input_pair(left, right) + sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} @@ -550,6 +654,12 @@ def _difference_lenient_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. + + # Convert from split if required --> i.e. all distinct keys (global+local) + is_split, left, right = xd_normalise_input_pair(left, right) + # TODO: ?maybe? consider if we flag different global+local values of a + # given attr (name). BUT not clear how we would report that, anyway. + sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. @@ -568,6 +678,11 @@ def _difference_lenient_attributes(left, right): # Replace hash-rvalue with original rvalue. dsleft = {k: left[k] for k in dsleft.keys()} dsright = {k: right[k] for k in dsright.keys()} + if is_split: + # Convert results back to split-attrs dicts, if originals were. + dsleft, dsright = ( + xd_from_normal(dic) for dic in (dsleft, dsright) + ) result = (dsleft, dsright) return result @@ -578,6 +693,10 @@ def _difference_strict_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. + + # Convert from split if required --> i.e. all distinct keys (global+local) + is_split, left, right = xd_normalise_input_pair(left, right) + sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. @@ -591,6 +710,11 @@ def _difference_strict_attributes(left, right): # Replace hash-rvalue with original rvalue. dsleft = {k: left[k] for k in dsleft.keys()} dsright = {k: right[k] for k in dsright.keys()} + if is_split: + # Convert results back to split-attrs dicts, if originals were. + dsleft, dsright = ( + xd_from_normal(dic) for dic in (dsleft, dsright) + ) result = (dsleft, dsright) return result From c6c3d719f3fc3242baa50a91540e0d73505a0de6 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 23 Aug 2023 10:37:14 +0100 Subject: [PATCH 02/11] Tests for split-attributes handling in CubeMetadata operations. --- lib/iris/common/metadata.py | 28 +- .../integration/test_netcdf__loadsaveattrs.py | 3 +- .../unit/common/metadata/test_CubeMetadata.py | 309 ++++++++++++++++++ 3 files changed, 315 insertions(+), 25 deletions(-) diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index b533dec17c..7c05684b8d 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -176,38 +176,18 @@ def xd_to_normal(dic): def xd_from_normal(dic): """ - Convert the input to a split-attrs dict, if it has global//local paired keys. + Convert an input with global//local paired keys back into a split-attrs dict. """ - result = dic - is_first_key = True - for key, value in dic.items(): - if is_first_key: - if ( - isinstance(key, tuple) - and len(key) == 2 - and key[0] in ("global", "local") - ): - # Input passes a "duck type" test for being a split dictionary. - # For now at least, this can *only* be a CubeAttrsDict. - from iris.cube import CubeAttrsDict - - # produce a split result - result = CubeAttrsDict() - # expect all keys to be 'split type' - is_first_key = False - else: - # Input is a "normal" dict : return it unchanged - break + from iris.cube import CubeAttrsDict - # From here on, we are converting items to a 'split' dict. - # Assign the items with paired keys into global+local parts. + result = CubeAttrsDict() + for key, value in dic.items(): keytype, keyname = key if keytype == "global": result.globals[keyname] = value else: assert keytype == "local" result.locals[keyname] = value - return result diff --git a/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py b/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py index 9bd996312c..2b1c5db93d 100644 --- a/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py +++ b/lib/iris/tests/integration/test_netcdf__loadsaveattrs.py @@ -565,7 +565,8 @@ def encode_matrix_result(results: List[List[str]]) -> List[str]: if not isinstance(results[0], list): results = [results] assert all( - all(val is None or len(val) == 1 for val in vals) for vals in results + all(val is None or isinstance(val, str) for val in vals) + for vals in results ) # Translate "None" values to "-" diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index ac47735393..75f2bf477e 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -20,6 +20,11 @@ from iris.common.lenient import _LENIENT, _qualname from iris.common.metadata import BaseMetadata, CubeMetadata +from iris.cube import CubeAttrsDict +from iris.tests.integration.test_netcdf__loadsaveattrs import ( + decode_matrix_input, + encode_matrix_result, +) def _make_metadata( @@ -103,6 +108,163 @@ def op_leniency(request): return request.param +_ATTRS_TESTCASE_INPUTS = { + "same": "GaLb:GaLb", + "extra_global": "GaL-:G-L-", + "extra_local": "G-La:G-L-", + "same_global_local": "GaL-:G-La", + "diff_global_local": "GaL-:G-Lb", + "diffglobal_nolocal": "GaL-:GbL-", + "diffglobal_samelocal": "GaLc:GbLc", + "difflocal_noglobal": "G-La:G-Lb", + "difflocal_sameglobal": "GaLc:GaLd", + "diff_local_and_global": "GaLc:GbLd", +} +_ATTRS_TESTCASE_NAMES = list(_ATTRS_TESTCASE_INPUTS) + + +def attrs_check( + check_testcase: str, check_lenient: bool, op: str, cases: dict +): + """ + Check the attributes handling of a metadata operation. + + Testcases are the ones already listed in _ATTRS_TESTCASE_INPUTS, where they are + coded strings, as used in iris.tests.integration.test_netcdf_loadsaveattrs. + + * construct the 2 inputs from _ATTRS_TESTCASE_INPUTS[check_testcase], + * then perform + result = op(*inputs, lenient=check_lenient). + * convert the result to a result-code string, again like test_netcdf_loadsaveattrs. + * assert that the (encoded) results match the expected + + The 'cases' args specifies the "expected" result-code answers for each testcase : + either two result-codes for 'strict' and 'lenient' cases, when those are different, + or a single result-code if strict and lenient results are the same. + + """ + # cases.keys() are the testcase names -- should match the master table + assert cases.keys() == _ATTRS_TESTCASE_INPUTS.keys() + # Each case is recorded as testcase: (, [*output-codes]) + # The 'input' is just for readability: it should match that in the master table. + assert all( + cases[key][0] == _ATTRS_TESTCASE_INPUTS[key] + for key in _ATTRS_TESTCASE_INPUTS + ) + # Perform the configured check, and check that the results are as expected. + testcase = cases[check_testcase] + input_spec, result_specs = testcase + input_spec = input_spec.split( + ":" + ) # make a list from the two sides of the ":" + assert len(input_spec) == 2 + # convert to a list of (global, *locals) value sets + input_values = decode_matrix_input(input_spec) + + # get the expected result, select strict/lenient if required + if len(result_specs) == 1: + expected_spec = result_specs[0] + else: + expected_spec = result_specs[1 if check_lenient else 0] + + # form 2 inputs to the operation + def attrsdict(value): + if value is None: + result = {} + else: + result = {"_testattr_": value} + return result + + input_attributes = ( + CubeAttrsDict( + globals=attrsdict(values[0]), locals=attrsdict(values[1]) + ) + for values in input_values + ) + input_l, input_r = [ + CubeMetadata( + **{ + field: attrs if field == "attributes" else None + for field in CubeMetadata._fields + } + ) + for attrs in input_attributes + ] + + # Run the actual operation + result = getattr(input_l, op)(input_r, lenient=check_lenient) + + # Convert the result to the form of the recorded "expected" output. + # This depends on the test operation... + assert op in ("combine", "equal", "difference") + if op == "combine": + # "combine" result is CubeMetadata + # convert global+local values to a result-code string + values = [ + result.attributes.globals.get("_testattr_", None), + result.attributes.locals.get("_testattr_", None), + ] + (result,) = encode_matrix_result( + values + ) # NB always a list of 1 spec (string) + + elif op == "difference": + # "difference" op result is a CubeMetadata, its values are difference-pairs. + if result is None: + # Use a unique string to indicate a null result + result = "-" + else: + # result is a CubeMetadata whose .attributes is a pair of CubeAttrsDict + assert isinstance(result, CubeMetadata) + assert isinstance(result.attributes, tuple) + assert len(result.attributes) == 2 + assert all( + isinstance(dic, CubeAttrsDict) for dic in result.attributes + ) + + # calculate value-pairs in each section, where present + global_local_valuepairs = [ + [ + val.globals.get("_testattr_", None) + for val in result.attributes + ], + [ + val.locals.get("_testattr_", None) + for val in result.attributes + ], + ] + # E.G. [[None, "a"], [None, None]], or [["a", "b"], [None, "c"]] + + # convert these difference-value-pairs to coded strings, which we will + # treat as "global and local values" for conversion into a spec string + # E.G. ["a", "b"] --> "ab"" + # E.G. [None, "a"] --> "-a" + # E.G. [None, None] --> "--" + def valrep_single(val): + return "-" if val is None else val + + def valrep_pair(val): + assert len(val) == 2 + return valrep_single(val[0]) + valrep_single(val[1]) + + global_local_valuecodes = [ + valrep_pair(val) for val in global_local_valuepairs + ] + + # Encode those "value-codes" as a result-code string + # E.G. converting + # (value-pairs) == [[None, "a"], [None, None]] + # --> (value-codes) ["-a", "--"] + # --> (result) "G-aL--" + (result,) = encode_matrix_result(global_local_valuecodes) + + else: + # "equal" op result is a boolean : needs no further conversion + assert op == "equal" + + assert result == expected_spec + + class Test___eq__: @pytest.fixture(autouse=True) def setup(self): @@ -247,6 +409,26 @@ def test_op_different__attribute_value(self, op_leniency): assert not lmetadata.__eq__(rmetadata) assert not rmetadata.__eq__(lmetadata) + @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) + def test_op__attributes_cases(self, op_leniency, testcase): + attrs_check( + check_testcase=testcase, + check_lenient=op_leniency == "lenient", + op="equal", + cases={ + "same": ("GaLb:GaLb", [True]), + "extra_global": ("GaL-:G-L-", [False, True]), + "extra_local": ("G-La:G-L-", [False, True]), + "same_global_local": ("GaL-:G-La", [False, True]), + "diff_global_local": ("GaL-:G-Lb", [False, True]), + "diffglobal_nolocal": ("GaL-:GbL-", [False]), + "diffglobal_samelocal": ("GaLc:GbLc", [False]), + "difflocal_noglobal": ("G-La:G-Lb", [False]), + "difflocal_sameglobal": ("GaLc:GaLd", [False]), + "diff_local_and_global": ("GaLc:GbLd", [False]), + }, + ) + class Test___lt__(tests.IrisTest): def setUp(self): @@ -456,6 +638,113 @@ def test_op_different__attribute_value(self, op_leniency): assert lmetadata.combine(rmetadata)._asdict() == expected assert rmetadata.combine(lmetadata)._asdict() == expected + def test_op_different__attribute_extra_global(self, op_leniency): + # One field has an extra attribute, both strict + lenient. + is_lenient = op_leniency == "lenient" + + self.lvalues["attributes"] = CubeAttrsDict( + globals={"_a_common_": mock.sentinel.dummy_a}, + locals={"_b_common_": mock.sentinel.dummy_b}, + ) + self.rvalues["attributes"] = self.lvalues["attributes"].copy() + self.rvalues["attributes"].globals["_extra_"] = mock.sentinel.testvalue + lmetadata = self.cls(**self.lvalues) + rmetadata = self.cls(**self.rvalues) + + if is_lenient: + # the extra attribute should appear in the result .. + expected = self.rvalues + else: + # .. it should not + expected = self.lvalues + + with mock.patch( + "iris.common.metadata._LENIENT", return_value=is_lenient + ): + # Check both l+r and r+l + assert lmetadata.combine(rmetadata)._asdict() == expected + assert rmetadata.combine(lmetadata)._asdict() == expected + + def test_op_different__attribute_extra_local(self, op_leniency): + # One field has an extra attribute, both strict + lenient. + is_lenient = op_leniency == "lenient" + + self.lvalues["attributes"] = CubeAttrsDict( + globals={"_a_common_": mock.sentinel.dummy_a}, + locals={"_b_common_": mock.sentinel.dummy_b}, + ) + self.rvalues["attributes"] = self.lvalues["attributes"].copy() + self.rvalues["attributes"].locals["_extra_"] = mock.sentinel.testvalue + lmetadata = self.cls(**self.lvalues) + rmetadata = self.cls(**self.rvalues) + + if is_lenient: + # the extra attribute should appear in the result .. + expected = self.rvalues + else: + # .. it should not + expected = self.lvalues + + with mock.patch( + "iris.common.metadata._LENIENT", return_value=is_lenient + ): + # Check both l+r and r+l + assert lmetadata.combine(rmetadata)._asdict() == expected + assert rmetadata.combine(lmetadata)._asdict() == expected + + def test_op_different__attribute_same_global_local(self, op_leniency): + # One field has an extra attribute, both strict + lenient. + is_lenient = op_leniency == "lenient" + + common_attrs = CubeAttrsDict( + globals={"_a_common_": mock.sentinel.dummy_a}, + locals={"_b_common_": mock.sentinel.dummy_b}, + ) + self.lvalues["attributes"] = deepcopy(common_attrs) + self.rvalues["attributes"] = deepcopy(common_attrs) + basis_metadata = self.cls(**deepcopy(self.lvalues)) + self.lvalues["attributes"].globals["_extra_"] = mock.sentinel.v1 + self.rvalues["attributes"].locals["_extra_"] = mock.sentinel.v2 + lmetadata = self.cls(**self.lvalues) + rmetadata = self.cls(**self.rvalues) + + expected = basis_metadata._asdict() + if is_lenient: + # BOTH extra attributes should appear in the result .. + expected["attributes"].globals.update( + self.lvalues["attributes"].globals + ) + expected["attributes"].locals.update( + self.rvalues["attributes"].locals + ) + + with mock.patch( + "iris.common.metadata._LENIENT", return_value=is_lenient + ): + # Check both l+r and r+l + assert lmetadata.combine(rmetadata)._asdict() == expected + assert rmetadata.combine(lmetadata)._asdict() == expected + + @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) + def test_op__attributes_cases(self, op_leniency, testcase): + attrs_check( + check_testcase=testcase, + check_lenient=op_leniency == "lenient", + op="combine", + cases={ + "same": ("GaLb:GaLb", ["GaLb"]), + "extra_global": ("GaL-:G-L-", ["G-L-", "GaL-"]), + "extra_local": ("G-La:G-L-", ["G-L-", "G-La"]), + "same_global_local": ("GaL-:G-La", ["G-L-", "GaLa"]), + "diff_global_local": ("GaL-:G-Lb", ["G-L-", "GaLb"]), + "diffglobal_nolocal": ("GaL-:GbL-", ["G-L-"]), + "diffglobal_samelocal": ("GaLc:GbLc", ["G-Lc"]), + "difflocal_noglobal": ("G-La:G-Lb", ["G-L-"]), + "difflocal_sameglobal": ("GaLc:GaLd", ["GaL-"]), + "diff_local_and_global": ("GaLc:GbLd", ["G-L-"]), + }, + ) + class Test_difference: @pytest.fixture(autouse=True) @@ -647,6 +936,26 @@ def test_op_different__attribute_value(self, op_leniency): assert lmetadata.difference(rmetadata)._asdict() == lexpected assert rmetadata.difference(lmetadata)._asdict() == rexpected + @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) + def test_op__attributes_cases(self, op_leniency, testcase): + attrs_check( + check_testcase=testcase, + check_lenient=op_leniency == "lenient", + op="difference", + cases={ + "same": ("GaLb:GaLb", ["-"]), + "extra_global": ("GaL-:G-L-", ["Ga-L--", "-"]), + "extra_local": ("G-La:G-L-", ["G--La-", "-"]), + "same_global_local": ("GaL-:G-La", ["Ga-L-a", "-"]), + "diff_global_local": ("GaL-:G-Lb", ["Ga-L-b", "-"]), + "diffglobal_nolocal": ("GaL-:GbL-", ["GabL--"]), + "diffglobal_samelocal": ("GaLc:GbLc", ["GabL--"]), + "difflocal_noglobal": ("G-La:G-Lb", ["G--Lab"]), + "difflocal_sameglobal": ("GaLc:GaLd", ["G--Lcd"]), + "diff_local_and_global": ("GaLc:GbLd", ["GabLcd"]), + }, + ) + class Test_equal(tests.IrisTest): def setUp(self): From 0d9a4e4479f4f2f13e8af92ddca16c4036cd50e4 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 23 Aug 2023 18:37:47 +0100 Subject: [PATCH 03/11] Small tidy and clarify. --- lib/iris/common/metadata.py | 35 ++++++++++--------- .../unit/common/metadata/test_CubeMetadata.py | 23 ++++++------ 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index 7c05684b8d..8fcd7d051f 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -141,15 +141,19 @@ def __new__(mcs, name, bases, namespace): # -# "Extended" dictionary access, for dict-like operations which work with regular dicts, -# but in specific extended ways for 'CubeAttrsDict' style split dictionaries +# Dictionary operations for dealing with the CubeAttrsDict "split"-style attribute +# dictionaries. # # The idea here is to convert a split-dictionary into a "plain" one for calculations, -# whose keys are all pairs of the form ('global', ) or ('local', ). +# whose keys are all pairs of the form ('global', ) or ('local', ). # And to convert back again after the operation, if the result is a dictionary. -# For "strict" operations this probably does all that is needed. For lenient ones, -# it's not so clear whether local+global keys with the same attribute name "should", -# in some cases, affect one another in some ways +# +# For "strict" operations this clearly does all that is needed. For lenient ones, +# we _might_ want for local+global attributes of the same name to interact. +# However, on careful consideration, it seems that this is not actually desirable for +# any of the common-metadata operations. +# So, we simply treat "global" and "local" attributes of the same name as entirely +# independent. Which happily is also the easiest to code, and to explain. # def xd_is_split(dic): """Detect whether a dictionary is a "split-attribute" type.""" @@ -167,16 +171,14 @@ def xd_to_normal(dic): """ Convert the input to a 'normal' dict with paired keys, if it is split-attrs type """ - if xd_is_split(dic): - result = dict(_global_local_items(dic)) - else: - result = dic - return result + return dict(_global_local_items(dic)) def xd_from_normal(dic): """ - Convert an input with global//local paired keys back into a split-attrs dict. + Convert an input with global/local paired keys back into a split-attrs dict. + + For now, this is always+only a CubeAttrsDict. """ from iris.cube import CubeAttrsDict @@ -441,9 +443,10 @@ def func(field): @staticmethod def _combine_lenient_attributes(left, right): """Leniently combine the dictionary members together.""" - # Copy the dictionaries, convert from split form if required + # Copy the dictionaries. left = deepcopy(left) right = deepcopy(right) + # convert from split form if required is_split, left, right = xd_normalise_input_pair(left, right) # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is @@ -472,9 +475,10 @@ def _combine_lenient_attributes(left, right): @staticmethod def _combine_strict_attributes(left, right): """Perform strict combination of the dictionary members.""" - # Copy the dictionaries, convert from split form if required + # Copy the dictionaries. left = deepcopy(left) right = deepcopy(right) + # convert from split form if required is_split, left, right = xd_normalise_input_pair(left, right) # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is @@ -542,8 +546,6 @@ def _compare_lenient_attributes(left, right): # Convert from split if required --> i.e. all distinct keys (global+local) _, left, right = xd_normalise_input_pair(left, right) - # TODO: ?maybe? global + local versions of an attr SHOULD conflict - # -- this way treats them as entirely separate entries, for now. sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} @@ -553,6 +555,7 @@ def _compare_lenient_attributes(left, right): dsright = dict(sright - sleft) # Intersection of common item keys with different values. keys = set(dsleft.keys()) & set(dsright.keys()) + return not bool(keys) @staticmethod diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index 75f2bf477e..d61767e7e9 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -127,7 +127,7 @@ def attrs_check( check_testcase: str, check_lenient: bool, op: str, cases: dict ): """ - Check the attributes handling of a metadata operation. + Check the split-attributes handling of a metadata operation. Testcases are the ones already listed in _ATTRS_TESTCASE_INPUTS, where they are coded strings, as used in iris.tests.integration.test_netcdf_loadsaveattrs. @@ -135,29 +135,30 @@ def attrs_check( * construct the 2 inputs from _ATTRS_TESTCASE_INPUTS[check_testcase], * then perform result = op(*inputs, lenient=check_lenient). - * convert the result to a result-code string, again like test_netcdf_loadsaveattrs. + * (except for equality) convert the result to a "result-code string", + again like in test_netcdf_loadsaveattrs. * assert that the (encoded) results match the expected The 'cases' args specifies the "expected" result-code answers for each testcase : - either two result-codes for 'strict' and 'lenient' cases, when those are different, - or a single result-code if strict and lenient results are the same. + either two results for 'strict' and 'lenient' cases, when those are different, + or a single result if strict and lenient results are the same. """ # cases.keys() are the testcase names -- should match the master table assert cases.keys() == _ATTRS_TESTCASE_INPUTS.keys() # Each case is recorded as testcase: (, [*output-codes]) - # The 'input' is just for readability: it should match that in the master table. + # The "input"s are only for readability, and should match those in the master table. assert all( cases[key][0] == _ATTRS_TESTCASE_INPUTS[key] for key in _ATTRS_TESTCASE_INPUTS ) # Perform the configured check, and check that the results are as expected. - testcase = cases[check_testcase] - input_spec, result_specs = testcase + input_spec, result_specs = cases[check_testcase] input_spec = input_spec.split( ":" ) # make a list from the two sides of the ":" assert len(input_spec) == 2 + # convert to a list of (global, *locals) value sets input_values = decode_matrix_input(input_spec) @@ -195,7 +196,7 @@ def attrsdict(value): result = getattr(input_l, op)(input_r, lenient=check_lenient) # Convert the result to the form of the recorded "expected" output. - # This depends on the test operation... + # The expected-result format depends on the operation under test. assert op in ("combine", "equal", "difference") if op == "combine": # "combine" result is CubeMetadata @@ -204,9 +205,8 @@ def attrsdict(value): result.attributes.globals.get("_testattr_", None), result.attributes.locals.get("_testattr_", None), ] - (result,) = encode_matrix_result( - values - ) # NB always a list of 1 spec (string) + # N.B. encode_matrix_result returns a list of results (always 1 in this case). + (result,) = encode_matrix_result(values) elif op == "difference": # "difference" op result is a CubeMetadata, its values are difference-pairs. @@ -256,6 +256,7 @@ def valrep_pair(val): # (value-pairs) == [[None, "a"], [None, None]] # --> (value-codes) ["-a", "--"] # --> (result) "G-aL--" + # N.B. encode_matrix_result returns a list of results (1 in this case). (result,) = encode_matrix_result(global_local_valuecodes) else: From 9d17da4dc7caca0f910ffa9c7bd3763dfb5fc19c Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 24 Aug 2023 11:09:21 +0100 Subject: [PATCH 04/11] Common metadata ops support mixed split/unsplit attribute dicts. --- docs/src/further_topics/metadata.rst | 2 +- lib/iris/common/metadata.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docs/src/further_topics/metadata.rst b/docs/src/further_topics/metadata.rst index bba879a707..10efcdf7fe 100644 --- a/docs/src/further_topics/metadata.rst +++ b/docs/src/further_topics/metadata.rst @@ -736,7 +736,7 @@ Let's reinforce this behaviour, but this time by combining metadata where the >>> metadata != cube.metadata True >>> metadata.combine(cube.metadata).attributes - {'Model scenario': 'A1B'} + CubeAttrsDict(globals={}, locals={'Model scenario': 'A1B'}) The combined result for the ``attributes`` member only contains those **common keys** with **common values**. diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index 8fcd7d051f..abf940d655 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -195,13 +195,20 @@ def xd_from_normal(dic): def xd_normalise_input_pair(left, right): """Work out whether inputs are "split" type, and convert if so.""" - is_split = xd_is_split(left) + from iris.cube import CubeAttrsDict + + left_split, right_split = xd_is_split(left), xd_is_split(right) + is_split = left_split or right_split if is_split: - assert xd_is_split(right) + # Convert any "normal" dicts to split equivalents first + # - this divides contents into global+local according to default rules + if not left_split: + left = CubeAttrsDict(left) + if not right_split: + right = CubeAttrsDict(right) + # convert both to paired-key form for calculations left = xd_to_normal(left) right = xd_to_normal(right) - else: - assert not xd_is_split(right) return is_split, left, right From 28ebcbb4ffb922ef45a59cddfde425119d335ff9 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 10 Oct 2023 15:18:05 +0100 Subject: [PATCH 05/11] Clarify with better naming, comments, docstrings. --- lib/iris/common/metadata.py | 18 +-- .../unit/common/metadata/test_CubeMetadata.py | 106 +++++++++++------- 2 files changed, 76 insertions(+), 48 deletions(-) diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index abf940d655..1f72bcf1aa 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -160,18 +160,20 @@ def xd_is_split(dic): return hasattr(dic, "globals") and hasattr(dic, "locals") -def _global_local_items(dic): - for key, value in dic.globals.items(): - yield ("global", key), value - for key, value in dic.locals.items(): - yield ("local", key), value - - def xd_to_normal(dic): """ Convert the input to a 'normal' dict with paired keys, if it is split-attrs type """ - return dict(_global_local_items(dic)) + + def _global_then_local_items(dic): + # Routine to produce global, then local 'items' in order, and with all keys + # "labelled" as local or global type, to ensure they are all unique. + for key, value in dic.globals.items(): + yield ("global", key), value + for key, value in dic.locals.items(): + yield ("local", key), value + + return dict(_global_then_local_items(dic)) def xd_from_normal(dic): diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index d61767e7e9..41876571b8 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -108,6 +108,10 @@ def op_leniency(request): return request.param +# Global data defining the individual split attributes "testcases". +# Each testcase specifies two inputs, with different global+local attribute settings. +# The same cases are tested for 3 different metadata operations : 'combine', +# 'difference' and 'equal'. _ATTRS_TESTCASE_INPUTS = { "same": "GaLb:GaLb", "extra_global": "GaL-:G-L-", @@ -123,28 +127,39 @@ def op_leniency(request): _ATTRS_TESTCASE_NAMES = list(_ATTRS_TESTCASE_INPUTS) -def attrs_check( +def check_splitattrs_op( check_testcase: str, check_lenient: bool, op: str, cases: dict ): """ - Check the split-attributes handling of a metadata operation. - - Testcases are the ones already listed in _ATTRS_TESTCASE_INPUTS, where they are - coded strings, as used in iris.tests.integration.test_netcdf_loadsaveattrs. - - * construct the 2 inputs from _ATTRS_TESTCASE_INPUTS[check_testcase], - * then perform - result = op(*inputs, lenient=check_lenient). - * (except for equality) convert the result to a "result-code string", - again like in test_netcdf_loadsaveattrs. - * assert that the (encoded) results match the expected - - The 'cases' args specifies the "expected" result-code answers for each testcase : - either two results for 'strict' and 'lenient' cases, when those are different, - or a single result if strict and lenient results are the same. - + Test a common metadata operation, specifically the split-attributes handling. + + Parameters + ---------- + check_testcase : str + One of those listed in _ATTRS_TESTCASE_INPUTS. These keys are coded strings, + as used in `iris.tests.integration.test_netcdf_loadsaveattrs`. + check_lenient : bool + Whether the test operation is performed 'lenient' or 'strict'. + op : {'combine', 'difference', 'equal'} + The operation under test + cases : dict + The "expected" result-code values for each testcase. Values are either two + results for 'strict' and 'lenient' cases, when those are different, or a single + result if strict and lenient results are the same. + NOTE: this arg defines expected results for *all* testcases, even though each + call only tests a single testcase. This just makes parameterisation easier. + + Notes + ----- + Sequence of operation : + + 1. construct 2 inputs from _ATTRS_TESTCASE_INPUTS[check_testcase] + 2. perform ``result = op(*inputs, lenient=check_lenient)`` + 3. (except for equality) convert the result to a "result-code string", + again as in test_netcdf_loadsaveattrs. + 4 assert that the (encoded) results match the expected """ - # cases.keys() are the testcase names -- should match the master table + # cases.keys() are the testcase names -- these should always match the master table assert cases.keys() == _ATTRS_TESTCASE_INPUTS.keys() # Each case is recorded as testcase: (, [*output-codes]) # The "input"s are only for readability, and should match those in the master table. @@ -152,22 +167,16 @@ def attrs_check( cases[key][0] == _ATTRS_TESTCASE_INPUTS[key] for key in _ATTRS_TESTCASE_INPUTS ) - # Perform the configured check, and check that the results are as expected. + + # Fetch input test-values from the common dictionary. input_spec, result_specs = cases[check_testcase] input_spec = input_spec.split( ":" ) # make a list from the two sides of the ":" assert len(input_spec) == 2 - # convert to a list of (global, *locals) value sets input_values = decode_matrix_input(input_spec) - # get the expected result, select strict/lenient if required - if len(result_specs) == 1: - expected_spec = result_specs[0] - else: - expected_spec = result_specs[1 if check_lenient else 0] - # form 2 inputs to the operation def attrsdict(value): if value is None: @@ -195,11 +204,17 @@ def attrsdict(value): # Run the actual operation result = getattr(input_l, op)(input_r, lenient=check_lenient) - # Convert the result to the form of the recorded "expected" output. - # The expected-result format depends on the operation under test. - assert op in ("combine", "equal", "difference") + # Get the expected result, the strict/lenient one as required + if len(result_specs) == 1: + expected_spec = result_specs[0] + else: + expected_spec = result_specs[1 if check_lenient else 0] + + # Convert the operation result to the form of the recorded "expected" output. + # N.B. the expected-result format depends on the operation under test. + assert op in ("combine", "difference", "equal") if op == "combine": - # "combine" result is CubeMetadata + # "combine" results are CubeMetadata objects # convert global+local values to a result-code string values = [ result.attributes.globals.get("_testattr_", None), @@ -209,7 +224,7 @@ def attrsdict(value): (result,) = encode_matrix_result(values) elif op == "difference": - # "difference" op result is a CubeMetadata, its values are difference-pairs. + # "difference" op results are CubeMetadata : its values are difference-pairs. if result is None: # Use a unique string to indicate a null result result = "-" @@ -263,6 +278,7 @@ def valrep_pair(val): # "equal" op result is a boolean : needs no further conversion assert op == "equal" + # Check that the coded result matches the expectation. assert result == expected_spec @@ -411,8 +427,10 @@ def test_op_different__attribute_value(self, op_leniency): assert not rmetadata.__eq__(lmetadata) @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) - def test_op__attributes_cases(self, op_leniency, testcase): - attrs_check( + def test_op__splitattributes_cases(self, op_leniency, testcase): + # Check results for various global/local values of the same attribute. + # N.B. 'cases' dict specifies the expected results for each testcase. + check_splitattrs_op( check_testcase=testcase, check_lenient=op_leniency == "lenient", op="equal", @@ -640,7 +658,7 @@ def test_op_different__attribute_value(self, op_leniency): assert rmetadata.combine(lmetadata)._asdict() == expected def test_op_different__attribute_extra_global(self, op_leniency): - # One field has an extra attribute, both strict + lenient. + # One input has an additional attribute, specifically set as a *GLOBAL* one. is_lenient = op_leniency == "lenient" self.lvalues["attributes"] = CubeAttrsDict( @@ -667,7 +685,7 @@ def test_op_different__attribute_extra_global(self, op_leniency): assert rmetadata.combine(lmetadata)._asdict() == expected def test_op_different__attribute_extra_local(self, op_leniency): - # One field has an extra attribute, both strict + lenient. + # One input has an additional attribute, specifically set as a *LOCAL* one. is_lenient = op_leniency == "lenient" self.lvalues["attributes"] = CubeAttrsDict( @@ -694,7 +712,8 @@ def test_op_different__attribute_extra_local(self, op_leniency): assert rmetadata.combine(lmetadata)._asdict() == expected def test_op_different__attribute_same_global_local(self, op_leniency): - # One field has an extra attribute, both strict + lenient. + # One field has an extra specifically *LOCAL* attribute, and the other a + # corresponding *GLOBAL* one -- both with distinct values. is_lenient = op_leniency == "lenient" common_attrs = CubeAttrsDict( @@ -718,6 +737,9 @@ def test_op_different__attribute_same_global_local(self, op_leniency): expected["attributes"].locals.update( self.rvalues["attributes"].locals ) + else: + # strict operation : neither of the "extras" appears + pass with mock.patch( "iris.common.metadata._LENIENT", return_value=is_lenient @@ -727,8 +749,10 @@ def test_op_different__attribute_same_global_local(self, op_leniency): assert rmetadata.combine(lmetadata)._asdict() == expected @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) - def test_op__attributes_cases(self, op_leniency, testcase): - attrs_check( + def test_op__splitattributes_cases(self, op_leniency, testcase): + # Check results for various global/local values of the same attribute. + # N.B. 'cases' dict specifies the expected results for each testcase. + check_splitattrs_op( check_testcase=testcase, check_lenient=op_leniency == "lenient", op="combine", @@ -938,8 +962,10 @@ def test_op_different__attribute_value(self, op_leniency): assert rmetadata.difference(lmetadata)._asdict() == rexpected @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) - def test_op__attributes_cases(self, op_leniency, testcase): - attrs_check( + def test_op__splitattributes_cases(self, op_leniency, testcase): + # Check results for various global/local values of the same attribute. + # N.B. 'cases' dict specifies the expected results for each testcase. + check_splitattrs_op( check_testcase=testcase, check_lenient=op_leniency == "lenient", op="difference", From 8d61d9b42c208c7a81b8332f961bb9b3fff1a1b7 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 9 Nov 2023 16:22:47 +0000 Subject: [PATCH 06/11] Remove split-attrs handling to own sourcefile, and implement as a decorator. --- lib/iris/common/_split_attribute_dicts.py | 123 +++++++++++++ lib/iris/common/metadata.py | 161 +++++------------- .../unit/common/metadata/test_CubeMetadata.py | 3 + 3 files changed, 169 insertions(+), 118 deletions(-) create mode 100644 lib/iris/common/_split_attribute_dicts.py diff --git a/lib/iris/common/_split_attribute_dicts.py b/lib/iris/common/_split_attribute_dicts.py new file mode 100644 index 0000000000..8826ef833c --- /dev/null +++ b/lib/iris/common/_split_attribute_dicts.py @@ -0,0 +1,123 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Dictionary operations for dealing with the CubeAttrsDict "split"-style attribute +dictionaries. + +The idea here is to convert a split-dictionary into a "plain" one for calculations, +whose keys are all pairs of the form ('global', ) or ('local', ). +And to convert back again after the operation, if the result is a dictionary. + +For "strict" operations this clearly does all that is needed. For lenient ones, +we _might_ want for local+global attributes of the same name to interact. +However, on careful consideration, it seems that this is not actually desirable for +any of the common-metadata operations. +So, we simply treat "global" and "local" attributes of the same name as entirely +independent. Which happily is also the easiest to code, and to explain. +""" + +from collections.abc import Mapping, Sequence +from functools import wraps + + +def _convert_splitattrs_to_pairedkeys_dict(dic): + """ + Convert a split-attributes dictionary to "normal" dict. + + Transform a :class:`~iris.cube.CubeAttributesDict` "split" attributes dictionary + into a 'normal' :class:`dict`, with paired keys of the form ('global', name) or + ('local', name). + """ + + def _global_then_local_items(dic): + # Routine to produce global, then local 'items' in order, and with all keys + # "labelled" as local or global type, to ensure they are all unique. + for key, value in dic.globals.items(): + yield ("global", key), value + for key, value in dic.locals.items(): + yield ("local", key), value + + return dict(_global_then_local_items(dic)) + + +def _convert_pairedkeys_dict_to_splitattrs(dic): + """ + Convert an input with global/local paired keys back into a split-attrs dict. + + For now, this is always and only a :class:`iris.cube.CubeAttrsDict`. + """ + from iris.cube import CubeAttrsDict + + result = CubeAttrsDict() + for key, value in dic.items(): + keytype, keyname = key + if keytype == "global": + result.globals[keyname] = value + else: + assert keytype == "local" + result.locals[keyname] = value + return result + + +def adjust_for_split_attribute_dictionaries(operation): + """ + Decorator to make a function of attribute-dictionaries work with split attributes. + + The wrapped function of attribute-dictionaries is currently always one of "equals", + "combine" or "difference", with signatures like : + equals(left: dict, right: dict) -> bool + combine(left: dict, right: dict) -> dict + difference(left: dict, right: dict) -> None | (dict, dict) + + The results of the wrapped operation are either : + * for "equals" (or "__eq__") : a boolean + * for "combine" : a (converted) attributes-dictionary + * for "difference" : a list of (None or "pair"), where a pair contains two + dictionaries + + Before calling the wrapped operation, its inputs (left, right) are modified by + converting any "split" dictionaries to a form where the keys are pairs + of the form ("global", name) or ("local", name). + + After calling the wrapped operation, for "combine" or "difference", the result can + contain a dictionary or dictionaries. These are then transformed back from the + 'converted' form to split-attribute dictionaries, before returning. + + "Split" dictionaries are all of class :class:`~iris.cube.CubeAttrsDict`, since + the only usage of 'split' attribute dictionaries is in Cubes (i.e. they are not + used for cube components). + """ + + @wraps(operation) + def _inner_function(*args, **kwargs): + from iris.cube import CubeAttrsDict + + # First make all inputs into CubeAttrsDict, if not already. + args = [ + arg if isinstance(arg, CubeAttrsDict) else CubeAttrsDict(arg) + for arg in args + ] + # Convert all inputs into 'pairedkeys' type dicts + args = [_convert_splitattrs_to_pairedkeys_dict(arg) for arg in args] + + result = operation(*args, **kwargs) + + # Convert 'pairedkeys' dicts in the result back to split-attributes form. + if isinstance(result, Mapping): + # Fix a result which is a single dictionary -- for "combine" + result = _convert_pairedkeys_dict_to_splitattrs(result) + elif isinstance(result, Sequence) and len(result) == 2: + # Fix a result which is a pair of dictionaries -- for "difference" + left, right = result + left, right = ( + _convert_pairedkeys_dict_to_splitattrs(left), + _convert_pairedkeys_dict_to_splitattrs(right), + ) + result = result.__class__([left, right]) + + return result + + return _inner_function diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index 1f72bcf1aa..131029ccee 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -21,6 +21,7 @@ from xxhash import xxh64_hexdigest from ..config import get_logger +from ._split_attribute_dicts import adjust_for_split_attribute_dictionaries from .lenient import _LENIENT from .lenient import _lenient_service as lenient_service from .lenient import _qualname as qualname @@ -140,88 +141,6 @@ def __new__(mcs, name, bases, namespace): return super().__new__(mcs, name, bases, namespace) -# -# Dictionary operations for dealing with the CubeAttrsDict "split"-style attribute -# dictionaries. -# -# The idea here is to convert a split-dictionary into a "plain" one for calculations, -# whose keys are all pairs of the form ('global', ) or ('local', ). -# And to convert back again after the operation, if the result is a dictionary. -# -# For "strict" operations this clearly does all that is needed. For lenient ones, -# we _might_ want for local+global attributes of the same name to interact. -# However, on careful consideration, it seems that this is not actually desirable for -# any of the common-metadata operations. -# So, we simply treat "global" and "local" attributes of the same name as entirely -# independent. Which happily is also the easiest to code, and to explain. -# -def xd_is_split(dic): - """Detect whether a dictionary is a "split-attribute" type.""" - return hasattr(dic, "globals") and hasattr(dic, "locals") - - -def xd_to_normal(dic): - """ - Convert the input to a 'normal' dict with paired keys, if it is split-attrs type - """ - - def _global_then_local_items(dic): - # Routine to produce global, then local 'items' in order, and with all keys - # "labelled" as local or global type, to ensure they are all unique. - for key, value in dic.globals.items(): - yield ("global", key), value - for key, value in dic.locals.items(): - yield ("local", key), value - - return dict(_global_then_local_items(dic)) - - -def xd_from_normal(dic): - """ - Convert an input with global/local paired keys back into a split-attrs dict. - - For now, this is always+only a CubeAttrsDict. - """ - from iris.cube import CubeAttrsDict - - result = CubeAttrsDict() - for key, value in dic.items(): - keytype, keyname = key - if keytype == "global": - result.globals[keyname] = value - else: - assert keytype == "local" - result.locals[keyname] = value - return result - - -def xd_normalise_input_pair(left, right): - """Work out whether inputs are "split" type, and convert if so.""" - from iris.cube import CubeAttrsDict - - left_split, right_split = xd_is_split(left), xd_is_split(right) - is_split = left_split or right_split - if is_split: - # Convert any "normal" dicts to split equivalents first - # - this divides contents into global+local according to default rules - if not left_split: - left = CubeAttrsDict(left) - if not right_split: - right = CubeAttrsDict(right) - # convert both to paired-key form for calculations - left = xd_to_normal(left) - right = xd_to_normal(right) - - return is_split, left, right - - -def xd_reconvert_output(is_split, result): - """Re-form a 'split dict' result from a dict with paired keys, if needed.""" - if is_split: - result = xd_from_normal(result) - return result - - class BaseMetadata(metaclass=_NamedTupleMeta): """ Container for common metadata. @@ -455,8 +374,6 @@ def _combine_lenient_attributes(left, right): # Copy the dictionaries. left = deepcopy(left) right = deepcopy(right) - # convert from split form if required - is_split, left, right = xd_normalise_input_pair(left, right) # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. @@ -477,8 +394,7 @@ def _combine_lenient_attributes(left, right): result = {k: left[k] for k, _ in common} result.update({k: left[k] for k in dsleft.keys()}) result.update({k: right[k] for k in dsright.keys()}) - # Convert result back to split-attrs dict, if original inputs were - result = xd_reconvert_output(is_split, result) + return result @staticmethod @@ -487,8 +403,6 @@ def _combine_strict_attributes(left, right): # Copy the dictionaries. left = deepcopy(left) right = deepcopy(right) - # convert from split form if required - is_split, left, right = xd_normalise_input_pair(left, right) # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. @@ -498,8 +412,7 @@ def _combine_strict_attributes(left, right): common = sleft & sright # Now bring the result together. result = {k: left[k] for k, _ in common} - # Convert result back to split-attrs dict, if original inputs were - result = xd_reconvert_output(is_split, result) + return result def _compare_lenient(self, other): @@ -552,10 +465,6 @@ def _compare_lenient_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - - # Convert from split if required --> i.e. all distinct keys (global+local) - _, left, right = xd_normalise_input_pair(left, right) - sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. @@ -573,10 +482,6 @@ def _compare_strict_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - - # Convert from split if required --> i.e. all distinct keys (global+local) - _, left, right = xd_normalise_input_pair(left, right) - sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} @@ -646,12 +551,6 @@ def _difference_lenient_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - - # Convert from split if required --> i.e. all distinct keys (global+local) - is_split, left, right = xd_normalise_input_pair(left, right) - # TODO: ?maybe? consider if we flag different global+local values of a - # given attr (name). BUT not clear how we would report that, anyway. - sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. @@ -670,11 +569,6 @@ def _difference_lenient_attributes(left, right): # Replace hash-rvalue with original rvalue. dsleft = {k: left[k] for k in dsleft.keys()} dsright = {k: right[k] for k in dsright.keys()} - if is_split: - # Convert results back to split-attrs dicts, if originals were. - dsleft, dsright = ( - xd_from_normal(dic) for dic in (dsleft, dsright) - ) result = (dsleft, dsright) return result @@ -685,10 +579,6 @@ def _difference_strict_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - - # Convert from split if required --> i.e. all distinct keys (global+local) - is_split, left, right = xd_normalise_input_pair(left, right) - sleft = {(k, hexdigest(v)) for k, v in left.items()} sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. @@ -702,11 +592,6 @@ def _difference_strict_attributes(left, right): # Replace hash-rvalue with original rvalue. dsleft = {k: left[k] for k in dsleft.keys()} dsright = {k: right[k] for k in dsright.keys()} - if is_split: - # Convert results back to split-attrs dicts, if originals were. - dsleft, dsright = ( - xd_from_normal(dic) for dic in (dsleft, dsright) - ) result = (dsleft, dsright) return result @@ -1371,6 +1256,46 @@ def _check(item): return result + # + # Override each of the attribute-dict operations in BaseMetadata, to enable + # them to deal with split-attribute dictionaries correctly. + # There are 6 of these, for (equals/combine/difference) * (lenient/strict). + # Each is overridden with a *wrapped* version of the parent method, using the + # "@adjust_for_split_attribute_dictionaries" decorator, which converts any + # split-attribute dictionaries in the inputs to ordinary dicts, and likewise + # re-converts any dictionaries in the return value. + # + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _combine_lenient_attributes(left, right): + return BaseMetadata._combine_lenient_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _combine_strict_attributes(left, right): + return BaseMetadata._combine_strict_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _compare_lenient_attributes(left, right): + return BaseMetadata._compare_lenient_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _compare_strict_attributes(left, right): + return BaseMetadata._compare_strict_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _difference_lenient_attributes(left, right): + return BaseMetadata._difference_lenient_attributes(left, right) + + @staticmethod + @adjust_for_split_attribute_dictionaries + def _difference_strict_attributes(left, right): + return BaseMetadata._difference_strict_attributes(left, right) + class DimCoordMetadata(CoordMetadata): """ diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index 41876571b8..9e1185c21b 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -659,6 +659,7 @@ def test_op_different__attribute_value(self, op_leniency): def test_op_different__attribute_extra_global(self, op_leniency): # One input has an additional attribute, specifically set as a *GLOBAL* one. + # ?OBSOLETE : ~equivalent to case "extra_global" --> "GaL-:G-L-" is_lenient = op_leniency == "lenient" self.lvalues["attributes"] = CubeAttrsDict( @@ -686,6 +687,7 @@ def test_op_different__attribute_extra_global(self, op_leniency): def test_op_different__attribute_extra_local(self, op_leniency): # One input has an additional attribute, specifically set as a *LOCAL* one. + # ?OBSOLETE : ~equivalent to case "extra_local" --> "G-La:G-L-" is_lenient = op_leniency == "lenient" self.lvalues["attributes"] = CubeAttrsDict( @@ -714,6 +716,7 @@ def test_op_different__attribute_extra_local(self, op_leniency): def test_op_different__attribute_same_global_local(self, op_leniency): # One field has an extra specifically *LOCAL* attribute, and the other a # corresponding *GLOBAL* one -- both with distinct values. + # ?OBSOLETE : ~equivalent to case "extra_local" --> "G-La:G-L-" is_lenient = op_leniency == "lenient" common_attrs = CubeAttrsDict( From fb526243e84883c709d0671c8f31a42ea19016d9 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 13 Nov 2023 12:04:40 +0000 Subject: [PATCH 07/11] Remove redundant tests duplicated by matrix testcases. --- .../unit/common/metadata/test_CubeMetadata.py | 94 ------------------- 1 file changed, 94 deletions(-) diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index 9e1185c21b..61380612fc 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -657,100 +657,6 @@ def test_op_different__attribute_value(self, op_leniency): assert lmetadata.combine(rmetadata)._asdict() == expected assert rmetadata.combine(lmetadata)._asdict() == expected - def test_op_different__attribute_extra_global(self, op_leniency): - # One input has an additional attribute, specifically set as a *GLOBAL* one. - # ?OBSOLETE : ~equivalent to case "extra_global" --> "GaL-:G-L-" - is_lenient = op_leniency == "lenient" - - self.lvalues["attributes"] = CubeAttrsDict( - globals={"_a_common_": mock.sentinel.dummy_a}, - locals={"_b_common_": mock.sentinel.dummy_b}, - ) - self.rvalues["attributes"] = self.lvalues["attributes"].copy() - self.rvalues["attributes"].globals["_extra_"] = mock.sentinel.testvalue - lmetadata = self.cls(**self.lvalues) - rmetadata = self.cls(**self.rvalues) - - if is_lenient: - # the extra attribute should appear in the result .. - expected = self.rvalues - else: - # .. it should not - expected = self.lvalues - - with mock.patch( - "iris.common.metadata._LENIENT", return_value=is_lenient - ): - # Check both l+r and r+l - assert lmetadata.combine(rmetadata)._asdict() == expected - assert rmetadata.combine(lmetadata)._asdict() == expected - - def test_op_different__attribute_extra_local(self, op_leniency): - # One input has an additional attribute, specifically set as a *LOCAL* one. - # ?OBSOLETE : ~equivalent to case "extra_local" --> "G-La:G-L-" - is_lenient = op_leniency == "lenient" - - self.lvalues["attributes"] = CubeAttrsDict( - globals={"_a_common_": mock.sentinel.dummy_a}, - locals={"_b_common_": mock.sentinel.dummy_b}, - ) - self.rvalues["attributes"] = self.lvalues["attributes"].copy() - self.rvalues["attributes"].locals["_extra_"] = mock.sentinel.testvalue - lmetadata = self.cls(**self.lvalues) - rmetadata = self.cls(**self.rvalues) - - if is_lenient: - # the extra attribute should appear in the result .. - expected = self.rvalues - else: - # .. it should not - expected = self.lvalues - - with mock.patch( - "iris.common.metadata._LENIENT", return_value=is_lenient - ): - # Check both l+r and r+l - assert lmetadata.combine(rmetadata)._asdict() == expected - assert rmetadata.combine(lmetadata)._asdict() == expected - - def test_op_different__attribute_same_global_local(self, op_leniency): - # One field has an extra specifically *LOCAL* attribute, and the other a - # corresponding *GLOBAL* one -- both with distinct values. - # ?OBSOLETE : ~equivalent to case "extra_local" --> "G-La:G-L-" - is_lenient = op_leniency == "lenient" - - common_attrs = CubeAttrsDict( - globals={"_a_common_": mock.sentinel.dummy_a}, - locals={"_b_common_": mock.sentinel.dummy_b}, - ) - self.lvalues["attributes"] = deepcopy(common_attrs) - self.rvalues["attributes"] = deepcopy(common_attrs) - basis_metadata = self.cls(**deepcopy(self.lvalues)) - self.lvalues["attributes"].globals["_extra_"] = mock.sentinel.v1 - self.rvalues["attributes"].locals["_extra_"] = mock.sentinel.v2 - lmetadata = self.cls(**self.lvalues) - rmetadata = self.cls(**self.rvalues) - - expected = basis_metadata._asdict() - if is_lenient: - # BOTH extra attributes should appear in the result .. - expected["attributes"].globals.update( - self.lvalues["attributes"].globals - ) - expected["attributes"].locals.update( - self.rvalues["attributes"].locals - ) - else: - # strict operation : neither of the "extras" appears - pass - - with mock.patch( - "iris.common.metadata._LENIENT", return_value=is_lenient - ): - # Check both l+r and r+l - assert lmetadata.combine(rmetadata)._asdict() == expected - assert rmetadata.combine(lmetadata)._asdict() == expected - @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) def test_op__splitattributes_cases(self, op_leniency, testcase): # Check results for various global/local values of the same attribute. From 2cd51cda1b743dba65e2619ecd0acbe3a8579bcd Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 13 Nov 2023 11:55:59 +0000 Subject: [PATCH 08/11] Newstyle split-attrs matrix testing, with fewer testcases. --- .../unit/common/metadata/test_CubeMetadata.py | 500 ++++++++++-------- 1 file changed, 289 insertions(+), 211 deletions(-) diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index 61380612fc..f85f783bd6 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -21,10 +21,6 @@ from iris.common.lenient import _LENIENT, _qualname from iris.common.metadata import BaseMetadata, CubeMetadata from iris.cube import CubeAttrsDict -from iris.tests.integration.test_netcdf__loadsaveattrs import ( - decode_matrix_input, - encode_matrix_result, -) def _make_metadata( @@ -100,97 +96,209 @@ def test_bases(self): @pytest.fixture(params=CubeMetadata._fields) def fieldname(request): + """Parametrize testing over all CubeMetadata field names.""" return request.param @pytest.fixture(params=["strict", "lenient"]) def op_leniency(request): + """Parametrize testing over strict or lenient operation.""" return request.param -# Global data defining the individual split attributes "testcases". -# Each testcase specifies two inputs, with different global+local attribute settings. -# The same cases are tested for 3 different metadata operations : 'combine', -# 'difference' and 'equal'. -_ATTRS_TESTCASE_INPUTS = { - "same": "GaLb:GaLb", - "extra_global": "GaL-:G-L-", - "extra_local": "G-La:G-L-", - "same_global_local": "GaL-:G-La", - "diff_global_local": "GaL-:G-Lb", - "diffglobal_nolocal": "GaL-:GbL-", - "diffglobal_samelocal": "GaLc:GbLc", - "difflocal_noglobal": "G-La:G-Lb", - "difflocal_sameglobal": "GaLc:GaLd", - "diff_local_and_global": "GaLc:GbLd", +@pytest.fixture(params=["primaryAA", "primaryAX", "primaryAB"]) +def primary_values(request): + """ + Parametrize over the possible non-trivial pairs of operation values. + + The possible cases are : where one side has a missing value; where both sides + have the same non-missing value; and where they have different non-missing values. + """ + return request.param + + +@pytest.fixture(params=[False, True], ids=["primaryLocal", "primaryGlobal"]) +def primary_is_global_not_local(request): + """Parametrize split-attribute testing over "global" or "local" attribute types.""" + return request.param + + +@pytest.fixture(params=[False, True], ids=["leftrightL2R", "leftrightR2L"]) +def order_reversed(request): + """Parametrize split-attribute testing over "left OP right" or "right OP left".""" + return request.param + + +# Define the expected results for split-attribute testing. +# This dictionary records the expected for the various possible arrangements of values +# of a single attribute in the "left" and "right" inputs of a CubeMetadata operation. +# The possible operations are "equal", "combine" or "difference", and may all be +# performed "strict" or "lenient". +_ALL_RESULTS = { + "equal": { + "primaryAA": {"lenient": True, "strict": True}, + "primaryAX": {"lenient": True, "strict": False}, + "primaryAB": {"lenient": False, "strict": False}, + }, + "combine": { + "primaryAA": {"lenient": "A", "strict": "A"}, + "primaryAX": {"lenient": "A", "strict": None}, + "primaryAB": {"lenient": None, "strict": None}, + }, + "difference": { + "primaryAA": {"lenient": None, "strict": None}, + "primaryAX": {"lenient": None, "strict": ("A", None)}, + "primaryAB": {"lenient": ("A", "B"), "strict": ("A", "B")}, + }, } -_ATTRS_TESTCASE_NAMES = list(_ATTRS_TESTCASE_INPUTS) +_TEST_ATTRNAME = "_test_attr_" + + +def extract_attribute_value(split_dict, extract_global): + """ + Extract a test-attribute value from a split-attribute dictionary. + + Parameters + ---------- + split_dict : CubeAttrsDict + a split dictionary from an operation result + extract_global : bool + whether to extract values of the global, or local, `_TEST_ATTRNAME` attribute + + Returns + ------- + str | None + """ + if extract_global: + result = split_dict.globals.get(_TEST_ATTRNAME, None) + else: + result = split_dict.locals.get(_TEST_ATTRNAME, None) + return result + + +def extract_result_value(input, extract_global): + """ + Extract the values(s) of the main test attribute from an operation result. + + Parameters + ---------- + input : bool | CubeMetadata + an operation result : the structure varies for the three different operations. + extract_global : bool + whether to return values of a global, or local, `_TEST_ATTRNAME` attribute. + + Returns + ------- + None | bool | str or tuple of (None | str) + result value(s) + """ + if not isinstance(input, CubeMetadata): + # Result is either boolean (for "equals") or a None (for "difference"). + result = input + else: + # Result is a CubeMetadata. Get the value(s) of the required attribute. + result = input.attributes + + if isinstance(result, CubeAttrsDict): + result = extract_attribute_value(result, extract_global) + else: + # For "difference", input.attributes is a *pair* of dictionaries. + assert isinstance(result, tuple) + result = tuple( + [ + extract_attribute_value(dic, extract_global) + for dic in result + ] + ) + if result == (None, None): + # This value occurs when the desired attribute is *missing* from a + # difference result, but other (secondary) attributes were *different*. + # We want only differences of the *target* attribute, so convert these + # to a plain 'no difference', for expected-result testing purposes. + result = None + return result -def check_splitattrs_op( - check_testcase: str, check_lenient: bool, op: str, cases: dict + +def make_attrsdict(value): + """ + Return a dictionary containing a test attribute of the given value. + + If the value is "X", the attribute is absent (result is empty dict). + """ + if value == "X": + # Translate an "X" input as "missing". + result = {} + else: + result = {_TEST_ATTRNAME: value} + return result + + +def check_splitattrs_testcase( + operation_name: str, + check_is_lenient: bool, + primary_inputs: str = "AA", # character values + secondary_inputs: str = "XX", # character values + check_global_not_local: bool = True, + check_reversed: bool = False, ): """ - Test a common metadata operation, specifically the split-attributes handling. + Test a metadata operation with split-attributes against known expected results. Parameters ---------- - check_testcase : str - One of those listed in _ATTRS_TESTCASE_INPUTS. These keys are coded strings, - as used in `iris.tests.integration.test_netcdf_loadsaveattrs`. - check_lenient : bool - Whether the test operation is performed 'lenient' or 'strict'. - op : {'combine', 'difference', 'equal'} - The operation under test - cases : dict - The "expected" result-code values for each testcase. Values are either two - results for 'strict' and 'lenient' cases, when those are different, or a single - result if strict and lenient results are the same. - NOTE: this arg defines expected results for *all* testcases, even though each - call only tests a single testcase. This just makes parameterisation easier. + operation_name : str + One of "equal", "combine" or "difference. + check_is_lenient : bool + Whether the tested operation is performed 'lenient' or 'strict'. + primary_inputs : str + A pair of characters defining left + right attribute values for the operands of + the operation. + secondary_inputs : str + A further pair of values for an attribute of the same name but "other" type + ( i.e. global/local when the main test is local/global ). + check_global_not_local : bool + If True, the main operands are the global ones, and secondary are local. + Otherwise, the other way around. + check_reversed : bool + If True, the left and right operands are exchanged, and the expected value + modified according. Notes ----- - Sequence of operation : + The expected result of an operation is mostly defined by : the operation applied; + the main "primary" inputs; and the `check_is_lenient` state. + + In the case of the "equals" operation, however, the expected result is simply + set to `False` if the secondary inputs do not match. + + Calling with different values for the keywords aims to show that the main operation + has the expected value, from _ALL_RESULTS, the ***same in essentially all cases*** + ( though modified in specific ways for some factors ). - 1. construct 2 inputs from _ATTRS_TESTCASE_INPUTS[check_testcase] - 2. perform ``result = op(*inputs, lenient=check_lenient)`` - 3. (except for equality) convert the result to a "result-code string", - again as in test_netcdf_loadsaveattrs. - 4 assert that the (encoded) results match the expected + This regularity also demonstrates the required independence over the other + test-factors, i.e. global/local attribute type, and right-left order. """ - # cases.keys() are the testcase names -- these should always match the master table - assert cases.keys() == _ATTRS_TESTCASE_INPUTS.keys() - # Each case is recorded as testcase: (, [*output-codes]) - # The "input"s are only for readability, and should match those in the master table. + # Just for comfort, check that inputs are all one of a few single characters. assert all( - cases[key][0] == _ATTRS_TESTCASE_INPUTS[key] - for key in _ATTRS_TESTCASE_INPUTS + (item in list("ABCDX")) for item in (primary_inputs + secondary_inputs) ) + # Interpret "primary" and "secondary" inputs as "global" and "local" attributes. + if check_global_not_local: + global_values, local_values = primary_inputs, secondary_inputs + else: + local_values, global_values = primary_inputs, secondary_inputs - # Fetch input test-values from the common dictionary. - input_spec, result_specs = cases[check_testcase] - input_spec = input_spec.split( - ":" - ) # make a list from the two sides of the ":" - assert len(input_spec) == 2 - # convert to a list of (global, *locals) value sets - input_values = decode_matrix_input(input_spec) - - # form 2 inputs to the operation - def attrsdict(value): - if value is None: - result = {} - else: - result = {"_testattr_": value} - return result - - input_attributes = ( + # Form 2 inputs to the operation : Make left+right split-attribute input + # dictionaries, with both the primary and secondary attribute value settings. + input_dicts = [ CubeAttrsDict( - globals=attrsdict(values[0]), locals=attrsdict(values[1]) + globals=make_attrsdict(global_value), + locals=make_attrsdict(local_value), ) - for values in input_values - ) + for global_value, local_value in zip(global_values, local_values) + ] + # Make left+right CubeMetadata with just those attributes, other fields all blank. input_l, input_r = [ CubeMetadata( **{ @@ -198,91 +306,123 @@ def attrsdict(value): for field in CubeMetadata._fields } ) - for attrs in input_attributes + for attrs in input_dicts ] + if check_reversed: + # Swap the inputs to perform a 'reversed' calculation. + input_l, input_r = input_r, input_l + # Run the actual operation - result = getattr(input_l, op)(input_r, lenient=check_lenient) + result = getattr(input_l, operation_name)( + input_r, lenient=check_is_lenient + ) - # Get the expected result, the strict/lenient one as required - if len(result_specs) == 1: - expected_spec = result_specs[0] - else: - expected_spec = result_specs[1 if check_lenient else 0] - - # Convert the operation result to the form of the recorded "expected" output. - # N.B. the expected-result format depends on the operation under test. - assert op in ("combine", "difference", "equal") - if op == "combine": - # "combine" results are CubeMetadata objects - # convert global+local values to a result-code string - values = [ - result.attributes.globals.get("_testattr_", None), - result.attributes.locals.get("_testattr_", None), - ] - # N.B. encode_matrix_result returns a list of results (always 1 in this case). - (result,) = encode_matrix_result(values) - - elif op == "difference": - # "difference" op results are CubeMetadata : its values are difference-pairs. - if result is None: - # Use a unique string to indicate a null result - result = "-" - else: - # result is a CubeMetadata whose .attributes is a pair of CubeAttrsDict - assert isinstance(result, CubeMetadata) - assert isinstance(result.attributes, tuple) - assert len(result.attributes) == 2 - assert all( - isinstance(dic, CubeAttrsDict) for dic in result.attributes - ) + if operation_name == "difference" and check_reversed: + # Adjust the expected result of a "reversed" operation + # ( N.B. only "difference" results are affected by reversal. ) + if isinstance(result, CubeMetadata): + result = result._replace(attributes=result.attributes[::-1]) + + # Extract, from the operation result, the value to be tested against "expected". + result = extract_result_value(result, check_global_not_local) + + # Get the *expected* result for this operation. + which = "lenient" if check_is_lenient else "strict" + primary_key = "primary" + primary_inputs + expected = _ALL_RESULTS[operation_name][primary_key][which] + if operation_name == "equal" and expected: + # Account for the equality cases made `False` by mismatched secondary values. + secondary_1, secondary_2 = secondary_inputs + if ( + secondary_1 != "X" + and secondary_2 != "X" + and secondary_1 != secondary_2 + ): + expected = False - # calculate value-pairs in each section, where present - global_local_valuepairs = [ - [ - val.globals.get("_testattr_", None) - for val in result.attributes - ], - [ - val.locals.get("_testattr_", None) - for val in result.attributes - ], - ] - # E.G. [[None, "a"], [None, None]], or [["a", "b"], [None, "c"]] - - # convert these difference-value-pairs to coded strings, which we will - # treat as "global and local values" for conversion into a spec string - # E.G. ["a", "b"] --> "ab"" - # E.G. [None, "a"] --> "-a" - # E.G. [None, None] --> "--" - def valrep_single(val): - return "-" if val is None else val - - def valrep_pair(val): - assert len(val) == 2 - return valrep_single(val[0]) + valrep_single(val[1]) - - global_local_valuecodes = [ - valrep_pair(val) for val in global_local_valuepairs - ] + # Check that actual extracted operation result matches the "expected" one. + assert result == expected - # Encode those "value-codes" as a result-code string - # E.G. converting - # (value-pairs) == [[None, "a"], [None, None]] - # --> (value-codes) ["-a", "--"] - # --> (result) "G-aL--" - # N.B. encode_matrix_result returns a list of results (1 in this case). - (result,) = encode_matrix_result(global_local_valuecodes) - else: - # "equal" op result is a boolean : needs no further conversion - assert op == "equal" +class MixinSplitattrsMatrixTests: + """ + Define split-attributes tests to perform on all the metadata operations. - # Check that the coded result matches the expectation. - assert result == expected_spec + This is inherited by the testclass for each operation : + i.e. Test__eq__, Test_combine" and Test_difference + """ + # Define the operation name : set in each inheritor + operation_name = None + + def test_splitattrs_cases( + self, + op_leniency, + primary_values, + primary_is_global_not_local, + order_reversed, + ): + """ + Check the basic operaation against the expected result from _ALL_RESULTS. + + Parametrisation checks over various factors : strict and lenient ; global and + local type attributes ; possible arrangements of the primary values ; and + left-to-right or right-to-left operation order. + """ + primary_inputs = primary_values[-2:] + check_is_lenient = {"strict": False, "lenient": True}[op_leniency] + check_splitattrs_testcase( + operation_name=self.operation_name, + check_is_lenient=check_is_lenient, + primary_inputs=primary_inputs, + secondary_inputs="XX", + check_global_not_local=primary_is_global_not_local, + check_reversed=order_reversed, + ) + + @pytest.mark.parametrize( + "secondary_values", ["secondaryXX", "secondaryCC", "secondaryCD"] + ) + def test_splitattrs_global_local_independence( + self, + op_leniency, + primary_values, + secondary_values, + ): + """ + Check that results are (mostly) independent of the "other" type attributes. + + The operation on attributes of the 'primary' type (global/local) should be + basically unaffected by those of the 'secondary' type (--> local/global). + + This is not really true for equality, so we adjust those results to compensate. + See :func:`check_splitattrs_testcase` for explanations. + + Notes + ----- + We provide this *separate* test for global/local attribute independence, + parametrized over selected relevant arrangements of the 'secondary' values, and + do not test with reversed order or "local" primary inputs. + This is because matrix testing over *all* relevant factors produces over 1000 + possible combinations (!) + """ + primary_inputs = primary_values[-2:] + secondary_inputs = secondary_values[-2:] + check_is_lenient = {"strict": False, "lenient": True}[op_leniency] + check_splitattrs_testcase( + operation_name=self.operation_name, + check_is_lenient=check_is_lenient, + primary_inputs=primary_inputs, + secondary_inputs=secondary_inputs, + check_global_not_local=True, + check_reversed=False, + ) + + +class Test___eq__(MixinSplitattrsMatrixTests): + operation_name = "equal" -class Test___eq__: @pytest.fixture(autouse=True) def setup(self): self.lvalues = dict( @@ -426,28 +566,6 @@ def test_op_different__attribute_value(self, op_leniency): assert not lmetadata.__eq__(rmetadata) assert not rmetadata.__eq__(lmetadata) - @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) - def test_op__splitattributes_cases(self, op_leniency, testcase): - # Check results for various global/local values of the same attribute. - # N.B. 'cases' dict specifies the expected results for each testcase. - check_splitattrs_op( - check_testcase=testcase, - check_lenient=op_leniency == "lenient", - op="equal", - cases={ - "same": ("GaLb:GaLb", [True]), - "extra_global": ("GaL-:G-L-", [False, True]), - "extra_local": ("G-La:G-L-", [False, True]), - "same_global_local": ("GaL-:G-La", [False, True]), - "diff_global_local": ("GaL-:G-Lb", [False, True]), - "diffglobal_nolocal": ("GaL-:GbL-", [False]), - "diffglobal_samelocal": ("GaLc:GbLc", [False]), - "difflocal_noglobal": ("G-La:G-Lb", [False]), - "difflocal_sameglobal": ("GaLc:GaLd", [False]), - "diff_local_and_global": ("GaLc:GbLd", [False]), - }, - ) - class Test___lt__(tests.IrisTest): def setUp(self): @@ -480,7 +598,9 @@ def test__ignore_attributes_cell_methods(self): self.assertFalse(result) -class Test_combine: +class Test_combine(MixinSplitattrsMatrixTests): + operation_name = "combine" + @pytest.fixture(autouse=True) def setup(self): self.lvalues = dict( @@ -657,30 +777,10 @@ def test_op_different__attribute_value(self, op_leniency): assert lmetadata.combine(rmetadata)._asdict() == expected assert rmetadata.combine(lmetadata)._asdict() == expected - @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) - def test_op__splitattributes_cases(self, op_leniency, testcase): - # Check results for various global/local values of the same attribute. - # N.B. 'cases' dict specifies the expected results for each testcase. - check_splitattrs_op( - check_testcase=testcase, - check_lenient=op_leniency == "lenient", - op="combine", - cases={ - "same": ("GaLb:GaLb", ["GaLb"]), - "extra_global": ("GaL-:G-L-", ["G-L-", "GaL-"]), - "extra_local": ("G-La:G-L-", ["G-L-", "G-La"]), - "same_global_local": ("GaL-:G-La", ["G-L-", "GaLa"]), - "diff_global_local": ("GaL-:G-Lb", ["G-L-", "GaLb"]), - "diffglobal_nolocal": ("GaL-:GbL-", ["G-L-"]), - "diffglobal_samelocal": ("GaLc:GbLc", ["G-Lc"]), - "difflocal_noglobal": ("G-La:G-Lb", ["G-L-"]), - "difflocal_sameglobal": ("GaLc:GaLd", ["GaL-"]), - "diff_local_and_global": ("GaLc:GbLd", ["G-L-"]), - }, - ) +class Test_difference(MixinSplitattrsMatrixTests): + operation_name = "difference" -class Test_difference: @pytest.fixture(autouse=True) def setup(self): self.lvalues = dict( @@ -870,28 +970,6 @@ def test_op_different__attribute_value(self, op_leniency): assert lmetadata.difference(rmetadata)._asdict() == lexpected assert rmetadata.difference(lmetadata)._asdict() == rexpected - @pytest.mark.parametrize("testcase", _ATTRS_TESTCASE_NAMES) - def test_op__splitattributes_cases(self, op_leniency, testcase): - # Check results for various global/local values of the same attribute. - # N.B. 'cases' dict specifies the expected results for each testcase. - check_splitattrs_op( - check_testcase=testcase, - check_lenient=op_leniency == "lenient", - op="difference", - cases={ - "same": ("GaLb:GaLb", ["-"]), - "extra_global": ("GaL-:G-L-", ["Ga-L--", "-"]), - "extra_local": ("G-La:G-L-", ["G--La-", "-"]), - "same_global_local": ("GaL-:G-La", ["Ga-L-a", "-"]), - "diff_global_local": ("GaL-:G-Lb", ["Ga-L-b", "-"]), - "diffglobal_nolocal": ("GaL-:GbL-", ["GabL--"]), - "diffglobal_samelocal": ("GaLc:GbLc", ["GabL--"]), - "difflocal_noglobal": ("G-La:G-Lb", ["G--Lab"]), - "difflocal_sameglobal": ("GaLc:GaLd", ["G--Lcd"]), - "diff_local_and_global": ("GaLc:GbLd", ["GabLcd"]), - }, - ) - class Test_equal(tests.IrisTest): def setUp(self): From 07ed579d723879bd03a6db3b8d2dd83672278e4a Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 14 Nov 2023 13:22:13 +0000 Subject: [PATCH 09/11] Small improvements to comments + docstrings. --- lib/iris/common/_split_attribute_dicts.py | 2 +- .../unit/common/metadata/test_CubeMetadata.py | 54 ++++++++++++------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/lib/iris/common/_split_attribute_dicts.py b/lib/iris/common/_split_attribute_dicts.py index 8826ef833c..875ee3102f 100644 --- a/lib/iris/common/_split_attribute_dicts.py +++ b/lib/iris/common/_split_attribute_dicts.py @@ -25,7 +25,7 @@ def _convert_splitattrs_to_pairedkeys_dict(dic): """ - Convert a split-attributes dictionary to "normal" dict. + Convert a split-attributes dictionary to a "normal" dict. Transform a :class:`~iris.cube.CubeAttributesDict` "split" attributes dictionary into a 'normal' :class:`dict`, with paired keys of the form ('global', name) or diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index f85f783bd6..b8d9620766 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -111,8 +111,15 @@ def primary_values(request): """ Parametrize over the possible non-trivial pairs of operation values. - The possible cases are : where one side has a missing value; where both sides - have the same non-missing value; and where they have different non-missing values. + The parameters all provide two attribute values which are the left- and right-hand + arguments to the tested operation. The attribute values are single characters from + the end of the parameter name -- except that "X" denotes a "missing" attribute. + + The possible cases are: + + * one side has a value and the other is missing + * left and right have the same non-missing value + * left and right have different non-missing values """ return request.param @@ -130,10 +137,14 @@ def order_reversed(request): # Define the expected results for split-attribute testing. -# This dictionary records the expected for the various possible arrangements of values -# of a single attribute in the "left" and "right" inputs of a CubeMetadata operation. +# This dictionary records the expected results for the various possible arrangements of +# values of a single attribute in the "left" and "right" inputs of a CubeMetadata +# operation. # The possible operations are "equal", "combine" or "difference", and may all be # performed "strict" or "lenient". +# N.B. the *same* results should also apply when left+right are swapped, with a suitable +# adjustment to the result value. Likewise, results should be the same for either +# global- or local-style attributes. _ALL_RESULTS = { "equal": { "primaryAA": {"lenient": True, "strict": True}, @@ -151,6 +162,7 @@ def order_reversed(request): "primaryAB": {"lenient": ("A", "B"), "strict": ("A", "B")}, }, } +# A fixed attribute name used for all the split-attribute testing. _TEST_ATTRNAME = "_test_attr_" @@ -189,7 +201,7 @@ def extract_result_value(input, extract_global): Returns ------- - None | bool | str or tuple of (None | str) + None | bool | str | tuple[None | str] result value(s) """ if not isinstance(input, CubeMetadata): @@ -222,7 +234,7 @@ def extract_result_value(input, extract_global): def make_attrsdict(value): """ - Return a dictionary containing a test attribute of the given value. + Return a dictionary containing a test attribute with the given value. If the value is "X", the attribute is absent (result is empty dict). """ @@ -237,8 +249,8 @@ def make_attrsdict(value): def check_splitattrs_testcase( operation_name: str, check_is_lenient: bool, - primary_inputs: str = "AA", # character values - secondary_inputs: str = "XX", # character values + primary_inputs: str = "AA", # two character values + secondary_inputs: str = "XX", # two character values check_global_not_local: bool = True, check_reversed: bool = False, ): @@ -258,7 +270,8 @@ def check_splitattrs_testcase( A further pair of values for an attribute of the same name but "other" type ( i.e. global/local when the main test is local/global ). check_global_not_local : bool - If True, the main operands are the global ones, and secondary are local. + If `True` then the primary operands, and the tested result values, are *global* + attributes, and the secondary ones are local. Otherwise, the other way around. check_reversed : bool If True, the left and right operands are exchanged, and the expected value @@ -267,7 +280,7 @@ def check_splitattrs_testcase( Notes ----- The expected result of an operation is mostly defined by : the operation applied; - the main "primary" inputs; and the `check_is_lenient` state. + the main "primary" inputs; and the lenient/strict mode. In the case of the "equals" operation, however, the expected result is simply set to `False` if the secondary inputs do not match. @@ -298,7 +311,7 @@ def check_splitattrs_testcase( ) for global_value, local_value in zip(global_values, local_values) ] - # Make left+right CubeMetadata with just those attributes, other fields all blank. + # Make left+right CubeMetadata with those attributes, other fields all blank. input_l, input_r = [ CubeMetadata( **{ @@ -319,7 +332,7 @@ def check_splitattrs_testcase( ) if operation_name == "difference" and check_reversed: - # Adjust the expected result of a "reversed" operation + # Adjust the result of a "reversed" operation to the 'normal' way round. # ( N.B. only "difference" results are affected by reversal. ) if isinstance(result, CubeMetadata): result = result._replace(attributes=result.attributes[::-1]) @@ -350,7 +363,7 @@ class MixinSplitattrsMatrixTests: Define split-attributes tests to perform on all the metadata operations. This is inherited by the testclass for each operation : - i.e. Test__eq__, Test_combine" and Test_difference + i.e. Test___eq__, Test_combine and Test_difference """ # Define the operation name : set in each inheritor @@ -364,11 +377,14 @@ def test_splitattrs_cases( order_reversed, ): """ - Check the basic operaation against the expected result from _ALL_RESULTS. + Check the basic operation against the expected result from _ALL_RESULTS. + + Parametrisation checks this for all combinations of various factors : - Parametrisation checks over various factors : strict and lenient ; global and - local type attributes ; possible arrangements of the primary values ; and - left-to-right or right-to-left operation order. + * possible arrangements of the primary values + * strict and lenient + * global- and local-type attributes + * left-to-right or right-to-left operation order. """ primary_inputs = primary_values[-2:] check_is_lenient = {"strict": False, "lenient": True}[op_leniency] @@ -404,8 +420,8 @@ def test_splitattrs_global_local_independence( We provide this *separate* test for global/local attribute independence, parametrized over selected relevant arrangements of the 'secondary' values, and do not test with reversed order or "local" primary inputs. - This is because matrix testing over *all* relevant factors produces over 1000 - possible combinations (!) + This is because matrix testing over *all* relevant factors simply produces too + many possible combinations. """ primary_inputs = primary_values[-2:] secondary_inputs = secondary_values[-2:] From dca5dce1d0e3d795329b819fb3d40d965752bb66 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 15 Nov 2023 12:53:04 +0000 Subject: [PATCH 10/11] Fix logic for equals expectation; expand primary/secondary independence test. --- .../unit/common/metadata/test_CubeMetadata.py | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py index b8d9620766..7e1f64262e 100644 --- a/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py +++ b/lib/iris/tests/unit/common/metadata/test_CubeMetadata.py @@ -346,12 +346,11 @@ def check_splitattrs_testcase( expected = _ALL_RESULTS[operation_name][primary_key][which] if operation_name == "equal" and expected: # Account for the equality cases made `False` by mismatched secondary values. - secondary_1, secondary_2 = secondary_inputs - if ( - secondary_1 != "X" - and secondary_2 != "X" - and secondary_1 != secondary_2 - ): + left, right = secondary_inputs + secondaries_same = left == right or ( + check_is_lenient and "X" in (left, right) + ) + if not secondaries_same: expected = False # Check that actual extracted operation result matches the "expected" one. @@ -398,7 +397,15 @@ def test_splitattrs_cases( ) @pytest.mark.parametrize( - "secondary_values", ["secondaryXX", "secondaryCC", "secondaryCD"] + "secondary_values", + [ + "secondaryXX", + "secondaryCX", + "secondaryXC", + "secondaryCC", + "secondaryCD", + ] + # NOTE: test CX as well as XC, since primary choices has "AX" but not "XA". ) def test_splitattrs_global_local_independence( self, @@ -418,10 +425,9 @@ def test_splitattrs_global_local_independence( Notes ----- We provide this *separate* test for global/local attribute independence, - parametrized over selected relevant arrangements of the 'secondary' values, and - do not test with reversed order or "local" primary inputs. - This is because matrix testing over *all* relevant factors simply produces too - many possible combinations. + parametrized over selected relevant arrangements of the 'secondary' values. + We *don't* test with reversed order or "local" primary inputs, because matrix + testing over *all* relevant factors produces too many possible combinations. """ primary_inputs = primary_values[-2:] secondary_inputs = secondary_values[-2:] From b00b37fb0c70a3761ac39c3459876c1d37ed6569 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 15 Nov 2023 13:50:33 +0000 Subject: [PATCH 11/11] Clarify result testing in metadata operations decorator. --- lib/iris/common/_split_attribute_dicts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/iris/common/_split_attribute_dicts.py b/lib/iris/common/_split_attribute_dicts.py index 875ee3102f..41ce321a8f 100644 --- a/lib/iris/common/_split_attribute_dicts.py +++ b/lib/iris/common/_split_attribute_dicts.py @@ -105,7 +105,8 @@ def _inner_function(*args, **kwargs): result = operation(*args, **kwargs) - # Convert 'pairedkeys' dicts in the result back to split-attributes form. + # Convert known specific cases of 'pairedkeys' dicts in the result, and convert + # those back into split-attribute dictionaries. if isinstance(result, Mapping): # Fix a result which is a single dictionary -- for "combine" result = _convert_pairedkeys_dict_to_splitattrs(result) @@ -117,6 +118,7 @@ def _inner_function(*args, **kwargs): _convert_pairedkeys_dict_to_splitattrs(right), ) result = result.__class__([left, right]) + # ELSE: leave other types of result unchanged. E.G. None, bool return result