From 3cb375e7da4a6c993ca696a1f89ae1cd6e49d183 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 15 Nov 2023 20:26:21 +0000 Subject: [PATCH 1/6] Add tests in advance for split-attributes handling cases. --- .../unit/util/test_equalise_attributes.py | 117 +++++++++++++++++- 1 file changed, 116 insertions(+), 1 deletion(-) diff --git a/lib/iris/tests/unit/util/test_equalise_attributes.py b/lib/iris/tests/unit/util/test_equalise_attributes.py index 13aa1e2af4..72ec994829 100644 --- a/lib/iris/tests/unit/util/test_equalise_attributes.py +++ b/lib/iris/tests/unit/util/test_equalise_attributes.py @@ -13,9 +13,15 @@ import iris.tests as tests # isort:skip import numpy as np +import pytest -from iris.cube import Cube +from iris.coords import AuxCoord +from iris.cube import Cube, CubeAttrsDict import iris.tests.stock +from iris.tests.unit.common.metadata.test_CubeMetadata import ( + _TEST_ATTRNAME, + make_attrsdict, +) from iris.util import equalise_attributes @@ -153,5 +159,114 @@ def test_complex_somecommon(self): ) +class TestSplitattributes: + """ + Extra testing for cases where attributes differ specifically by type + + That is, where there is a new possibility of 'mismatch' due to the newer "typing" + of attributes as global or local. + + Specifically, it is now possible that although + "cube1.attributes.keys == cube2.attributes.keys()", + AND "cube1.attributes[k] == cube2.attributes[k]" for all keys, + YET STILL (possibly) "cube1.attributes != cube2.attributes" + """ + + @staticmethod + def _sample_splitattrs_cube(attr_global_local): + attrs = CubeAttrsDict( + globals=make_attrsdict(attr_global_local[0]), + locals=make_attrsdict(attr_global_local[1]), + ) + return Cube([0], attributes=attrs) + + @staticmethod + def check_equalised_result(cube1, cube2): + equalise_attributes([cube1, cube2]) + # Note: "X" represents a missing attribute, as in test_CubeMetadata + return [ + ( + cube1.attributes.globals.get(_TEST_ATTRNAME, "X") + + cube1.attributes.locals.get(_TEST_ATTRNAME, "X") + ), + ( + cube2.attributes.globals.get(_TEST_ATTRNAME, "X") + + cube2.attributes.locals.get(_TEST_ATTRNAME, "X") + ), + ] + + def test__global_and_local__bothsame(self): + # A trivial case showing that the original globals+locals are both preserved. + cube1 = self._sample_splitattrs_cube("AB") + cube2 = self._sample_splitattrs_cube("AB") + result = self.check_equalised_result(cube1, cube2) + assert result == ["AB", "AB"] + + def test__globals_different(self): + cube1 = self._sample_splitattrs_cube("AX") + cube2 = self._sample_splitattrs_cube("BX") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + def test__locals_different(self): + cube1 = self._sample_splitattrs_cube("XA") + cube2 = self._sample_splitattrs_cube("XB") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + def test__oneglobal_onelocal__different(self): + cube1 = self._sample_splitattrs_cube("AX") + cube2 = self._sample_splitattrs_cube("XB") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + @pytest.mark.xfail + # This case fails without the split-attributes fix. + def test__oneglobal_onelocal__same(self): + cube1 = self._sample_splitattrs_cube("AX") + cube2 = self._sample_splitattrs_cube("XA") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + def test__sameglobals_onelocal__different(self): + cube1 = self._sample_splitattrs_cube("AB") + cube2 = self._sample_splitattrs_cube("AX") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + @pytest.mark.xfail + # This case fails without the split-attributes fix. + def test__sameglobals_onelocal__same(self): + cube1 = self._sample_splitattrs_cube("AA") + cube2 = self._sample_splitattrs_cube("AX") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + @pytest.mark.xfail + # This case fails without the split-attributes fix. + def test__differentglobals_samelocals(self): + cube1 = self._sample_splitattrs_cube("AC") + cube2 = self._sample_splitattrs_cube("BC") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + +class TestNonCube: + # Just to assert that we can do operations on non-cube components (like Coords), + # in fact effectively, anything with a ".attributes". + # Even though the docstring does not admit this, we test it because we put in + # special code to preserve it when adding the split-attribute handling. + def test(self): + attrs = [1, 1, 2] + coords = [ + AuxCoord([0], attributes={"a": attr, "b": "all_the_same"}) + for attr in attrs + ] + equalise_attributes(coords) + assert all( + coord.attributes == {"b": "all_the_same"} for coord in coords + ) + + if __name__ == "__main__": tests.main() From 5517f3fa54f78d103ed1cecbc80f07455c761d86 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 15 Nov 2023 20:32:35 +0000 Subject: [PATCH 2/6] Move dict conversion inside utility, for use elsewhere. --- lib/iris/common/_split_attribute_dicts.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/lib/iris/common/_split_attribute_dicts.py b/lib/iris/common/_split_attribute_dicts.py index 41ce321a8f..92fd883dd7 100644 --- a/lib/iris/common/_split_attribute_dicts.py +++ b/lib/iris/common/_split_attribute_dicts.py @@ -18,7 +18,6 @@ So, we simply treat "global" and "local" attributes of the same name as entirely independent. Which happily is also the easiest to code, and to explain. """ - from collections.abc import Mapping, Sequence from functools import wraps @@ -30,7 +29,16 @@ def _convert_splitattrs_to_pairedkeys_dict(dic): Transform a :class:`~iris.cube.CubeAttributesDict` "split" attributes dictionary into a 'normal' :class:`dict`, with paired keys of the form ('global', name) or ('local', name). + + If the input is *not* a split-attrs dict, it is converted to one before + transforming it. This will assign its keys to global/local depending on a standard + set of choices (see :class:`~iris.cube.CubeAttributesDict`). """ + from iris.cube import CubeAttrsDict + + # Convert input to CubeAttrsDict + if not hasattr(dic, "globals") or not hasattr(dic, "locals"): + dic = CubeAttrsDict(dic) def _global_then_local_items(dic): # Routine to produce global, then local 'items' in order, and with all keys @@ -93,13 +101,6 @@ def adjust_for_split_attribute_dictionaries(operation): @wraps(operation) def _inner_function(*args, **kwargs): - from iris.cube import CubeAttrsDict - - # First make all inputs into CubeAttrsDict, if not already. - args = [ - arg if isinstance(arg, CubeAttrsDict) else CubeAttrsDict(arg) - for arg in args - ] # Convert all inputs into 'pairedkeys' type dicts args = [_convert_splitattrs_to_pairedkeys_dict(arg) for arg in args] From 35ec095a6293394c599c71a8238db5f10e2f9e13 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 15 Nov 2023 20:36:04 +0000 Subject: [PATCH 3/6] Add support for split-attributes to equalise_attributes. --- .../unit/util/test_equalise_attributes.py | 4 -- lib/iris/util.py | 38 +++++++++++++++---- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/lib/iris/tests/unit/util/test_equalise_attributes.py b/lib/iris/tests/unit/util/test_equalise_attributes.py index 72ec994829..7348553fa2 100644 --- a/lib/iris/tests/unit/util/test_equalise_attributes.py +++ b/lib/iris/tests/unit/util/test_equalise_attributes.py @@ -13,7 +13,6 @@ import iris.tests as tests # isort:skip import numpy as np -import pytest from iris.coords import AuxCoord from iris.cube import Cube, CubeAttrsDict @@ -220,7 +219,6 @@ def test__oneglobal_onelocal__different(self): result = self.check_equalised_result(cube1, cube2) assert result == ["XX", "XX"] - @pytest.mark.xfail # This case fails without the split-attributes fix. def test__oneglobal_onelocal__same(self): cube1 = self._sample_splitattrs_cube("AX") @@ -234,7 +232,6 @@ def test__sameglobals_onelocal__different(self): result = self.check_equalised_result(cube1, cube2) assert result == ["XX", "XX"] - @pytest.mark.xfail # This case fails without the split-attributes fix. def test__sameglobals_onelocal__same(self): cube1 = self._sample_splitattrs_cube("AA") @@ -242,7 +239,6 @@ def test__sameglobals_onelocal__same(self): result = self.check_equalised_result(cube1, cube2) assert result == ["XX", "XX"] - @pytest.mark.xfail # This case fails without the split-attributes fix. def test__differentglobals_samelocals(self): cube1 = self._sample_splitattrs_cube("AC") diff --git a/lib/iris/util.py b/lib/iris/util.py index 0b31ebdafc..fa818f7837 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2059,24 +2059,45 @@ def equalise_attributes(cubes): See more at :doc:`/userguide/real_and_lazy_data`. """ + # deferred imports to avoid circularity problem + from iris.common._split_attribute_dicts import ( + _convert_splitattrs_to_pairedkeys_dict, + ) + from iris.cube import CubeAttrsDict + + cube_attrs = [cube.attributes for cube in cubes] removed = [] + + # *IF* they are cube attributes, then convert to "paired-keys" form. + is_split_dicts = any( + isinstance(attrs, CubeAttrsDict) for attrs in cube_attrs + ) + if is_split_dicts: + # Note: we only test this in case someone was passing some *other* objects for + # attributes processing (though the docstring does not admit this is possible). + cube_attrs = [ + _convert_splitattrs_to_pairedkeys_dict(dic) for dic in cube_attrs + ] + # Work out which attributes are identical across all the cubes. - common_keys = list(cubes[0].attributes.keys()) + common_keys = list(cube_attrs[0].keys()) keys_to_remove = set(common_keys) - for cube in cubes[1:]: - cube_keys = list(cube.attributes.keys()) + for attrs in cube_attrs[1:]: + cube_keys = list(attrs.keys()) keys_to_remove.update(cube_keys) common_keys = [ key for key in common_keys - if ( - key in cube_keys - and np.all(cube.attributes[key] == cubes[0].attributes[key]) - ) + if (key in cube_keys and np.all(attrs[key] == cube_attrs[0][key])) ] keys_to_remove.difference_update(common_keys) - # Remove all the other attributes. + # rework the keys list if we were handling split-attributes. + if is_split_dicts: + # Collect all the ('local'/'global, name) keys + retain just the names. + keys_to_remove = set(key_pair[1] for key_pair in keys_to_remove) + + # Remove all the non-matching attributes. for cube in cubes: deleted_attributes = { key: cube.attributes.pop(key) @@ -2084,6 +2105,7 @@ def equalise_attributes(cubes): if key in cube.attributes } removed.append(deleted_attributes) + return removed From a6ddf4337007a9cb7f6eef38510aea811066856a Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 17 Nov 2023 12:07:33 +0000 Subject: [PATCH 4/6] Update lib/iris/util.py Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> --- lib/iris/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index fa818f7837..cbe7c25428 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2094,7 +2094,7 @@ def equalise_attributes(cubes): # rework the keys list if we were handling split-attributes. if is_split_dicts: - # Collect all the ('local'/'global, name) keys + retain just the names. + # Collect all the ('local'/'global', name) keys + retain just the names. keys_to_remove = set(key_pair[1] for key_pair in keys_to_remove) # Remove all the non-matching attributes. From 0005cc366c6346588b63a23fbc07e03473b44783 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 17 Nov 2023 12:24:00 +0000 Subject: [PATCH 5/6] Update lib/iris/tests/unit/util/test_equalise_attributes.py Co-authored-by: Martin Yeo <40734014+trexfeathers@users.noreply.github.com> --- lib/iris/tests/unit/util/test_equalise_attributes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/tests/unit/util/test_equalise_attributes.py b/lib/iris/tests/unit/util/test_equalise_attributes.py index 7348553fa2..b410b5522c 100644 --- a/lib/iris/tests/unit/util/test_equalise_attributes.py +++ b/lib/iris/tests/unit/util/test_equalise_attributes.py @@ -166,7 +166,7 @@ class TestSplitattributes: of attributes as global or local. Specifically, it is now possible that although - "cube1.attributes.keys == cube2.attributes.keys()", + "cube1.attributes.keys() == cube2.attributes.keys()", AND "cube1.attributes[k] == cube2.attributes[k]" for all keys, YET STILL (possibly) "cube1.attributes != cube2.attributes" """ From c3e2175b7ea7735ed8ab2b49d926d70922a4afde Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 17 Nov 2023 12:31:38 +0000 Subject: [PATCH 6/6] Simplify and clarify equalise_attributes code. --- lib/iris/util.py | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/lib/iris/util.py b/lib/iris/util.py index cbe7c25428..258be7d318 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2059,25 +2059,28 @@ def equalise_attributes(cubes): See more at :doc:`/userguide/real_and_lazy_data`. """ - # deferred imports to avoid circularity problem + # deferred import to avoid circularity problem from iris.common._split_attribute_dicts import ( _convert_splitattrs_to_pairedkeys_dict, ) - from iris.cube import CubeAttrsDict cube_attrs = [cube.attributes for cube in cubes] - removed = [] - # *IF* they are cube attributes, then convert to "paired-keys" form. - is_split_dicts = any( - isinstance(attrs, CubeAttrsDict) for attrs in cube_attrs - ) - if is_split_dicts: - # Note: we only test this in case someone was passing some *other* objects for - # attributes processing (though the docstring does not admit this is possible). - cube_attrs = [ - _convert_splitattrs_to_pairedkeys_dict(dic) for dic in cube_attrs - ] + # Convert all the input dictionaries to ones with 'paired' keys, so each key + # becomes a pair, ('local'/'global', attribute-name), making them specific to each + # "type", i.e. global or local. + # This is needed to ensure that afterwards all cubes will have identical + # attributes, E.G. it treats an attribute which is global on one cube and local + # on another as *not* the same. This is essential to its use in making merges work. + # + # This approach does also still function with "ordinary" dictionaries, or + # :class:`iris.common.mixin.LimitedAttributeDict`, though somewhat inefficiently, + # so the routine works on *other* objects bearing attributes, i.e. not just Cubes. + # That is also important since the original code allows that (though the docstring + # does not admit it). + cube_attrs = [ + _convert_splitattrs_to_pairedkeys_dict(dic) for dic in cube_attrs + ] # Work out which attributes are identical across all the cubes. common_keys = list(cube_attrs[0].keys()) @@ -2092,12 +2095,14 @@ def equalise_attributes(cubes): ] keys_to_remove.difference_update(common_keys) - # rework the keys list if we were handling split-attributes. - if is_split_dicts: - # Collect all the ('local'/'global', name) keys + retain just the names. - keys_to_remove = set(key_pair[1] for key_pair in keys_to_remove) + # Convert back from the resulting 'paired' keys set, extracting just the + # attribute-name parts, as a set of names to be discarded. + # Note: we don't care any more what type (global/local) these were : we will + # simply remove *all* attributes with those names. + keys_to_remove = set(key_pair[1] for key_pair in keys_to_remove) # Remove all the non-matching attributes. + removed = [] for cube in cubes: deleted_attributes = { key: cube.attributes.pop(key)