Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Splitattrs equalise #5586

Merged
merged 6 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions lib/iris/common/_split_attribute_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
So, we simply treat "global" and "local" attributes of the same name as entirely
independent. Which happily is also the easiest to code, and to explain.
"""

from collections.abc import Mapping, Sequence
from functools import wraps

Expand All @@ -30,7 +29,16 @@ def _convert_splitattrs_to_pairedkeys_dict(dic):
Transform a :class:`~iris.cube.CubeAttributesDict` "split" attributes dictionary
into a 'normal' :class:`dict`, with paired keys of the form ('global', name) or
('local', name).

If the input is *not* a split-attrs dict, it is converted to one before
transforming it. This will assign its keys to global/local depending on a standard
set of choices (see :class:`~iris.cube.CubeAttributesDict`).
"""
from iris.cube import CubeAttrsDict

# Convert input to CubeAttrsDict
if not hasattr(dic, "globals") or not hasattr(dic, "locals"):
dic = CubeAttrsDict(dic)

def _global_then_local_items(dic):
# Routine to produce global, then local 'items' in order, and with all keys
Expand Down Expand Up @@ -93,13 +101,6 @@ def adjust_for_split_attribute_dictionaries(operation):

@wraps(operation)
def _inner_function(*args, **kwargs):
from iris.cube import CubeAttrsDict

# First make all inputs into CubeAttrsDict, if not already.
args = [
arg if isinstance(arg, CubeAttrsDict) else CubeAttrsDict(arg)
for arg in args
]
# Convert all inputs into 'pairedkeys' type dicts
args = [_convert_splitattrs_to_pairedkeys_dict(arg) for arg in args]

Expand Down
113 changes: 112 additions & 1 deletion lib/iris/tests/unit/util/test_equalise_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@

import numpy as np

from iris.cube import Cube
from iris.coords import AuxCoord
from iris.cube import Cube, CubeAttrsDict
import iris.tests.stock
from iris.tests.unit.common.metadata.test_CubeMetadata import (
_TEST_ATTRNAME,
make_attrsdict,
)
from iris.util import equalise_attributes


Expand Down Expand Up @@ -153,5 +158,111 @@ def test_complex_somecommon(self):
)


class TestSplitattributes:
"""
Extra testing for cases where attributes differ specifically by type

That is, where there is a new possibility of 'mismatch' due to the newer "typing"
of attributes as global or local.

Specifically, it is now possible that although
"cube1.attributes.keys == cube2.attributes.keys()",
pp-mo marked this conversation as resolved.
Show resolved Hide resolved
AND "cube1.attributes[k] == cube2.attributes[k]" for all keys,
YET STILL (possibly) "cube1.attributes != cube2.attributes"
"""

@staticmethod
def _sample_splitattrs_cube(attr_global_local):
attrs = CubeAttrsDict(
globals=make_attrsdict(attr_global_local[0]),
locals=make_attrsdict(attr_global_local[1]),
)
return Cube([0], attributes=attrs)

@staticmethod
def check_equalised_result(cube1, cube2):
equalise_attributes([cube1, cube2])
# Note: "X" represents a missing attribute, as in test_CubeMetadata
return [
(
cube1.attributes.globals.get(_TEST_ATTRNAME, "X")
+ cube1.attributes.locals.get(_TEST_ATTRNAME, "X")
),
(
cube2.attributes.globals.get(_TEST_ATTRNAME, "X")
+ cube2.attributes.locals.get(_TEST_ATTRNAME, "X")
),
]

def test__global_and_local__bothsame(self):
# A trivial case showing that the original globals+locals are both preserved.
cube1 = self._sample_splitattrs_cube("AB")
cube2 = self._sample_splitattrs_cube("AB")
result = self.check_equalised_result(cube1, cube2)
assert result == ["AB", "AB"]

def test__globals_different(self):
cube1 = self._sample_splitattrs_cube("AX")
cube2 = self._sample_splitattrs_cube("BX")
result = self.check_equalised_result(cube1, cube2)
assert result == ["XX", "XX"]

def test__locals_different(self):
cube1 = self._sample_splitattrs_cube("XA")
cube2 = self._sample_splitattrs_cube("XB")
result = self.check_equalised_result(cube1, cube2)
assert result == ["XX", "XX"]

def test__oneglobal_onelocal__different(self):
cube1 = self._sample_splitattrs_cube("AX")
cube2 = self._sample_splitattrs_cube("XB")
result = self.check_equalised_result(cube1, cube2)
assert result == ["XX", "XX"]

# This case fails without the split-attributes fix.
def test__oneglobal_onelocal__same(self):
cube1 = self._sample_splitattrs_cube("AX")
cube2 = self._sample_splitattrs_cube("XA")
result = self.check_equalised_result(cube1, cube2)
assert result == ["XX", "XX"]

def test__sameglobals_onelocal__different(self):
cube1 = self._sample_splitattrs_cube("AB")
cube2 = self._sample_splitattrs_cube("AX")
result = self.check_equalised_result(cube1, cube2)
assert result == ["XX", "XX"]

# This case fails without the split-attributes fix.
def test__sameglobals_onelocal__same(self):
cube1 = self._sample_splitattrs_cube("AA")
cube2 = self._sample_splitattrs_cube("AX")
result = self.check_equalised_result(cube1, cube2)
assert result == ["XX", "XX"]

# This case fails without the split-attributes fix.
def test__differentglobals_samelocals(self):
cube1 = self._sample_splitattrs_cube("AC")
cube2 = self._sample_splitattrs_cube("BC")
result = self.check_equalised_result(cube1, cube2)
assert result == ["XX", "XX"]


class TestNonCube:
# Just to assert that we can do operations on non-cube components (like Coords),
# in fact effectively, anything with a ".attributes".
# Even though the docstring does not admit this, we test it because we put in
# special code to preserve it when adding the split-attribute handling.
def test(self):
attrs = [1, 1, 2]
coords = [
AuxCoord([0], attributes={"a": attr, "b": "all_the_same"})
for attr in attrs
]
equalise_attributes(coords)
assert all(
coord.attributes == {"b": "all_the_same"} for coord in coords
)


if __name__ == "__main__":
tests.main()
38 changes: 30 additions & 8 deletions lib/iris/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2059,31 +2059,53 @@ def equalise_attributes(cubes):
See more at :doc:`/userguide/real_and_lazy_data`.

"""
# deferred imports to avoid circularity problem
from iris.common._split_attribute_dicts import (
_convert_splitattrs_to_pairedkeys_dict,
)
from iris.cube import CubeAttrsDict

cube_attrs = [cube.attributes for cube in cubes]
removed = []

# *IF* they are cube attributes, then convert to "paired-keys" form.
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
is_split_dicts = any(
isinstance(attrs, CubeAttrsDict) for attrs in cube_attrs
)
if is_split_dicts:
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
# Note: we only test this in case someone was passing some *other* objects for
# attributes processing (though the docstring does not admit this is possible).
cube_attrs = [
_convert_splitattrs_to_pairedkeys_dict(dic) for dic in cube_attrs
]

# Work out which attributes are identical across all the cubes.
common_keys = list(cubes[0].attributes.keys())
common_keys = list(cube_attrs[0].keys())
keys_to_remove = set(common_keys)
for cube in cubes[1:]:
cube_keys = list(cube.attributes.keys())
for attrs in cube_attrs[1:]:
cube_keys = list(attrs.keys())
keys_to_remove.update(cube_keys)
common_keys = [
key
for key in common_keys
if (
key in cube_keys
and np.all(cube.attributes[key] == cubes[0].attributes[key])
)
if (key in cube_keys and np.all(attrs[key] == cube_attrs[0][key]))
]
keys_to_remove.difference_update(common_keys)

# Remove all the other attributes.
# rework the keys list if we were handling split-attributes.
if is_split_dicts:
# Collect all the ('local'/'global, name) keys + retain just the names.
pp-mo marked this conversation as resolved.
Show resolved Hide resolved
keys_to_remove = set(key_pair[1] for key_pair in keys_to_remove)

# Remove all the non-matching attributes.
for cube in cubes:
deleted_attributes = {
key: cube.attributes.pop(key)
for key in keys_to_remove
if key in cube.attributes
}
removed.append(deleted_attributes)

return removed


Expand Down
Loading