Skip to content

Commit

Permalink
Splitattrs ncsave redo (#5410)
Browse files Browse the repository at this point in the history
* Add docs and future switch, no function yet.

* Typing enables code completion for Cube.attributes.

* Make roundtrip checking more precise + improve some tests accordingly (cf. #5403).

* Rework all tests to use common setup + results-checking code.

* Saver supports split-attributes saving (no tests yet).

* Tiny docs fix.

* Explain test routines better.

* Fix init of FUTURE object.

* Remove spurious re-test of FUTURE.save_split_attrs.

* Don't create Cube attrs of 'None' (n.b. but no effect as currently used).

* Remove/repair refs to obsolete routines.

* Check all warnings from save operations.

* Remove TestSave test numbers.

* More save cases: no match with missing, and different cube attribute types.

* Run save/roundtrip tests both with+without split saves.

* Fix.

* Review changes.

* Fix changed warning messages.

* Move warnings checking from 'run' to 'check' phase.

* Simplify and improve warnings checking code.

* Fix wrong testcase.

* Minor review changes.

* Fix reverted code.

* Use sets to simplify demoted-attributes code.

* WIP

* Working with iris 3.6.1, no errors TestSave or TestRoundtrip.

* Interim save (incomplete?).

* Different results form for split tests; working for roundtrip.

* Check that all param lists are sorted.

* Check matrix result-files compatibility; add test_save_matrix.

* test_load_matrix added; two types of load result.

* Finalise special-case attributes.

* Small docs tweaks.

* Add some more testcases,

* Ensure valid sort-order for globals of possibly different types.

* Initialise matrix results with legacy values from v3.6.1 -- all matching.

* Add full current matrix results, i.e. snapshot current behaviours.

* Review changes : rename some matrix testcases, for clarity.
  • Loading branch information
pp-mo authored Oct 10, 2023
1 parent 57eec4d commit fa7962e
Show file tree
Hide file tree
Showing 7 changed files with 4,283 additions and 530 deletions.
16 changes: 13 additions & 3 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def callback(cube, field, filename):
class Future(threading.local):
"""Run-time configuration controller."""

def __init__(self, datum_support=False, pandas_ndim=False):
def __init__(
self, datum_support=False, pandas_ndim=False, save_split_attrs=False
):
"""
A container for run-time options controls.
Expand All @@ -164,6 +166,11 @@ def __init__(self, datum_support=False, pandas_ndim=False):
pandas_ndim : bool, default=False
See :func:`iris.pandas.as_data_frame` for details - opts in to the
newer n-dimensional behaviour.
save_split_attrs : bool, default=False
Save "global" and "local" cube attributes to netcdf in appropriately
different ways : "global" ones are saved as dataset attributes, where
possible, while "local" ones are saved as data-variable attributes.
See :func:`iris.fileformats.netcdf.saver.save`.
"""
# The flag 'example_future_flag' is provided as a reference for the
Expand All @@ -175,12 +182,15 @@ def __init__(self, datum_support=False, pandas_ndim=False):
# self.__dict__['example_future_flag'] = example_future_flag
self.__dict__["datum_support"] = datum_support
self.__dict__["pandas_ndim"] = pandas_ndim
self.__dict__["save_split_attrs"] = save_split_attrs

def __repr__(self):
# msg = ('Future(example_future_flag={})')
# return msg.format(self.example_future_flag)
msg = "Future(datum_support={}, pandas_ndim={})"
return msg.format(self.datum_support, self.pandas_ndim)
msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
return msg.format(
self.datum_support, self.pandas_ndim, self.save_split_attrs
)

# deprecated_options = {'example_future_flag': 'warning',}
deprecated_options = {}
Expand Down
16 changes: 10 additions & 6 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,19 +935,19 @@ def _normalise_attrs(
return attributes

@property
def locals(self):
def locals(self) -> LimitedAttributeDict:
return self._locals

@locals.setter
def locals(self, attributes):
def locals(self, attributes: Optional[Mapping]):
self._locals = self._normalise_attrs(attributes)

@property
def globals(self):
def globals(self) -> LimitedAttributeDict:
return self._globals

@globals.setter
def globals(self, attributes):
def globals(self, attributes: Optional[Mapping]):
self._globals = self._normalise_attrs(attributes)

#
Expand Down Expand Up @@ -1340,8 +1340,12 @@ def _names(self):
#
# Ensure that .attributes is always a :class:`CubeAttrsDict`.
#
@CFVariableMixin.attributes.setter
def attributes(self, attributes):
@property
def attributes(self) -> CubeAttrsDict:
return super().attributes

@attributes.setter
def attributes(self, attributes: Optional[Mapping]):
"""
An override to CfVariableMixin.attributes.setter, which ensures that Cube
attributes are stored in a way which distinguishes global + local ones.
Expand Down
210 changes: 166 additions & 44 deletions lib/iris/fileformats/netcdf/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,10 @@ def write(
matching keys will become attributes on the data variable rather
than global attributes.
.. Note::
Has no effect if :attr:`iris.FUTURE.save_split_attrs` is ``True``.
* unlimited_dimensions (iterable of strings and/or
:class:`iris.coords.Coord` objects):
List of coordinate names (or coordinate objects)
Expand Down Expand Up @@ -633,6 +637,9 @@ def write(
3 files that do not use HDF5.
"""
# TODO: when iris.FUTURE.save_split_attrs defaults to True, we can deprecate the
# "local_keys" arg, and finally remove it when we finally remove the
# save_split_attrs switch.
if unlimited_dimensions is None:
unlimited_dimensions = []

Expand Down Expand Up @@ -709,20 +716,23 @@ def write(
# aux factory in the cube.
self._add_aux_factories(cube, cf_var_cube, cube_dimensions)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
global_attributes = {
k: v
for k, v in cube.attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)
if not iris.FUTURE.save_split_attrs:
# In the "old" way, we update global attributes as we go.
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
cube_attributes = cube.attributes
global_attributes = {
k: v
for k, v in cube_attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)

if cf_profile_available:
cf_patch = iris.site_configuration.get("cf_patch")
Expand Down Expand Up @@ -778,6 +788,9 @@ def update_global_attributes(self, attributes=None, **kwargs):
CF global attributes to be updated.
"""
# TODO: when when iris.FUTURE.save_split_attrs is removed, this routine will
# only be called once: it can reasonably be renamed "_set_global_attributes",
# and the 'kwargs' argument can be removed.
if attributes is not None:
# Handle sequence e.g. [('fruit', 'apple'), ...].
if not hasattr(attributes, "keys"):
Expand Down Expand Up @@ -2195,6 +2208,8 @@ def _create_cf_data_variable(
"""
Create CF-netCDF data variable for the cube and any associated grid
mapping.
# TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
# be removed.
Args:
Expand All @@ -2219,6 +2234,8 @@ def _create_cf_data_variable(
The newly created CF-netCDF data variable.
"""
# TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
# be removed.
# Get the values in a form which is valid for the file format.
data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)

Expand Down Expand Up @@ -2307,16 +2324,20 @@ def set_packing_ncattrs(cfvar):
if cube.units.calendar:
_setncattr(cf_var, "calendar", cube.units.calendar)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
if iris.FUTURE.save_split_attrs:
attr_names = cube.attributes.locals.keys()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)
for attr_name in sorted(attr_names):
# Do not output 'conventions' attribute.
if attr_name.lower() == "conventions":
Expand Down Expand Up @@ -2600,9 +2621,15 @@ def save(
Save cube(s) to a netCDF file, given the cube and the filename.
* Iris will write CF 1.7 compliant NetCDF files.
* The attributes dictionaries on each cube in the saved cube list
will be compared and common attributes saved as NetCDF global
attributes where appropriate.
* **If split-attribute saving is disabled**, i.e.
:data:`iris.FUTURE`\\ ``.save_split_attrs`` is ``False``, then attributes
dictionaries on each cube in the saved cube list will be compared, and common
attributes saved as NetCDF global attributes where appropriate.
Or, **when split-attribute saving is enabled**, then ``cube.attributes.locals``
are always saved as attributes of data-variables, and ``cube.attributes.globals``
are saved as global (dataset) attributes, where possible.
Since the 2 types are now distinguished : see :class:`~iris.cube.CubeAttrsDict`.
* Keyword arguments specifying how to save the data are applied
to each cube. To use different settings for different cubes, use
the NetCDF Context manager (:class:`~Saver`) directly.
Expand Down Expand Up @@ -2635,6 +2662,8 @@ def save(
An interable of cube attribute keys. Any cube attributes with
matching keys will become attributes on the data variable rather
than global attributes.
**NOTE:** this is *ignored* if 'split-attribute saving' is **enabled**,
i.e. when ``iris.FUTURE.save_split_attrs`` is ``True``.
* unlimited_dimensions (iterable of strings and/or
:class:`iris.coords.Coord` objects):
Expand Down Expand Up @@ -2773,26 +2802,114 @@ def save(
else:
cubes = cube

if local_keys is None:
# Decide which cube attributes will be saved as "global" attributes
# NOTE: in 'legacy' mode, when iris.FUTURE.save_split_attrs == False, this code
# section derives a common value for 'local_keys', which is passed to 'Saver.write'
# when saving each input cube. The global attributes are then created by a call
# to "Saver.update_global_attributes" within each 'Saver.write' call (which is
# obviously a bit redundant!), plus an extra one to add 'Conventions'.
# HOWEVER, in `split_attrs` mode (iris.FUTURE.save_split_attrs == False), this code
# instead constructs a 'global_attributes' dictionary, and outputs that just once,
# after writing all the input cubes.
if iris.FUTURE.save_split_attrs:
# We don't actually use 'local_keys' in this case.
# TODO: can remove this when the iris.FUTURE.save_split_attrs is removed.
local_keys = set()

# Find any collisions in the cube global attributes and "demote" all those to
# local attributes (where possible, else warn they are lost).
# N.B. "collision" includes when not all cubes *have* that attribute.
global_names = set()
for cube in cubes:
global_names |= set(cube.attributes.globals.keys())

# Fnd any global attributes which are not the same on *all* cubes.
def attr_values_equal(val1, val2):
# An equality test which also works when some values are numpy arrays (!)
# As done in :meth:`iris.common.mixin.LimitedAttributeDict.__eq__`.
match = val1 == val2
try:
match = bool(match)
except ValueError:
match = match.all()
return match

cube0 = cubes[0]
invalid_globals = set(
[
attrname
for attrname in global_names
if not all(
attr_values_equal(
cube.attributes.globals.get(attrname),
cube0.attributes.globals.get(attrname),
)
for cube in cubes[1:]
)
]
)

# Establish all the global attributes which we will write to the file (at end).
global_attributes = {
attr: cube0.attributes.globals.get(attr)
for attr in global_names - invalid_globals
}
if invalid_globals:
# Some cubes have different global attributes: modify cubes as required.
warnings.warn(
f"Saving the cube global attributes {sorted(invalid_globals)} as local "
"(i.e. data-variable) attributes, where possible, since they are not "
"the same on all input cubes."
)
cubes = cubes.copy() # avoiding modifying the actual input arg.
for i_cube in range(len(cubes)):
# We iterate over cube *index*, so we can replace the list entries with
# with cube *copies* -- just to avoid changing our call args.
cube = cubes[i_cube]
demote_attrs = set(cube.attributes.globals) & invalid_globals
if any(demote_attrs):
# Catch any demoted attrs where there is already a local version
blocked_attrs = demote_attrs & set(cube.attributes.locals)
if blocked_attrs:
warnings.warn(
f"Global cube attributes {sorted(blocked_attrs)} "
f'of cube "{cube.name()}" were not saved, overlaid '
"by existing local attributes with the same names."
)
demote_attrs -= blocked_attrs
if demote_attrs:
# This cube contains some 'demoted' global attributes.
# Replace input cube with a copy, so we can modify attributes.
cube = cube.copy()
cubes[i_cube] = cube
for attr in demote_attrs:
# move global to local
value = cube.attributes.globals.pop(attr)
cube.attributes.locals[attr] = value

else:
local_keys = set(local_keys)

# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
attributes = cubes[0].attributes
common_keys = set(attributes)
for cube in cubes[1:]:
keys = set(cube.attributes)
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(attributes[key] != cube.attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)
# Legacy mode: calculate "local_keys" to control which attributes are local
# and which global.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)

# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
attributes = cubes[0].attributes
common_keys = set(attributes)
for cube in cubes[1:]:
keys = set(cube.attributes)
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(attributes[key] != cube.attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)

def is_valid_packspec(p):
"""Only checks that the datatype is valid."""
Expand Down Expand Up @@ -2894,7 +3011,12 @@ def is_valid_packspec(p):
warnings.warn(msg)

# Add conventions attribute.
sman.update_global_attributes(Conventions=conventions)
if iris.FUTURE.save_split_attrs:
# In the "new way", we just create all the global attributes at once.
global_attributes["Conventions"] = conventions
sman.update_global_attributes(global_attributes)
else:
sman.update_global_attributes(Conventions=conventions)

if compute:
# No more to do, since we used Saver(compute=True).
Expand Down
Loading

0 comments on commit fa7962e

Please sign in to comment.