From 78a9d3bb947972ee5bb0f4c33a1314324c04dff1 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 28 Jul 2023 13:55:12 +0100 Subject: [PATCH] Updated saver changes (still untested). --- lib/iris/cube.py | 8 +- lib/iris/fileformats/netcdf/saver.py | 215 +++++++++++++++++++-------- 2 files changed, 159 insertions(+), 64 deletions(-) diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 8bb9d7c00e7..8815ce17724 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -930,19 +930,19 @@ def _normalise_attrs( return attributes @property - def locals(self): + def locals(self) -> LimitedAttributeDict: return self._locals @locals.setter - def locals(self, attributes): + def locals(self, attributes: Optional[Mapping]): self._locals = self._normalise_attrs(attributes) @property - def globals(self): + def globals(self) -> LimitedAttributeDict: return self._globals @globals.setter - def globals(self, attributes): + def globals(self, attributes: Optional[Mapping]): self._globals = self._normalise_attrs(attributes) # diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py index bff3cb089a0..ecaa87a47e3 100644 --- a/lib/iris/fileformats/netcdf/saver.py +++ b/lib/iris/fileformats/netcdf/saver.py @@ -540,6 +540,8 @@ def write( An interable of cube attribute keys. Any cube attributes with matching keys will become attributes on the data variable rather than global attributes. + .. note: + Has no effect if :attr:`iris.FUTURE.save_split_attrs` is ``True``. * unlimited_dimensions (iterable of strings and/or :class:`iris.coords.Coord` objects): @@ -709,25 +711,27 @@ def write( # aux factory in the cube. self._add_aux_factories(cube, cf_var_cube, cube_dimensions) - # Add data variable-only attribute names to local_keys. - if local_keys is None: - local_keys = set() - else: - local_keys = set(local_keys) - local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) - - # Add global attributes taking into account local_keys. - cube_attributes = cube.attributes - if iris.FUTURE.save_split_attrs: - # In this case, do *not* promote any 'local' attributes to global ones, - # only "global" cube attrs may be written as global file attributes - cube_attributes = cube_attributes.globals - global_attributes = { - k: v - for k, v in cube_attributes.items() - if (k not in local_keys and k.lower() != "conventions") - } - self.update_global_attributes(global_attributes) + if not iris.FUTURE.save_split_attrs: + # In the "old" way, we update global attributes as we go. + # Add data variable-only attribute names to local_keys. + if local_keys is None: + local_keys = set() + else: + local_keys = set(local_keys) + local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) + + # Add global attributes taking into account local_keys. + cube_attributes = cube.attributes + if iris.FUTURE.save_split_attrs: + # In this case, do *not* promote any 'local' attributes to global ones, + # only "global" cube attrs may be written as global file attributes + cube_attributes = cube_attributes.globals + global_attributes = { + k: v + for k, v in cube_attributes.items() + if (k not in local_keys and k.lower() != "conventions") + } + self.update_global_attributes(global_attributes) if cf_profile_available: cf_patch = iris.site_configuration.get("cf_patch") @@ -783,6 +787,9 @@ def update_global_attributes(self, attributes=None, **kwargs): CF global attributes to be updated. """ + # TODO: when we no longer support combined attribute saving, this routine will + # only be called once: it can reasonably be renamed "_set_global_attributes", + # and the 'kwargs' argument can be removed. if attributes is not None: # Handle sequence e.g. [('fruit', 'apple'), ...]. if not hasattr(attributes, "keys"): @@ -2224,6 +2231,8 @@ def _create_cf_data_variable( The newly created CF-netCDF data variable. """ + # TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can + # be removed. # Get the values in a form which is valid for the file format. data = self._ensure_valid_dtype(cube.core_data(), "cube", cube) @@ -2312,16 +2321,20 @@ def set_packing_ncattrs(cfvar): if cube.units.calendar: _setncattr(cf_var, "calendar", cube.units.calendar) - # Add data variable-only attribute names to local_keys. - if local_keys is None: - local_keys = set() + if iris.FUTURE.save_split_attrs: + attr_names = cube.attributes.locals.keys() else: - local_keys = set(local_keys) - local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) + # Add data variable-only attribute names to local_keys. + if local_keys is None: + local_keys = set() + else: + local_keys = set(local_keys) + local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS) + + # Add any cube attributes whose keys are in local_keys as + # CF-netCDF data variable attributes. + attr_names = set(cube.attributes).intersection(local_keys) - # Add any cube attributes whose keys are in local_keys as - # CF-netCDF data variable attributes. - attr_names = set(cube.attributes).intersection(local_keys) for attr_name in sorted(attr_names): # Do not output 'conventions' attribute. if attr_name.lower() == "conventions": @@ -2786,38 +2799,117 @@ def save( else: cubes = cube - do_split_attrs = iris.FUTURE.save_split_attrs - if local_keys is None or do_split_attrs: + if iris.FUTURE.save_split_attrs: + # We don't actually use 'local_keys' in this case. + # TODO: can remove this when the iris.FUTURE.save_split_attrs is removed. local_keys = set() + + # Find any collisions in the cube global attributes and "demote" all those to + # local attributes (where possible, else warn they are lost). + # N.B. "collision" includes when not all cubes *have* that attribute. + global_names = set() + for cube in cubes: + global_names |= set(cube.attributes.globals.keys()) + + # Fnd any global attributes which are not the same on *all* cubes. + def attr_values_equal(val1, val2): + # An equality test which also works when some values are numpy arrays (!) + # As done in :meth:`iris.common.mixin.LimitedAttributeDict.__eq__`. + match = val1 == val2 + try: + match = bool(match) + except ValueError: + match = match.all() + return match + + cube0 = cubes[0] + invalid_globals = [ + attrname + for attrname in global_names + if not all( + attr_values_equal( + cube.attributes[attrname], cube0.attributes[attrname] + ) + for cube in cubes[1:] + ) + ] + + # Establish all the global attributes which we will write to the file (at end). + global_attributes = { + attr: cube0.attributes.globals[attr] + for attr in global_names + if attr not in invalid_globals + } + if invalid_globals: + # Some cubes have different global attributes: modify cubes as required. + warnings.warn( + f"Saving the cube global attributes {invalid_globals} as local" + "(i.e. data-variable) attributes, where possible, since they are not '" + "the same on all input cubes." + ) + cubes = list(cubes) # avoiding modifying the actual input arg. + for i_cube in range(len(cubes)): + # We iterate over cube *index*, so we can replace the list entries with + # with cube *copies* -- just to avoid changing our call args. + cube = cubes[i_cube] + demote_attrs = [ + attr + for attr in cube.attributes.globals + if attr in invalid_globals + ] + if any(demote_attrs): + # This cube contains some 'demoted' global attributes. + # Replace the input cube with a copy, so we can modify attributes. + cube = cube.copy() + cubes[i_cube] = cube + # Catch any demoted attrs where there is already a local version + blocked_attrs = [ + attrname + for attrname in demote_attrs + if attrname in cube.attributes.locals + ] + if blocked_attrs: + warnings.warn( + f"Global cube attributes {blocked_attrs} " + f'of cube "{cube.name()}" have been lost, overlaid ' + "by existing local attributes with the same names." + ) + for attr in demote_attrs: + if attr not in blocked_attrs: + cube.attributes.locals[ + attr + ] = cube.attributes.globals[attr] + cube.attributes.globals.pop(attr) + else: - local_keys = set(local_keys) - - # Determine the attribute keys that are common across all cubes and - # thereby extend the collection of local_keys for attributes - # that should be attributes on data variables. - # NOTE: in 'legacy' mode, this code derives a common value for 'local_keys', which - # is employed in saving each cube. - # However, in `split_attrs` mode, this considers ONLY global attributes, and the - # resulting 'common_keys' is the fixed result : each cube is then saved like ... - # "sman.write(... localkeys=list(cube.attributes) - common_keys, ...)" - common_attr_values = None - for cube in cubes: - cube_attributes = cube.attributes - if do_split_attrs: - # Only consider "global" type attributes for those purposes. - cube_attributes = cube_attributes.globals - keys = set(cube_attributes) - if common_attr_values is None: - common_attr_values = cube_attributes.copy() - common_keys = keys.copy() - local_keys.update(keys.symmetric_difference(common_keys)) - common_keys.intersection_update(keys) - different_value_keys = [] - for key in common_keys: - if np.any(common_attr_values[key] != cube_attributes[key]): - different_value_keys.append(key) - common_keys.difference_update(different_value_keys) - local_keys.update(different_value_keys) + # Determine the attribute keys that are common across all cubes and + # thereby extend the collection of local_keys for attributes + # that should be attributes on data variables. + # NOTE: in 'legacy' mode, this code derives a common value for 'local_keys', which + # is employed in saving each cube. + # However, in `split_attrs` mode, this considers ONLY global attributes, and the + # resulting 'common_keys' is the fixed result : each cube is then saved like ... + # "sman.write(... localkeys=list(cube.attributes) - common_keys, ...)" + if local_keys is None: + local_keys = set() + else: + local_keys = set(local_keys) + + common_attr_values = None + for cube in cubes: + cube_attributes = cube.attributes + keys = set(cube_attributes) + if common_attr_values is None: + common_attr_values = cube_attributes.copy() + common_keys = keys.copy() + local_keys.update(keys.symmetric_difference(common_keys)) + common_keys.intersection_update(keys) + different_value_keys = [] + for key in common_keys: + if np.any(common_attr_values[key] != cube_attributes[key]): + different_value_keys.append(key) + common_keys.difference_update(different_value_keys) + local_keys.update(different_value_keys) def is_valid_packspec(p): """Only checks that the datatype is valid.""" @@ -2878,8 +2970,6 @@ def is_valid_packspec(p): with Saver(filename, netcdf_format, compute=compute) as sman: # Iterate through the cubelist. for cube, packspec, fill_value in zip(cubes, packspecs, fill_values): - if do_split_attrs: - local_keys = list(cube.attributes.locals.keys()) sman.write( cube, local_keys, @@ -2921,7 +3011,12 @@ def is_valid_packspec(p): warnings.warn(msg) # Add conventions attribute. - sman.update_global_attributes(Conventions=conventions) + if not iris.FUTURE.save_split_attrs: + # In the "new way", we just create all the global attributes at once. + global_attributes["Conventions"] = conventions + sman.update_global_attributes(global_attributes) + else: + sman.update_global_attributes(Conventions=conventions) if compute: # No more to do, since we used Saver(compute=True).