diff --git a/cf/data/data.py b/cf/data/data.py index 15bc65a9ca..d2387c3759 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -8219,10 +8219,12 @@ def outerproduct(self, a, inplace=False, i=False): d = _inplace_enabled_define_and_cleanup(self) # Cast 'a' as a Data object so that it definitely has sensible - # Units + # Units. We don't mind if the units of 'a' are incompatible + # with those of 'self', but if they are then it's nice if the + # units are conformed. a = self.asdata(a) try: - a = conform_units(a, d.Units) + a = conform_units(a, d.Units, message="") except ValueError: pass @@ -9579,8 +9581,11 @@ def where( **Broadcasting** The array and the *condition*, *x* and *y* parameters must all - be broadcastable to each other, such that the shape of the - result is identical to the orginal shape of the array. + be broadcastable across the original array, such that the size + of the result is identical to the original size of the + array. Leading size 1 dimensions of these parameters are + ignored, thereby also ensuring that the shape of the result is + identical to the orginal shape of the array. If *condition* is a `Query` object then for the purposes of broadcasting, the condition is considered to be that which is @@ -9598,7 +9603,7 @@ def where( :Parameters: - condition: array-like or `Query` + condition: array_like or `Query` The condition which determines how to assign values to the data. @@ -9731,7 +9736,7 @@ def where( >>> d = cf.Data(x) >>> e = d.where(condition, d, 10 + y) ... - ValueError: where: Broadcasting the 'condition' parameter with shape (3, 4) would change the shape of the data with shape (3, 1) + ValueError: where: 'condition' parameter with shape (3, 4) can not be broadcast across data with shape (3, 1) when the result will have a different shape to the data >>> d = cf.Data(np.arange(9).reshape(3, 3)) >>> e = d.copy() @@ -9749,6 +9754,8 @@ def where( [ 6. 7. 8. ]] """ + from .utils import where_broadcastable + d = _inplace_enabled_define_and_cleanup(self) # Missing values could be affected, so make sure that the mask @@ -9761,13 +9768,13 @@ def where( if getattr(condition, "isquery", False): # Condition is a cf.Query object: Make sure that the # condition units are OK, and convert the condition to a - # boolean dask array with the same shape as the data. + # boolean Data instance with the same shape as the data. condition = condition.copy() - condition = condition.set_condition_units(units) + condition.set_condition_units(units) condition = condition.evaluate(d) condition = type(self).asdata(condition) - _where_broadcastable(d, condition, "condition") + condition = where_broadcastable(d, condition, "condition") # If x or y is self then change it to None. This prevents an # unnecessary copy; and, at compute time, an unncessary numpy @@ -9795,18 +9802,16 @@ def where( continue arg = type(self).asdata(arg) - _where_broadcastable(d, arg, name) - - if arg.Units: - # Make sure that units are OK. - arg = arg.copy() - try: - arg.Units = units - except ValueError: - raise ValueError( - f"where: {name!r} parameter units {arg.Units!r} " - f"are not equivalent to data units {units!r}" - ) + arg = where_broadcastable(d, arg, name) + + arg_units = arg.Units + if arg_units: + arg = conform_units( + arg, + units, + message=f"where: {name!r} parameter units {arg_units!r} " + f"are not equivalent to data units {units!r}", + ) xy.append(arg.to_dask_array()) @@ -11791,90 +11796,6 @@ def _size_of_index(index, size=None): return len(index) -def _broadcast(a, shape): - """Broadcast an array to a given shape. - - It is assumed that ``len(array.shape) <= len(shape)`` and that the - array is broadcastable to the shape by the normal numpy - boradcasting rules, but neither of these things are checked. - - For example, ``d[...] = d._broadcast(e, d.shape)`` gives the same - result as ``d[...] = e`` - - :Parameters: - - a: numpy array-like - - shape: `tuple` - - :Returns: - - `numpy.ndarray` - - """ - # Replace with numpy.broadcast_to v1.10 ??/ TODO - - a_shape = np.shape(a) - if a_shape == shape: - return a - - tile = [(m if n == 1 else 1) for n, m in zip(a_shape[::-1], shape[::-1])] - tile = shape[0 : len(shape) - len(a_shape)] + tuple(tile[::-1]) - - return np.tile(a, tile) - - -def _where_broadcastable(data, x, name): - """Check broadcastability for `where` assignments. - - Raises an exception if the result of broadcasting *data* and *x* - together does not have the same shape as *data*. - - .. versionadded:: TODODASKVER - - .. seealso:: `where` - - :Parameters: - - data, x: `Data` - The arrays to compare. - - name: `str` - A name for *x* that is used in any exception error - message. - - :Returns: - - `bool` - If *x* is acceptably broadcastable to *data* then `True` - is returned, otherwise a `ValueError` is raised. - - """ - ndim_x = x.ndim - if not ndim_x: - return True - - ndim_data = data.ndim - if ndim_x > ndim_data: - raise ValueError( - f"where: Broadcasting the {name!r} parameter with {ndim_x} " - f"dimensions would change the shape of the data with " - f"{ndim_data} dimensions" - ) - - shape_x = x.shape - shape_data = data.shape - for n, m in zip(shape_x[::-1], shape_data[::-1]): - if n != m and n != 1: - raise ValueError( - f"where: Broadcasting the {name!r} parameter with shape " - f"{shape_x} would change the shape of the data with shape " - f"{shape_data}" - ) - - return True - - def _collapse( func, d, diff --git a/cf/data/utils.py b/cf/data/utils.py index ce78ad0854..b1b0d37ee3 100644 --- a/cf/data/utils.py +++ b/cf/data/utils.py @@ -553,18 +553,18 @@ def scalar_masked_array(dtype=float): return a -def conform_units(value, units): +def conform_units(value, units, message=None): """Conform units. If *value* has units defined by its `Units` attribute then - * if the value units are equal to *units* then *value* is returned + * If the value units are equal to *units* then *value* is returned unchanged; - * if the value units are equivalent to *units* then a copy of + * If the value units are equivalent to *units* then a copy of *value* converted to *units* is returned; - * if the value units are not equivalent to *units* then an + * If the value units are not equivalent to *units* then an exception is raised. In all other cases *value* is returned unchanged. @@ -579,6 +579,12 @@ def conform_units(value, units): units: `Units` The units to conform to. + message: `str`, optional + If the value units are not equivalent to *units* then use + this message when the exception is raised. By default a + message that is independent of the calling context is + used. + **Examples** >>> cf.data.utils.conform_units(1, cf.Units('m')) @@ -600,6 +606,10 @@ def conform_units(value, units): Traceback (most recent call last): ... ValueError: Units are incompatible with units + >>> cf.data.utils.conform_units(d, cf.Units('s'), message='My message') + Traceback (most recent call last): + ... + ValueError: My message """ value_units = getattr(value, "Units", None) @@ -611,9 +621,12 @@ def conform_units(value, units): value = value.copy() value.Units = units elif value_units and units: - raise ValueError( - f"Units {value_units!r} are incompatible with units {units!r}" - ) + if message is None: + message = ( + f"Units {value_units!r} are incompatible with units {units!r}" + ) + + raise ValueError(message) return value @@ -661,3 +674,72 @@ def YMDhms(d, attr): d._set_dask(dx) d.override_units(Units(None), inplace=True) return d + + +def where_broadcastable(data, x, name): + """Check broadcastability for `cf.Data.where` assignments. + + Raises an exception unless the *data* and *x* parameters are + broadcastable across each other, such that the size of the result + is identical to the size of *data*. Leading size 1 dimensions of + *x* are ignored, thereby also ensuring that the shape of the + result is identical to the shape of *data*. + + .. versionadded:: TODODASKVER + + .. seealso:: `cf.Data.where` + + :Parameters: + + data, x: `Data` + The arrays to compare. + + name: `str` + A name for *x* that is used in exception error messages. + + :Returns: + + `Data` + The input parameter *x*, or a modified copy without + leading size 1 dimensions. If *x* can not be acceptably + broadcast to *data* then a `ValueError` is raised. + + """ + ndim_x = x.ndim + if not ndim_x: + return x + + error = 0 + + shape_x = x.shape + shape_data = data.shape + + shape_x0 = shape_x + ndim_difference = ndim_x - data.ndim + + if ndim_difference > 0: + if shape_x[:ndim_difference] == (1,) * ndim_difference: + # Remove leading ize 1 dimensions + x = x.reshape(shape_x[ndim_difference:]) + shape_x = x.shape + else: + error += 1 + + for n, m in zip(shape_x[::-1], shape_data[::-1]): + if n != m and m > 1 and n > 1: + raise ValueError( + f"where: {name!r} parameter with shape {shape_x0} can not " + f"be broadcast across data with shape {shape_data}" + ) + + if m == 1 and n > 1: + error += 1 + + if error: + raise ValueError( + f"where: {name!r} parameter with shape {shape_x0} can not " + f"be broadcast across data with shape {shape_data} when the " + "result will have a different shape to the data" + ) + + return x diff --git a/cf/field.py b/cf/field.py index 029cc11218..529b0a972b 100644 --- a/cf/field.py +++ b/cf/field.py @@ -14230,163 +14230,197 @@ def where( ): """Assign to data elements depending on a condition. - Data can be changed by assigning to elements that are selected by - a condition based on the data values of the field construct or on - its metadata constructs. - - Different values can be assigned to where the conditions are, and - are not, met. + The elements to be changed are identified by a + condition. Different values can be assigned according to where + the condition is True (assignment from the *x* parameter) or + False (assignment from the *y* parameter). **Missing data** - Data array elements may be set to missing values by assigning them - to the `cf.masked` constant, or by assignment missing data - elements of array-valued *x* and *y* parameters. + Array elements may be set to missing values if either *x* or + *y* are the `cf.masked` constant, or by assignment from any + missing data elements in *x* or *y*. + + If the data mask is hard (see the `hardmask` attribute) then + missing data values in the array will not be overwritten, + regardless of the content of *x* and *y*. + + If the *condition* contains missing data then the + corresponding elements in the array will not be assigned to, + regardless of the contents of *x* and *y*. + + **Broadcasting** + + The array and the *condition*, *x* and *y* parameters must all + be broadcastable across the original array, such that the size + of the result is identical to the orginal size of the + array. Leading size 1 dimensions of these parameters are + ignored, thereby also ensuring that the shape of the result is + identical to the orginal shape of the array. + + If *condition* is a `Query` object then for the purposes of + broadcasting, the condition is considered to be that which is + produced by applying the query to the field's array. - By default the data mask is "hard", meaning that masked values can - not be changed by assigning them to another value. This behaviour - may be changed by setting the `hardmask` attribute of the field - construct to `False`, thereby making the data mask "soft" and - allowing masked elements to be set to non-masked values. + **Performance** - .. seealso:: `cf.masked`, `hardmask`, `indices`, `mask`, - `subspace`, `__setitem__` + If any of the shapes of the *condition*, *x*, or *y* + parameters, or the field, is unknown, then there is a + possibility that an unknown shape will need to be calculated + immediately by executing all delayed operations on that + object. + + .. seealso:: `hardmask`, `indices`, `mask`, `subspace`, + `__setitem__`, `cf.masked` :Parameters: - condition: - The condition which determines how to assign values to the - data. + condition: array_like, `Field` or `Query` + The condition which determines how to assign values to + the field's data. - In general it may be any scalar or array-like object (such - as a `numpy`, `Data` or `Field` object) that is - broadcastable to the shape of the data. Assignment from - the *x* and *y* parameters will be done where elements of - the condition evaluate to `True` and `False` respectively. + Assignment from the *x* and *y* parameters will be + done where elements of the condition evaluate to + `True` and `False` respectively. + + If *condition* is a `Query` object then this implies a + condition defined by applying the query to the data. *Parameter example:* - ``f.where(f.data<0, x=-999)`` will set all data values - that are less than zero to -999. + ``f.where(f.data<0, x=-999)`` will set all data + values that are less than zero to -999. *Parameter example:* - ``f.where(True, x=-999)`` will set all data values to - -999. This is equivalent to ``f[...] = -999``. + ``f.where(True, x=-999)`` will set all data values + to -999. This is equivalent to ``f[...] = -999``. *Parameter example:* - ``f.where(False, y=-999)`` will set all data values to - -999. This is equivalent to ``f[...] = -999``. + ``f.where(False, y=-999)`` will set all data values + to -999. This is equivalent to ``f[...] = -999``. *Parameter example:* If field construct ``f`` has shape ``(5, 3)`` then - ``f.where([True, False, True], x=-999, y=cf.masked)`` - will set data values in columns 0 and 2 to -999, and - data values in column 1 to missing data. This works - because the condition has shape ``(3,)`` which - broadcasts to the field construct's shape. - - If, however, *condition* is a `Query` object then this - implies a condition defined by applying the query to the - field construct's data (or a metadata construct's data if - the *construct* parameter is set). + ``f.where([True, False, True], x=-999, + y=cf.masked)`` will set data values in columns 0 and + 2 to -999, and data values in column 1 to missing + data. This works because the condition has shape + ``(3,)`` which broadcasts to the field construct's + shape. *Parameter example:* - ``f.where(cf.lt(0), x=-999)`` will set all data values - that are less than zero to -999. This is equivalent to - ``f.where(f.data<0, x=-999)``. - - If *condition* is another field construct then it is first - transformed so that it is broadcastable to the data being - assigned to. This is done by using the metadata constructs - of the two field constructs to create a mapping of - physically identical dimensions between the fields, and - then manipulating the dimensions of other field - construct's data to ensure that they are broadcastable. If - either of the field constructs does not have sufficient - metadata to create such a mapping then an exception will - be raised. In this case, any manipulation of the - dimensions must be done manually, and the `Data` instance - of *construct* (rather than the field construct itself) - may be used for the condition. + ``f.where(cf.lt(0), x=-999)`` will set all data + values that are less than zero to -999. This is + equivalent to ``f.where(f.data<0, x=-999)``. + + If *condition* is a `Field` then it is first + transformed so that it is broadcastable to the data + being assigned to. This is done by using the metadata + constructs of the two field constructs to create a + mapping of physically identical dimensions between the + fields, and then manipulating the dimensions of other + field construct's data to ensure that they are + broadcastable. If either of the field constructs does + not have sufficient metadata to create such a mapping + then an exception will be raised. In this case, any + manipulation of the dimensions must be done manually, + and the `Data` instance of *construct* (rather than + the field construct itself) may be used for the + condition. *Parameter example:* - If field construct ``f`` has shape ``(5, 3)`` and ``g = - f.transpose() < 0`` then ``f.where(g, x=-999)`` will set - all data values that are less than zero to -999, - provided there are sufficient metadata for the data - dimensions to be mapped. However, ``f.where(g.data, - x=-999)`` will always fail in this example, because the - shape of the condition is ``(3, 5)``, which does not + If field construct ``f`` has shape ``(5, 3)`` and + ``g = f.transpose() < 0`` then ``f.where(g, + x=-999)`` will set all data values that are less + than zero to -999, provided there are sufficient + metadata for the data dimensions to be + mapped. However, ``f.where(g.data, x=-999)`` will + always fail in this example, because the shape of + the condition is ``(3, 5)``, which does not broadcast to the shape of the ``f``. - x, y: *optional* - Specify the assignment values. Where the condition - evaluates to `True`, assign to the field construct's data - from *x*, and where the condition evaluates to `False`, - assign to the field construct's data from *y*. The *x* and - *y* parameters are each one of: - - * `None`. The appropriate data elements array are - unchanged. This the default. + x, y: array-like or `Field` or `None` + Specify the assignment values. Where the condition is + True assign to the data from *x*, and where the + condition is False assign to the data from *y*. - * Any scalar or array-like object (such as a `numpy`, - `Data` or `Field` object) that is broadcastable to the - shape of the data. + If *x* is `None` (the default) then no assignment is + carried out where the condition is True. - .. + If *y* is `None` (the default) then no assignment is + carried out where the condition is False. *Parameter example:* - ``f.where(condition)``, for any ``condition``, returns a - field construct with identical data values. + ``d.where(condition)``, for any ``condition``, + returns data with identical data values. *Parameter example:* - ``f.where(cf.lt(0), x=-f.data, y=cf.masked)`` will - change the sign of all negative data values, and set all + ``d.where(cf.lt(0), x=-d, y=cf.masked)`` will change + the sign of all negative data values, and set all other data values to missing data. - If *x* or *y* is another field construct then it is first - transformed so that its data is broadcastable to the data - being assigned to. This is done by using the metadata - constructs of the two field constructs to create a mapping - of physically identical dimensions between the fields, and - then manipulating the dimensions of other field - construct's data to ensure that they are broadcastable. If - either of the field constructs does not have sufficient - metadata to create such a mapping then an exception will - be raised. In this case, any manipulation of the - dimensions must be done manually, and the `Data` instance - of *x* or *y* (rather than the field construct itself) may - be used for the condition. + *Parameter example:* + ``d.where(cf.lt(0), x=-d)`` will change the sign of + all negative data values, and leave all other data + values unchanged. This is equivalent to, but faster + than, ``d.where(cf.lt(0), x=-d, y=d)`` + + *Parameter example:* + ``f.where(condition)``, for any ``condition``, + returns a field construct with identical data + values. + + *Parameter example:* + ``f.where(cf.lt(0), x=-f.data, y=cf.masked)`` will + change the sign of all negative data values, and set + all other data values to missing data. + + If *x* or *y* is a `Field` then it is first + transformed so that its data is broadcastable to the + data being assigned to. This is done by using the + metadata constructs of the two field constructs to + create a mapping of physically identical dimensions + between the fields, and then manipulating the + dimensions of other field construct's data to ensure + that they are broadcastable. If either of the field + constructs does not have sufficient metadata to create + such a mapping then an exception will be raised. In + this case, any manipulation of the dimensions must be + done manually, and the `Data` instance of *x* or *y* + (rather than the field construct itself) may be used + for the condition. *Parameter example:* - If field construct ``f`` has shape ``(5, 3)`` and ``g = - f.transpose() * 10`` then ``f.where(cf.lt(0), x=g)`` - will set all data values that are less than zero to the - equivalent elements of field construct ``g``, provided - there are sufficient metadata for the data dimensions to - be mapped. However, ``f.where(cf.lt(0), x=g.data)`` will - always fail in this example, because the shape of the - condition is ``(3, 5)``, which does not broadcast to the - shape of the ``f``. + If field construct ``f`` has shape ``(5, 3)`` and + ``g = f.transpose() * 10`` then ``f.where(cf.lt(0), + x=g)`` will set all data values that are less than + zero to the equivalent elements of field construct + ``g``, provided there are sufficient metadata for + the data dimensions to be mapped. However, + ``f.where(cf.lt(0), x=g.data)`` will always fail in + this example, because the shape of the condition is + ``(3, 5)``, which does not broadcast to the shape of + the ``f``. construct: `str`, optional - Define the condition by applying the *construct* parameter - to the given metadata construct's data, rather then the - data of the field construct. Must be + Define the condition by applying the *construct* + parameter to the given metadata construct's data, + rather than the data of the field construct. Must be - * The identity or key of a metadata coordinate construct - that has data. + * The identity or key of a metadata coordinate + construct that has data. .. - The *construct* parameter selects the metadata construct - that is returned by this call of the field construct's - `construct` method: ``f.construct(construct)``. See - `cf.Field.construct` for details. + The *construct* parameter selects the metadata + construct that is returned by this call of the field + construct's `construct` method: + ``f.construct(construct)``. See `cf.Field.construct` + for details. *Parameter example:* ``f.where(cf.wi(-30, 30), x=cf.masked, - construct='latitude')`` will set all data values within - 30 degrees of the equator to missing data. + construct='latitude')`` will set all data values + within 30 degrees of the equator to missing data. {{inplace: `bool`, optional}} diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 4f51d95073..9c39fc2526 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -2831,6 +2831,12 @@ def test_Data_where(self): self.assertTrue((e.array.mask == [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]).all()) self.assertTrue((e.array == a).all()) + d = cf.Data(np.arange(12).reshape(3, 4)) + for condition in (True, 3, [True], [[1]], [[[1]]], [[[1, 1, 1, 1]]]): + e = d.where(condition, -9) + self.assertEqual(e.shape, d.shape) + self.assertTrue((e.array == -9).all()) + def test_Data__init__compression(self): """Test Data initialised from compressed data sources.""" import cfdm diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index e338a5985d..ac393cc06f 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -2205,20 +2205,19 @@ def test_Field_where(self): for condition in (True, 1, [[[True]]], [[[[[456]]]]]): g = f.where(condition, -9) - self.assertTrue(g[0].minimum() == -9, str(condition)) - self.assertTrue(g[0].maximum() == -9, str(condition)) + self.assertTrue((g.array == -9).all()) g = f.where(cf.le(34), 34) - self.assertTrue(g[0].minimum() == 34) - self.assertTrue(g[0].maximum() == 89) + self.assertTrue(g.min() == 34) + self.assertTrue(g.max() == 89) g = f.where(cf.le(34), cf.masked) - self.assertTrue(g[0].minimum() == 35) - self.assertTrue(g[0].maximum() == 89) + self.assertTrue(g.min() == 35) + self.assertTrue(g.max() == 89) self.assertIsNone(f.where(cf.le(34), cf.masked, 45, inplace=True)) - self.assertTrue(f[0].minimum() == 45) - self.assertTrue(f[0].maximum() == 45) + self.assertTrue(f.min() == 45) + self.assertTrue(f.max() == 45) def test_Field_masked_invalid(self): f = self.f.copy()