From bdcfab524ef1c852abe6dabcfabc7292f058fddc Mon Sep 17 00:00:00 2001 From: johnomotani Date: Mon, 29 Jun 2020 20:36:24 +0100 Subject: [PATCH] Support multiple dimensions in DataArray.argmin() and DataArray.argmax() methods (#3936) * DataArray.indices_min() and DataArray.indices_max() methods These return dicts of the indices of the minimum or maximum of a DataArray over several dimensions. * Update whats-new.rst and api.rst with indices_min(), indices_max() * Fix type checking in DataArray._unravel_argminmax() * Fix expected results for TestReduce3D.test_indices_max() * Respect global default for keep_attrs * Merge behaviour of indices_min/indices_max into argmin/argmax When argmin or argmax are called with a sequence for 'dim', they now return a dict with the indices for each dimension in dim. * Basic overload of argmin() and argmax() for Dataset If single dim is passed to Dataset.argmin() or Dataset.argmax(), then pass through to _argmin_base or _argmax_base. If a sequence is passed for dim, raise an exception, because the result for each DataArray would be a dict, which cannot be stored in a Dataset. * Update Variable and dask tests with _argmin_base, _argmax_base The basic numpy-style argmin() and argmax() methods were renamed when adding support for handling multiple dimensions in DataArray.argmin() and DataArray.argmax(). Variable.argmin() and Variable.argmax() are therefore renamed as Variable._argmin_base() and Variable._argmax_base(). * Update api-hidden.rst with _argmin_base and _argmax_base * Explicitly defined class methods override injected methods If a method (such as 'argmin') has been explicitly defined on a class (so that hasattr(cls, "argmin")==True), then do not inject that method, as it would override the explicitly defined one. Instead inject a private method, prefixed by "_injected_" (such as '_injected_argmin'), so that the injected method is available to the explicitly defined one. Do not perform the hasattr check on binary ops, because this breaks some operations (e.g. addition between DataArray and int in test_dask.py). * Move StringAccessor back to bottom of DataArray class definition * Revert use of _argmin_base and _argmax_base Now not needed because of change to injection in ops.py. * Move implementation of argmin, argmax from DataArray to Variable Makes use of argmin and argmax more general (they are available for Variable) and is straightforward for DataArray to wrap the Variable version. * Update tests for change to coordinates on result of argmin, argmax * Add 'out' keyword to argmin/argmax methods - allow numpy call signature When np.argmin(da) is called, numpy passes an 'out' keyword argument to argmin/argmax. Need to allow this argument to avoid errors (but an exception is thrown if out is not None). * Update and correct docstrings for argmin and argmax * Correct suggested replacement for da.argmin() and da.argmax() * Remove use of _injected_ methods in argmin/argmax * Fix typo in name of argminmax_func Co-Authored-By: keewis * Mark argminmax argument to _unravel_argminmax as a string Co-Authored-By: keewis * Hidden internal methods don't need to appear in docs * Basic docstrings for Dataset.argmin() and Dataset.argmax() * Set stacklevel for DeprecationWarning in argmin/argmax methods * Revert "Explicitly defined class methods override injected methods" This reverts commit 8caf2b8d07c14a2956a26b50ee08d83323c36058. * Revert "Add 'out' keyword to argmin/argmax methods - allow numpy call signature" This reverts commit ab480b5c88a059264086260e5090eb38b98aa7fa. * Remove argmin and argmax from ops.py * Use self.reduce() in Dataset.argmin() and Dataset.argmax() Replaces need for "_injected_argmin" and "_injected_argmax". * Whitespace after 'title' lines in docstrings * Remove tests of np.argmax() and np.argmin() functions from test_units.py Applying numpy functions to xarray objects is not necessarily expected to work, and the wrapping of argmin() and argmax() is broken by xarray-specific interface of argmin() and argmax() methods of Variable, DataArray and Dataset. * Clearer deprecation warnings in Dataset.argmin() and Dataset.argmax() Also, previously suggested workaround was not correct. Remove suggestion as there is no workaround (but the removed behaviour is unlikely to be useful). * Add unravel_index to duck_array_ops, use in Variable._unravel_argminmax * Filter argmin/argmax DeprecationWarnings in tests * Correct test for exception for nan in test_argmax * Remove injected argmin and argmax methods from api-hidden.rst * flake8 fixes * Tidy up argmin/argmax following code review Co-authored-by: Deepak Cherian * Remove filters for warnings from argmin/argmax from tests Pass an explicit axis or dim argument instead to avoid the warning. * Swap order of reduce_dims checks in Dataset.reduce() Prefer to pass reduce_dims=None when possible, including for variables with only one dimension. Avoids an error if an 'axis' keyword was passed. * revert the changes to Dataset.reduce * use dim instead of axis * use dimension instead of Ellipsis * Make passing 'dim=...' to Dataset.argmin() or Dataset.argmax() an error * Better docstrings for Dataset.argmin() and Dataset.argmax() * Update doc/whats-new.rst Co-authored-by: keewis Co-authored-by: Stephan Hoyer Co-authored-by: keewis Co-authored-by: Deepak Cherian Co-authored-by: Keewis --- doc/api-hidden.rst | 20 - doc/whats-new.rst | 7 + xarray/core/dataarray.py | 203 ++++++++ xarray/core/dataset.py | 126 +++++ xarray/core/duck_array_ops.py | 1 + xarray/core/ops.py | 2 - xarray/core/variable.py | 172 ++++++- xarray/tests/test_dataarray.py | 823 +++++++++++++++++++++++++++++++++ xarray/tests/test_dataset.py | 6 + xarray/tests/test_units.py | 97 +++- xarray/tests/test_variable.py | 2 +- 11 files changed, 1415 insertions(+), 44 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 5542e488143..efef4259b74 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -41,8 +41,6 @@ core.rolling.DatasetCoarsen.all core.rolling.DatasetCoarsen.any - core.rolling.DatasetCoarsen.argmax - core.rolling.DatasetCoarsen.argmin core.rolling.DatasetCoarsen.count core.rolling.DatasetCoarsen.max core.rolling.DatasetCoarsen.mean @@ -68,8 +66,6 @@ core.groupby.DatasetGroupBy.where core.groupby.DatasetGroupBy.all core.groupby.DatasetGroupBy.any - core.groupby.DatasetGroupBy.argmax - core.groupby.DatasetGroupBy.argmin core.groupby.DatasetGroupBy.count core.groupby.DatasetGroupBy.max core.groupby.DatasetGroupBy.mean @@ -85,8 +81,6 @@ core.resample.DatasetResample.all core.resample.DatasetResample.any core.resample.DatasetResample.apply - core.resample.DatasetResample.argmax - core.resample.DatasetResample.argmin core.resample.DatasetResample.assign core.resample.DatasetResample.assign_coords core.resample.DatasetResample.bfill @@ -110,8 +104,6 @@ core.resample.DatasetResample.dims core.resample.DatasetResample.groups - core.rolling.DatasetRolling.argmax - core.rolling.DatasetRolling.argmin core.rolling.DatasetRolling.count core.rolling.DatasetRolling.max core.rolling.DatasetRolling.mean @@ -185,8 +177,6 @@ core.rolling.DataArrayCoarsen.all core.rolling.DataArrayCoarsen.any - core.rolling.DataArrayCoarsen.argmax - core.rolling.DataArrayCoarsen.argmin core.rolling.DataArrayCoarsen.count core.rolling.DataArrayCoarsen.max core.rolling.DataArrayCoarsen.mean @@ -211,8 +201,6 @@ core.groupby.DataArrayGroupBy.where core.groupby.DataArrayGroupBy.all core.groupby.DataArrayGroupBy.any - core.groupby.DataArrayGroupBy.argmax - core.groupby.DataArrayGroupBy.argmin core.groupby.DataArrayGroupBy.count core.groupby.DataArrayGroupBy.max core.groupby.DataArrayGroupBy.mean @@ -228,8 +216,6 @@ core.resample.DataArrayResample.all core.resample.DataArrayResample.any core.resample.DataArrayResample.apply - core.resample.DataArrayResample.argmax - core.resample.DataArrayResample.argmin core.resample.DataArrayResample.assign_coords core.resample.DataArrayResample.bfill core.resample.DataArrayResample.count @@ -252,8 +238,6 @@ core.resample.DataArrayResample.dims core.resample.DataArrayResample.groups - core.rolling.DataArrayRolling.argmax - core.rolling.DataArrayRolling.argmin core.rolling.DataArrayRolling.count core.rolling.DataArrayRolling.max core.rolling.DataArrayRolling.mean @@ -423,8 +407,6 @@ IndexVariable.all IndexVariable.any - IndexVariable.argmax - IndexVariable.argmin IndexVariable.argsort IndexVariable.astype IndexVariable.broadcast_equals @@ -564,8 +546,6 @@ CFTimeIndex.all CFTimeIndex.any CFTimeIndex.append - CFTimeIndex.argmax - CFTimeIndex.argmin CFTimeIndex.argsort CFTimeIndex.asof CFTimeIndex.asof_locs diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c1440ec1108..086cddee0a0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,6 +54,13 @@ Enhancements New Features ~~~~~~~~~~~~ +- :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support + sequences of 'dim' arguments, and if a sequence is passed return a dict + (which can be passed to :py:meth:`isel` to get the value of the minimum) of + the indices for each dimension of the minimum or maximum of a DataArray. + (:pull:`3936`) + By `John Omotani `_, thanks to `Keisuke Fujii + `_ for work in :pull:`1469`. - Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`). By `Pascal Bourgault `_. - ``chunks='auto'`` is now supported in the ``chunks`` argument of diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b0df874953b..0ce76a5e23a 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3819,6 +3819,209 @@ def idxmax( keep_attrs=keep_attrs, ) + def argmin( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: + """Index or indices of the minimum of the DataArray over one or more dimensions. + + If a sequence is passed to 'dim', then result returned as dict of DataArrays, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a DataArray with dtype int. + + If there are multiple minima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : DataArray or dict of DataArray + + See also + -------- + Variable.argmin, DataArray.idxmin + + Examples + -------- + >>> array = xr.DataArray([0, 2, -1, 3], dims="x") + >>> array.min() + + array(-1) + >>> array.argmin() + + array(2) + >>> array.argmin(...) + {'x': + array(2)} + >>> array.isel(array.argmin(...)) + array(-1) + + >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], + ... [[1, 3, 2], [2, -5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z")) + >>> array.min(dim="x") + + array([[ 1, 2, 1], + [ 2, -5, 1], + [ 2, 1, 1]]) + Dimensions without coordinates: y, z + >>> array.argmin(dim="x") + + array([[1, 0, 0], + [1, 1, 1], + [0, 0, 1]]) + Dimensions without coordinates: y, z + >>> array.argmin(dim=["x"]) + {'x': + array([[1, 0, 0], + [1, 1, 1], + [0, 0, 1]]) + Dimensions without coordinates: y, z} + >>> array.min(dim=("x", "z")) + + array([ 1, -5, 1]) + Dimensions without coordinates: y + >>> array.argmin(dim=["x", "z"]) + {'x': + array([0, 1, 0]) + Dimensions without coordinates: y, 'z': + array([2, 1, 1]) + Dimensions without coordinates: y} + >>> array.isel(array.argmin(dim=["x", "z"])) + + array([ 1, -5, 1]) + Dimensions without coordinates: y + """ + result = self.variable.argmin(dim, axis, keep_attrs, skipna) + if isinstance(result, dict): + return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()} + else: + return self._replace_maybe_drop_dims(result) + + def argmax( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["DataArray", Dict[Hashable, "DataArray"]]: + """Index or indices of the maximum of the DataArray over one or more dimensions. + + If a sequence is passed to 'dim', then result returned as dict of DataArrays, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a DataArray with dtype int. + + If there are multiple maxima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the maximum. By default, finds maximum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : DataArray or dict of DataArray + + See also + -------- + Variable.argmax, DataArray.idxmax + + Examples + -------- + >>> array = xr.DataArray([0, 2, -1, 3], dims="x") + >>> array.max() + + array(3) + >>> array.argmax() + + array(3) + >>> array.argmax(...) + {'x': + array(3)} + >>> array.isel(array.argmax(...)) + + array(3) + + >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]], + ... [[1, 3, 2], [2, 5, 1], [2, 3, 1]]], + ... dims=("x", "y", "z")) + >>> array.max(dim="x") + + array([[3, 3, 2], + [3, 5, 2], + [2, 3, 3]]) + Dimensions without coordinates: y, z + >>> array.argmax(dim="x") + + array([[0, 1, 1], + [0, 1, 0], + [0, 1, 0]]) + Dimensions without coordinates: y, z + >>> array.argmax(dim=["x"]) + {'x': + array([[0, 1, 1], + [0, 1, 0], + [0, 1, 0]]) + Dimensions without coordinates: y, z} + >>> array.max(dim=("x", "z")) + + array([3, 5, 3]) + Dimensions without coordinates: y + >>> array.argmax(dim=["x", "z"]) + {'x': + array([0, 1, 0]) + Dimensions without coordinates: y, 'z': + array([0, 1, 2]) + Dimensions without coordinates: y} + >>> array.isel(array.argmax(dim=["x", "z"])) + + array([3, 5, 3]) + Dimensions without coordinates: y + """ + result = self.variable.argmax(dim, axis, keep_attrs, skipna) + if isinstance(result, dict): + return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()} + else: + return self._replace_maybe_drop_dims(result) + # this needs to be at the end, or mypy will confuse with `str` # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names str = utils.UncachedAccessor(StringAccessor) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a024324bcb1..b46b1d6dce0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6368,5 +6368,131 @@ def idxmax( ) ) + def argmin(self, dim=None, axis=None, **kwargs): + """Indices of the minima of the member variables. + + If there are multiple minima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : str, optional + The dimension over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will be an error, since DataArray.argmin will + return a dict with indices for all dimensions, which does not make sense for + a Dataset. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Dataset + + See also + -------- + DataArray.argmin + + """ + if dim is None and axis is None: + warnings.warn( + "Once the behaviour of DataArray.argmin() and Variable.argmin() with " + "neither dim nor axis argument changes to return a dict of indices of " + "each dimension, for consistency it will be an error to call " + "Dataset.argmin() with no argument, since we don't return a dict of " + "Datasets.", + DeprecationWarning, + stacklevel=2, + ) + if ( + dim is None + or axis is not None + or (not isinstance(dim, Sequence) and dim is not ...) + or isinstance(dim, str) + ): + # Return int index if single dimension is passed, and is not part of a + # sequence + argmin_func = getattr(duck_array_ops, "argmin") + return self.reduce(argmin_func, dim=dim, axis=axis, **kwargs) + else: + raise ValueError( + "When dim is a sequence or ..., DataArray.argmin() returns a dict. " + "dicts cannot be contained in a Dataset, so cannot call " + "Dataset.argmin() with a sequence or ... for dim" + ) + + def argmax(self, dim=None, axis=None, **kwargs): + """Indices of the maxima of the member variables. + + If there are multiple maxima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : str, optional + The dimension over which to find the maximum. By default, finds maximum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will be an error, since DataArray.argmax will + return a dict with indices for all dimensions, which does not make sense for + a Dataset. + axis : int, optional + Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Dataset + + See also + -------- + DataArray.argmax + + """ + if dim is None and axis is None: + warnings.warn( + "Once the behaviour of DataArray.argmax() and Variable.argmax() with " + "neither dim nor axis argument changes to return a dict of indices of " + "each dimension, for consistency it will be an error to call " + "Dataset.argmax() with no argument, since we don't return a dict of " + "Datasets.", + DeprecationWarning, + stacklevel=2, + ) + if ( + dim is None + or axis is not None + or (not isinstance(dim, Sequence) and dim is not ...) + or isinstance(dim, str) + ): + # Return int index if single dimension is passed, and is not part of a + # sequence + argmax_func = getattr(duck_array_ops, "argmax") + return self.reduce(argmax_func, dim=dim, axis=axis, **kwargs) + else: + raise ValueError( + "When dim is a sequence or ..., DataArray.argmin() returns a dict. " + "dicts cannot be contained in a Dataset, so cannot call " + "Dataset.argmin() with a sequence or ... for dim" + ) + ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 76719699168..df579d23544 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -359,6 +359,7 @@ def f(values, axis=None, skipna=None, **kwargs): cumprod_1d.numeric_only = True cumsum_1d = _create_nan_agg_method("cumsum") cumsum_1d.numeric_only = True +unravel_index = _dask_or_eager_func("unravel_index") _mean = _create_nan_agg_method("mean") diff --git a/xarray/core/ops.py b/xarray/core/ops.py index b789f93b4f1..d4aeea37aad 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -47,8 +47,6 @@ # methods which remove an axis REDUCE_METHODS = ["all", "any"] NAN_REDUCE_METHODS = [ - "argmax", - "argmin", "max", "min", "mean", diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e19132b1b06..c505c749557 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -6,7 +6,17 @@ from collections import defaultdict from datetime import timedelta from distutils.version import LooseVersion -from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union +from typing import ( + Any, + Dict, + Hashable, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) import numpy as np import pandas as pd @@ -2069,6 +2079,166 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float): ) return type(self)(self.dims, numeric_array, self._attrs) + def _unravel_argminmax( + self, + argminmax: str, + dim: Union[Hashable, Sequence[Hashable], None], + axis: Union[int, None], + keep_attrs: Optional[bool], + skipna: Optional[bool], + ) -> Union["Variable", Dict[Hashable, "Variable"]]: + """Apply argmin or argmax over one or more dimensions, returning the result as a + dict of DataArray that can be passed directly to isel. + """ + if dim is None and axis is None: + warnings.warn( + "Behaviour of argmin/argmax with neither dim nor axis argument will " + "change to return a dict of indices of each dimension. To get a " + "single, flat index, please use np.argmin(da.data) or " + "np.argmax(da.data) instead of da.argmin() or da.argmax().", + DeprecationWarning, + stacklevel=3, + ) + + argminmax_func = getattr(duck_array_ops, argminmax) + + if dim is ...: + # In future, should do this also when (dim is None and axis is None) + dim = self.dims + if ( + dim is None + or axis is not None + or not isinstance(dim, Sequence) + or isinstance(dim, str) + ): + # Return int index if single dimension is passed, and is not part of a + # sequence + return self.reduce( + argminmax_func, dim=dim, axis=axis, keep_attrs=keep_attrs, skipna=skipna + ) + + # Get a name for the new dimension that does not conflict with any existing + # dimension + newdimname = "_unravel_argminmax_dim_0" + count = 1 + while newdimname in self.dims: + newdimname = "_unravel_argminmax_dim_{}".format(count) + count += 1 + + stacked = self.stack({newdimname: dim}) + + result_dims = stacked.dims[:-1] + reduce_shape = tuple(self.sizes[d] for d in dim) + + result_flat_indices = stacked.reduce(argminmax_func, axis=-1, skipna=skipna) + + result_unravelled_indices = duck_array_ops.unravel_index( + result_flat_indices.data, reduce_shape + ) + + result = { + d: Variable(dims=result_dims, data=i) + for d, i in zip(dim, result_unravelled_indices) + } + + if keep_attrs is None: + keep_attrs = _get_keep_attrs(default=False) + if keep_attrs: + for v in result.values(): + v.attrs = self.attrs + + return result + + def argmin( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["Variable", Dict[Hashable, "Variable"]]: + """Index or indices of the minimum of the Variable over one or more dimensions. + If a sequence is passed to 'dim', then result returned as dict of Variables, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a Variable with dtype int. + + If there are multiple minima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Variable or dict of Variable + + See also + -------- + DataArray.argmin, DataArray.idxmin + """ + return self._unravel_argminmax("argmin", dim, axis, keep_attrs, skipna) + + def argmax( + self, + dim: Union[Hashable, Sequence[Hashable]] = None, + axis: int = None, + keep_attrs: bool = None, + skipna: bool = None, + ) -> Union["Variable", Dict[Hashable, "Variable"]]: + """Index or indices of the maximum of the Variable over one or more dimensions. + If a sequence is passed to 'dim', then result returned as dict of Variables, + which can be passed directly to isel(). If a single str is passed to 'dim' then + returns a Variable with dtype int. + + If there are multiple maxima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : hashable, sequence of hashable or ..., optional + The dimensions over which to find the maximum. By default, finds maximum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will return a dict with indices for all + dimensions; to return a dict with all dimensions now, pass '...'. + axis : int, optional + Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments + can be supplied. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Variable or dict of Variable + + See also + -------- + DataArray.argmax, DataArray.idxmax + """ + return self._unravel_argminmax("argmax", dim, axis, keep_attrs, skipna) + ops.inject_all_ops_and_reduce_methods(Variable) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index d942667a4c7..793090cc122 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -4493,6 +4493,9 @@ def test_max(self, x, minindex, maxindex, nanindex): assert_identical(result2, expected2) + @pytest.mark.filterwarnings( + "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" + ) def test_argmin(self, x, minindex, maxindex, nanindex): ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs @@ -4522,6 +4525,9 @@ def test_argmin(self, x, minindex, maxindex, nanindex): assert_identical(result2, expected2) + @pytest.mark.filterwarnings( + "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" + ) def test_argmax(self, x, minindex, maxindex, nanindex): ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs @@ -4763,6 +4769,78 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): result7 = ar0.idxmax(fill_value=-1j) assert_identical(result7, expected7) + @pytest.mark.filterwarnings( + "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" + ) + def test_argmin_dim(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"]) + + if np.isnan(minindex): + with pytest.raises(ValueError): + ar.argmin() + return + + expected0 = {"x": indarr[minindex]} + result0 = ar.argmin(...) + for key in expected0: + assert_identical(result0[key], expected0[key]) + + result1 = ar.argmin(..., keep_attrs=True) + expected1 = deepcopy(expected0) + for da in expected1.values(): + da.attrs = self.attrs + for key in expected1: + assert_identical(result1[key], expected1[key]) + + result2 = ar.argmin(..., skipna=False) + if nanindex is not None and ar.dtype.kind != "O": + expected2 = {"x": indarr.isel(x=nanindex, drop=True)} + expected2["x"].attrs = {} + else: + expected2 = expected0 + + for key in expected2: + assert_identical(result2[key], expected2[key]) + + @pytest.mark.filterwarnings( + "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" + ) + def test_argmax_dim(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs + ) + indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"]) + + if np.isnan(maxindex): + with pytest.raises(ValueError): + ar.argmax() + return + + expected0 = {"x": indarr[maxindex]} + result0 = ar.argmax(...) + for key in expected0: + assert_identical(result0[key], expected0[key]) + + result1 = ar.argmax(..., keep_attrs=True) + expected1 = deepcopy(expected0) + for da in expected1.values(): + da.attrs = self.attrs + for key in expected1: + assert_identical(result1[key], expected1[key]) + + result2 = ar.argmax(..., skipna=False) + if nanindex is not None and ar.dtype.kind != "O": + expected2 = {"x": indarr.isel(x=nanindex, drop=True)} + expected2["x"].attrs = {} + else: + expected2 = expected0 + + for key in expected2: + assert_identical(result2[key], expected2[key]) + @pytest.mark.parametrize( "x, minindex, maxindex, nanindex", @@ -5256,6 +5334,751 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): result7 = ar0.idxmax(dim="x", fill_value=-5j) assert_identical(result7, expected7) + @pytest.mark.filterwarnings( + "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" + ) + def test_argmin_dim(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + + if np.isnan(minindex).any(): + with pytest.raises(ValueError): + ar.argmin(dim="x") + return + + expected0 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex) + ] + expected0 = {"x": xr.concat(expected0, dim="y")} + + result0 = ar.argmin(dim=["x"]) + for key in expected0: + assert_identical(result0[key], expected0[key]) + + result1 = ar.argmin(dim=["x"], keep_attrs=True) + expected1 = deepcopy(expected0) + expected1["x"].attrs = self.attrs + for key in expected1: + assert_identical(result1[key], expected1[key]) + + minindex = [ + x if y is None or ar.dtype.kind == "O" else y + for x, y in zip(minindex, nanindex) + ] + expected2 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(minindex) + ] + expected2 = {"x": xr.concat(expected2, dim="y")} + expected2["x"].attrs = {} + + result2 = ar.argmin(dim=["x"], skipna=False) + + for key in expected2: + assert_identical(result2[key], expected2[key]) + + result3 = ar.argmin(...) + min_xind = ar.isel(expected0).argmin() + expected3 = { + "y": DataArray(min_xind), + "x": DataArray(minindex[min_xind.item()]), + } + + for key in expected3: + assert_identical(result3[key], expected3[key]) + + @pytest.mark.filterwarnings( + "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" + ) + def test_argmax_dim(self, x, minindex, maxindex, nanindex): + ar = xr.DataArray( + x, + dims=["y", "x"], + coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, + attrs=self.attrs, + ) + indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + + if np.isnan(maxindex).any(): + with pytest.raises(ValueError): + ar.argmax(dim="x") + return + + expected0 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex) + ] + expected0 = {"x": xr.concat(expected0, dim="y")} + + result0 = ar.argmax(dim=["x"]) + for key in expected0: + assert_identical(result0[key], expected0[key]) + + result1 = ar.argmax(dim=["x"], keep_attrs=True) + expected1 = deepcopy(expected0) + expected1["x"].attrs = self.attrs + for key in expected1: + assert_identical(result1[key], expected1[key]) + + maxindex = [ + x if y is None or ar.dtype.kind == "O" else y + for x, y in zip(maxindex, nanindex) + ] + expected2 = [ + indarr.isel(y=yi).isel(x=indi, drop=True) + for yi, indi in enumerate(maxindex) + ] + expected2 = {"x": xr.concat(expected2, dim="y")} + expected2["x"].attrs = {} + + result2 = ar.argmax(dim=["x"], skipna=False) + + for key in expected2: + assert_identical(result2[key], expected2[key]) + + result3 = ar.argmax(...) + max_xind = ar.isel(expected0).argmax() + expected3 = { + "y": DataArray(max_xind), + "x": DataArray(maxindex[max_xind.item()]), + } + + for key in expected3: + assert_identical(result3[key], expected3[key]) + + +@pytest.mark.parametrize( + "x, minindices_x, minindices_y, minindices_z, minindices_xy, " + "minindices_xz, minindices_yz, minindices_xyz, maxindices_x, " + "maxindices_y, maxindices_z, maxindices_xy, maxindices_xz, maxindices_yz, " + "maxindices_xyz, nanindices_x, nanindices_y, nanindices_z, nanindices_xy, " + "nanindices_xz, nanindices_yz, nanindices_xyz", + [ + ( + np.array( + [ + [[0, 1, 2, 0], [-2, -4, 2, 0]], + [[1, 1, 1, 1], [1, 1, 1, 1]], + [[0, 0, -10, 5], [20, 0, 0, 0]], + ] + ), + {"x": np.array([[0, 2, 2, 0], [0, 0, 2, 0]])}, + {"y": np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]])}, + {"z": np.array([[0, 1], [0, 0], [2, 1]])}, + {"x": np.array([0, 0, 2, 0]), "y": np.array([1, 1, 0, 0])}, + {"x": np.array([2, 0]), "z": np.array([2, 1])}, + {"y": np.array([1, 0, 0]), "z": np.array([1, 0, 2])}, + {"x": np.array(2), "y": np.array(0), "z": np.array(2)}, + {"x": np.array([[1, 0, 0, 2], [2, 1, 0, 1]])}, + {"y": np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 1, 0]])}, + {"z": np.array([[2, 2], [0, 0], [3, 0]])}, + {"x": np.array([2, 0, 0, 2]), "y": np.array([1, 0, 0, 0])}, + {"x": np.array([2, 2]), "z": np.array([3, 0])}, + {"y": np.array([0, 0, 1]), "z": np.array([2, 0, 0])}, + {"x": np.array(2), "y": np.array(1), "z": np.array(0)}, + {"x": np.array([[None, None, None, None], [None, None, None, None]])}, + { + "y": np.array( + [ + [None, None, None, None], + [None, None, None, None], + [None, None, None, None], + ] + ) + }, + {"z": np.array([[None, None], [None, None], [None, None]])}, + { + "x": np.array([None, None, None, None]), + "y": np.array([None, None, None, None]), + }, + {"x": np.array([None, None]), "z": np.array([None, None])}, + {"y": np.array([None, None, None]), "z": np.array([None, None, None])}, + {"x": np.array(None), "y": np.array(None), "z": np.array(None)}, + ), + ( + np.array( + [ + [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]], + [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]], + [[np.NaN] * 4, [np.NaN] * 4], + ] + ), + {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])}, + { + "y": np.array( + [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + ) + }, + {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])}, + {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])}, + {"x": np.array([1, 0]), "z": np.array([0, 1])}, + {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])}, + {"x": np.array(0), "y": np.array(1), "z": np.array(1)}, + {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])}, + { + "y": np.array( + [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + ) + }, + {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])}, + {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])}, + {"x": np.array([0, 0]), "z": np.array([2, 2])}, + {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])}, + {"x": np.array(0), "y": np.array(0), "z": np.array(0)}, + {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])}, + { + "y": np.array( + [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]] + ) + }, + {"z": np.array([[None, None], [1, None], [0, 0]])}, + {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])}, + {"x": np.array([1, 2]), "z": np.array([1, 0])}, + {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])}, + {"x": np.array(1), "y": np.array(0), "z": np.array(1)}, + ), + ( + np.array( + [ + [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]], + [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]], + [[np.NaN] * 4, [np.NaN] * 4], + ] + ).astype("object"), + {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])}, + { + "y": np.array( + [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + ) + }, + {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])}, + {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])}, + {"x": np.array([1, 0]), "z": np.array([0, 1])}, + {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])}, + {"x": np.array(0), "y": np.array(1), "z": np.array(1)}, + {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])}, + { + "y": np.array( + [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]] + ) + }, + {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])}, + {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])}, + {"x": np.array([0, 0]), "z": np.array([2, 2])}, + {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])}, + {"x": np.array(0), "y": np.array(0), "z": np.array(0)}, + {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])}, + { + "y": np.array( + [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]] + ) + }, + {"z": np.array([[None, None], [1, None], [0, 0]])}, + {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])}, + {"x": np.array([1, 2]), "z": np.array([1, 0])}, + {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])}, + {"x": np.array(1), "y": np.array(0), "z": np.array(1)}, + ), + ( + np.array( + [ + [["2015-12-31", "2020-01-02"], ["2020-01-01", "2016-01-01"]], + [["2020-01-02", "2020-01-02"], ["2020-01-02", "2020-01-02"]], + [["1900-01-01", "1-02-03"], ["1900-01-02", "1-02-03"]], + ], + dtype="datetime64[ns]", + ), + {"x": np.array([[2, 2], [2, 2]])}, + {"y": np.array([[0, 1], [0, 0], [0, 0]])}, + {"z": np.array([[0, 1], [0, 0], [1, 1]])}, + {"x": np.array([2, 2]), "y": np.array([0, 0])}, + {"x": np.array([2, 2]), "z": np.array([1, 1])}, + {"y": np.array([0, 0, 0]), "z": np.array([0, 0, 1])}, + {"x": np.array(2), "y": np.array(0), "z": np.array(1)}, + {"x": np.array([[1, 0], [1, 1]])}, + {"y": np.array([[1, 0], [0, 0], [1, 0]])}, + {"z": np.array([[1, 0], [0, 0], [0, 0]])}, + {"x": np.array([1, 0]), "y": np.array([0, 0])}, + {"x": np.array([0, 1]), "z": np.array([1, 0])}, + {"y": np.array([0, 0, 1]), "z": np.array([1, 0, 0])}, + {"x": np.array(0), "y": np.array(0), "z": np.array(1)}, + {"x": np.array([[None, None], [None, None]])}, + {"y": np.array([[None, None], [None, None], [None, None]])}, + {"z": np.array([[None, None], [None, None], [None, None]])}, + {"x": np.array([None, None]), "y": np.array([None, None])}, + {"x": np.array([None, None]), "z": np.array([None, None])}, + {"y": np.array([None, None, None]), "z": np.array([None, None, None])}, + {"x": np.array(None), "y": np.array(None), "z": np.array(None)}, + ), + ], +) +class TestReduce3D(TestReduce): + def test_argmin_dim( + self, + x, + minindices_x, + minindices_y, + minindices_z, + minindices_xy, + minindices_xz, + minindices_yz, + minindices_xyz, + maxindices_x, + maxindices_y, + maxindices_z, + maxindices_xy, + maxindices_xz, + maxindices_yz, + maxindices_xyz, + nanindices_x, + nanindices_y, + nanindices_z, + nanindices_xy, + nanindices_xz, + nanindices_yz, + nanindices_xyz, + ): + + ar = xr.DataArray( + x, + dims=["x", "y", "z"], + coords={ + "x": np.arange(x.shape[0]) * 4, + "y": 1 - np.arange(x.shape[1]), + "z": 2 + 3 * np.arange(x.shape[2]), + }, + attrs=self.attrs, + ) + xindarr = np.tile( + np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis], + [1, x.shape[1], x.shape[2]], + ) + xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords) + yindarr = np.tile( + np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis], + [x.shape[0], 1, x.shape[2]], + ) + yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords) + zindarr = np.tile( + np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :], + [x.shape[0], x.shape[1], 1], + ) + zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords) + + for inds in [ + minindices_x, + minindices_y, + minindices_z, + minindices_xy, + minindices_xz, + minindices_yz, + minindices_xyz, + ]: + if np.array([np.isnan(i) for i in inds.values()]).any(): + with pytest.raises(ValueError): + ar.argmin(dim=[d for d in inds]) + return + + result0 = ar.argmin(dim=["x"]) + expected0 = { + key: xr.DataArray(value, dims=("y", "z")) + for key, value in minindices_x.items() + } + for key in expected0: + assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key]) + + result1 = ar.argmin(dim=["y"]) + expected1 = { + key: xr.DataArray(value, dims=("x", "z")) + for key, value in minindices_y.items() + } + for key in expected1: + assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key]) + + result2 = ar.argmin(dim=["z"]) + expected2 = { + key: xr.DataArray(value, dims=("x", "y")) + for key, value in minindices_z.items() + } + for key in expected2: + assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key]) + + result3 = ar.argmin(dim=("x", "y")) + expected3 = { + key: xr.DataArray(value, dims=("z")) for key, value in minindices_xy.items() + } + for key in expected3: + assert_identical(result3[key].drop_vars("z"), expected3[key]) + + result4 = ar.argmin(dim=("x", "z")) + expected4 = { + key: xr.DataArray(value, dims=("y")) for key, value in minindices_xz.items() + } + for key in expected4: + assert_identical(result4[key].drop_vars("y"), expected4[key]) + + result5 = ar.argmin(dim=("y", "z")) + expected5 = { + key: xr.DataArray(value, dims=("x")) for key, value in minindices_yz.items() + } + for key in expected5: + assert_identical(result5[key].drop_vars("x"), expected5[key]) + + result6 = ar.argmin(...) + expected6 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()} + for key in expected6: + assert_identical(result6[key], expected6[key]) + + minindices_x = { + key: xr.where( + nanindices_x[key] == None, # noqa: E711 + minindices_x[key], + nanindices_x[key], + ) + for key in minindices_x + } + expected7 = { + key: xr.DataArray(value, dims=("y", "z")) + for key, value in minindices_x.items() + } + + result7 = ar.argmin(dim=["x"], skipna=False) + for key in expected7: + assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key]) + + minindices_y = { + key: xr.where( + nanindices_y[key] == None, # noqa: E711 + minindices_y[key], + nanindices_y[key], + ) + for key in minindices_y + } + expected8 = { + key: xr.DataArray(value, dims=("x", "z")) + for key, value in minindices_y.items() + } + + result8 = ar.argmin(dim=["y"], skipna=False) + for key in expected8: + assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key]) + + minindices_z = { + key: xr.where( + nanindices_z[key] == None, # noqa: E711 + minindices_z[key], + nanindices_z[key], + ) + for key in minindices_z + } + expected9 = { + key: xr.DataArray(value, dims=("x", "y")) + for key, value in minindices_z.items() + } + + result9 = ar.argmin(dim=["z"], skipna=False) + for key in expected9: + assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key]) + + minindices_xy = { + key: xr.where( + nanindices_xy[key] == None, # noqa: E711 + minindices_xy[key], + nanindices_xy[key], + ) + for key in minindices_xy + } + expected10 = { + key: xr.DataArray(value, dims="z") for key, value in minindices_xy.items() + } + + result10 = ar.argmin(dim=("x", "y"), skipna=False) + for key in expected10: + assert_identical(result10[key].drop_vars("z"), expected10[key]) + + minindices_xz = { + key: xr.where( + nanindices_xz[key] == None, # noqa: E711 + minindices_xz[key], + nanindices_xz[key], + ) + for key in minindices_xz + } + expected11 = { + key: xr.DataArray(value, dims="y") for key, value in minindices_xz.items() + } + + result11 = ar.argmin(dim=("x", "z"), skipna=False) + for key in expected11: + assert_identical(result11[key].drop_vars("y"), expected11[key]) + + minindices_yz = { + key: xr.where( + nanindices_yz[key] == None, # noqa: E711 + minindices_yz[key], + nanindices_yz[key], + ) + for key in minindices_yz + } + expected12 = { + key: xr.DataArray(value, dims="x") for key, value in minindices_yz.items() + } + + result12 = ar.argmin(dim=("y", "z"), skipna=False) + for key in expected12: + assert_identical(result12[key].drop_vars("x"), expected12[key]) + + minindices_xyz = { + key: xr.where( + nanindices_xyz[key] == None, # noqa: E711 + minindices_xyz[key], + nanindices_xyz[key], + ) + for key in minindices_xyz + } + expected13 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()} + + result13 = ar.argmin(..., skipna=False) + for key in expected13: + assert_identical(result13[key], expected13[key]) + + def test_argmax_dim( + self, + x, + minindices_x, + minindices_y, + minindices_z, + minindices_xy, + minindices_xz, + minindices_yz, + minindices_xyz, + maxindices_x, + maxindices_y, + maxindices_z, + maxindices_xy, + maxindices_xz, + maxindices_yz, + maxindices_xyz, + nanindices_x, + nanindices_y, + nanindices_z, + nanindices_xy, + nanindices_xz, + nanindices_yz, + nanindices_xyz, + ): + + ar = xr.DataArray( + x, + dims=["x", "y", "z"], + coords={ + "x": np.arange(x.shape[0]) * 4, + "y": 1 - np.arange(x.shape[1]), + "z": 2 + 3 * np.arange(x.shape[2]), + }, + attrs=self.attrs, + ) + xindarr = np.tile( + np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis], + [1, x.shape[1], x.shape[2]], + ) + xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords) + yindarr = np.tile( + np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis], + [x.shape[0], 1, x.shape[2]], + ) + yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords) + zindarr = np.tile( + np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :], + [x.shape[0], x.shape[1], 1], + ) + zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords) + + for inds in [ + maxindices_x, + maxindices_y, + maxindices_z, + maxindices_xy, + maxindices_xz, + maxindices_yz, + maxindices_xyz, + ]: + if np.array([np.isnan(i) for i in inds.values()]).any(): + with pytest.raises(ValueError): + ar.argmax(dim=[d for d in inds]) + return + + result0 = ar.argmax(dim=["x"]) + expected0 = { + key: xr.DataArray(value, dims=("y", "z")) + for key, value in maxindices_x.items() + } + for key in expected0: + assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key]) + + result1 = ar.argmax(dim=["y"]) + expected1 = { + key: xr.DataArray(value, dims=("x", "z")) + for key, value in maxindices_y.items() + } + for key in expected1: + assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key]) + + result2 = ar.argmax(dim=["z"]) + expected2 = { + key: xr.DataArray(value, dims=("x", "y")) + for key, value in maxindices_z.items() + } + for key in expected2: + assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key]) + + result3 = ar.argmax(dim=("x", "y")) + expected3 = { + key: xr.DataArray(value, dims=("z")) for key, value in maxindices_xy.items() + } + for key in expected3: + assert_identical(result3[key].drop_vars("z"), expected3[key]) + + result4 = ar.argmax(dim=("x", "z")) + expected4 = { + key: xr.DataArray(value, dims=("y")) for key, value in maxindices_xz.items() + } + for key in expected4: + assert_identical(result4[key].drop_vars("y"), expected4[key]) + + result5 = ar.argmax(dim=("y", "z")) + expected5 = { + key: xr.DataArray(value, dims=("x")) for key, value in maxindices_yz.items() + } + for key in expected5: + assert_identical(result5[key].drop_vars("x"), expected5[key]) + + result6 = ar.argmax(...) + expected6 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()} + for key in expected6: + assert_identical(result6[key], expected6[key]) + + maxindices_x = { + key: xr.where( + nanindices_x[key] == None, # noqa: E711 + maxindices_x[key], + nanindices_x[key], + ) + for key in maxindices_x + } + expected7 = { + key: xr.DataArray(value, dims=("y", "z")) + for key, value in maxindices_x.items() + } + + result7 = ar.argmax(dim=["x"], skipna=False) + for key in expected7: + assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key]) + + maxindices_y = { + key: xr.where( + nanindices_y[key] == None, # noqa: E711 + maxindices_y[key], + nanindices_y[key], + ) + for key in maxindices_y + } + expected8 = { + key: xr.DataArray(value, dims=("x", "z")) + for key, value in maxindices_y.items() + } + + result8 = ar.argmax(dim=["y"], skipna=False) + for key in expected8: + assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key]) + + maxindices_z = { + key: xr.where( + nanindices_z[key] == None, # noqa: E711 + maxindices_z[key], + nanindices_z[key], + ) + for key in maxindices_z + } + expected9 = { + key: xr.DataArray(value, dims=("x", "y")) + for key, value in maxindices_z.items() + } + + result9 = ar.argmax(dim=["z"], skipna=False) + for key in expected9: + assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key]) + + maxindices_xy = { + key: xr.where( + nanindices_xy[key] == None, # noqa: E711 + maxindices_xy[key], + nanindices_xy[key], + ) + for key in maxindices_xy + } + expected10 = { + key: xr.DataArray(value, dims="z") for key, value in maxindices_xy.items() + } + + result10 = ar.argmax(dim=("x", "y"), skipna=False) + for key in expected10: + assert_identical(result10[key].drop_vars("z"), expected10[key]) + + maxindices_xz = { + key: xr.where( + nanindices_xz[key] == None, # noqa: E711 + maxindices_xz[key], + nanindices_xz[key], + ) + for key in maxindices_xz + } + expected11 = { + key: xr.DataArray(value, dims="y") for key, value in maxindices_xz.items() + } + + result11 = ar.argmax(dim=("x", "z"), skipna=False) + for key in expected11: + assert_identical(result11[key].drop_vars("y"), expected11[key]) + + maxindices_yz = { + key: xr.where( + nanindices_yz[key] == None, # noqa: E711 + maxindices_yz[key], + nanindices_yz[key], + ) + for key in maxindices_yz + } + expected12 = { + key: xr.DataArray(value, dims="x") for key, value in maxindices_yz.items() + } + + result12 = ar.argmax(dim=("y", "z"), skipna=False) + for key in expected12: + assert_identical(result12[key].drop_vars("x"), expected12[key]) + + maxindices_xyz = { + key: xr.where( + nanindices_xyz[key] == None, # noqa: E711 + maxindices_xyz[key], + nanindices_xyz[key], + ) + for key in maxindices_xyz + } + expected13 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()} + + result13 = ar.argmax(..., skipna=False) + for key in expected13: + assert_identical(result13[key], expected13[key]) + class TestReduceND(TestReduce): @pytest.mark.parametrize("op", ["idxmin", "idxmax"]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 9c8d40724da..0c4082a553e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4597,6 +4597,9 @@ def test_reduce_non_numeric(self): assert_equal(data1.mean(), data2.mean()) assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1")) + @pytest.mark.filterwarnings( + "ignore:Once the behaviour of DataArray:DeprecationWarning" + ) def test_reduce_strings(self): expected = Dataset({"x": "a"}) ds = Dataset({"x": ("y", ["a", "b"])}) @@ -4668,6 +4671,9 @@ def test_reduce_keep_attrs(self): for k, v in ds.data_vars.items(): assert v.attrs == data[k].attrs + @pytest.mark.filterwarnings( + "ignore:Once the behaviour of DataArray:DeprecationWarning" + ) def test_reduce_argmin(self): # regression test for #205 ds = Dataset({"a": ("x", [0, 1])}) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index fb9063ca49e..20a5f0e8613 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -297,19 +297,29 @@ def __call__(self, obj, *args, **kwargs): all_args = merge_args(self.args, args) all_kwargs = {**self.kwargs, **kwargs} + xarray_classes = ( + xr.Variable, + xr.DataArray, + xr.Dataset, + xr.core.groupby.GroupBy, + ) + + if not isinstance(obj, xarray_classes): + # remove typical xarray args like "dim" + exclude_kwargs = ("dim", "dims") + all_kwargs = { + key: value + for key, value in all_kwargs.items() + if key not in exclude_kwargs + } + func = getattr(obj, self.name, None) + if func is None or not isinstance(func, Callable): # fall back to module level numpy functions if not a xarray object if not isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)): numpy_func = getattr(np, self.name) func = partial(numpy_func, obj) - # remove typical xarray args like "dim" - exclude_kwargs = ("dim", "dims") - all_kwargs = { - key: value - for key, value in all_kwargs.items() - if key not in exclude_kwargs - } else: raise AttributeError(f"{obj} has no method named '{self.name}'") @@ -1408,8 +1418,8 @@ def test_real_and_imag(self): ( method("all"), method("any"), - method("argmax"), - method("argmin"), + method("argmax", dim="x"), + method("argmin", dim="x"), method("argsort"), method("cumprod"), method("cumsum"), @@ -1433,7 +1443,11 @@ def test_aggregation(self, func, dtype): ) variable = xr.Variable("x", array) - units = extract_units(func(array)) + numpy_kwargs = func.kwargs.copy() + if "dim" in func.kwargs: + numpy_kwargs["axis"] = variable.get_axis_num(numpy_kwargs.pop("dim")) + + units = extract_units(func(array, **numpy_kwargs)) expected = attach_units(func(strip_units(variable)), units) actual = func(variable) @@ -2243,8 +2257,20 @@ def test_repr(self, func, variant, dtype): ( function("all"), function("any"), - function("argmax"), - function("argmin"), + pytest.param( + function("argmax"), + marks=pytest.mark.skip( + reason="calling np.argmax as a function on xarray objects is not " + "supported" + ), + ), + pytest.param( + function("argmin"), + marks=pytest.mark.skip( + reason="calling np.argmin as a function on xarray objects is not " + "supported" + ), + ), function("max"), function("mean"), pytest.param( @@ -2265,8 +2291,8 @@ def test_repr(self, func, variant, dtype): function("cumprod"), method("all"), method("any"), - method("argmax"), - method("argmin"), + method("argmax", dim="x"), + method("argmin", dim="x"), method("max"), method("mean"), method("median"), @@ -2289,6 +2315,10 @@ def test_aggregation(self, func, dtype): ) data_array = xr.DataArray(data=array, dims="x") + numpy_kwargs = func.kwargs.copy() + if "dim" in numpy_kwargs: + numpy_kwargs["axis"] = data_array.get_axis_num(numpy_kwargs.pop("dim")) + # units differ based on the applied function, so we need to # first compute the units units = extract_units(func(array)) @@ -3803,8 +3833,20 @@ def test_repr(self, func, variant, dtype): ( function("all"), function("any"), - function("argmax"), - function("argmin"), + pytest.param( + function("argmax"), + marks=pytest.mark.skip( + reason="calling np.argmax as a function on xarray objects is not " + "supported" + ), + ), + pytest.param( + function("argmin"), + marks=pytest.mark.skip( + reason="calling np.argmin as a function on xarray objects is not " + "supported" + ), + ), function("max"), function("min"), function("mean"), @@ -3823,8 +3865,8 @@ def test_repr(self, func, variant, dtype): function("cumprod"), method("all"), method("any"), - method("argmax"), - method("argmin"), + method("argmax", dim="x"), + method("argmin", dim="x"), method("max"), method("min"), method("mean"), @@ -3853,8 +3895,23 @@ def test_aggregation(self, func, dtype): ds = xr.Dataset({"a": ("x", a), "b": ("x", b)}) - units_a = array_extract_units(func(a)) - units_b = array_extract_units(func(b)) + if "dim" in func.kwargs: + numpy_kwargs = func.kwargs.copy() + dim = numpy_kwargs.pop("dim") + + axis_a = ds.a.get_axis_num(dim) + axis_b = ds.b.get_axis_num(dim) + + numpy_kwargs_a = numpy_kwargs.copy() + numpy_kwargs_a["axis"] = axis_a + numpy_kwargs_b = numpy_kwargs.copy() + numpy_kwargs_b["axis"] = axis_b + else: + numpy_kwargs_a = {} + numpy_kwargs_b = {} + + units_a = array_extract_units(func(a, **numpy_kwargs_a)) + units_b = array_extract_units(func(b, **numpy_kwargs_b)) units = {"a": units_a, "b": units_b} actual = func(ds) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 3003e0d66f3..d79d40d67c0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1657,7 +1657,7 @@ def test_reduce_funcs(self): assert_identical(v.all(dim="x"), Variable([], False)) v = Variable("t", pd.date_range("2000-01-01", periods=3)) - assert v.argmax(skipna=True) == 2 + assert v.argmax(skipna=True, dim="t") == 2 assert_identical(v.max(), Variable([], pd.Timestamp("2000-01-03")))