diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 24120270444..2c30db99bcd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -45,7 +45,12 @@ New Features By `Julia Signell `_. - :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a first argument, which is then called on the input; replicating pandas' behavior. - By `Maximilian Roos `_ + By `Maximilian Roos `_. +- Implement ``skipna`` in :py:meth:`Dataset.quantile`, :py:meth:`DataArray.quantile`, + :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile` + (:issue:`3843`, :pull:`3844`) + By `Aaron Spring `_. + Bug fixes ~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4e80ef222c2..7fcb42bf9d2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2939,6 +2939,7 @@ def quantile( dim: Union[Hashable, Sequence[Hashable], None] = None, interpolation: str = "linear", keep_attrs: bool = None, + skipna: bool = True, ) -> "DataArray": """Compute the qth quantile of the data along the specified dimension. @@ -2966,6 +2967,8 @@ def quantile( If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -2978,7 +2981,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile Examples -------- @@ -3015,7 +3018,11 @@ def quantile( """ ds = self._to_temp_dataset().quantile( - q, dim=dim, keep_attrs=keep_attrs, interpolation=interpolation + q, + dim=dim, + keep_attrs=keep_attrs, + interpolation=interpolation, + skipna=skipna, ) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 52940e98b27..f286236dd45 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5140,7 +5140,13 @@ def sortby(self, variables, ascending=True): return aligned_self.isel(**indices) def quantile( - self, q, dim=None, interpolation="linear", numeric_only=False, keep_attrs=None + self, + q, + dim=None, + interpolation="linear", + numeric_only=False, + keep_attrs=None, + skipna=True, ): """Compute the qth quantile of the data along the specified dimension. @@ -5171,6 +5177,8 @@ def quantile( object will be returned without attributes. numeric_only : bool, optional If True, only apply ``func`` to variables with a numeric dtype. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -5183,7 +5191,7 @@ def quantile( See Also -------- - numpy.nanquantile, pandas.Series.quantile, DataArray.quantile + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, DataArray.quantile Examples -------- @@ -5258,6 +5266,7 @@ def quantile( dim=reduce_dims, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) else: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index f2a9ebac6eb..4223d9dc255 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -558,7 +558,9 @@ def fillna(self, value): out = ops.fillna(self, value) return out - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile over each array in the groups and concatenate them together into a new array. @@ -582,6 +584,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): * higher: ``j``. * nearest: ``i`` or ``j``, whichever is nearest. * midpoint: ``(i + j) / 2``. + skipna : bool, optional + Whether to skip missing values when aggregating. Returns ------- @@ -595,7 +599,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): See Also -------- - numpy.nanquantile, pandas.Series.quantile, Dataset.quantile, + numpy.nanquantile, numpy.quantile, pandas.Series.quantile, Dataset.quantile, DataArray.quantile Examples @@ -656,6 +660,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): dim=dim, interpolation=interpolation, keep_attrs=keep_attrs, + skipna=skipna, ) return out diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 62f9fde6a2e..435edb6f014 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1678,7 +1678,9 @@ def no_conflicts(self, other, equiv=duck_array_ops.array_notnull_equiv): """ return self.broadcast_equals(other, equiv=equiv) - def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): + def quantile( + self, q, dim=None, interpolation="linear", keep_attrs=None, skipna=True + ): """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -1725,6 +1727,8 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): from .computation import apply_ufunc + _quantile_func = np.nanquantile if skipna else np.quantile + if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) @@ -1739,7 +1743,7 @@ def quantile(self, q, dim=None, interpolation="linear", keep_attrs=None): def _wrapper(npa, **kwargs): # move quantile axis to end. required for apply_ufunc - return np.moveaxis(np.nanquantile(npa, **kwargs), 0, -1) + return np.moveaxis(_quantile_func(npa, **kwargs), 0, -1) axis = np.arange(-1, -1 * len(dim) - 1, -1) result = apply_ufunc( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b8a9c5edaf9..33f1b403eb8 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2368,13 +2368,15 @@ def test_reduce_out(self): with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): - actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True) - expected = np.nanpercentile(self.dv.values, np.array(q) * 100, axis=axis) + def test_quantile(self, q, axis, dim, skipna): + actual = DataArray(self.va).quantile(q, dim=dim, keep_attrs=True, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.dv.values, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) if is_scalar(q): assert "quantile" not in actual.dims diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 44ffafb23b1..d2e8c6b7609 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4697,12 +4697,13 @@ def test_reduce_keepdims(self): ) assert_identical(expected, actual) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q): + def test_quantile(self, q, skipna): ds = create_test_data(seed=123) for dim in [None, "dim1", ["dim1"]]: - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) if is_scalar(q): assert "quantile" not in ds_quantile.dims else: @@ -4710,12 +4711,27 @@ def test_quantile(self, q): for var, dar in ds.data_vars.items(): assert var in ds_quantile - assert_identical(ds_quantile[var], dar.quantile(q, dim=dim)) + assert_identical( + ds_quantile[var], dar.quantile(q, dim=dim, skipna=skipna) + ) dim = ["dim1", "dim2"] - ds_quantile = ds.quantile(q, dim=dim) + ds_quantile = ds.quantile(q, dim=dim, skipna=skipna) assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) + @pytest.mark.parametrize("skipna", [True, False]) + def test_quantile_skipna(self, skipna): + q = 0.1 + dim = "time" + ds = Dataset({"a": ([dim], np.arange(0, 11))}) + ds = ds.where(ds >= 1) + + result = ds.quantile(q=q, dim=dim, skipna=skipna) + + value = 1.9 if skipna else np.nan + expected = Dataset({"a": value}, coords={"quantile": q}) + assert_identical(result, expected) + @requires_bottleneck def test_rank(self): ds = create_test_data(seed=1234) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c86ecd0121f..c600f7a77d0 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1511,14 +1511,16 @@ def test_reduce(self): with pytest.warns(DeprecationWarning, match="allow_lazy is deprecated"): v.mean(dim="x", allow_lazy=False) + @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim): + def test_quantile(self, q, axis, dim, skipna): v = Variable(["x", "y"], self.d) - actual = v.quantile(q, dim=dim) - expected = np.nanpercentile(self.d, np.array(q) * 100, axis=axis) + actual = v.quantile(q, dim=dim, skipna=skipna) + _percentile_func = np.nanpercentile if skipna else np.percentile + expected = _percentile_func(self.d, np.array(q) * 100, axis=axis) np.testing.assert_allclose(actual.values, expected) @requires_dask