From 6a60c4ed54d4fe32d137152f691d89b1f63dc46b Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Fri, 23 Aug 2019 09:01:56 -0700 Subject: [PATCH] Fix sparse ops that were calling bottleneck min and max are now working. notnull was already fixed by one of my earlier PRs. std/var/median are still broken, but only because sparse hasn't implemented the corresponding NumPy functions yet (nanstd, nanvar and nanmedian). rank needs pure NumPy implementation (not via bottleneck) if we want it to work on sparse or dask arrays. --- xarray/core/nputils.py | 1 + xarray/core/variable.py | 12 ++++++++--- xarray/tests/test_sparse.py | 40 ++++++++++++++++++++++--------------- 3 files changed, 34 insertions(+), 19 deletions(-) diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index a9971e7125a..769af03fe6a 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -209,6 +209,7 @@ def f(values, axis=None, **kwargs): if ( _USE_BOTTLENECK + and isinstance(values, np.ndarray) and bn_func is not None and not isinstance(axis, tuple) and values.dtype.kind in "uifc" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 4c095f3a062..c10ea7b4996 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1697,18 +1697,24 @@ def rank(self, dim, pct=False): """ import bottleneck as bn - if isinstance(self.data, dask_array_type): + data = self.data + + if isinstance(data, dask_array_type): raise TypeError( "rank does not work for arrays stored as dask " "arrays. Load the data via .compute() or .load() " "prior to calling this method." ) + elif not isinstance(data, np.ndarray): + raise TypeError( + "rank is not implemented for {} objects.".format(type(data)) + ) axis = self.get_axis_num(dim) func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata - ranked = func(self.data, axis=axis) + ranked = func(data, axis=axis) if pct: - count = np.sum(~np.isnan(self.data), axis=axis, keepdims=True) + count = np.sum(~np.isnan(data), axis=axis, keepdims=True) ranked /= count return Variable(self.dims, ranked) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 74805b225fa..3c1b49f9e43 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -173,11 +173,13 @@ def test_variable_property(prop): False, marks=xfail(reason="'COO' object has no attribute 'item'"), ), - param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")), param( - do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck") + do("median"), + False, + marks=xfail(reason="Missing implementation for np.nanmedian"), ), - param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")), + param(do("max"), False), + param(do("min"), False), param( do("no_conflicts", other=make_xrvar({"x": 10, "y": 5})), True, @@ -201,7 +203,7 @@ def test_variable_property(prop): param( do("rank", dim="x"), False, - marks=xfail(reason="Coercion to dense via bottleneck"), + marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"), ), param( do("reduce", func=np.sum, dim="x"), @@ -216,13 +218,17 @@ def test_variable_property(prop): param( do("shift", x=2), True, marks=xfail(reason="mixed sparse-dense operation") ), - param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")), + param( + do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd") + ), param( do("sum"), False, marks=xfail(reason="Missing implementation for np.result_type"), ), - param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")), + param( + do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar") + ), param(do("to_dict"), False, marks=xfail(reason="Coercion to dense")), param( do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5), @@ -478,16 +484,14 @@ def test_dataarray_property(prop): False, marks=xfail(reason="'COO' object has no attribute 'item'"), ), - param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")), + param(do("max"), False), + param(do("min"), False), param( - do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck") - ), - param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")), - param( - do("notnull"), + do("median"), False, - marks=xfail(reason="'COO' object has no attribute 'notnull'"), + marks=xfail(reason="Missing implementation for np.nanmedian"), ), + param(do("notnull"), True), param( do("pipe", np.sum, axis=1), True, @@ -506,7 +510,7 @@ def test_dataarray_property(prop): param( do("rank", "x"), False, - marks=xfail(reason="Coercion to dense via bottleneck"), + marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"), ), param( do("reduce", np.sum, dim="x"), @@ -534,13 +538,17 @@ def test_dataarray_property(prop): True, marks=xfail(reason="Indexing COO with more than one iterable index"), ), # noqa - param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")), + param( + do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd") + ), param( do("sum"), False, marks=xfail(reason="Missing implementation for np.result_type"), ), - param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")), + param( + do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar") + ), param( do("where", make_xrarray({"x": 10, "y": 5}) > 0.5), False,