Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duck array ops that were calling bottleneck on sparse arrays #3254

Merged
merged 1 commit into from
Aug 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions xarray/core/nputils.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def f(values, axis=None, **kwargs):

if (
_USE_BOTTLENECK
and isinstance(values, np.ndarray)
and bn_func is not None
and not isinstance(axis, tuple)
and values.dtype.kind in "uifc"
Expand Down
12 changes: 9 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1697,18 +1697,24 @@ def rank(self, dim, pct=False):
"""
import bottleneck as bn

if isinstance(self.data, dask_array_type):
data = self.data

if isinstance(data, dask_array_type):
raise TypeError(
"rank does not work for arrays stored as dask "
"arrays. Load the data via .compute() or .load() "
"prior to calling this method."
)
elif not isinstance(data, np.ndarray):
raise TypeError(
"rank is not implemented for {} objects.".format(type(data))
)

axis = self.get_axis_num(dim)
func = bn.nanrankdata if self.dtype.kind == "f" else bn.rankdata
ranked = func(self.data, axis=axis)
ranked = func(data, axis=axis)
if pct:
count = np.sum(~np.isnan(self.data), axis=axis, keepdims=True)
count = np.sum(~np.isnan(data), axis=axis, keepdims=True)
ranked /= count
return Variable(self.dims, ranked)

Expand Down
40 changes: 24 additions & 16 deletions xarray/tests/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,13 @@ def test_variable_property(prop):
False,
marks=xfail(reason="'COO' object has no attribute 'item'"),
),
param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(
do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck")
do("median"),
False,
marks=xfail(reason="Missing implementation for np.nanmedian"),
),
param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(do("max"), False),
param(do("min"), False),
param(
do("no_conflicts", other=make_xrvar({"x": 10, "y": 5})),
True,
Expand All @@ -201,7 +203,7 @@ def test_variable_property(prop):
param(
do("rank", dim="x"),
False,
marks=xfail(reason="Coercion to dense via bottleneck"),
marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"),
),
param(
do("reduce", func=np.sum, dim="x"),
Expand All @@ -216,13 +218,17 @@ def test_variable_property(prop):
param(
do("shift", x=2), True, marks=xfail(reason="mixed sparse-dense operation")
),
param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(
do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
),
param(
do("sum"),
False,
marks=xfail(reason="Missing implementation for np.result_type"),
),
param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(
do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
),
param(do("to_dict"), False, marks=xfail(reason="Coercion to dense")),
param(
do("where", cond=make_xrvar({"x": 10, "y": 5}) > 0.5),
Expand Down Expand Up @@ -478,16 +484,14 @@ def test_dataarray_property(prop):
False,
marks=xfail(reason="'COO' object has no attribute 'item'"),
),
param(do("max"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(do("max"), False),
param(do("min"), False),
param(
do("median"), False, marks=xfail(reason="Coercion to dense via bottleneck")
),
param(do("min"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(
do("notnull"),
do("median"),
False,
marks=xfail(reason="'COO' object has no attribute 'notnull'"),
marks=xfail(reason="Missing implementation for np.nanmedian"),
),
param(do("notnull"), True),
param(
do("pipe", np.sum, axis=1),
True,
Expand All @@ -506,7 +510,7 @@ def test_dataarray_property(prop):
param(
do("rank", "x"),
False,
marks=xfail(reason="Coercion to dense via bottleneck"),
marks=xfail(reason="Only implemented for NumPy arrays (via bottleneck)"),
),
param(
do("reduce", np.sum, dim="x"),
Expand Down Expand Up @@ -534,13 +538,17 @@ def test_dataarray_property(prop):
True,
marks=xfail(reason="Indexing COO with more than one iterable index"),
), # noqa
param(do("std"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(
do("std"), False, marks=xfail(reason="Missing implementation for np.nanstd")
),
param(
do("sum"),
False,
marks=xfail(reason="Missing implementation for np.result_type"),
),
param(do("var"), False, marks=xfail(reason="Coercion to dense via bottleneck")),
param(
do("var"), False, marks=xfail(reason="Missing implementation for np.nanvar")
),
param(
do("where", make_xrarray({"x": 10, "y": 5}) > 0.5),
False,
Expand Down