Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable numbagg for reductions #8316

Merged
merged 9 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions xarray/core/nputils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,20 @@
try:
import bottleneck as bn

_USE_BOTTLENECK = True
_BOTTLENECK_AVAILABLE = True
except ImportError:
# use numpy methods instead
bn = np
_USE_BOTTLENECK = False
_BOTTLENECK_AVAILABLE = False

try:
import numbagg

_NUMBAGG_AVAILABLE = True
except ImportError:
# use numpy methods instead
numbagg = np
_NUMBAGG_AVAILABLE = False
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI there's a version of this at

has_numbagg = numbagg.__version__
, which has the advantage that it contains the version, which we likely want to filter on...

(though I'm not sure my approach is that good! Open to feedback...)



def _select_along_axis(values, idx, axis):
Expand Down Expand Up @@ -165,9 +174,25 @@ def _create_bottleneck_method(name, npmodule=np):
def f(values, axis=None, **kwargs):
dtype = kwargs.get("dtype", None)
bn_func = getattr(bn, name, None)
nba_func = getattr(numbagg, name, None)

if (
_USE_BOTTLENECK
_NUMBAGG_AVAILABLE
and OPTIONS["use_numbagg"]
and isinstance(values, np.ndarray)
and nba_func is not None
# numbagg uses ddof=1 only
and "var" not in name
# TODO: bool?
and values.dtype.kind in "uifc"
# and values.dtype.isnative
and (dtype is None or np.dtype(dtype) == values.dtype)
):
# bottleneck does not take care dtype, min_count
kwargs.pop("dtype", None)
result = nba_func(values, axis=axis, **kwargs)
elif (
_BOTTLENECK_AVAILABLE
and OPTIONS["use_bottleneck"]
and isinstance(values, np.ndarray)
and bn_func is not None
Expand Down
7 changes: 7 additions & 0 deletions xarray/core/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"keep_attrs",
"warn_for_unclosed_files",
"use_bottleneck",
"use_numbagg",
"use_flox",
]

Expand All @@ -50,6 +51,7 @@ class T_Options(TypedDict):
warn_for_unclosed_files: bool
use_bottleneck: bool
use_flox: bool
use_numbagg: bool


OPTIONS: T_Options = {
Expand All @@ -72,6 +74,7 @@ class T_Options(TypedDict):
"warn_for_unclosed_files": False,
"use_bottleneck": True,
"use_flox": True,
"use_numbagg": True,
}

_JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"])
Expand All @@ -98,6 +101,7 @@ def _positive_integer(value: int) -> bool:
"file_cache_maxsize": _positive_integer,
"keep_attrs": lambda choice: choice in [True, False, "default"],
"use_bottleneck": lambda value: isinstance(value, bool),
"use_numbagg": lambda value: isinstance(value, bool),
"use_flox": lambda value: isinstance(value, bool),
"warn_for_unclosed_files": lambda value: isinstance(value, bool),
}
Expand Down Expand Up @@ -230,6 +234,9 @@ class set_options:
use_flox : bool, default: True
Whether to use ``numpy_groupies`` and `flox`` to
accelerate groupby and resampling reductions.
use_numbagg : bool, default: True
Whether to use ``numbagg`` to accelerate reductions.
Takes precedence over ``use_bottleneck`` when both are True.
warn_for_unclosed_files : bool, default: False
Whether or not to issue a warning when unclosed files are
deallocated. This is mostly useful for debugging.
Expand Down
Loading