From ca72f79da7fe23f822add7b33ca2abbd816fa340 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 16 Oct 2021 23:23:00 +0530 Subject: [PATCH 01/16] Generator for groupby reductions --- xarray/core/_reductions.py | 785 +++++++++++++++++++++++++++++ xarray/core/arithmetic.py | 4 - xarray/core/groupby.py | 5 +- xarray/util/generate_reductions.py | 163 ++++++ 4 files changed, 951 insertions(+), 6 deletions(-) create mode 100644 xarray/core/_reductions.py create mode 100644 xarray/util/generate_reductions.py diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py new file mode 100644 index 00000000000..c5d5f90621b --- /dev/null +++ b/xarray/core/_reductions.py @@ -0,0 +1,785 @@ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_reductions. Do not edit manually. + +from . import duck_array_ops + + +class DatasetGroupByReductions: + __slots__ = () + + def count(self, dim=None, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `count` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `count` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `count` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all(self, dim=None, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `all` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `all` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `all` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any(self, dim=None, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `any` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `any` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `any` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `max` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `max` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `max` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `min` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `min` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `min` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + numeric_only=False, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `mean` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `mean` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `mean` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `prod` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `prod` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `prod` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `sum` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `sum` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `sum` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `std` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `std` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `std` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `var` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `var` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `var` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this Dataset's data by applying `median` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `median` on this object's data. + + Returns + ------- + reduced : Dataset + New Dataset with `median` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayGroupByReductions: + __slots__ = () + + def count(self, dim=None, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `count` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `count` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `count` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all(self, dim=None, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `all` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `all` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `all` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any(self, dim=None, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `any` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `any` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `any` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `max` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `max` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `max` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `min` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `min` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `min` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `mean` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `mean` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `mean` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `prod` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `prod` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `prod` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `sum` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `sum` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `sum` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `std` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `std` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `std` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `var` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `var` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `var` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + """ + Reduce this DataArray's data by applying `median` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional + + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `median` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with `median` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 27ec5ab8dd9..814e9a59877 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -128,8 +128,6 @@ class DataArrayArithmetic( class DataArrayGroupbyArithmetic( - ImplementsArrayReduce, - IncludeReduceMethods, SupportsArithmetic, DataArrayGroupByOpsMixin, ): @@ -137,8 +135,6 @@ class DataArrayGroupbyArithmetic( class DatasetGroupbyArithmetic( - ImplementsDatasetReduce, - IncludeReduceMethods, SupportsArithmetic, DatasetGroupByOpsMixin, ): diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 1ca5de965d0..7e9cb5589cb 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -5,6 +5,7 @@ import pandas as pd from . import dtypes, duck_array_ops, nputils, ops +from ._reductions import DataArrayGroupByReductions, DatasetGroupByReductions from .arithmetic import DataArrayGroupbyArithmetic, DatasetGroupbyArithmetic from .concat import concat from .formatting import format_array_flat @@ -712,7 +713,7 @@ def _maybe_reorder(xarray_obj, dim, positions): return xarray_obj[{dim: order}] -class DataArrayGroupBy(GroupBy, DataArrayGroupbyArithmetic): +class DataArrayGroupBy(GroupBy, DataArrayGroupbyArithmetic, DataArrayGroupByReductions): """GroupBy object specialized to grouping DataArray objects""" __slots__ = () @@ -877,7 +878,7 @@ def reduce_array(ar): return self.map(reduce_array, shortcut=shortcut) -class DatasetGroupBy(GroupBy, DatasetGroupbyArithmetic): +class DatasetGroupBy(GroupBy, DatasetGroupbyArithmetic, DatasetGroupByReductions): __slots__ = () diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py new file mode 100644 index 00000000000..042dc850dec --- /dev/null +++ b/xarray/util/generate_reductions.py @@ -0,0 +1,163 @@ +"""Generate module and stub file for arithmetic operators of various xarray classes. + +For internal xarray development use only. + +Usage: + python xarray/util/generate_reductions.py > xarray/core/_reductions.py +""" + +import textwrap +from typing import Optional + +MODULE_PREAMBLE = '''\ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_reductions. Do not edit manually. + +from . import duck_array_ops''' + +CLASS_PREAMBLE = """ + +class {cls}GroupByReductions: + __slots__ = ()""" + +_SKIPNA_DOCSTRING = """ +skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64).""" + +_MINCOUNT_DOCSTRING = """ +min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array.""" + + +REDUCE_METHODS = ["all", "any"] +NAN_REDUCE_METHODS = [ + "max", + "min", + "mean", + "prod", + "sum", + "std", + "var", + "median", +] +NAN_CUM_METHODS = ["cumsum", "cumprod"] +MIN_COUNT_METHODS = ["prod", "sum"] +NUMERIC_ONLY_METHODS = [ + "mean", + "std", + "var", + "sum", + "prod", + "median", + "cumsum", + "cumprod", +] + +TEMPLATE_REDUCTION = ''' + def {method}(self, dim=None, {skip_na_kwarg}{min_count_kwarg}keep_attrs=None, **kwargs): + """ + Reduce this {cls}'s data by applying `{method}` along some dimension(s). + + Parameters + ---------- + dim : hashable, optional +{extra_args}{skip_na_docs}{min_count_docs} + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating `{method}` on this object's data. + + Returns + ------- + reduced : {cls} + New {cls} with `{method}` applied to its data and the + indicated dimension(s) removed + """ + return self.reduce( + duck_array_ops.{array_method}, + dim=dim,{skip_na_reduce}{min_count_reduce}{numeric_only_reduce} + keep_attrs=keep_attrs, + **kwargs, + ) +''' + + +def generate_method( + cls: str, + method: str, + skipna: bool, + array_method: Optional[str] = None, +): + if not array_method: + array_method = method + + if cls == "Dataset": + if method in NUMERIC_ONLY_METHODS: + numeric_only_reduce = "numeric_only=True," + else: + numeric_only_reduce = "numeric_only=False," + else: + numeric_only_reduce = "" + + if skipna: + skip_na_docs = textwrap.indent(_SKIPNA_DOCSTRING, " ") + skip_na_kwarg = "skipna=True, " + skip_na_reduce = "skipna=skipna," + else: + skip_na_docs = "" + skip_na_kwarg = "" + skip_na_reduce = "" + + if method in MIN_COUNT_METHODS: + min_count_docs = textwrap.indent(_MINCOUNT_DOCSTRING, " ") + min_count_kwarg = "min_count=None, " + min_count_reduce = "min_count=min_count," + else: + min_count_docs = "" + min_count_kwarg = "" + min_count_reduce = "" + + return TEMPLATE_REDUCTION.format( + cls=cls, + method=method, + array_method=array_method, + extra_args="", + skip_na_docs=skip_na_docs, + skip_na_kwarg=skip_na_kwarg, + skip_na_reduce=skip_na_reduce, + min_count_docs=min_count_docs, + min_count_kwarg=min_count_kwarg, + min_count_reduce=min_count_reduce, + numeric_only_reduce=numeric_only_reduce, + ) + + +def render(cls): + yield CLASS_PREAMBLE.format(cls=cls) + yield generate_method(cls, "count", skipna=False) + for method in REDUCE_METHODS: + yield generate_method( + cls, + method, + skipna=False, + array_method=f"array_{method}", + ) + for method in NAN_REDUCE_METHODS: + yield generate_method(cls, method, skipna=True) + + +if __name__ == "__main__": + print(MODULE_PREAMBLE) + for cls in ["Dataset", "DataArray"]: + for line in render(cls=cls): + print(line) From bc9d5d33db35c4dea1cd61747a1ae2bbe8741d3b Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 17 Oct 2021 10:54:31 +0530 Subject: [PATCH 02/16] cleanup with namedtuple --- xarray/core/_reductions.py | 242 +++++++++++++++++++++-------- xarray/util/generate_reductions.py | 59 +++---- 2 files changed, 207 insertions(+), 94 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index c5d5f90621b..96d8200d7c6 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -16,18 +16,23 @@ def count(self, dim=None, keep_attrs=None, **kwargs): dim : hashable, optional keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `count` on this object's data. + function for calculating ``count`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `count` applied to its data and the + New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -46,18 +51,23 @@ def all(self, dim=None, keep_attrs=None, **kwargs): dim : hashable, optional keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `all` on this object's data. + function for calculating ``all`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `all` applied to its data and the + New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -76,18 +86,23 @@ def any(self, dim=None, keep_attrs=None, **kwargs): dim : hashable, optional keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `any` on this object's data. + function for calculating ``any`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `any` applied to its data and the + New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -111,18 +126,23 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `max` on this object's data. + function for calculating ``max`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `max` applied to its data and the + New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -147,18 +167,23 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `min` on this object's data. + function for calculating ``min`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `min` applied to its data and the + New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -183,18 +208,23 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `mean` on this object's data. + function for calculating ``mean`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `mean` applied to its data and the + New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -225,18 +255,23 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `prod` on this object's data. + function for calculating ``prod`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `prod` applied to its data and the + New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -268,18 +303,23 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `sum` on this object's data. + function for calculating ``sum`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `sum` applied to its data and the + New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -305,18 +345,23 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `std` on this object's data. + function for calculating ``std`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `std` applied to its data and the + New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.std, @@ -341,18 +386,23 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `var` on this object's data. + function for calculating ``var`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `var` applied to its data and the + New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.var, @@ -377,18 +427,23 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `median` on this object's data. + function for calculating ``median`` on this object's data. Returns ------- reduced : Dataset - New Dataset with `median` applied to its data and the + New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.median, @@ -412,18 +467,23 @@ def count(self, dim=None, keep_attrs=None, **kwargs): dim : hashable, optional keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `count` on this object's data. + function for calculating ``count`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `count` applied to its data and the + New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.count, @@ -441,18 +501,23 @@ def all(self, dim=None, keep_attrs=None, **kwargs): dim : hashable, optional keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `all` on this object's data. + function for calculating ``all`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `all` applied to its data and the + New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.array_all, @@ -470,18 +535,23 @@ def any(self, dim=None, keep_attrs=None, **kwargs): dim : hashable, optional keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `any` on this object's data. + function for calculating ``any`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `any` applied to its data and the + New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.array_any, @@ -504,18 +574,23 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `max` on this object's data. + function for calculating ``max`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `max` applied to its data and the + New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.max, @@ -539,18 +614,23 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `min` on this object's data. + function for calculating ``min`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `min` applied to its data and the + New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.min, @@ -574,18 +654,23 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `mean` on this object's data. + function for calculating ``mean`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `mean` applied to its data and the + New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.mean, @@ -615,18 +700,23 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `prod` on this object's data. + function for calculating ``prod`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `prod` applied to its data and the + New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.prod, @@ -657,18 +747,23 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): array's dtype. Changed in version 0.17.0: if specified on an integer array and skipna=True, the result will be a float array. keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `sum` on this object's data. + function for calculating ``sum`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `sum` applied to its data and the + New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.sum, @@ -693,18 +788,23 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `std` on this object's data. + function for calculating ``std`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `std` applied to its data and the + New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.std, @@ -728,18 +828,23 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `var` on this object's data. + function for calculating ``var`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `var` applied to its data and the + New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.var, @@ -763,18 +868,23 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): have a sentinel missing value (int) or skipna=True has not been implemented (object, datetime64 or timedelta64). keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `median` on this object's data. + function for calculating ``median`` on this object's data. Returns ------- reduced : DataArray - New DataArray with `median` applied to its data and the + New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.median, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 042dc850dec..01792d00b9f 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -6,6 +6,7 @@ python xarray/util/generate_reductions.py > xarray/core/_reductions.py """ +import collections import textwrap from typing import Optional @@ -61,31 +62,36 @@ class {cls}GroupByReductions: ] TEMPLATE_REDUCTION = ''' - def {method}(self, dim=None, {skip_na_kwarg}{min_count_kwarg}keep_attrs=None, **kwargs): + def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, **kwargs): """ Reduce this {cls}'s data by applying `{method}` along some dimension(s). Parameters ---------- dim : hashable, optional -{extra_args}{skip_na_docs}{min_count_docs} +{extra_args}{skip_na.docs}{min_count.docs} keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original + If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. **kwargs : dict Additional keyword arguments passed on to the appropriate array - function for calculating `{method}` on this object's data. + function for calculating ``{method}`` on this object's data. Returns ------- reduced : {cls} - New {cls} with `{method}` applied to its data and the + New {cls} with ``{method}`` applied to its data and the indicated dimension(s) removed + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. """ return self.reduce( duck_array_ops.{array_method}, - dim=dim,{skip_na_reduce}{min_count_reduce}{numeric_only_reduce} + dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} keep_attrs=keep_attrs, **kwargs, ) @@ -103,42 +109,39 @@ def generate_method( if cls == "Dataset": if method in NUMERIC_ONLY_METHODS: - numeric_only_reduce = "numeric_only=True," + numeric_only_call = "numeric_only=True," else: - numeric_only_reduce = "numeric_only=False," + numeric_only_call = "numeric_only=False," else: - numeric_only_reduce = "" + numeric_only_call = "" + kwarg = collections.namedtuple("kwarg", "docs kwarg call") if skipna: - skip_na_docs = textwrap.indent(_SKIPNA_DOCSTRING, " ") - skip_na_kwarg = "skipna=True, " - skip_na_reduce = "skipna=skipna," + skip_na = kwarg( + docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), + kwarg="skipna=True, ", + call="skipna=skipna,", + ) else: - skip_na_docs = "" - skip_na_kwarg = "" - skip_na_reduce = "" + skip_na = kwarg(docs="", kwarg="", call="") if method in MIN_COUNT_METHODS: - min_count_docs = textwrap.indent(_MINCOUNT_DOCSTRING, " ") - min_count_kwarg = "min_count=None, " - min_count_reduce = "min_count=min_count," + min_count = kwarg( + docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), + kwarg="min_count=None, ", + call="min_count=min_count,", + ) else: - min_count_docs = "" - min_count_kwarg = "" - min_count_reduce = "" + min_count = kwarg(docs="", kwarg="", call="") return TEMPLATE_REDUCTION.format( cls=cls, method=method, array_method=array_method, extra_args="", - skip_na_docs=skip_na_docs, - skip_na_kwarg=skip_na_kwarg, - skip_na_reduce=skip_na_reduce, - min_count_docs=min_count_docs, - min_count_kwarg=min_count_kwarg, - min_count_reduce=min_count_reduce, - numeric_only_reduce=numeric_only_reduce, + skip_na=skip_na, + min_count=min_count, + numeric_only_call=numeric_only_call, ) From 8d4eee380420c2415a33256385aa9ea4bee8bf7a Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 17 Oct 2021 11:19:02 +0530 Subject: [PATCH 03/16] docstring edit --- xarray/core/_reductions.py | 44 +++++++++++++++--------------- xarray/util/generate_reductions.py | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 96d8200d7c6..6e46d0f1ebf 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -9,7 +9,7 @@ class DatasetGroupByReductions: def count(self, dim=None, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `count` along some dimension(s). + Reduce this Dataset's data by applying ``count`` along some dimension(s). Parameters ---------- @@ -44,7 +44,7 @@ def count(self, dim=None, keep_attrs=None, **kwargs): def all(self, dim=None, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `all` along some dimension(s). + Reduce this Dataset's data by applying ``all`` along some dimension(s). Parameters ---------- @@ -79,7 +79,7 @@ def all(self, dim=None, keep_attrs=None, **kwargs): def any(self, dim=None, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `any` along some dimension(s). + Reduce this Dataset's data by applying ``any`` along some dimension(s). Parameters ---------- @@ -114,7 +114,7 @@ def any(self, dim=None, keep_attrs=None, **kwargs): def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `max` along some dimension(s). + Reduce this Dataset's data by applying ``max`` along some dimension(s). Parameters ---------- @@ -155,7 +155,7 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `min` along some dimension(s). + Reduce this Dataset's data by applying ``min`` along some dimension(s). Parameters ---------- @@ -196,7 +196,7 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `mean` along some dimension(s). + Reduce this Dataset's data by applying ``mean`` along some dimension(s). Parameters ---------- @@ -237,7 +237,7 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `prod` along some dimension(s). + Reduce this Dataset's data by applying ``prod`` along some dimension(s). Parameters ---------- @@ -285,7 +285,7 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `sum` along some dimension(s). + Reduce this Dataset's data by applying ``sum`` along some dimension(s). Parameters ---------- @@ -333,7 +333,7 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `std` along some dimension(s). + Reduce this Dataset's data by applying ``std`` along some dimension(s). Parameters ---------- @@ -374,7 +374,7 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `var` along some dimension(s). + Reduce this Dataset's data by applying ``var`` along some dimension(s). Parameters ---------- @@ -415,7 +415,7 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this Dataset's data by applying `median` along some dimension(s). + Reduce this Dataset's data by applying ``median`` along some dimension(s). Parameters ---------- @@ -460,7 +460,7 @@ class DataArrayGroupByReductions: def count(self, dim=None, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `count` along some dimension(s). + Reduce this DataArray's data by applying ``count`` along some dimension(s). Parameters ---------- @@ -494,7 +494,7 @@ def count(self, dim=None, keep_attrs=None, **kwargs): def all(self, dim=None, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `all` along some dimension(s). + Reduce this DataArray's data by applying ``all`` along some dimension(s). Parameters ---------- @@ -528,7 +528,7 @@ def all(self, dim=None, keep_attrs=None, **kwargs): def any(self, dim=None, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `any` along some dimension(s). + Reduce this DataArray's data by applying ``any`` along some dimension(s). Parameters ---------- @@ -562,7 +562,7 @@ def any(self, dim=None, keep_attrs=None, **kwargs): def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `max` along some dimension(s). + Reduce this DataArray's data by applying ``max`` along some dimension(s). Parameters ---------- @@ -602,7 +602,7 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `min` along some dimension(s). + Reduce this DataArray's data by applying ``min`` along some dimension(s). Parameters ---------- @@ -642,7 +642,7 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `mean` along some dimension(s). + Reduce this DataArray's data by applying ``mean`` along some dimension(s). Parameters ---------- @@ -682,7 +682,7 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `prod` along some dimension(s). + Reduce this DataArray's data by applying ``prod`` along some dimension(s). Parameters ---------- @@ -729,7 +729,7 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `sum` along some dimension(s). + Reduce this DataArray's data by applying ``sum`` along some dimension(s). Parameters ---------- @@ -776,7 +776,7 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `std` along some dimension(s). + Reduce this DataArray's data by applying ``std`` along some dimension(s). Parameters ---------- @@ -816,7 +816,7 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `var` along some dimension(s). + Reduce this DataArray's data by applying ``var`` along some dimension(s). Parameters ---------- @@ -856,7 +856,7 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): """ - Reduce this DataArray's data by applying `median` along some dimension(s). + Reduce this DataArray's data by applying ``median`` along some dimension(s). Parameters ---------- diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 01792d00b9f..e33969fa35f 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -64,7 +64,7 @@ class {cls}GroupByReductions: TEMPLATE_REDUCTION = ''' def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, **kwargs): """ - Reduce this {cls}'s data by applying `{method}` along some dimension(s). + Reduce this {cls}'s data by applying ``{method}`` along some dimension(s). Parameters ---------- From dfcc4585567469b2445c10f2723121ce25219132 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 18 Oct 2021 11:53:40 +0530 Subject: [PATCH 04/16] Add examples --- xarray/core/_reductions.py | 352 +++++++++++++++++++++++++++++ xarray/util/generate_reductions.py | 22 ++ 2 files changed, 374 insertions(+) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 6e46d0f1ebf..3d1f0df08ac 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -29,6 +29,23 @@ def count(self, dim=None, keep_attrs=None, **kwargs): New Dataset with ``count`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").count() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 2 2 2 + See Also -------- :ref:`groupby` @@ -64,6 +81,23 @@ def all(self, dim=None, keep_attrs=None, **kwargs): New Dataset with ``all`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").all() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True + See Also -------- :ref:`groupby` @@ -99,6 +133,23 @@ def any(self, dim=None, keep_attrs=None, **kwargs): New Dataset with ``any`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").any() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) bool True True True + See Also -------- :ref:`groupby` @@ -139,6 +190,23 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New Dataset with ``max`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").max() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 3 2 3 + See Also -------- :ref:`groupby` @@ -180,6 +248,23 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New Dataset with ``min`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").min() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 1 2 1 + See Also -------- :ref:`groupby` @@ -221,6 +306,23 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").mean() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 2.0 2.0 2.0 + See Also -------- :ref:`groupby` @@ -268,6 +370,23 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").prod() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 3 4 3 + See Also -------- :ref:`groupby` @@ -316,6 +435,23 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").sum() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) int64 4 4 4 + See Also -------- :ref:`groupby` @@ -358,6 +494,23 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New Dataset with ``std`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").std() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 0.0 1.0 + See Also -------- :ref:`groupby` @@ -399,6 +552,23 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New Dataset with ``var`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").var() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 1.0 0.0 1.0 + See Also -------- :ref:`groupby` @@ -440,6 +610,23 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New Dataset with ``median`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.groupby("labels").median() + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 2.0 2.0 2.0 + See Also -------- :ref:`groupby` @@ -480,6 +667,21 @@ def count(self, dim=None, keep_attrs=None, **kwargs): New DataArray with ``count`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").count() + + array([2, 2, 2]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -514,6 +716,21 @@ def all(self, dim=None, keep_attrs=None, **kwargs): New DataArray with ``all`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").all() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -548,6 +765,21 @@ def any(self, dim=None, keep_attrs=None, **kwargs): New DataArray with ``any`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").any() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -587,6 +819,21 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New DataArray with ``max`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").max() + + array([3, 2, 3]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -627,6 +874,21 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New DataArray with ``min`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").min() + + array([1, 2, 1]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -667,6 +929,21 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New DataArray with ``mean`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").mean() + + array([2., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -713,6 +990,21 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) New DataArray with ``prod`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").prod() + + array([3, 4, 3]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -760,6 +1052,21 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): New DataArray with ``sum`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").sum() + + array([4, 4, 4]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -801,6 +1108,21 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New DataArray with ``std`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").std() + + array([1., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -841,6 +1163,21 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New DataArray with ``var`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").var() + + array([1., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` @@ -881,6 +1218,21 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): New DataArray with ``median`` applied to its data and the indicated dimension(s) removed + Examples + -------- + + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").median() + + array([2., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + See Also -------- :ref:`groupby` diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index e33969fa35f..f178e3ffd4c 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -84,6 +84,9 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** New {cls} with ``{method}`` applied to its data and the indicated dimension(s) removed + Examples + -------- +{example} See Also -------- :ref:`groupby` @@ -98,6 +101,24 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** ''' +def generate_example(cls, method): + dx = "ds" if cls == "Dataset" else "da" + calculation = f'{dx}.groupby("labels").{method}()' + if cls == "Dataset": + create = ">>> ds = xr.Dataset(dict(da=da))" + else: + create = "" + return f""" + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, 3], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + {create} + >>> {calculation} +""" + + def generate_method( cls: str, method: str, @@ -142,6 +163,7 @@ def generate_method( skip_na=skip_na, min_count=min_count, numeric_only_call=numeric_only_call, + example=generate_example(cls, method), ) From 40207652173f993dfdc5772bc317bb65b2366c40 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 18 Oct 2021 12:36:28 +0530 Subject: [PATCH 05/16] Update examples --- .pre-commit-config.yaml | 1 + xarray/core/_reductions.py | 290 ++++++++++++++++++----------- xarray/util/generate_reductions.py | 44 +++-- 3 files changed, 214 insertions(+), 121 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 51eed6a6fc9..fec2750b9f6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,6 +20,7 @@ repos: rev: v0.3.4 hooks: - id: blackdoc + exclude: "generate_reductions.py" - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.2 hooks: diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 3d1f0df08ac..fcab3291769 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -13,8 +13,8 @@ def count(self, dim=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -31,9 +31,8 @@ def count(self, dim=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -44,7 +43,7 @@ def count(self, dim=None, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 2 2 2 + da (labels) int64 1 2 2 See Also -------- @@ -65,8 +64,8 @@ def all(self, dim=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -83,9 +82,8 @@ def all(self, dim=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -96,7 +94,7 @@ def all(self, dim=None, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) bool True True True + da (labels) bool False True True See Also -------- @@ -117,8 +115,8 @@ def any(self, dim=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -135,9 +133,8 @@ def any(self, dim=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -169,8 +166,8 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -192,9 +189,8 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -205,7 +201,14 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 3 2 3 + da (labels) float64 1.0 2.0 3.0 + >>> ds.groupby("labels").max(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 3.0 See Also -------- @@ -227,8 +230,8 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -250,9 +253,8 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -263,7 +265,14 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 1 2 1 + da (labels) float64 1.0 2.0 1.0 + >>> ds.groupby("labels").min(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 1.0 See Also -------- @@ -285,8 +294,8 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -308,9 +317,8 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -321,7 +329,14 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 2.0 2.0 2.0 + da (labels) float64 1.0 2.0 2.0 + >>> ds.groupby("labels").mean(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 See Also -------- @@ -343,8 +358,8 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -372,9 +387,8 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -385,7 +399,14 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 3 4 3 + da (labels) float64 1.0 4.0 3.0 + >>> ds.groupby("labels").prod(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 See Also -------- @@ -408,8 +429,8 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -437,9 +458,8 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -450,7 +470,14 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 4 4 4 + da (labels) float64 1.0 4.0 4.0 + >>> ds.groupby("labels").sum(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 See Also -------- @@ -473,8 +500,8 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -496,9 +523,8 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -509,7 +535,14 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 0.0 1.0 + da (labels) float64 0.0 0.0 1.0 + >>> ds.groupby("labels").std(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 See Also -------- @@ -531,8 +564,8 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -554,9 +587,8 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -567,7 +599,14 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 0.0 1.0 + da (labels) float64 0.0 0.0 1.0 + >>> ds.groupby("labels").var(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 0.0 1.0 See Also -------- @@ -589,8 +628,8 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -612,9 +651,8 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -625,7 +663,14 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 2.0 2.0 2.0 + da (labels) float64 1.0 2.0 2.0 + >>> ds.groupby("labels").median(skipna=False) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 2.0 2.0 See Also -------- @@ -651,8 +696,8 @@ def count(self, dim=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -669,16 +714,15 @@ def count(self, dim=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").count() - array([2, 2, 2]) + array([1, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -700,8 +744,8 @@ def all(self, dim=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -718,16 +762,15 @@ def all(self, dim=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").all() - array([ True, True, True]) + array([False, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -749,8 +792,8 @@ def any(self, dim=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -767,9 +810,8 @@ def any(self, dim=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) @@ -798,8 +840,8 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -821,16 +863,20 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").max() - array([3, 2, 3]) + array([1., 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").max(skipna=False) + + array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -853,8 +899,8 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -876,16 +922,20 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").min() - array([1, 2, 1]) + array([1., 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").min(skipna=False) + + array([nan, 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -908,8 +958,8 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -931,16 +981,20 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").mean() - array([2., 2., 2.]) + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").mean(skipna=False) + + array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -963,8 +1017,8 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -992,16 +1046,20 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").prod() - array([3, 4, 3]) + array([1., 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").prod(skipna=False) + + array([nan, 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1025,8 +1083,8 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1054,16 +1112,20 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").sum() - array([4, 4, 4]) + array([1., 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").sum(skipna=False) + + array([nan, 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1087,8 +1149,8 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1110,16 +1172,20 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").std() - array([1., 0., 1.]) + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").std(skipna=False) + + array([nan, 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1142,8 +1208,8 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1165,16 +1231,20 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").var() - array([1., 0., 1.]) + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").var(skipna=False) + + array([nan, 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1197,8 +1267,8 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Parameters ---------- - dim : hashable, optional - + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1220,16 +1290,20 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Examples -------- - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) >>> da.groupby("labels").median() - array([2., 2., 2.]) + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index f178e3ffd4c..d34f2ad9847 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -68,8 +68,8 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** Parameters ---------- - dim : hashable, optional -{extra_args}{skip_na.docs}{min_count.docs} + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``{method}``.{extra_args}{skip_na.docs}{min_count.docs} keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -85,8 +85,8 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** indicated dimension(s) removed Examples - -------- -{example} + --------{example} + See Also -------- :ref:`groupby` @@ -102,21 +102,39 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** def generate_example(cls, method): + """Generate examples for method.""" dx = "ds" if cls == "Dataset" else "da" calculation = f'{dx}.groupby("labels").{method}()' - if cls == "Dataset": - create = ">>> ds = xr.Dataset(dict(da=da))" + + if method in REDUCE_METHODS: + create_da = """ + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... )""" else: - create = "" - return f""" + create_da = """ >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, 3], + ... [1, 2, 3, 1, 2, np.nan], ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), - ... ) - {create} - >>> {calculation} -""" + ... )""" + + if cls == "Dataset": + maybe_dataset = ">>> ds = xr.Dataset(dict(da=da))" + else: + maybe_dataset = "" + + if method in NAN_REDUCE_METHODS: + maybe_skipna = f""" + >>> {dx}.groupby("labels").{method}(skipna=False)""" + else: + maybe_skipna = "" + + return f"""{create_da} + {maybe_dataset} + >>> {calculation}{maybe_skipna}""" def generate_method( From a7da4d733249062e8336f1207d025f1b586f06c4 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 23 Oct 2021 22:27:51 +0530 Subject: [PATCH 06/16] Generate resample reductions separately. --- xarray/core/_reductions.py | 2008 +++++++++++++++++++++++++--- xarray/core/groupby.py | 12 +- xarray/core/resample.py | 7 +- xarray/util/generate_reductions.py | 126 +- 4 files changed, 1926 insertions(+), 227 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index fcab3291769..26f4bbf274e 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,13 +1,21 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. +from typing import Optional + from . import duck_array_ops +from .types import T_DataArray, T_Dataset class DatasetGroupByReductions: __slots__ = () - def count(self, dim=None, keep_attrs=None, **kwargs): + def count( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``count`` along some dimension(s). @@ -58,7 +66,12 @@ def count(self, dim=None, keep_attrs=None, **kwargs): **kwargs, ) - def all(self, dim=None, keep_attrs=None, **kwargs): + def all( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``all`` along some dimension(s). @@ -109,7 +122,12 @@ def all(self, dim=None, keep_attrs=None, **kwargs): **kwargs, ) - def any(self, dim=None, keep_attrs=None, **kwargs): + def any( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``any`` along some dimension(s). @@ -160,7 +178,13 @@ def any(self, dim=None, keep_attrs=None, **kwargs): **kwargs, ) - def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def max( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``max`` along some dimension(s). @@ -224,7 +248,13 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): **kwargs, ) - def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def min( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``min`` along some dimension(s). @@ -288,7 +318,13 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): **kwargs, ) - def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def mean( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``mean`` along some dimension(s). @@ -352,7 +388,14 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): **kwargs, ) - def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + def prod( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``prod`` along some dimension(s). @@ -423,7 +466,14 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) **kwargs, ) - def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + def sum( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``sum`` along some dimension(s). @@ -494,7 +544,13 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): **kwargs, ) - def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def std( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``std`` along some dimension(s). @@ -558,7 +614,13 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): **kwargs, ) - def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def var( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``var`` along some dimension(s). @@ -622,7 +684,13 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): **kwargs, ) - def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def median( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ Reduce this Dataset's data by applying ``median`` along some dimension(s). @@ -687,12 +755,17 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): ) -class DataArrayGroupByReductions: +class DatasetResampleReductions: __slots__ = () - def count(self, dim=None, keep_attrs=None, **kwargs): + def count( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``count`` along some dimension(s). + Reduce this Dataset's data by applying ``count`` along some dimension(s). Parameters ---------- @@ -708,39 +781,49 @@ def count(self, dim=None, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``count`` applied to its data and the + reduced : Dataset + New Dataset with ``count`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").count() - - array([1, 2, 2]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").count() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 3 2 See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.count, dim=dim, + numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) - def all(self, dim=None, keep_attrs=None, **kwargs): + def all( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``all`` along some dimension(s). + Reduce this Dataset's data by applying ``all`` along some dimension(s). Parameters ---------- @@ -756,39 +839,49 @@ def all(self, dim=None, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``all`` applied to its data and the + reduced : Dataset + New Dataset with ``all`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").all() - - array([False, True, True]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").all() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True False See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.array_all, dim=dim, + numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) - def any(self, dim=None, keep_attrs=None, **kwargs): + def any( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``any`` along some dimension(s). + Reduce this Dataset's data by applying ``any`` along some dimension(s). Parameters ---------- @@ -804,39 +897,50 @@ def any(self, dim=None, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``any`` applied to its data and the + reduced : Dataset + New Dataset with ``any`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").any() - - array([ True, True, True]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").any() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.array_any, dim=dim, + numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) - def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def max( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``max`` along some dimension(s). + Reduce this Dataset's data by applying ``max`` along some dimension(s). Parameters ---------- @@ -857,45 +961,58 @@ def max(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``max`` applied to its data and the + reduced : Dataset + New Dataset with ``max`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").max() - - array([1., 2., 3.]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").max() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").max(skipna=False) - - array([nan, 2., 3.]) + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True + >>> ds.resample(time="3M").max(skipna=False) + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.max, dim=dim, skipna=skipna, + numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) - def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def min( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``min`` along some dimension(s). + Reduce this Dataset's data by applying ``min`` along some dimension(s). Parameters ---------- @@ -916,45 +1033,58 @@ def min(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``min`` applied to its data and the + reduced : Dataset + New Dataset with ``min`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").min() - - array([1., 2., 1.]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").min() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").min(skipna=False) - - array([nan, 2., 1.]) + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True + >>> ds.resample(time="3M").min(skipna=False) + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) bool True True True See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.min, dim=dim, skipna=skipna, + numeric_only=False, keep_attrs=keep_attrs, **kwargs, ) - def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def mean( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``mean`` along some dimension(s). + Reduce this Dataset's data by applying ``mean`` along some dimension(s). Parameters ---------- @@ -975,45 +1105,59 @@ def mean(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``mean`` applied to its data and the + reduced : Dataset + New Dataset with ``mean`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").mean() - - array([1., 2., 2.]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").mean() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").mean(skipna=False) - - array([nan, 2., 2.]) + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 1.0 + >>> ds.resample(time="3M").mean(skipna=False) + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 1.0 See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.mean, dim=dim, skipna=skipna, + numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) - def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + def prod( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``prod`` along some dimension(s). + Reduce this Dataset's data by applying ``prod`` along some dimension(s). Parameters ---------- @@ -1040,46 +1184,60 @@ def prod(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs) Returns ------- - reduced : DataArray - New DataArray with ``prod`` applied to its data and the + reduced : Dataset + New Dataset with ``prod`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").prod() - - array([1., 4., 3.]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").prod() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").prod(skipna=False) - - array([nan, 4., 3.]) + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 1 1 + >>> ds.resample(time="3M").prod(skipna=False) + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 1 1 See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.prod, dim=dim, skipna=skipna, min_count=min_count, + numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) - def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): + def sum( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``sum`` along some dimension(s). + Reduce this Dataset's data by applying ``sum`` along some dimension(s). Parameters ---------- @@ -1106,46 +1264,59 @@ def sum(self, dim=None, skipna=True, min_count=None, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``sum`` applied to its data and the + reduced : Dataset + New Dataset with ``sum`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").sum() - - array([1., 4., 4.]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").sum() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").sum(skipna=False) - - array([nan, 4., 4.]) + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 3 2 + >>> ds.resample(time="3M").sum(skipna=False) + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) int64 1 3 2 See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.sum, dim=dim, skipna=skipna, min_count=min_count, + numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) - def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def std( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``std`` along some dimension(s). + Reduce this Dataset's data by applying ``std`` along some dimension(s). Parameters ---------- @@ -1166,45 +1337,58 @@ def std(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``std`` applied to its data and the + reduced : Dataset + New Dataset with ``std`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").std() - - array([0., 0., 1.]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").std() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").std(skipna=False) - - array([nan, 0., 1.]) + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.0 0.0 + >>> ds.resample(time="3M").std(skipna=False) + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.0 0.0 See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.std, dim=dim, skipna=skipna, + numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) - def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def var( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``var`` along some dimension(s). + Reduce this Dataset's data by applying ``var`` along some dimension(s). Parameters ---------- @@ -1225,45 +1409,58 @@ def var(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``var`` applied to its data and the + reduced : Dataset + New Dataset with ``var`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").var() - - array([0., 0., 1.]) + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").var() + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").var(skipna=False) - - array([nan, 0., 1.]) + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.0 0.0 + >>> ds.resample(time="3M").var(skipna=False) + + Dimensions: (time: 3) Coordinates: - * labels (labels) object 'a' 'b' 'c' + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 0.0 0.0 0.0 See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`resampling` + User guide on resampling operations. """ return self.reduce( duck_array_ops.var, dim=dim, skipna=skipna, + numeric_only=True, keep_attrs=keep_attrs, **kwargs, ) - def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): + def median( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_Dataset: """ - Reduce this DataArray's data by applying ``median`` along some dimension(s). + Reduce this Dataset's data by applying ``median`` along some dimension(s). Parameters ---------- @@ -1284,26 +1481,261 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): Returns ------- - reduced : DataArray - New DataArray with ``median`` applied to its data and the + reduced : Dataset + New Dataset with ``median`` applied to its data and the indicated dimension(s) removed Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), ... ) - - >>> da.groupby("labels").median() - - array([1., 2., 2.]) - Coordinates: + >>> ds = xr.Dataset(dict(da=da)) + >>> ds.resample(time="3M").median() + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 1.0 + >>> ds.resample(time="3M").median(skipna=False) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 1.0 1.0 1.0 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + numeric_only=True, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayGroupByReductions: + __slots__ = () + + def count( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").count() + + array([1, 2, 2]) + Coordinates: * labels (labels) object 'a' 'b' 'c' - >>> da.groupby("labels").median(skipna=False) + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").all() - array([nan, 2., 2.]) + array([False, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").any() + + array([ True, True, True]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").max() + + array([1., 2., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").max(skipna=False) + + array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1312,6 +1744,1198 @@ def median(self, dim=None, skipna=True, keep_attrs=None, **kwargs): :ref:`groupby` User guide on groupby operations. """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").min() + + array([1., 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").min(skipna=False) + + array([nan, 2., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").mean() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").mean(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").prod() + + array([1., 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").prod(skipna=False) + + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").sum() + + array([1., 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").sum(skipna=False) + + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").std() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").std(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").var() + + array([0., 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").var(skipna=False) + + array([nan, 0., 1.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... [1, 2, 3, 1, 2, np.nan], + ... dims="x", + ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... ) + + >>> da.groupby("labels").median() + + array([1., 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + >>> da.groupby("labels").median(skipna=False) + + array([nan, 2., 2.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + See Also + -------- + :ref:`groupby` + User guide on groupby operations. + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + +class DataArrayResampleReductions: + __slots__ = () + + def count( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``count``. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``count`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").count() + + array([1, 3, 2]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def all( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``all``. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``all`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").all() + + array([ True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def any( + self, + dim=None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``any``. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``any`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([True, True, True, True, True, False], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").any() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + keep_attrs=keep_attrs, + **kwargs, + ) + + def max( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``max``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``max`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").max() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").max(skipna=False) + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def min( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``min``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``min`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").min() + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").min(skipna=False) + + array([ True, True, True]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def mean( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``mean``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").mean() + + array([1., 1., 1.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").mean(skipna=False) + + array([1., 1., 1.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def prod( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``prod``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").prod() + + array([1, 1, 1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").prod(skipna=False) + + array([1, 1, 1]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def sum( + self, + dim=None, + skipna: bool = True, + min_count: Optional[int] = None, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``sum``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + min_count : int, default: None + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").sum() + + array([1, 3, 2]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").sum(skipna=False) + + array([1, 3, 2]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + keep_attrs=keep_attrs, + **kwargs, + ) + + def std( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``std``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``std`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").std() + + array([0., 0., 0.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").std(skipna=False) + + array([0., 0., 0.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def var( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``var``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``var`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").var() + + array([0., 0., 0.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").var(skipna=False) + + array([0., 0., 0.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + keep_attrs=keep_attrs, + **kwargs, + ) + + def median( + self, + dim=None, + skipna: bool = True, + keep_attrs=None, + **kwargs, + ) -> T_DataArray: + """ + Reduce this DataArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : hashable or iterable of hashable, optional + Name of dimension[s] along which to apply ``median``. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + keep_attrs : bool, optional + If True, ``attrs`` will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + **kwargs : dict + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + + Returns + ------- + reduced : DataArray + New DataArray with ``median`` applied to its data and the + indicated dimension(s) removed + + Examples + -------- + >>> da = xr.DataArray( + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... ), + ... ) + + >>> da.resample(time="3M").median() + + array([1., 1., 1.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + >>> da.resample(time="3M").median(skipna=False) + + array([1., 1., 1.]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + See Also + -------- + :ref:`resampling` + User guide on resampling operations. + """ return self.reduce( duck_array_ops.median, dim=dim, diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 7e9cb5589cb..185b4ae5bec 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -713,7 +713,7 @@ def _maybe_reorder(xarray_obj, dim, positions): return xarray_obj[{dim: order}] -class DataArrayGroupBy(GroupBy, DataArrayGroupbyArithmetic, DataArrayGroupByReductions): +class DataArrayGroupByBase(GroupBy, DataArrayGroupbyArithmetic): """GroupBy object specialized to grouping DataArray objects""" __slots__ = () @@ -878,7 +878,11 @@ def reduce_array(ar): return self.map(reduce_array, shortcut=shortcut) -class DatasetGroupBy(GroupBy, DatasetGroupbyArithmetic, DatasetGroupByReductions): +class DataArrayGroupBy(DataArrayGroupByBase, DataArrayGroupByReductions): + __slots__ = () + + +class DatasetGroupByBase(GroupBy, DatasetGroupbyArithmetic): __slots__ = () @@ -995,3 +999,7 @@ def assign(self, **kwargs): Dataset.assign """ return self.map(lambda ds: ds.assign(**kwargs)) + + +class DatasetGroupBy(DatasetGroupByBase, DatasetGroupByReductions): + __slots__ = () diff --git a/xarray/core/resample.py b/xarray/core/resample.py index c7749a7e5ca..e2f599e8b4e 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,6 +1,7 @@ import warnings -from .groupby import DataArrayGroupBy, DatasetGroupBy +from ._reductions import DataArrayResampleReductions, DatasetResampleReductions +from .groupby import DataArrayGroupByBase, DatasetGroupByBase RESAMPLE_DIM = "__resample_dim__" @@ -156,7 +157,7 @@ def _interpolate(self, kind="linear"): ) -class DataArrayResample(DataArrayGroupBy, Resample): +class DataArrayResample(DataArrayResampleReductions, DataArrayGroupByBase, Resample): """DataArrayGroupBy object specialized to time resampling operations over a specified dimension """ @@ -247,7 +248,7 @@ def apply(self, func, args=(), shortcut=None, **kwargs): return self.map(func=func, shortcut=shortcut, args=args, **kwargs) -class DatasetResample(DatasetGroupBy, Resample): +class DatasetResample(DatasetResampleReductions, DatasetGroupByBase, Resample): """DatasetGroupBy object specialized to resampling a specified dimension""" def __init__(self, *args, dim=None, resample_dim=None, **kwargs): diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index d34f2ad9847..402eca18114 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -8,17 +8,20 @@ import collections import textwrap -from typing import Optional +from typing import Callable, Optional MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from . import duck_array_ops''' +from typing import Optional + +from . import duck_array_ops +from .types import T_DataArray, T_Dataset''' CLASS_PREAMBLE = """ -class {cls}GroupByReductions: +class {obj}{cls}Reductions: __slots__ = ()""" _SKIPNA_DOCSTRING = """ @@ -37,7 +40,7 @@ class {cls}GroupByReductions: array and skipna=True, the result will be a float array.""" -REDUCE_METHODS = ["all", "any"] +BOOL_REDUCE_METHODS = ["all", "any"] NAN_REDUCE_METHODS = [ "max", "min", @@ -62,9 +65,14 @@ class {cls}GroupByReductions: ] TEMPLATE_REDUCTION = ''' - def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, **kwargs): + def {method}( + self, + dim=None, {skip_na.kwarg}{min_count.kwarg} + keep_attrs=None, + **kwargs, + ) -> T_{obj}: """ - Reduce this {cls}'s data by applying ``{method}`` along some dimension(s). + Reduce this {obj}'s data by applying ``{method}`` along some dimension(s). Parameters ---------- @@ -80,8 +88,8 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** Returns ------- - reduced : {cls} - New {cls} with ``{method}`` applied to its data and the + reduced : {obj} + New {obj} with ``{method}`` applied to its data and the indicated dimension(s) removed Examples @@ -89,8 +97,8 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** See Also -------- - :ref:`groupby` - User guide on groupby operations. + :ref:`{docref}` + User guide on {docref} operations. """ return self.reduce( duck_array_ops.{array_method}, @@ -101,12 +109,12 @@ def {method}(self, dim=None, {skip_na.kwarg}{min_count.kwarg}keep_attrs=None, ** ''' -def generate_example(cls, method): +def generate_groupby_example(obj, method): """Generate examples for method.""" - dx = "ds" if cls == "Dataset" else "da" + dx = "ds" if obj == "Dataset" else "da" calculation = f'{dx}.groupby("labels").{method}()' - if method in REDUCE_METHODS: + if method in BOOL_REDUCE_METHODS: create_da = """ >>> da = xr.DataArray( ... np.array([True, True, True, True, True, False], dtype=bool), @@ -121,7 +129,7 @@ def generate_example(cls, method): ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... )""" - if cls == "Dataset": + if obj == "Dataset": maybe_dataset = ">>> ds = xr.Dataset(dict(da=da))" else: maybe_dataset = "" @@ -137,16 +145,53 @@ def generate_example(cls, method): >>> {calculation}{maybe_skipna}""" +def generate_resample_example(obj: str, method: str): + """Generate examples for method.""" + dx = "ds" if obj == "Dataset" else "da" + calculation = f'{dx}.resample(time="3M").{method}()' + + if method in BOOL_REDUCE_METHODS: + np_array = """ + ... np.array([True, True, True, True, True, False], dtype=bool),""" + + else: + np_array = """ + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool),""" + + create_da = f""" + >>> da = xr.DataArray({np_array} + ... dims="time", + ... coords=dict(time=("time", pd.date_range("01-01-2001", freq="M", periods=6))), + ... )""" + + if obj == "Dataset": + maybe_dataset = ">>> ds = xr.Dataset(dict(da=da))" + else: + maybe_dataset = "" + + if method in NAN_REDUCE_METHODS: + maybe_skipna = f""" + >>> {dx}.resample(time="3M").{method}(skipna=False)""" + else: + maybe_skipna = "" + + return f"""{create_da} + {maybe_dataset} + >>> {calculation}{maybe_skipna}""" + + def generate_method( - cls: str, + obj: str, + docref: str, method: str, skipna: bool, + example_generator: Callable, array_method: Optional[str] = None, ): if not array_method: array_method = method - if cls == "Dataset": + if obj == "Dataset": if method in NUMERIC_ONLY_METHODS: numeric_only_call = "numeric_only=True," else: @@ -158,7 +203,7 @@ def generate_method( if skipna: skip_na = kwarg( docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), - kwarg="skipna=True, ", + kwarg="skipna: bool=True, ", call="skipna=skipna,", ) else: @@ -167,40 +212,61 @@ def generate_method( if method in MIN_COUNT_METHODS: min_count = kwarg( docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), - kwarg="min_count=None, ", + kwarg="min_count: Optional[int]=None, ", call="min_count=min_count,", ) else: min_count = kwarg(docs="", kwarg="", call="") return TEMPLATE_REDUCTION.format( - cls=cls, + obj=obj, + docref=docref, method=method, array_method=array_method, extra_args="", skip_na=skip_na, min_count=min_count, numeric_only_call=numeric_only_call, - example=generate_example(cls, method), + example=example_generator(obj, method), ) -def render(cls): - yield CLASS_PREAMBLE.format(cls=cls) - yield generate_method(cls, "count", skipna=False) - for method in REDUCE_METHODS: +def render(obj: str, cls: str, docref: str, example_generator: Callable): + yield CLASS_PREAMBLE.format(obj=obj, cls=cls) + yield generate_method( + obj, + method="count", + docref=docref, + skipna=False, + example_generator=example_generator, + ) + for method in BOOL_REDUCE_METHODS: yield generate_method( - cls, - method, + obj, + method=method, + docref=docref, skipna=False, array_method=f"array_{method}", + example_generator=example_generator, ) for method in NAN_REDUCE_METHODS: - yield generate_method(cls, method, skipna=True) + yield generate_method( + obj, + method=method, + docref=docref, + skipna=True, + example_generator=example_generator, + ) if __name__ == "__main__": print(MODULE_PREAMBLE) - for cls in ["Dataset", "DataArray"]: - for line in render(cls=cls): - print(line) + for obj in ["Dataset", "DataArray"]: + for cls, docref, examplegen in ( + ("GroupBy", "groupby", generate_groupby_example), + ("Resample", "resampling", generate_resample_example), + ): + for line in render( + obj=obj, cls=cls, docref=docref, example_generator=examplegen + ): + print(line) From dece7fa5a0655df4de54b32cc611eb5a6231a8f4 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 23 Oct 2021 22:29:52 +0530 Subject: [PATCH 07/16] Add whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 87c85d45454..2bcc751e13d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -86,6 +86,8 @@ Documentation - Users are instructed to try ``use_cftime=True`` if a ``TypeError`` occurs when combining datasets and one of the types involved is a subclass of ``cftime.datetime`` (:pull:`5776`). By `Zeb Nicholls `_. +- Better examples in docstrings for groupby and resampling reductions. + By `Deepak Cherian `_. Internal Changes ~~~~~~~~~~~~~~~~ From c3df4b0d5248d2a3fcd2a4bfcbdf9d5a2ba5e070 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 23 Oct 2021 22:36:58 +0530 Subject: [PATCH 08/16] Minor docstring improvement --- xarray/core/_reductions.py | 11 ----------- xarray/util/generate_reductions.py | 6 +++--- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 26f4bbf274e..a66e5ac4d29 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1562,7 +1562,6 @@ def count( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").count() array([1, 2, 2]) @@ -1615,7 +1614,6 @@ def all( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").all() array([False, True, True]) @@ -1668,7 +1666,6 @@ def any( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").any() array([ True, True, True]) @@ -1727,7 +1724,6 @@ def max( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").max() array([1., 2., 3.]) @@ -1792,7 +1788,6 @@ def min( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").min() array([1., 2., 1.]) @@ -1857,7 +1852,6 @@ def mean( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").mean() array([1., 2., 2.]) @@ -1929,7 +1923,6 @@ def prod( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").prod() array([1., 4., 3.]) @@ -2002,7 +1995,6 @@ def sum( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").sum() array([1., 4., 4.]) @@ -2068,7 +2060,6 @@ def std( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").std() array([0., 0., 1.]) @@ -2133,7 +2124,6 @@ def var( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").var() array([0., 0., 1.]) @@ -2198,7 +2188,6 @@ def median( ... dims="x", ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), ... ) - >>> da.groupby("labels").median() array([1., 2., 2.]) diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 402eca18114..5a0db39a3f5 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -130,7 +130,8 @@ def generate_groupby_example(obj, method): ... )""" if obj == "Dataset": - maybe_dataset = ">>> ds = xr.Dataset(dict(da=da))" + maybe_dataset = """ + >>> ds = xr.Dataset(dict(da=da))""" else: maybe_dataset = "" @@ -140,8 +141,7 @@ def generate_groupby_example(obj, method): else: maybe_skipna = "" - return f"""{create_da} - {maybe_dataset} + return f"""{create_da}{maybe_dataset} >>> {calculation}{maybe_skipna}""" From 4d976b1364b2f755aecf8742e92073e42288cb60 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sat, 23 Oct 2021 23:08:10 +0530 Subject: [PATCH 09/16] Some consolidation --- xarray/core/_reductions.py | 423 ++++++++++++++++++++--------- xarray/util/generate_reductions.py | 74 ++--- 2 files changed, 320 insertions(+), 177 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index a66e5ac4d29..8d164cf0fc4 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -40,9 +40,12 @@ def count( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").count() @@ -51,10 +54,12 @@ def count( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 1 2 2 + da (labels) int64 2 2 2 See Also -------- + numpy.count + Dataset.count :ref:`groupby` User guide on groupby operations. """ @@ -97,8 +102,11 @@ def all( -------- >>> da = xr.DataArray( ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").all() @@ -111,6 +119,8 @@ def all( See Also -------- + numpy.all + Dataset.all :ref:`groupby` User guide on groupby operations. """ @@ -153,8 +163,11 @@ def any( -------- >>> da = xr.DataArray( ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").any() @@ -167,6 +180,8 @@ def any( See Also -------- + numpy.any + Dataset.any :ref:`groupby` User guide on groupby operations. """ @@ -214,9 +229,12 @@ def max( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").max() @@ -225,17 +243,19 @@ def max( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 2.0 3.0 + da (labels) bool True True True >>> ds.groupby("labels").max(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 2.0 3.0 + da (labels) bool True True True See Also -------- + numpy.max + Dataset.max :ref:`groupby` User guide on groupby operations. """ @@ -284,9 +304,12 @@ def min( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").min() @@ -295,17 +318,19 @@ def min( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 2.0 1.0 + da (labels) bool True True True >>> ds.groupby("labels").min(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 2.0 1.0 + da (labels) bool True True True See Also -------- + numpy.min + Dataset.min :ref:`groupby` User guide on groupby operations. """ @@ -354,9 +379,12 @@ def mean( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").mean() @@ -365,17 +393,19 @@ def mean( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 2.0 2.0 + da (labels) float64 1.0 1.0 1.0 >>> ds.groupby("labels").mean(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 2.0 2.0 + da (labels) float64 1.0 1.0 1.0 See Also -------- + numpy.mean + Dataset.mean :ref:`groupby` User guide on groupby operations. """ @@ -431,9 +461,12 @@ def prod( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").prod() @@ -442,17 +475,19 @@ def prod( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 4.0 3.0 + da (labels) int64 1 1 1 >>> ds.groupby("labels").prod(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 4.0 3.0 + da (labels) int64 1 1 1 See Also -------- + numpy.prod + Dataset.prod :ref:`groupby` User guide on groupby operations. """ @@ -509,9 +544,12 @@ def sum( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").sum() @@ -520,17 +558,19 @@ def sum( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 4.0 4.0 + da (labels) int64 2 2 2 >>> ds.groupby("labels").sum(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 4.0 4.0 + da (labels) int64 2 2 2 See Also -------- + numpy.sum + Dataset.sum :ref:`groupby` User guide on groupby operations. """ @@ -580,9 +620,12 @@ def std( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").std() @@ -591,17 +634,19 @@ def std( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 0.0 0.0 1.0 + da (labels) float64 0.0 0.0 0.0 >>> ds.groupby("labels").std(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 0.0 1.0 + da (labels) float64 0.0 0.0 0.0 See Also -------- + numpy.std + Dataset.std :ref:`groupby` User guide on groupby operations. """ @@ -650,9 +695,12 @@ def var( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").var() @@ -661,17 +709,19 @@ def var( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 0.0 0.0 1.0 + da (labels) float64 0.0 0.0 0.0 >>> ds.groupby("labels").var(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 0.0 1.0 + da (labels) float64 0.0 0.0 0.0 See Also -------- + numpy.var + Dataset.var :ref:`groupby` User guide on groupby operations. """ @@ -720,9 +770,12 @@ def median( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) >>> ds.groupby("labels").median() @@ -731,17 +784,19 @@ def median( Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 2.0 2.0 + da (labels) float64 1.0 1.0 1.0 >>> ds.groupby("labels").median(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 nan 2.0 2.0 + da (labels) float64 1.0 1.0 1.0 See Also -------- + numpy.median + Dataset.median :ref:`groupby` User guide on groupby operations. """ @@ -791,7 +846,8 @@ def count( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -805,6 +861,8 @@ def count( See Also -------- + numpy.count + Dataset.count :ref:`resampling` User guide on resampling operations. """ @@ -849,7 +907,8 @@ def all( ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -863,6 +922,8 @@ def all( See Also -------- + numpy.all + Dataset.all :ref:`resampling` User guide on resampling operations. """ @@ -907,7 +968,8 @@ def any( ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -921,6 +983,8 @@ def any( See Also -------- + numpy.any + Dataset.any :ref:`resampling` User guide on resampling operations. """ @@ -971,7 +1035,8 @@ def max( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -992,6 +1057,8 @@ def max( See Also -------- + numpy.max + Dataset.max :ref:`resampling` User guide on resampling operations. """ @@ -1043,7 +1110,8 @@ def min( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -1064,6 +1132,8 @@ def min( See Also -------- + numpy.min + Dataset.min :ref:`resampling` User guide on resampling operations. """ @@ -1115,7 +1185,8 @@ def mean( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -1136,6 +1207,8 @@ def mean( See Also -------- + numpy.mean + Dataset.mean :ref:`resampling` User guide on resampling operations. """ @@ -1194,7 +1267,8 @@ def prod( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -1215,6 +1289,8 @@ def prod( See Also -------- + numpy.prod + Dataset.prod :ref:`resampling` User guide on resampling operations. """ @@ -1274,7 +1350,8 @@ def sum( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -1295,6 +1372,8 @@ def sum( See Also -------- + numpy.sum + Dataset.sum :ref:`resampling` User guide on resampling operations. """ @@ -1347,7 +1426,8 @@ def std( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -1368,6 +1448,8 @@ def std( See Also -------- + numpy.std + Dataset.std :ref:`resampling` User guide on resampling operations. """ @@ -1419,7 +1501,8 @@ def var( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -1440,6 +1523,8 @@ def var( See Also -------- + numpy.var + Dataset.var :ref:`resampling` User guide on resampling operations. """ @@ -1491,7 +1576,8 @@ def median( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) @@ -1512,6 +1598,8 @@ def median( See Also -------- + numpy.median + Dataset.median :ref:`resampling` User guide on resampling operations. """ @@ -1558,18 +1646,23 @@ def count( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").count() - array([1, 2, 2]) + array([2, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.count + DataArray.count :ref:`groupby` User guide on groupby operations. """ @@ -1611,8 +1704,11 @@ def all( -------- >>> da = xr.DataArray( ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").all() @@ -1622,6 +1718,8 @@ def all( See Also -------- + numpy.all + DataArray.all :ref:`groupby` User guide on groupby operations. """ @@ -1663,8 +1761,11 @@ def any( -------- >>> da = xr.DataArray( ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").any() @@ -1674,6 +1775,8 @@ def any( See Also -------- + numpy.any + DataArray.any :ref:`groupby` User guide on groupby operations. """ @@ -1720,23 +1823,28 @@ def max( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").max() - array([1., 2., 3.]) + array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").max(skipna=False) - array([nan, 2., 3.]) + array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.max + DataArray.max :ref:`groupby` User guide on groupby operations. """ @@ -1784,23 +1892,28 @@ def min( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").min() - array([1., 2., 1.]) + array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").min(skipna=False) - array([nan, 2., 1.]) + array([ True, True, True]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.min + DataArray.min :ref:`groupby` User guide on groupby operations. """ @@ -1848,23 +1961,28 @@ def mean( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").mean() - array([1., 2., 2.]) + array([1., 1., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").mean(skipna=False) - array([nan, 2., 2.]) + array([1., 1., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.mean + DataArray.mean :ref:`groupby` User guide on groupby operations. """ @@ -1919,23 +2037,28 @@ def prod( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").prod() - array([1., 4., 3.]) + array([1, 1, 1]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").prod(skipna=False) - array([nan, 4., 3.]) + array([1, 1, 1]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.prod + DataArray.prod :ref:`groupby` User guide on groupby operations. """ @@ -1991,23 +2114,28 @@ def sum( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").sum() - array([1., 4., 4.]) + array([2, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").sum(skipna=False) - array([nan, 4., 4.]) + array([2, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.sum + DataArray.sum :ref:`groupby` User guide on groupby operations. """ @@ -2056,23 +2184,28 @@ def std( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").std() - array([0., 0., 1.]) + array([0., 0., 0.]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").std(skipna=False) - array([nan, 0., 1.]) + array([0., 0., 0.]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.std + DataArray.std :ref:`groupby` User guide on groupby operations. """ @@ -2120,23 +2253,28 @@ def var( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").var() - array([0., 0., 1.]) + array([0., 0., 0.]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").var(skipna=False) - array([nan, 0., 1.]) + array([0., 0., 0.]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.var + DataArray.var :ref:`groupby` User guide on groupby operations. """ @@ -2184,23 +2322,28 @@ def median( Examples -------- >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), + ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... ) >>> da.groupby("labels").median() - array([1., 2., 2.]) + array([1., 1., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' >>> da.groupby("labels").median(skipna=False) - array([nan, 2., 2.]) + array([1., 1., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' See Also -------- + numpy.median + DataArray.median :ref:`groupby` User guide on groupby operations. """ @@ -2249,10 +2392,10 @@ def count( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").count() array([1, 3, 2]) @@ -2261,6 +2404,8 @@ def count( See Also -------- + numpy.count + DataArray.count :ref:`resampling` User guide on resampling operations. """ @@ -2304,10 +2449,10 @@ def all( ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").all() array([ True, True, False]) @@ -2316,6 +2461,8 @@ def all( See Also -------- + numpy.all + DataArray.all :ref:`resampling` User guide on resampling operations. """ @@ -2359,10 +2506,10 @@ def any( ... np.array([True, True, True, True, True, False], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").any() array([ True, True, True]) @@ -2371,6 +2518,8 @@ def any( See Also -------- + numpy.any + DataArray.any :ref:`resampling` User guide on resampling operations. """ @@ -2420,10 +2569,10 @@ def max( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").max() array([ True, True, True]) @@ -2437,6 +2586,8 @@ def max( See Also -------- + numpy.max + DataArray.max :ref:`resampling` User guide on resampling operations. """ @@ -2487,10 +2638,10 @@ def min( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").min() array([ True, True, True]) @@ -2504,6 +2655,8 @@ def min( See Also -------- + numpy.min + DataArray.min :ref:`resampling` User guide on resampling operations. """ @@ -2554,10 +2707,10 @@ def mean( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").mean() array([1., 1., 1.]) @@ -2571,6 +2724,8 @@ def mean( See Also -------- + numpy.mean + DataArray.mean :ref:`resampling` User guide on resampling operations. """ @@ -2628,10 +2783,10 @@ def prod( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").prod() array([1, 1, 1]) @@ -2645,6 +2800,8 @@ def prod( See Also -------- + numpy.prod + DataArray.prod :ref:`resampling` User guide on resampling operations. """ @@ -2703,10 +2860,10 @@ def sum( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").sum() array([1, 3, 2]) @@ -2720,6 +2877,8 @@ def sum( See Also -------- + numpy.sum + DataArray.sum :ref:`resampling` User guide on resampling operations. """ @@ -2771,10 +2930,10 @@ def std( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").std() array([0., 0., 0.]) @@ -2788,6 +2947,8 @@ def std( See Also -------- + numpy.std + DataArray.std :ref:`resampling` User guide on resampling operations. """ @@ -2838,10 +2999,10 @@ def var( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").var() array([0., 0., 0.]) @@ -2855,6 +3016,8 @@ def var( See Also -------- + numpy.var + DataArray.var :ref:`resampling` User guide on resampling operations. """ @@ -2905,10 +3068,10 @@ def median( ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), ... dims="time", ... coords=dict( - ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)) + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) - >>> da.resample(time="3M").median() array([1., 1., 1.]) @@ -2922,6 +3085,8 @@ def median( See Also -------- + numpy.median + DataArray.median :ref:`resampling` User guide on resampling operations. """ diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 5a0db39a3f5..6db6a48ce29 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -8,6 +8,7 @@ import collections import textwrap +from functools import partial from typing import Callable, Optional MODULE_PREAMBLE = '''\ @@ -97,6 +98,8 @@ def {method}( See Also -------- + numpy.{method} + {obj}.{method} :ref:`{docref}` User guide on {docref} operations. """ @@ -109,46 +112,15 @@ def {method}( ''' -def generate_groupby_example(obj, method): +def generate_groupby_example(obj: str, cls: str, method: str): """Generate examples for method.""" dx = "ds" if obj == "Dataset" else "da" - calculation = f'{dx}.groupby("labels").{method}()' - - if method in BOOL_REDUCE_METHODS: - create_da = """ - >>> da = xr.DataArray( - ... np.array([True, True, True, True, True, False], dtype=bool), - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), - ... )""" - else: - create_da = """ - >>> da = xr.DataArray( - ... [1, 2, 3, 1, 2, np.nan], - ... dims="x", - ... coords=dict(labels=("x", np.array(["a", "b", "c", "c", "b", "a"]))), - ... )""" - - if obj == "Dataset": - maybe_dataset = """ - >>> ds = xr.Dataset(dict(da=da))""" - else: - maybe_dataset = "" - - if method in NAN_REDUCE_METHODS: - maybe_skipna = f""" - >>> {dx}.groupby("labels").{method}(skipna=False)""" + if cls == "Resample": + calculation = f'{dx}.resample(time="3M").{method}' + elif cls == "GroupBy": + calculation = f'{dx}.groupby("labels").{method}' else: - maybe_skipna = "" - - return f"""{create_da}{maybe_dataset} - >>> {calculation}{maybe_skipna}""" - - -def generate_resample_example(obj: str, method: str): - """Generate examples for method.""" - dx = "ds" if obj == "Dataset" else "da" - calculation = f'{dx}.resample(time="3M").{method}()' + raise ValueError if method in BOOL_REDUCE_METHODS: np_array = """ @@ -161,23 +133,26 @@ def generate_resample_example(obj: str, method: str): create_da = f""" >>> da = xr.DataArray({np_array} ... dims="time", - ... coords=dict(time=("time", pd.date_range("01-01-2001", freq="M", periods=6))), + ... coords=dict( + ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), ... )""" if obj == "Dataset": - maybe_dataset = ">>> ds = xr.Dataset(dict(da=da))" + maybe_dataset = """ + >>> ds = xr.Dataset(dict(da=da))""" else: maybe_dataset = "" if method in NAN_REDUCE_METHODS: maybe_skipna = f""" - >>> {dx}.resample(time="3M").{method}(skipna=False)""" + >>> {calculation}(skipna=False)""" else: maybe_skipna = "" - return f"""{create_da} - {maybe_dataset} - >>> {calculation}{maybe_skipna}""" + return f"""{create_da}{maybe_dataset} + >>> {calculation}(){maybe_skipna}""" def generate_method( @@ -227,7 +202,7 @@ def generate_method( skip_na=skip_na, min_count=min_count, numeric_only_call=numeric_only_call, - example=example_generator(obj, method), + example=example_generator(obj=obj, method=method), ) @@ -262,11 +237,14 @@ def render(obj: str, cls: str, docref: str, example_generator: Callable): if __name__ == "__main__": print(MODULE_PREAMBLE) for obj in ["Dataset", "DataArray"]: - for cls, docref, examplegen in ( - ("GroupBy", "groupby", generate_groupby_example), - ("Resample", "resampling", generate_resample_example), + for cls, docref in ( + ("GroupBy", "groupby"), + ("Resample", "resampling"), ): for line in render( - obj=obj, cls=cls, docref=docref, example_generator=examplegen + obj=obj, + cls=cls, + docref=docref, + example_generator=partial(generate_groupby_example, cls=cls), ): print(line) From 499b4880cd65e134dc13bf41e15f4b765bbc6454 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sat, 23 Oct 2021 17:11:20 -0700 Subject: [PATCH 10/16] Fix mypy & black --- xarray/core/_reductions.py | 116 ++++++++++++++++++----------- xarray/util/generate_reductions.py | 44 ++++++++--- 2 files changed, 104 insertions(+), 56 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 8d164cf0fc4..74f5c1e1183 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,17 +1,30 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Optional +from typing import Optional, Callable, Union, Sequence, Hashable, Protocol, Any from . import duck_array_ops from .types import T_DataArray, T_Dataset +class DatasetReduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_Dataset: + ... + + class DatasetGroupByReductions: __slots__ = () def count( - self, + self: DatasetReduce, dim=None, keep_attrs=None, **kwargs, @@ -72,7 +85,7 @@ def count( ) def all( - self, + self: DatasetReduce, dim=None, keep_attrs=None, **kwargs, @@ -133,7 +146,7 @@ def all( ) def any( - self, + self: DatasetReduce, dim=None, keep_attrs=None, **kwargs, @@ -194,7 +207,7 @@ def any( ) def max( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -269,7 +282,7 @@ def max( ) def min( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -344,7 +357,7 @@ def min( ) def mean( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -419,7 +432,7 @@ def mean( ) def prod( - self, + self: DatasetReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -502,7 +515,7 @@ def prod( ) def sum( - self, + self: DatasetReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -585,7 +598,7 @@ def sum( ) def std( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -660,7 +673,7 @@ def std( ) def var( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -735,7 +748,7 @@ def var( ) def median( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -814,7 +827,7 @@ class DatasetResampleReductions: __slots__ = () def count( - self, + self: DatasetReduce, dim=None, keep_attrs=None, **kwargs, @@ -875,7 +888,7 @@ def count( ) def all( - self, + self: DatasetReduce, dim=None, keep_attrs=None, **kwargs, @@ -936,7 +949,7 @@ def all( ) def any( - self, + self: DatasetReduce, dim=None, keep_attrs=None, **kwargs, @@ -997,7 +1010,7 @@ def any( ) def max( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1072,7 +1085,7 @@ def max( ) def min( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1147,7 +1160,7 @@ def min( ) def mean( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1222,7 +1235,7 @@ def mean( ) def prod( - self, + self: DatasetReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -1305,7 +1318,7 @@ def prod( ) def sum( - self, + self: DatasetReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -1388,7 +1401,7 @@ def sum( ) def std( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1463,7 +1476,7 @@ def std( ) def var( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1538,7 +1551,7 @@ def var( ) def median( - self, + self: DatasetReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1613,11 +1626,24 @@ def median( ) +class DataArrayReduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_DataArray: + ... + + class DataArrayGroupByReductions: __slots__ = () def count( - self, + self: DataArrayReduce, dim=None, keep_attrs=None, **kwargs, @@ -1674,7 +1700,7 @@ def count( ) def all( - self, + self: DataArrayReduce, dim=None, keep_attrs=None, **kwargs, @@ -1731,7 +1757,7 @@ def all( ) def any( - self, + self: DataArrayReduce, dim=None, keep_attrs=None, **kwargs, @@ -1788,7 +1814,7 @@ def any( ) def max( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1857,7 +1883,7 @@ def max( ) def min( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1926,7 +1952,7 @@ def min( ) def mean( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -1995,7 +2021,7 @@ def mean( ) def prod( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -2072,7 +2098,7 @@ def prod( ) def sum( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -2149,7 +2175,7 @@ def sum( ) def std( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -2218,7 +2244,7 @@ def std( ) def var( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -2287,7 +2313,7 @@ def var( ) def median( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -2360,7 +2386,7 @@ class DataArrayResampleReductions: __slots__ = () def count( - self, + self: DataArrayReduce, dim=None, keep_attrs=None, **kwargs, @@ -2417,7 +2443,7 @@ def count( ) def all( - self, + self: DataArrayReduce, dim=None, keep_attrs=None, **kwargs, @@ -2474,7 +2500,7 @@ def all( ) def any( - self, + self: DataArrayReduce, dim=None, keep_attrs=None, **kwargs, @@ -2531,7 +2557,7 @@ def any( ) def max( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -2600,7 +2626,7 @@ def max( ) def min( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -2669,7 +2695,7 @@ def min( ) def mean( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -2738,7 +2764,7 @@ def mean( ) def prod( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -2815,7 +2841,7 @@ def prod( ) def sum( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, min_count: Optional[int] = None, @@ -2892,7 +2918,7 @@ def sum( ) def std( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -2961,7 +2987,7 @@ def std( ) def var( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, @@ -3030,7 +3056,7 @@ def var( ) def median( - self, + self: DataArrayReduce, dim=None, skipna: bool = True, keep_attrs=None, diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 6db6a48ce29..b22a86f0aa9 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -4,6 +4,13 @@ Usage: python xarray/util/generate_reductions.py > xarray/core/_reductions.py + pytest --doctest-modules xarray/core/_reductions.py --accept || true + pytest --doctest-modules xarray/core/_reductions.py --accept + +This requires [pytest-accept](https://github.com/max-sixty/pytest-accept). +The second run of pytest is deliberate, since the first will return an error +while replacing the doctests. + """ import collections @@ -15,11 +22,26 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Optional +from typing import Optional, Callable, Union, Sequence, Hashable, Protocol, Any from . import duck_array_ops from .types import T_DataArray, T_Dataset''' +OBJ_PREAMBLE = """ + +class {obj}Reduce(Protocol): + def reduce( + self, + func: Callable[..., Any], + dim: Union[None, Hashable, Sequence[Hashable]] = None, + axis: Union[None, int, Sequence[int]] = None, + keep_attrs: bool = None, + keepdims: bool = False, + **kwargs: Any, + ) -> T_{obj}: + ...""" + + CLASS_PREAMBLE = """ class {obj}{cls}Reductions: @@ -67,8 +89,8 @@ class {obj}{cls}Reductions: TEMPLATE_REDUCTION = ''' def {method}( - self, - dim=None, {skip_na.kwarg}{min_count.kwarg} + self: {obj}Reduce, + dim=None,{skip_na.kwarg}{min_count.kwarg} keep_attrs=None, **kwargs, ) -> T_{obj}: @@ -108,8 +130,7 @@ def {method}( dim=dim,{skip_na.call}{min_count.call}{numeric_only_call} keep_attrs=keep_attrs, **kwargs, - ) -''' + )''' def generate_groupby_example(obj: str, cls: str, method: str): @@ -168,9 +189,9 @@ def generate_method( if obj == "Dataset": if method in NUMERIC_ONLY_METHODS: - numeric_only_call = "numeric_only=True," + numeric_only_call = "\n numeric_only=True," else: - numeric_only_call = "numeric_only=False," + numeric_only_call = "\n numeric_only=False," else: numeric_only_call = "" @@ -178,8 +199,8 @@ def generate_method( if skipna: skip_na = kwarg( docs=textwrap.indent(_SKIPNA_DOCSTRING, " "), - kwarg="skipna: bool=True, ", - call="skipna=skipna,", + kwarg="\n skipna: bool = True,", + call="\n skipna=skipna,", ) else: skip_na = kwarg(docs="", kwarg="", call="") @@ -187,8 +208,8 @@ def generate_method( if method in MIN_COUNT_METHODS: min_count = kwarg( docs=textwrap.indent(_MINCOUNT_DOCSTRING, " "), - kwarg="min_count: Optional[int]=None, ", - call="min_count=min_count,", + kwarg="\n min_count: Optional[int] = None,", + call="\n min_count=min_count,", ) else: min_count = kwarg(docs="", kwarg="", call="") @@ -237,6 +258,7 @@ def render(obj: str, cls: str, docref: str, example_generator: Callable): if __name__ == "__main__": print(MODULE_PREAMBLE) for obj in ["Dataset", "DataArray"]: + print(OBJ_PREAMBLE.format(obj=obj)) for cls, docref in ( ("GroupBy", "groupby"), ("Resample", "resampling"), From ed10140709dbed4c2298786ae50feeb3c15e1b25 Mon Sep 17 00:00:00 2001 From: dcherian Date: Sun, 24 Oct 2021 10:56:28 +0530 Subject: [PATCH 11/16] Fix isort + update whats-new --- doc/whats-new.rst | 4 +++- xarray/core/_reductions.py | 2 +- xarray/util/generate_reductions.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1271ce37b91..344d4fe2185 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -87,7 +87,9 @@ Documentation - Users are instructed to try ``use_cftime=True`` if a ``TypeError`` occurs when combining datasets and one of the types involved is a subclass of ``cftime.datetime`` (:pull:`5776`). By `Zeb Nicholls `_. - Better examples in docstrings for groupby and resampling reductions. - By `Deepak Cherian `_. + By `Deepak Cherian `_, + `Maximilian Roos `_, + `Jimmy Westling `_ . - A clearer error is now raised if a user attempts to assign a Dataset to a single key of another Dataset. (:pull:`5839`) By `Tom Nicholas `_. diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 74f5c1e1183..1da8038c2cf 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,7 +1,7 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Optional, Callable, Union, Sequence, Hashable, Protocol, Any +from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union from . import duck_array_ops from .types import T_DataArray, T_Dataset diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index b22a86f0aa9..2afae42a2fc 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -22,7 +22,7 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Optional, Callable, Union, Sequence, Hashable, Protocol, Any +from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union from . import duck_array_ops from .types import T_DataArray, T_Dataset''' From a12ba89565ace090b1250bb611d71c536714fa87 Mon Sep 17 00:00:00 2001 From: dcherian Date: Mon, 25 Oct 2021 23:08:54 +0530 Subject: [PATCH 12/16] More typing --- xarray/core/_reductions.py | 176 ++++++++++++++--------------- xarray/util/generate_reductions.py | 4 +- 2 files changed, 90 insertions(+), 90 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 1da8038c2cf..969be9358ee 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -25,8 +25,8 @@ class DatasetGroupByReductions: def count( self: DatasetReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -86,8 +86,8 @@ def count( def all( self: DatasetReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -147,8 +147,8 @@ def all( def any( self: DatasetReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -208,9 +208,9 @@ def any( def max( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -283,9 +283,9 @@ def max( def min( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -358,9 +358,9 @@ def min( def mean( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -433,10 +433,10 @@ def mean( def prod( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -516,10 +516,10 @@ def prod( def sum( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -599,9 +599,9 @@ def sum( def std( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -674,9 +674,9 @@ def std( def var( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -749,9 +749,9 @@ def var( def median( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -828,8 +828,8 @@ class DatasetResampleReductions: def count( self: DatasetReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -889,8 +889,8 @@ def count( def all( self: DatasetReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -950,8 +950,8 @@ def all( def any( self: DatasetReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1011,9 +1011,9 @@ def any( def max( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1086,9 +1086,9 @@ def max( def min( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1161,9 +1161,9 @@ def min( def mean( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1236,10 +1236,10 @@ def mean( def prod( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1319,10 +1319,10 @@ def prod( def sum( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1402,9 +1402,9 @@ def sum( def std( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1477,9 +1477,9 @@ def std( def var( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1552,9 +1552,9 @@ def var( def median( self: DatasetReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_Dataset: """ @@ -1644,8 +1644,8 @@ class DataArrayGroupByReductions: def count( self: DataArrayReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -1701,8 +1701,8 @@ def count( def all( self: DataArrayReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -1758,8 +1758,8 @@ def all( def any( self: DataArrayReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -1815,9 +1815,9 @@ def any( def max( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -1884,9 +1884,9 @@ def max( def min( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -1953,9 +1953,9 @@ def min( def mean( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2022,10 +2022,10 @@ def mean( def prod( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2099,10 +2099,10 @@ def prod( def sum( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2176,9 +2176,9 @@ def sum( def std( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2245,9 +2245,9 @@ def std( def var( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2314,9 +2314,9 @@ def var( def median( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2387,8 +2387,8 @@ class DataArrayResampleReductions: def count( self: DataArrayReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2444,8 +2444,8 @@ def count( def all( self: DataArrayReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2501,8 +2501,8 @@ def all( def any( self: DataArrayReduce, - dim=None, - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2558,9 +2558,9 @@ def any( def max( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2627,9 +2627,9 @@ def max( def min( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2696,9 +2696,9 @@ def min( def mean( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2765,10 +2765,10 @@ def mean( def prod( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2842,10 +2842,10 @@ def prod( def sum( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, min_count: Optional[int] = None, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2919,9 +2919,9 @@ def sum( def std( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -2988,9 +2988,9 @@ def std( def var( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ @@ -3057,9 +3057,9 @@ def var( def median( self: DataArrayReduce, - dim=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None, skipna: bool = True, - keep_attrs=None, + keep_attrs: bool = None, **kwargs, ) -> T_DataArray: """ diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 2afae42a2fc..8e17216772d 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -90,8 +90,8 @@ class {obj}{cls}Reductions: TEMPLATE_REDUCTION = ''' def {method}( self: {obj}Reduce, - dim=None,{skip_na.kwarg}{min_count.kwarg} - keep_attrs=None, + dim: Union[None, Hashable, Sequence[Hashable]] = None,{skip_na.kwarg}{min_count.kwarg} + keep_attrs: bool = None, **kwargs, ) -> T_{obj}: """ From 88894227a7a2121a527babdf4014af6619235263 Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 27 Oct 2021 16:13:28 +0530 Subject: [PATCH 13/16] Update docstrings. min_count examples; --- xarray/core/_reductions.py | 772 +++++++++++++++++++++++++---- xarray/util/generate_reductions.py | 30 +- 2 files changed, 692 insertions(+), 110 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 969be9358ee..143ebf4c4e4 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -36,6 +36,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -53,7 +54,7 @@ def count( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -61,13 +62,22 @@ def count( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").count() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 2 2 2 + da (labels) int64 1 2 2 See Also -------- @@ -97,6 +107,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -122,6 +133,15 @@ def all( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").all() Dimensions: (labels: 3) @@ -158,6 +178,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -183,6 +204,15 @@ def any( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").any() Dimensions: (labels: 3) @@ -220,6 +250,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -242,7 +273,7 @@ def max( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -250,20 +281,32 @@ def max( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").max() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) bool True True True + da (labels) float64 1.0 2.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").max(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) bool True True True + da (labels) float64 nan 2.0 3.0 See Also -------- @@ -295,6 +338,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -317,7 +361,7 @@ def min( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -325,20 +369,32 @@ def min( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").min() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) bool True True True + da (labels) float64 1.0 2.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").min(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) bool True True True + da (labels) float64 nan 2.0 1.0 See Also -------- @@ -370,6 +426,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -392,7 +449,7 @@ def mean( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -400,20 +457,32 @@ def mean( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").mean() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 1.0 1.0 + da (labels) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").mean(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 1.0 1.0 + da (labels) float64 nan 2.0 2.0 See Also -------- @@ -446,6 +515,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -474,7 +544,7 @@ def prod( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -482,20 +552,42 @@ def prod( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").prod() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 1 1 1 + da (labels) float64 1.0 4.0 3.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").prod(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 1 1 1 + da (labels) float64 nan 4.0 3.0 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.groupby("labels").prod(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 3.0 See Also -------- @@ -529,6 +621,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -557,7 +650,7 @@ def sum( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -565,20 +658,42 @@ def sum( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").sum() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 2 2 2 + da (labels) float64 1.0 4.0 4.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").sum(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) int64 2 2 2 + da (labels) float64 nan 4.0 4.0 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.groupby("labels").sum(skipna=True, min_count=2) + + Dimensions: (labels: 3) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + Data variables: + da (labels) float64 nan 4.0 4.0 See Also -------- @@ -611,6 +726,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -633,7 +749,7 @@ def std( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -641,20 +757,32 @@ def std( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").std() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 0.0 0.0 0.0 + da (labels) float64 0.0 0.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").std(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 0.0 0.0 0.0 + da (labels) float64 nan 0.0 1.0 See Also -------- @@ -686,6 +814,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -708,7 +837,7 @@ def var( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -716,20 +845,32 @@ def var( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").var() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 0.0 0.0 0.0 + da (labels) float64 0.0 0.0 1.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").var(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 0.0 0.0 0.0 + da (labels) float64 nan 0.0 1.0 See Also -------- @@ -761,6 +902,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -783,7 +925,7 @@ def median( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -791,20 +933,32 @@ def median( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.groupby("labels").median() Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 1.0 1.0 + da (labels) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.groupby("labels").median(skipna=False) Dimensions: (labels: 3) Coordinates: * labels (labels) object 'a' 'b' 'c' Data variables: - da (labels) float64 1.0 1.0 1.0 + da (labels) float64 nan 2.0 2.0 See Also -------- @@ -839,6 +993,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -856,7 +1011,7 @@ def count( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -864,13 +1019,22 @@ def count( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").count() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) int64 1 3 2 + da (time) int64 1 3 1 See Also -------- @@ -900,6 +1064,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -925,6 +1090,15 @@ def all( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").all() Dimensions: (time: 3) @@ -961,6 +1135,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -986,6 +1161,15 @@ def any( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").any() Dimensions: (time: 3) @@ -1023,6 +1207,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1045,7 +1230,7 @@ def max( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1053,20 +1238,32 @@ def max( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").max() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) bool True True True + da (time) float64 1.0 3.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").max(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) bool True True True + da (time) float64 1.0 3.0 nan See Also -------- @@ -1098,6 +1295,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1120,7 +1318,7 @@ def min( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1128,20 +1326,32 @@ def min( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").min() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) bool True True True + da (time) float64 1.0 1.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").min(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) bool True True True + da (time) float64 1.0 1.0 nan See Also -------- @@ -1173,6 +1383,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1195,7 +1406,7 @@ def mean( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1203,20 +1414,32 @@ def mean( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").mean() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 1.0 1.0 + da (time) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").mean(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 1.0 1.0 + da (time) float64 1.0 2.0 nan See Also -------- @@ -1249,6 +1472,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1277,7 +1501,7 @@ def prod( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1285,20 +1509,42 @@ def prod( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").prod() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) int64 1 1 1 + da (time) float64 1.0 6.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").prod(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) int64 1 1 1 + da (time) float64 1.0 6.0 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.resample(time="3M").prod(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan See Also -------- @@ -1332,6 +1578,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1360,7 +1607,7 @@ def sum( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1368,20 +1615,42 @@ def sum( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").sum() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) int64 1 3 2 + da (time) float64 1.0 6.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").sum(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) int64 1 3 2 + da (time) float64 1.0 6.0 nan + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> ds.resample(time="3M").sum(skipna=True, min_count=2) + + Dimensions: (time: 3) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + Data variables: + da (time) float64 nan 6.0 nan See Also -------- @@ -1414,6 +1683,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1436,7 +1706,7 @@ def std( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1444,20 +1714,32 @@ def std( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").std() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 0.0 0.0 0.0 + da (time) float64 0.0 0.8165 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").std(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 0.0 0.0 0.0 + da (time) float64 0.0 0.8165 nan See Also -------- @@ -1489,6 +1771,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1511,7 +1794,7 @@ def var( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1519,20 +1802,32 @@ def var( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").var() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 0.0 0.0 0.0 + da (time) float64 0.0 0.6667 0.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").var(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 0.0 0.0 0.0 + da (time) float64 0.0 0.6667 nan See Also -------- @@ -1564,6 +1859,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1586,7 +1882,7 @@ def median( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), @@ -1594,20 +1890,32 @@ def median( ... ), ... ) >>> ds = xr.Dataset(dict(da=da)) + >>> ds + + Dimensions: (time: 6) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> ds.resample(time="3M").median() Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 1.0 1.0 + da (time) float64 1.0 2.0 2.0 + + Use ``skipna`` to control whether NaNs are ignored. + >>> ds.resample(time="3M").median(skipna=False) Dimensions: (time: 3) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 Data variables: - da (time) float64 1.0 1.0 1.0 + da (time) float64 1.0 2.0 nan See Also -------- @@ -1655,6 +1963,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1672,16 +1981,23 @@ def count( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").count() - array([2, 2, 2]) + array([1, 2, 2]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1712,6 +2028,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1736,6 +2053,13 @@ def all( ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").all() array([False, True, True]) @@ -1769,6 +2093,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1793,6 +2118,13 @@ def any( ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").any() array([ True, True, True]) @@ -1827,6 +2159,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1849,21 +2182,31 @@ def max( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").max() - array([ True, True, True]) + array([1., 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").max(skipna=False) - array([ True, True, True]) + array([nan, 2., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1896,6 +2239,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1918,21 +2262,31 @@ def min( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").min() - array([ True, True, True]) + array([1., 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").min(skipna=False) - array([ True, True, True]) + array([nan, 2., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -1965,6 +2319,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1987,21 +2342,31 @@ def mean( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").mean() - array([1., 1., 1.]) + array([1., 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").mean(skipna=False) - array([1., 1., 1.]) + array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2035,6 +2400,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2063,21 +2429,39 @@ def prod( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").prod() - array([1, 1, 1]) + array([1., 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").prod(skipna=False) - array([1, 1, 1]) + array([nan, 4., 3.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.groupby("labels").prod(skipna=True, min_count=2) + + array([nan, 4., 3.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2112,6 +2496,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2140,21 +2525,39 @@ def sum( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").sum() - array([2, 2, 2]) + array([1., 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").sum(skipna=False) - array([2, 2, 2]) + array([nan, 4., 4.]) + Coordinates: + * labels (labels) object 'a' 'b' 'c' + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.groupby("labels").sum(skipna=True, min_count=2) + + array([nan, 4., 4.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2188,6 +2591,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2210,21 +2614,31 @@ def std( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").std() - array([0., 0., 0.]) + array([0., 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").std(skipna=False) - array([0., 0., 0.]) + array([nan, 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2257,6 +2671,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2279,21 +2694,31 @@ def var( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").var() - array([0., 0., 0.]) + array([0., 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").var(skipna=False) - array([0., 0., 0.]) + array([nan, 0., 1.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2326,6 +2751,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2348,21 +2774,31 @@ def median( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.groupby("labels").median() - array([1., 1., 1.]) + array([1., 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.groupby("labels").median(skipna=False) - array([1., 1., 1.]) + array([nan, 2., 2.]) Coordinates: * labels (labels) object 'a' 'b' 'c' @@ -2398,6 +2834,7 @@ def count( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``count``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2415,16 +2852,23 @@ def count( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").count() - array([1, 3, 2]) + array([1, 3, 1]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -2455,6 +2899,7 @@ def all( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``all``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2479,6 +2924,13 @@ def all( ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").all() array([ True, True, False]) @@ -2512,6 +2964,7 @@ def any( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``any``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2536,6 +2989,13 @@ def any( ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ True, True, True, True, True, False]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").any() array([ True, True, True]) @@ -2570,6 +3030,7 @@ def max( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``max``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2592,21 +3053,31 @@ def max( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").max() - array([ True, True, True]) + array([1., 3., 2.]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").max(skipna=False) - array([ True, True, True]) + array([ 1., 3., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -2639,6 +3110,7 @@ def min( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``min``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2661,21 +3133,31 @@ def min( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").min() - array([ True, True, True]) + array([1., 1., 2.]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").min(skipna=False) - array([ True, True, True]) + array([ 1., 1., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -2708,6 +3190,7 @@ def mean( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``mean``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2730,21 +3213,31 @@ def mean( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").mean() - array([1., 1., 1.]) + array([1., 2., 2.]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").mean(skipna=False) - array([1., 1., 1.]) + array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -2778,6 +3271,7 @@ def prod( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``prod``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2806,21 +3300,39 @@ def prod( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").prod() - array([1, 1, 1]) + array([1., 6., 2.]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").prod(skipna=False) - array([1, 1, 1]) + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.resample(time="3M").prod(skipna=True, min_count=2) + + array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -2855,6 +3367,7 @@ def sum( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``sum``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2883,21 +3396,39 @@ def sum( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").sum() - array([1, 3, 2]) + array([1., 6., 2.]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").sum(skipna=False) - array([1, 3, 2]) + array([ 1., 6., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> da.resample(time="3M").sum(skipna=True, min_count=2) + + array([nan, 6., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -2931,6 +3462,7 @@ def std( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``std``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2953,21 +3485,31 @@ def std( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").std() - array([0., 0., 0.]) + array([0. , 0.81649658, 0. ]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").std(skipna=False) - array([0., 0., 0.]) + array([0. , 0.81649658, nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -3000,6 +3542,7 @@ def var( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``var``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3022,21 +3565,31 @@ def var( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").var() - array([0., 0., 0.]) + array([0. , 0.66666667, 0. ]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").var(skipna=False) - array([0., 0., 0.]) + array([0. , 0.66666667, nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 @@ -3069,6 +3622,7 @@ def median( ---------- dim : hashable or iterable of hashable, optional Name of dimension[s] along which to apply ``median``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]`` skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3091,21 +3645,31 @@ def median( Examples -------- >>> da = xr.DataArray( - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool), + ... np.array([1, 2, 3, 1, 2, np.nan]), ... dims="time", ... coords=dict( ... time=("time", pd.date_range("01-01-2001", freq="M", periods=6)), ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ... ), ... ) + >>> da + + array([ 1., 2., 3., 1., 2., nan]) + Coordinates: + * time (time) datetime64[ns] 2001-01-31 2001-02-28 ... 2001-06-30 + labels (time) >> da.resample(time="3M").median() - array([1., 1., 1.]) + array([1., 2., 2.]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 + + Use ``skipna`` to control whether NaNs are ignored. + >>> da.resample(time="3M").median(skipna=False) - array([1., 1., 1.]) + array([ 1., 2., nan]) Coordinates: * time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31 diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 8e17216772d..b9eb6f90776 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -100,7 +100,8 @@ def {method}( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``{method}``.{extra_args}{skip_na.docs}{min_count.docs} + Name of dimension[s] along which to apply ``{method}``. + For e.g. ``dim="x"`` or ``dim=["x", "y"]``{extra_args}{skip_na.docs}{min_count.docs} keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -149,7 +150,7 @@ def generate_groupby_example(obj: str, cls: str, method: str): else: np_array = """ - ... np.array([1, 2, 3, 1, 2, np.nan], dtype=bool),""" + ... np.array([1, 2, 3, 1, 2, np.nan]),""" create_da = f""" >>> da = xr.DataArray({np_array} @@ -162,18 +163,35 @@ def generate_groupby_example(obj: str, cls: str, method: str): if obj == "Dataset": maybe_dataset = """ - >>> ds = xr.Dataset(dict(da=da))""" + >>> ds = xr.Dataset(dict(da=da)) + >>> ds""" else: - maybe_dataset = "" + maybe_dataset = """ + >>> da""" if method in NAN_REDUCE_METHODS: maybe_skipna = f""" + + Use ``skipna`` to control whether NaNs are ignored. + >>> {calculation}(skipna=False)""" else: maybe_skipna = "" - return f"""{create_da}{maybe_dataset} - >>> {calculation}(){maybe_skipna}""" + if method in MIN_COUNT_METHODS: + maybe_mincount = f""" + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> {calculation}(skipna=True, min_count=2)""" + else: + maybe_mincount = "" + + return ( + f"""{create_da}{maybe_dataset} + + >>> {calculation}(){maybe_skipna}{maybe_mincount}""" + ) def generate_method( From 85b63b6be7c9a368df0d57fed33a79a278eb50df Mon Sep 17 00:00:00 2001 From: dcherian Date: Wed, 27 Oct 2021 10:08:43 -0700 Subject: [PATCH 14/16] more docstring update --- xarray/core/_reductions.py | 220 +++++++++++++++++------------ xarray/util/generate_reductions.py | 6 +- 2 files changed, 136 insertions(+), 90 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 143ebf4c4e4..83aaa10a20c 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -35,8 +35,9 @@ def count( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``count``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -106,8 +107,9 @@ def all( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``all``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -177,8 +179,9 @@ def any( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``any``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -249,8 +252,9 @@ def max( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``max``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -337,8 +341,9 @@ def min( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``min``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -425,8 +430,9 @@ def mean( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``mean``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -514,8 +520,9 @@ def prod( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``prod``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -620,8 +627,9 @@ def sum( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``sum``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -725,8 +733,9 @@ def std( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``std``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -813,8 +822,9 @@ def var( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``var``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -901,8 +911,9 @@ def median( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -992,8 +1003,9 @@ def count( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``count``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1063,8 +1075,9 @@ def all( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``all``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1134,8 +1147,9 @@ def any( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``any``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -1206,8 +1220,9 @@ def max( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``max``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1294,8 +1309,9 @@ def min( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``min``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1382,8 +1398,9 @@ def mean( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``mean``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1471,8 +1488,9 @@ def prod( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``prod``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1577,8 +1595,9 @@ def sum( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``sum``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1682,8 +1701,9 @@ def std( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``std``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1770,8 +1790,9 @@ def var( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``var``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1858,8 +1879,9 @@ def median( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -1962,8 +1984,9 @@ def count( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``count``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2027,8 +2050,9 @@ def all( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``all``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2092,8 +2116,9 @@ def any( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``any``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2158,8 +2183,9 @@ def max( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``max``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2238,8 +2264,9 @@ def min( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``min``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2318,8 +2345,9 @@ def mean( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``mean``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2399,8 +2427,9 @@ def prod( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``prod``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2495,8 +2524,9 @@ def sum( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``sum``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2590,8 +2620,9 @@ def std( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``std``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2670,8 +2701,9 @@ def var( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``var``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2750,8 +2782,9 @@ def median( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -2833,8 +2866,9 @@ def count( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``count``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2898,8 +2932,9 @@ def all( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``all``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -2963,8 +2998,9 @@ def any( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``any``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -3029,8 +3065,9 @@ def max( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``max``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3109,8 +3146,9 @@ def min( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``min``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3189,8 +3227,9 @@ def mean( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``mean``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3270,8 +3309,9 @@ def prod( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``prod``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3366,8 +3406,9 @@ def sum( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``sum``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3461,8 +3502,9 @@ def std( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``std``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3541,8 +3583,9 @@ def var( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``var``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not @@ -3621,8 +3664,9 @@ def median( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``median``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]`` + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If ``None``, will reduce over all dimensions + present in the grouped variable. skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index b9eb6f90776..580166e915d 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -100,8 +100,8 @@ def {method}( Parameters ---------- dim : hashable or iterable of hashable, optional - Name of dimension[s] along which to apply ``{method}``. - For e.g. ``dim="x"`` or ``dim=["x", "y"]``{extra_args}{skip_na.docs}{min_count.docs} + Name of dimension[s] along which to apply ``{method}``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. {extra_dim}{extra_args}{skip_na.docs}{min_count.docs} keep_attrs : bool, optional If True, ``attrs`` will be copied from the original object to the new one. If False (default), the new object will be @@ -237,6 +237,8 @@ def generate_method( docref=docref, method=method, array_method=array_method, + extra_dim="""If ``None``, will reduce over all dimensions + present in the grouped variable.""", extra_args="", skip_na=skip_na, min_count=min_count, From 8106a4abd8be9e43784ea4c72f3d9d02856f5d60 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 29 Oct 2021 10:48:12 -0600 Subject: [PATCH 15/16] Fix typing. --- xarray/core/_reductions.py | 8 +++++++- xarray/util/generate_reductions.py | 14 +++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/xarray/core/_reductions.py b/xarray/core/_reductions.py index 83aaa10a20c..67fbbd482d0 100644 --- a/xarray/core/_reductions.py +++ b/xarray/core/_reductions.py @@ -1,11 +1,17 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union +import sys +from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops from .types import T_DataArray, T_Dataset +if sys.version_info >= (3, 8): + from typing import Protocol +else: + from typing_extensions import Protocol + class DatasetReduce(Protocol): def reduce( diff --git a/xarray/util/generate_reductions.py b/xarray/util/generate_reductions.py index 580166e915d..72449195d1e 100644 --- a/xarray/util/generate_reductions.py +++ b/xarray/util/generate_reductions.py @@ -22,10 +22,16 @@ """Mixin classes with reduction operations.""" # This file was generated using xarray.util.generate_reductions. Do not edit manually. -from typing import Any, Callable, Hashable, Optional, Protocol, Sequence, Union +import sys +from typing import Any, Callable, Hashable, Optional, Sequence, Union from . import duck_array_ops -from .types import T_DataArray, T_Dataset''' +from .types import T_DataArray, T_Dataset + +if sys.version_info >= (3, 8): + from typing import Protocol +else: + from typing_extensions import Protocol''' OBJ_PREAMBLE = """ @@ -187,11 +193,9 @@ def generate_groupby_example(obj: str, cls: str, method: str): else: maybe_mincount = "" - return ( - f"""{create_da}{maybe_dataset} + return f"""{create_da}{maybe_dataset} >>> {calculation}(){maybe_skipna}{maybe_mincount}""" - ) def generate_method( From 59f8586a747b1f0a7b5e42e0ee186a32a3a36819 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 5 Nov 2021 11:18:16 -0600 Subject: [PATCH 16/16] fix whats-new --- doc/whats-new.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b2bc3784684..0128e70caed 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -39,6 +39,11 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Better examples in docstrings for groupby and resampling reductions (:pull:`5871`). + By `Deepak Cherian `_, + `Maximilian Roos `_, + `Jimmy Westling `_ . + Internal Changes ~~~~~~~~~~~~~~~~ @@ -174,10 +179,6 @@ Documentation - Users are instructed to try ``use_cftime=True`` if a ``TypeError`` occurs when combining datasets and one of the types involved is a subclass of ``cftime.datetime`` (:pull:`5776`). By `Zeb Nicholls `_. -- Better examples in docstrings for groupby and resampling reductions. - By `Deepak Cherian `_, - `Maximilian Roos `_, - `Jimmy Westling `_ . - A clearer error is now raised if a user attempts to assign a Dataset to a single key of another Dataset. (:pull:`5839`) By `Tom Nicholas `_.