From e0c163947d481625d51f666bc1d14e1fecde8c5f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 1 Sep 2020 18:56:40 -0700 Subject: [PATCH] REF: handle axis=None case inside DataFrame.any/all to simplify _reduce (#35899) * REF: remove unnecesary try/except * TST: add test for agg on ordered categorical cols (#35630) * TST: resample does not yield empty groups (#10603) (#35799) * revert accidental rebase * REF: handle axis=None cases inside DataFrame.all/any * annotate * dummy commit to force Travis Co-authored-by: Karthik Mathur <22126205+mathurk1@users.noreply.github.com> Co-authored-by: tkmz-n <60312218+tkmz-n@users.noreply.github.com> --- pandas/core/frame.py | 61 +++++++++++++++--------------------------- pandas/core/generic.py | 8 ++++++ 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 312d449e36022..e78c15d125e8d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8617,14 +8617,11 @@ def _reduce( cols = self.columns[~dtype_is_dt] self = self[cols] - if axis is None and filter_type == "bool": - labels = None - constructor = None - else: - # TODO: Make other agg func handle axis=None properly - axis = self._get_axis_number(axis) - labels = self._get_agg_axis(axis) - constructor = self._constructor + # TODO: Make other agg func handle axis=None properly + axis = self._get_axis_number(axis) + labels = self._get_agg_axis(axis) + constructor = self._constructor + assert axis in [0, 1] def func(values): if is_extension_array_dtype(values.dtype): @@ -8632,7 +8629,7 @@ def func(values): else: return op(values, axis=axis, skipna=skipna, **kwds) - def _get_data(axis_matters): + def _get_data(axis_matters: bool) -> "DataFrame": if filter_type is None: data = self._get_numeric_data() elif filter_type == "bool": @@ -8649,7 +8646,7 @@ def _get_data(axis_matters): raise NotImplementedError(msg) return data - if numeric_only is not None and axis in [0, 1]: + if numeric_only is not None: df = self if numeric_only is True: df = _get_data(axis_matters=True) @@ -8675,6 +8672,8 @@ def blk_func(values): out[:] = coerce_to_dtypes(out.values, df.dtypes) return out + assert numeric_only is None + if not self._is_homogeneous_type or self._mgr.any_extension_types: # try to avoid self.values call @@ -8702,40 +8701,24 @@ def blk_func(values): result = result.iloc[0].rename(None) return result - if numeric_only is None: - data = self - values = data.values - - try: - result = func(values) - - except TypeError: - # e.g. in nanops trying to convert strs to float + data = self + values = data.values - # TODO: why doesnt axis matter here? - data = _get_data(axis_matters=False) - labels = data._get_agg_axis(axis) + try: + result = func(values) - values = data.values - with np.errstate(all="ignore"): - result = func(values) + except TypeError: + # e.g. in nanops trying to convert strs to float - else: - if numeric_only: - data = _get_data(axis_matters=True) - labels = data._get_agg_axis(axis) + # TODO: why doesnt axis matter here? + data = _get_data(axis_matters=False) + labels = data._get_agg_axis(axis) - values = data.values - else: - data = self - values = data.values - result = func(values) + values = data.values + with np.errstate(all="ignore"): + result = func(values) - if filter_type == "bool" and is_object_dtype(values) and axis is None: - # work around https://github.com/numpy/numpy/issues/10489 - # TODO: can we de-duplicate parts of this with the next blocK? - result = np.bool_(result) - elif hasattr(result, "dtype") and is_object_dtype(result.dtype): + if is_object_dtype(result.dtype): try: if filter_type is None: result = result.astype(np.float64) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3bad2d6dd18b9..c80a95f79a7a0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11499,6 +11499,14 @@ def logical_func(self, axis=0, bool_only=None, skipna=True, level=None, **kwargs "Option bool_only is not implemented with option level." ) return self._agg_by_level(name, axis=axis, level=level, skipna=skipna) + + if self.ndim > 1 and axis is None: + # Reduce along one dimension then the other, to simplify DataFrame._reduce + res = logical_func( + self, axis=0, bool_only=bool_only, skipna=skipna, **kwargs + ) + return logical_func(res, skipna=skipna, **kwargs) + return self._reduce( func, name=name,