CLN: Enforce deprecation of argmin/max and idxmin/max with NA values (#…

…57971) * CLN: Enforce deprecation of argmin/max and idxmin/max with NA values * Docstrings
pandas-dev · Mar 25, 2024 · 63cad6b · 63cad6b
1 parent 2750652
commit 63cad6b
Show file tree

Hide file tree

Showing 10 changed files with 139 additions and 241 deletions.
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -259,6 +259,7 @@ Removal of prior version deprecations/changes
 - Removed deprecated keyword ``verbose`` on :func:`read_csv` and :func:`read_table` (:issue:`56556`)
 - Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
 - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
+- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.performance:

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -14,7 +14,6 @@
     final,
     overload,
 )
-import warnings
 
 import numpy as np
 
@@ -35,7 +34,6 @@
     cache_readonly,
     doc,
 )
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import can_hold_element
 from pandas.core.dtypes.common import (
@@ -686,7 +684,8 @@ def argmax(
         axis : {{None}}
             Unused. Parameter needed for compatibility with DataFrame.
         skipna : bool, default True
-            Exclude NA/null values when showing the result.
+            Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
+            and there is an NA value, this method will raise a ``ValueError``.
         *args, **kwargs
             Additional arguments and keywords for compatibility with NumPy.
 
@@ -736,28 +735,15 @@ def argmax(
         nv.validate_minmax_axis(axis)
         skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
 
+        if skipna and len(delegate) > 0 and isna(delegate).all():
+            raise ValueError("Encountered all NA values")
+        elif not skipna and isna(delegate).any():
+            raise ValueError("Encountered an NA value with skipna=False")
+
         if isinstance(delegate, ExtensionArray):
-            if not skipna and delegate.isna().any():
-                warnings.warn(
-                    f"The behavior of {type(self).__name__}.argmax/argmin "
-                    "with skipna=False and NAs, or with all-NAs is deprecated. "
-                    "In a future version this will raise ValueError.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                return -1
-            else:
-                return delegate.argmax()
+            return delegate.argmax()
         else:
             result = nanops.nanargmax(delegate, skipna=skipna)
-            if result == -1:
-                warnings.warn(
-                    f"The behavior of {type(self).__name__}.argmax/argmin "
-                    "with skipna=False and NAs, or with all-NAs is deprecated. "
-                    "In a future version this will raise ValueError.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
             # error: Incompatible return value type (got "Union[int, ndarray]", expected
             # "int")
             return result  # type: ignore[return-value]
@@ -770,28 +756,15 @@ def argmin(
         nv.validate_minmax_axis(axis)
         skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
 
+        if skipna and len(delegate) > 0 and isna(delegate).all():
+            raise ValueError("Encountered all NA values")
+        elif not skipna and isna(delegate).any():
+            raise ValueError("Encountered an NA value with skipna=False")
+
         if isinstance(delegate, ExtensionArray):
-            if not skipna and delegate.isna().any():
-                warnings.warn(
-                    f"The behavior of {type(self).__name__}.argmax/argmin "
-                    "with skipna=False and NAs, or with all-NAs is deprecated. "
-                    "In a future version this will raise ValueError.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                return -1
-            else:
-                return delegate.argmin()
+            return delegate.argmin()
         else:
             result = nanops.nanargmin(delegate, skipna=skipna)
-            if result == -1:
-                warnings.warn(
-                    f"The behavior of {type(self).__name__}.argmax/argmin "
-                    "with skipna=False and NAs, or with all-NAs is deprecated. "
-                    "In a future version this will raise ValueError.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
             # error: Incompatible return value type (got "Union[int, ndarray]", expected
             # "int")
             return result  # type: ignore[return-value]

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -6976,16 +6976,10 @@ def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
 
         if not self._is_multi and self.hasnans:
             # Take advantage of cache
-            mask = self._isnan
-            if not skipna or mask.all():
-                warnings.warn(
-                    f"The behavior of {type(self).__name__}.argmax/argmin "
-                    "with skipna=False and NAs, or with all-NAs is deprecated. "
-                    "In a future version this will raise ValueError.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                return -1
+            if self._isnan.all():
+                raise ValueError("Encountered all NA values")
+            elif not skipna:
+                raise ValueError("Encountered an NA value with skipna=False")
         return super().argmin(skipna=skipna)
 
     @Appender(IndexOpsMixin.argmax.__doc__)
@@ -6995,16 +6989,10 @@ def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
 
         if not self._is_multi and self.hasnans:
             # Take advantage of cache
-            mask = self._isnan
-            if not skipna or mask.all():
-                warnings.warn(
-                    f"The behavior of {type(self).__name__}.argmax/argmin "
-                    "with skipna=False and NAs, or with all-NAs is deprecated. "
-                    "In a future version this will raise ValueError.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
-                return -1
+            if self._isnan.all():
+                raise ValueError("Encountered all NA values")
+            elif not skipna:
+                raise ValueError("Encountered an NA value with skipna=False")
         return super().argmax(skipna=skipna)
 
     def min(self, axis=None, skipna: bool = True, *args, **kwargs):

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -1441,17 +1441,18 @@ def _maybe_arg_null_out(
     if axis is None or not getattr(result, "ndim", False):
         if skipna:
             if mask.all():
-                return -1
+                raise ValueError("Encountered all NA values")
         else:
             if mask.any():
-                return -1
+                raise ValueError("Encountered an NA value with skipna=False")
     else:
-        if skipna:
-            na_mask = mask.all(axis)
-        else:
-            na_mask = mask.any(axis)
+        na_mask = mask.all(axis)
         if na_mask.any():
-            result[na_mask] = -1
+            raise ValueError("Encountered all NA values")
+        elif not skipna:
+            na_mask = mask.any(axis)
+            if na_mask.any():
+                raise ValueError("Encountered an NA value with skipna=False")
     return result
 
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2333,8 +2333,8 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
         axis : {0 or 'index'}
             Unused. Parameter needed for compatibility with DataFrame.
         skipna : bool, default True
-            Exclude NA/null values. If the entire Series is NA, the result
-            will be NA.
+            Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
+            and there is an NA value, this method will raise a ``ValueError``.
         *args, **kwargs
             Additional arguments and keywords have no effect but might be
             accepted for compatibility with NumPy.
@@ -2376,32 +2376,10 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
 
         >>> s.idxmin()
         'A'
-
-        If `skipna` is False and there is an NA value in the data,
-        the function returns ``nan``.
-
-        >>> s.idxmin(skipna=False)
-        nan
         """
         axis = self._get_axis_number(axis)
-        with warnings.catch_warnings():
-            # TODO(3.0): this catching/filtering can be removed
-            # ignore warning produced by argmin since we will issue a different
-            #  warning for idxmin
-            warnings.simplefilter("ignore")
-            i = self.argmin(axis, skipna, *args, **kwargs)
-
-        if i == -1:
-            # GH#43587 give correct NA value for Index.
-            warnings.warn(
-                f"The behavior of {type(self).__name__}.idxmin with all-NA "
-                "values, or any-NA and skipna=False, is deprecated. In a future "
-                "version this will raise ValueError",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            return self.index._na_value
-        return self.index[i]
+        iloc = self.argmin(axis, skipna, *args, **kwargs)
+        return self.index[iloc]
 
     def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
         """
@@ -2415,8 +2393,8 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
         axis : {0 or 'index'}
             Unused. Parameter needed for compatibility with DataFrame.
         skipna : bool, default True
-            Exclude NA/null values. If the entire Series is NA, the result
-            will be NA.
+            Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
+            and there is an NA value, this method will raise a ``ValueError``.
         *args, **kwargs
             Additional arguments and keywords have no effect but might be
             accepted for compatibility with NumPy.
@@ -2459,32 +2437,10 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
 
         >>> s.idxmax()
         'C'
-
-        If `skipna` is False and there is an NA value in the data,
-        the function returns ``nan``.
-
-        >>> s.idxmax(skipna=False)
-        nan
         """
         axis = self._get_axis_number(axis)
-        with warnings.catch_warnings():
-            # TODO(3.0): this catching/filtering can be removed
-            # ignore warning produced by argmax since we will issue a different
-            #  warning for argmax
-            warnings.simplefilter("ignore")
-            i = self.argmax(axis, skipna, *args, **kwargs)
-
-        if i == -1:
-            # GH#43587 give correct NA value for Index.
-            warnings.warn(
-                f"The behavior of {type(self).__name__}.idxmax with all-NA "
-                "values, or any-NA and skipna=False, is deprecated. In a future "
-                "version this will raise ValueError",
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-            return self.index._na_value
-        return self.index[i]
+        iloc = self.argmax(axis, skipna, *args, **kwargs)
+        return self.index[iloc]
 
     def round(self, decimals: int = 0, *args, **kwargs) -> Series:
         """

diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -692,8 +692,8 @@
     axis : {{0 or 'index', 1 or 'columns'}}, default 0
         The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
     skipna : bool, default True
-        Exclude NA/null values. If an entire row/column is NA, the result
-        will be NA.
+        Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
+        and there is an NA value, this method will raise a ``ValueError``.
     numeric_only : bool, default {numeric_only_default}
         Include only `float`, `int` or `boolean` data.
 
@@ -757,8 +757,8 @@
     axis : {{0 or 'index', 1 or 'columns'}}, default 0
         The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
     skipna : bool, default True
-        Exclude NA/null values. If an entire row/column is NA, the result
-        will be NA.
+        Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
+        and there is an NA value, this method will raise a ``ValueError``.
     numeric_only : bool, default {numeric_only_default}
         Include only `float`, `int` or `boolean` data.
 

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -169,8 +169,8 @@ def test_argmin_argmax_all_na(self, method, data, na_value):
             ("idxmin", True, 2),
             ("argmax", True, 0),
             ("argmin", True, 2),
-            ("idxmax", False, np.nan),
-            ("idxmin", False, np.nan),
+            ("idxmax", False, -1),
+            ("idxmin", False, -1),
             ("argmax", False, -1),
             ("argmin", False, -1),
         ],
@@ -179,17 +179,13 @@ def test_argreduce_series(
         self, data_missing_for_sorting, op_name, skipna, expected
     ):
         # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
-        warn = None
-        msg = "The behavior of Series.argmax/argmin"
-        if op_name.startswith("arg") and expected == -1:
-            warn = FutureWarning
-        if op_name.startswith("idx") and np.isnan(expected):
-            warn = FutureWarning
-            msg = f"The behavior of Series.{op_name}"
         ser = pd.Series(data_missing_for_sorting)
-        with tm.assert_produces_warning(warn, match=msg):
+        if expected == -1:
+            with pytest.raises(ValueError, match="Encountered an NA value"):
+                getattr(ser, op_name)(skipna=skipna)
+        else:
             result = getattr(ser, op_name)(skipna=skipna)
-        tm.assert_almost_equal(result, expected)
+            tm.assert_almost_equal(result, expected)
 
     def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting):
         # GH#38733