From 1007fb64dc7ff584b324d3df9a8df609e10d6022 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 14 Jun 2023 09:58:38 -0700 Subject: [PATCH] DEPR: method, limit in NDFrame.replace (#53492) * DEPR: method, limit in NDFrame.replace * update test, docs * suppress doctest warning * doctests --- doc/source/user_guide/missing_data.rst | 7 ---- doc/source/whatsnew/v2.1.0.rst | 2 + pandas/conftest.py | 2 + pandas/core/generic.py | 33 +++++++++++++++ pandas/core/shared_docs.py | 7 ++++ pandas/tests/frame/methods/test_replace.py | 11 ++++- pandas/tests/frame/test_subclass.py | 4 +- pandas/tests/series/methods/test_replace.py | 45 ++++++++++++++++----- 8 files changed, 91 insertions(+), 20 deletions(-) diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst index ed58554896a4f..443fdd4f59e3f 100644 --- a/doc/source/user_guide/missing_data.rst +++ b/doc/source/user_guide/missing_data.rst @@ -551,13 +551,6 @@ For a DataFrame, you can specify individual values by column: df.replace({"a": 0, "b": 5}, 100) -Instead of replacing with specified values, you can treat all given values as -missing and interpolate over them: - -.. ipython:: python - - ser.replace([1, 2, 3], method="pad") - .. _missing_data.replace_expression: String/regular expression replacement diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index ceda799ebb959..806abf670f32f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -286,9 +286,11 @@ Deprecations - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`) - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`) - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`) +- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`) - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) +- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) - diff --git a/pandas/conftest.py b/pandas/conftest.py index fbef2fb272ed6..ed05ddd1b2f31 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -134,6 +134,8 @@ def pytest_collection_modifyitems(items, config) -> None: ("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"), ("is_categorical_dtype", "is_categorical_dtype is deprecated"), ("is_sparse", "is_sparse is deprecated"), + ("NDFrame.replace", "The 'method' keyword"), + ("NDFrame.replace", "Series.replace without 'value'"), # Docstring divides by zero to show behavior difference ("missing.mask_zero_div_zero", "divide by zero encountered"), ( diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a859c7323c31d..f53d935c42c06 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -7481,6 +7481,39 @@ def replace( regex: bool_t = False, method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default, ) -> Self | None: + if method is not lib.no_default: + warnings.warn( + # GH#33302 + f"The 'method' keyword in {type(self).__name__}.replace is " + "deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + elif limit is not None: + warnings.warn( + # GH#33302 + f"The 'limit' keyword in {type(self).__name__}.replace is " + "deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if ( + value is lib.no_default + and method is lib.no_default + and not is_dict_like(to_replace) + and regex is False + ): + # case that goes through _replace_single and defaults to method="pad" + warnings.warn( + # GH#33302 + f"{type(self).__name__}.replace without 'value' and with " + "non-dict-like 'to_replace' is deprecated " + "and will raise in a future version. " + "Explicitly specify the new values instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if not ( is_scalar(to_replace) or is_re_compilable(to_replace) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 7bddaad780b8c..7579f816d0ace 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -562,6 +562,8 @@ {inplace} limit : int, default None Maximum size gap to forward or backward fill. + + .. deprecated:: 2.1.0 regex : bool or same types as `to_replace`, default False Whether to interpret `to_replace` and/or `value` as regular expressions. If this is ``True`` then `to_replace` *must* be a @@ -572,6 +574,8 @@ The method to use when for replacement, when `to_replace` is a scalar, list or tuple and `value` is ``None``. + .. deprecated:: 2.1.0 + Returns ------- {klass} @@ -766,6 +770,9 @@ 4 b dtype: object + .. deprecated:: 2.1.0 + The 'method' parameter and padding behavior are deprecated. + On the other hand, if ``None`` is explicitly passed for ``value``, it will be respected: diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index d5668020bab5d..9256df72cdf7b 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1236,7 +1236,9 @@ def test_replace_method(self, to_replace, method, expected): # GH 19632 df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]}) - result = df.replace(to_replace=to_replace, value=None, method=method) + msg = "The 'method' keyword in DataFrame.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.replace(to_replace=to_replace, value=None, method=method) expected = DataFrame(expected) tm.assert_frame_equal(result, expected) @@ -1327,8 +1329,13 @@ def test_replace_invalid_to_replace(self): r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" ) + msg2 = ( + "DataFrame.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) with pytest.raises(TypeError, match=msg): - df.replace(lambda x: x.strip()) + with tm.assert_produces_warning(FutureWarning, match=msg2): + df.replace(lambda x: x.strip()) @pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"]) @pytest.mark.parametrize("value", [np.nan, pd.NA]) diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 5c44a957b9373..3d1e9d26c1ea6 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -732,7 +732,9 @@ def test_equals_subclass(self): def test_replace_list_method(self): # https://github.com/pandas-dev/pandas/pull/46018 df = tm.SubclassedDataFrame({"A": [0, 1, 2]}) - result = df.replace([1, 2], method="ffill") + msg = "The 'method' keyword in SubclassedDataFrame.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.replace([1, 2], method="ffill") expected = tm.SubclassedDataFrame({"A": [0, 0, 0]}) assert isinstance(result, tm.SubclassedDataFrame) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 2880e3f3e85db..d3cdae63d26f3 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -131,12 +131,18 @@ def test_replace_gh5319(self): # GH 5319 ser = pd.Series([0, np.nan, 2, 3, 4]) expected = ser.ffill() - result = ser.replace([np.nan]) + msg = ( + "Series.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.replace([np.nan]) tm.assert_series_equal(result, expected) ser = pd.Series([0, np.nan, 2, 3, 4]) expected = ser.ffill() - result = ser.replace(np.nan) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = ser.replace(np.nan) tm.assert_series_equal(result, expected) def test_replace_datetime64(self): @@ -169,11 +175,17 @@ def test_replace_timedelta_td64(self): def test_replace_with_single_list(self): ser = pd.Series([0, 1, 2, 3, 4]) - result = ser.replace([1, 2, 3]) + msg2 = ( + "Series.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) + with tm.assert_produces_warning(FutureWarning, match=msg2): + result = ser.replace([1, 2, 3]) tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4])) s = ser.copy() - return_value = s.replace([1, 2, 3], inplace=True) + with tm.assert_produces_warning(FutureWarning, match=msg2): + return_value = s.replace([1, 2, 3], inplace=True) assert return_value is None tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4])) @@ -183,8 +195,10 @@ def test_replace_with_single_list(self): r"Invalid fill method\. Expecting pad \(ffill\) or backfill " r"\(bfill\)\. Got crash_cymbal" ) + msg3 = "The 'method' keyword in Series.replace is deprecated" with pytest.raises(ValueError, match=msg): - return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") + with tm.assert_produces_warning(FutureWarning, match=msg3): + return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal") assert return_value is None tm.assert_series_equal(s, ser) @@ -450,8 +464,13 @@ def test_replace_invalid_to_replace(self): r"Expecting 'to_replace' to be either a scalar, array-like, " r"dict or None, got invalid type.*" ) + msg2 = ( + "Series.replace without 'value' and with non-dict-like " + "'to_replace' is deprecated" + ) with pytest.raises(TypeError, match=msg): - series.replace(lambda x: x.strip()) + with tm.assert_produces_warning(FutureWarning, match=msg2): + series.replace(lambda x: x.strip()) @pytest.mark.parametrize("frame", [False, True]) def test_replace_nonbool_regex(self, frame): @@ -502,19 +521,25 @@ def test_replace_extension_other(self, frame_or_series): def _check_replace_with_method(self, ser: pd.Series): df = ser.to_frame() - res = ser.replace(ser[1], method="pad") + msg1 = "The 'method' keyword in Series.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg1): + res = ser.replace(ser[1], method="pad") expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype) tm.assert_series_equal(res, expected) - res_df = df.replace(ser[1], method="pad") + msg2 = "The 'method' keyword in DataFrame.replace is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg2): + res_df = df.replace(ser[1], method="pad") tm.assert_frame_equal(res_df, expected.to_frame()) ser2 = ser.copy() - res2 = ser2.replace(ser[1], method="pad", inplace=True) + with tm.assert_produces_warning(FutureWarning, match=msg1): + res2 = ser2.replace(ser[1], method="pad", inplace=True) assert res2 is None tm.assert_series_equal(ser2, expected) - res_df2 = df.replace(ser[1], method="pad", inplace=True) + with tm.assert_produces_warning(FutureWarning, match=msg2): + res_df2 = df.replace(ser[1], method="pad", inplace=True) assert res_df2 is None tm.assert_frame_equal(df, expected.to_frame())