Skip to content

Commit

Permalink
DEPR: method, limit in NDFrame.replace (pandas-dev#53492)
Browse files Browse the repository at this point in the history
* DEPR: method, limit in NDFrame.replace

* update test, docs

* suppress doctest warning

* doctests
  • Loading branch information
jbrockmendel authored and im-vinicius committed Jul 8, 2023
1 parent 0294273 commit 376bbf7
Show file tree
Hide file tree
Showing 8 changed files with 91 additions and 20 deletions.
7 changes: 0 additions & 7 deletions doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -551,13 +551,6 @@ For a DataFrame, you can specify individual values by column:
df.replace({"a": 0, "b": 5}, 100)
Instead of replacing with specified values, you can treat all given values as
missing and interpolate over them:

.. ipython:: python
ser.replace([1, 2, 3], method="pad")
.. _missing_data.replace_expression:

String/regular expression replacement
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,11 @@ Deprecations
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
- Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
- Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
- Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
-

Expand Down
2 changes: 2 additions & 0 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def pytest_collection_modifyitems(items, config) -> None:
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
("is_sparse", "is_sparse is deprecated"),
("NDFrame.replace", "The 'method' keyword"),
("NDFrame.replace", "Series.replace without 'value'"),
# Docstring divides by zero to show behavior difference
("missing.mask_zero_div_zero", "divide by zero encountered"),
(
Expand Down
33 changes: 33 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7470,6 +7470,39 @@ def replace(
regex: bool_t = False,
method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default,
) -> Self | None:
if method is not lib.no_default:
warnings.warn(
# GH#33302
f"The 'method' keyword in {type(self).__name__}.replace is "
"deprecated and will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
elif limit is not None:
warnings.warn(
# GH#33302
f"The 'limit' keyword in {type(self).__name__}.replace is "
"deprecated and will be removed in a future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
if (
value is lib.no_default
and method is lib.no_default
and not is_dict_like(to_replace)
and regex is False
):
# case that goes through _replace_single and defaults to method="pad"
warnings.warn(
# GH#33302
f"{type(self).__name__}.replace without 'value' and with "
"non-dict-like 'to_replace' is deprecated "
"and will raise in a future version. "
"Explicitly specify the new values instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

if not (
is_scalar(to_replace)
or is_re_compilable(to_replace)
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,8 @@
{inplace}
limit : int, default None
Maximum size gap to forward or backward fill.
.. deprecated:: 2.1.0
regex : bool or same types as `to_replace`, default False
Whether to interpret `to_replace` and/or `value` as regular
expressions. If this is ``True`` then `to_replace` *must* be a
Expand All @@ -572,6 +574,8 @@
The method to use when for replacement, when `to_replace` is a
scalar, list or tuple and `value` is ``None``.
.. deprecated:: 2.1.0
Returns
-------
{klass}
Expand Down Expand Up @@ -766,6 +770,9 @@
4 b
dtype: object
.. deprecated:: 2.1.0
The 'method' parameter and padding behavior are deprecated.
On the other hand, if ``None`` is explicitly passed for ``value``, it will
be respected:
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1236,7 +1236,9 @@ def test_replace_method(self, to_replace, method, expected):
# GH 19632
df = DataFrame({"A": [0, 1, 2], "B": [5, np.nan, 7], "C": ["a", "b", "c"]})

result = df.replace(to_replace=to_replace, value=None, method=method)
msg = "The 'method' keyword in DataFrame.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.replace(to_replace=to_replace, value=None, method=method)
expected = DataFrame(expected)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1327,8 +1329,13 @@ def test_replace_invalid_to_replace(self):
r"Expecting 'to_replace' to be either a scalar, array-like, "
r"dict or None, got invalid type.*"
)
msg2 = (
"DataFrame.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with pytest.raises(TypeError, match=msg):
df.replace(lambda x: x.strip())
with tm.assert_produces_warning(FutureWarning, match=msg2):
df.replace(lambda x: x.strip())

@pytest.mark.parametrize("dtype", ["float", "float64", "int64", "Int64", "boolean"])
@pytest.mark.parametrize("value", [np.nan, pd.NA])
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_subclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,9 @@ def test_equals_subclass(self):
def test_replace_list_method(self):
# https://github.com/pandas-dev/pandas/pull/46018
df = tm.SubclassedDataFrame({"A": [0, 1, 2]})
result = df.replace([1, 2], method="ffill")
msg = "The 'method' keyword in SubclassedDataFrame.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.replace([1, 2], method="ffill")
expected = tm.SubclassedDataFrame({"A": [0, 0, 0]})
assert isinstance(result, tm.SubclassedDataFrame)
tm.assert_frame_equal(result, expected)
45 changes: 35 additions & 10 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,18 @@ def test_replace_gh5319(self):
# GH 5319
ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
result = ser.replace([np.nan])
msg = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace([np.nan])
tm.assert_series_equal(result, expected)

ser = pd.Series([0, np.nan, 2, 3, 4])
expected = ser.ffill()
result = ser.replace(np.nan)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace(np.nan)
tm.assert_series_equal(result, expected)

def test_replace_datetime64(self):
Expand Down Expand Up @@ -169,11 +175,17 @@ def test_replace_timedelta_td64(self):

def test_replace_with_single_list(self):
ser = pd.Series([0, 1, 2, 3, 4])
result = ser.replace([1, 2, 3])
msg2 = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with tm.assert_produces_warning(FutureWarning, match=msg2):
result = ser.replace([1, 2, 3])
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))

s = ser.copy()
return_value = s.replace([1, 2, 3], inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg2):
return_value = s.replace([1, 2, 3], inplace=True)
assert return_value is None
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))

Expand All @@ -183,8 +195,10 @@ def test_replace_with_single_list(self):
r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
r"\(bfill\)\. Got crash_cymbal"
)
msg3 = "The 'method' keyword in Series.replace is deprecated"
with pytest.raises(ValueError, match=msg):
return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
with tm.assert_produces_warning(FutureWarning, match=msg3):
return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
assert return_value is None
tm.assert_series_equal(s, ser)

Expand Down Expand Up @@ -450,8 +464,13 @@ def test_replace_invalid_to_replace(self):
r"Expecting 'to_replace' to be either a scalar, array-like, "
r"dict or None, got invalid type.*"
)
msg2 = (
"Series.replace without 'value' and with non-dict-like "
"'to_replace' is deprecated"
)
with pytest.raises(TypeError, match=msg):
series.replace(lambda x: x.strip())
with tm.assert_produces_warning(FutureWarning, match=msg2):
series.replace(lambda x: x.strip())

@pytest.mark.parametrize("frame", [False, True])
def test_replace_nonbool_regex(self, frame):
Expand Down Expand Up @@ -502,19 +521,25 @@ def test_replace_extension_other(self, frame_or_series):
def _check_replace_with_method(self, ser: pd.Series):
df = ser.to_frame()

res = ser.replace(ser[1], method="pad")
msg1 = "The 'method' keyword in Series.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg1):
res = ser.replace(ser[1], method="pad")
expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
tm.assert_series_equal(res, expected)

res_df = df.replace(ser[1], method="pad")
msg2 = "The 'method' keyword in DataFrame.replace is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg2):
res_df = df.replace(ser[1], method="pad")
tm.assert_frame_equal(res_df, expected.to_frame())

ser2 = ser.copy()
res2 = ser2.replace(ser[1], method="pad", inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg1):
res2 = ser2.replace(ser[1], method="pad", inplace=True)
assert res2 is None
tm.assert_series_equal(ser2, expected)

res_df2 = df.replace(ser[1], method="pad", inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg2):
res_df2 = df.replace(ser[1], method="pad", inplace=True)
assert res_df2 is None
tm.assert_frame_equal(df, expected.to_frame())

Expand Down

0 comments on commit 376bbf7

Please sign in to comment.