Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST (string dtype): resolve xfails for frame fillna and replace tests + fix bug in replace for string #60295

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pandas/core/array_algos/replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,6 @@ def re_replacer(s):
if mask is None:
values[:] = f(values)
else:
if values.ndim != mask.ndim:
mask = np.broadcast_to(mask, values.shape)
values[mask] = f(values[mask])
7 changes: 7 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1688,6 +1688,13 @@ def where(self, other, cond) -> list[Block]:
if isinstance(self.dtype, (IntervalDtype, StringDtype)):
# TestSetitemFloatIntervalWithIntIntervalValues
blk = self.coerce_to_target_dtype(orig_other, raise_on_upcast=False)
if (
self.ndim == 2
and isinstance(orig_cond, np.ndarray)
and orig_cond.ndim == 1
and not is_1d_only_ea_dtype(blk.dtype)
):
orig_cond = orig_cond[:, None]
return blk.where(orig_other, orig_cond)

elif isinstance(self, NDArrayBackedExtensionBlock):
Expand Down
57 changes: 22 additions & 35 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
Categorical,
DataFrame,
Expand Down Expand Up @@ -65,15 +63,20 @@ def test_fillna_datetime(self, datetime_frame):
with pytest.raises(TypeError, match=msg):
datetime_frame.fillna()

# TODO(infer_string) test as actual error instead of xfail
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
def test_fillna_mixed_type(self, float_string_frame):
def test_fillna_mixed_type(self, float_string_frame, using_infer_string):
mf = float_string_frame
mf.loc[mf.index[5:20], "foo"] = np.nan
mf.loc[mf.index[-10:], "A"] = np.nan
# TODO: make stronger assertion here, GH 25640
mf.fillna(value=0)
mf.ffill()

result = mf.ffill()
assert (
result.loc[result.index[-10:], "A"] == result.loc[result.index[-11], "A"]
).all()
assert (result.loc[result.index[5:20], "foo"] == "bar").all()

result = mf.fillna(value=0)
assert (result.loc[result.index[-10:], "A"] == 0).all()
assert (result.loc[result.index[5:20], "foo"] == 0).all()

def test_fillna_mixed_float(self, mixed_float_frame):
# mixed numeric (but no float16)
Expand All @@ -84,28 +87,21 @@ def test_fillna_mixed_float(self, mixed_float_frame):
result = mf.ffill()
_check_mixed_float(result, dtype={"C": None})

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_fillna_different_dtype(self, using_infer_string):
def test_fillna_different_dtype(self):
# with different dtype (GH#3386)
df = DataFrame(
[["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
)

if using_infer_string:
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
result = df.fillna({2: "foo"})
else:
result = df.fillna({2: "foo"})
result = df.fillna({2: "foo"})
expected = DataFrame(
[["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
)
# column is originally float (all-NaN) -> filling with string gives object dtype
expected[2] = expected[2].astype("object")
tm.assert_frame_equal(result, expected)

if using_infer_string:
with tm.assert_produces_warning(FutureWarning, match="Downcasting"):
return_value = df.fillna({2: "foo"}, inplace=True)
else:
return_value = df.fillna({2: "foo"}, inplace=True)
return_value = df.fillna({2: "foo"}, inplace=True)
tm.assert_frame_equal(df, expected)
assert return_value is None

Expand Down Expand Up @@ -276,8 +272,7 @@ def test_fillna_dictlike_value_duplicate_colnames(self, columns):
expected["A"] = 0.0
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_fillna_dtype_conversion(self, using_infer_string):
def test_fillna_dtype_conversion(self):
# make sure that fillna on an empty frame works
df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
result = df.dtypes
Expand All @@ -292,7 +287,7 @@ def test_fillna_dtype_conversion(self, using_infer_string):
# empty block
df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
result = df.fillna("nan")
expected = DataFrame("nan", index=range(3), columns=["A", "B"])
expected = DataFrame("nan", dtype="object", index=range(3), columns=["A", "B"])
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
Expand Down Expand Up @@ -540,18 +535,10 @@ def test_fillna_col_reordering(self):
filled = df.ffill()
assert df.columns.tolist() == filled.columns.tolist()

# TODO(infer_string) test as actual error instead of xfail
@pytest.mark.xfail(using_string_dtype(), reason="can't fill 0 in string")
def test_fill_corner(self, float_frame, float_string_frame):
mf = float_string_frame
mf.loc[mf.index[5:20], "foo"] = np.nan
mf.loc[mf.index[-10:], "A"] = np.nan

filled = float_string_frame.fillna(value=0)
assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
del float_string_frame["foo"]

float_frame.reindex(columns=[]).fillna(value=0)
def test_fill_empty(self, float_frame):
df = float_frame.reindex(columns=[])
result = df.fillna(value=0)
tm.assert_frame_equal(result, df)

def test_fillna_with_columns_and_limit(self):
# GH40989
Expand Down
Loading