Skip to content

Commit

Permalink
BUG: BooleanArray match non-masked behavior div/pow/mod (#46063)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Feb 27, 2022
1 parent 1efa4fb commit 7c59260
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 47 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ Time Zones
Numeric
^^^^^^^
- Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`)
- Bug in division, ``pow`` and ``mod`` operations on array-likes with ``dtype="boolean"`` not being like their ``np.bool_`` counterparts (:issue:`46063`)
- Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an array-like with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`)
-

Expand Down
23 changes: 14 additions & 9 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,10 +626,21 @@ def _arith_method(self, other, op):
if other is libmissing.NA:
result = np.ones_like(self._data)
if self.dtype.kind == "b":
if op_name in {"floordiv", "rfloordiv", "mod", "rmod", "pow", "rpow"}:
if op_name in {
"floordiv",
"rfloordiv",
"pow",
"rpow",
"truediv",
"rtruediv",
}:
# GH#41165 Try to match non-masked Series behavior
# This is still imperfect GH#46043
raise NotImplementedError(
f"operator '{op_name}' not implemented for bool dtypes"
)
elif op_name in {"mod", "rmod"}:
dtype = "int8"
elif op_name in {"truediv", "rtruediv"}:
dtype = "float64"
else:
dtype = "bool"
result = result.astype(dtype)
Expand All @@ -646,12 +657,6 @@ def _arith_method(self, other, op):
# types with respect to floordiv-by-zero
pd_op = op

elif self.dtype.kind == "b" and (
"div" in op_name or "pow" in op_name or "mod" in op_name
):
# TODO(GH#41165): should these be disallowed?
pd_op = op

with np.errstate(all="ignore"):
result = pd_op(self._data, other)

Expand Down
22 changes: 12 additions & 10 deletions pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import pandas as pd
import pandas._testing as tm
from pandas.arrays import FloatingArray


@pytest.fixture
Expand Down Expand Up @@ -55,15 +54,13 @@ def test_sub(left_array, right_array):


def test_div(left_array, right_array):
result = left_array / right_array
expected = FloatingArray(
np.array(
[1.0, np.inf, np.nan, 0.0, np.nan, np.nan, np.nan, np.nan, np.nan],
dtype="float64",
),
np.array([False, False, True, False, False, True, True, True, True]),
)
tm.assert_extension_array_equal(result, expected)
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
# check that we are matching the non-masked Series behavior
pd.Series(left_array._data) / pd.Series(right_array._data)

with pytest.raises(NotImplementedError, match=msg):
left_array / right_array


@pytest.mark.parametrize(
Expand All @@ -76,6 +73,11 @@ def test_div(left_array, right_array):
)
def test_op_int8(left_array, right_array, opname):
op = getattr(operator, opname)
if opname != "mod":
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
result = op(left_array, right_array)
return
result = op(left_array, right_array)
expected = op(left_array.astype("Int8"), right_array.astype("Int8"))
tm.assert_extension_array_equal(result, expected)
Expand Down
87 changes: 63 additions & 24 deletions pandas/tests/arrays/masked/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ def check_skip(data, op_name):
pytest.skip("subtract not implemented for boolean")


def is_bool_not_implemented(data, op_name):
# match non-masked behavior
return data.dtype.kind == "b" and op_name.strip("_").lstrip("r") in [
"pow",
"truediv",
"floordiv",
]


# Test equivalence of scalars, numpy arrays with array ops
# -----------------------------------------------------------------------------

Expand All @@ -42,9 +51,16 @@ def test_array_scalar_like_equivalence(data, all_arithmetic_operators):

# TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype))
for scalar in [scalar, data.dtype.type(scalar)]:
result = op(data, scalar)
expected = op(data, scalar_array)
tm.assert_extension_array_equal(result, expected)
if is_bool_not_implemented(data, all_arithmetic_operators):
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
op(data, scalar)
with pytest.raises(NotImplementedError, match=msg):
op(data, scalar_array)
else:
result = op(data, scalar)
expected = op(data, scalar_array)
tm.assert_extension_array_equal(result, expected)


def test_array_NA(data, all_arithmetic_operators):
Expand All @@ -56,6 +72,15 @@ def test_array_NA(data, all_arithmetic_operators):
scalar_array = pd.array([pd.NA] * len(data), dtype=data.dtype)

mask = data._mask.copy()

if is_bool_not_implemented(data, all_arithmetic_operators):
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
op(data, scalar)
# GH#45421 check op doesn't alter data._mask inplace
tm.assert_numpy_array_equal(mask, data._mask)
return

result = op(data, scalar)
# GH#45421 check op doesn't alter data._mask inplace
tm.assert_numpy_array_equal(mask, data._mask)
Expand All @@ -74,6 +99,14 @@ def test_numpy_array_equivalence(data, all_arithmetic_operators):
numpy_array = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype)
pd_array = pd.array(numpy_array, dtype=data.dtype)

if is_bool_not_implemented(data, all_arithmetic_operators):
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
op(data, numpy_array)
with pytest.raises(NotImplementedError, match=msg):
op(data, pd_array)
return

result = op(data, numpy_array)
expected = op(data, pd_array)
tm.assert_extension_array_equal(result, expected)
Expand All @@ -91,6 +124,14 @@ def test_frame(data, all_arithmetic_operators):
# DataFrame with scalar
df = pd.DataFrame({"A": data})

if is_bool_not_implemented(data, all_arithmetic_operators):
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
op(df, scalar)
with pytest.raises(NotImplementedError, match=msg):
op(data, scalar)
return

result = op(df, scalar)
expected = pd.DataFrame({"A": op(data, scalar)})
tm.assert_frame_equal(result, expected)
Expand All @@ -101,30 +142,25 @@ def test_series(data, all_arithmetic_operators):
op = tm.get_op_from_name(all_arithmetic_operators)
check_skip(data, all_arithmetic_operators)

s = pd.Series(data)
ser = pd.Series(data)

# Series with scalar
result = op(s, scalar)
expected = pd.Series(op(data, scalar))
tm.assert_series_equal(result, expected)
others = [
scalar,
np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype),
pd.array([scalar] * len(data), dtype=data.dtype),
pd.Series([scalar] * len(data), dtype=data.dtype),
]

# Series with np.ndarray
other = np.array([scalar] * len(data), dtype=data.dtype.numpy_dtype)
result = op(s, other)
expected = pd.Series(op(data, other))
tm.assert_series_equal(result, expected)
for other in others:
if is_bool_not_implemented(data, all_arithmetic_operators):
msg = "operator '.*' not implemented for bool dtypes"
with pytest.raises(NotImplementedError, match=msg):
op(ser, other)

# Series with pd.array
other = pd.array([scalar] * len(data), dtype=data.dtype)
result = op(s, other)
expected = pd.Series(op(data, other))
tm.assert_series_equal(result, expected)

# Series with Series
other = pd.Series([scalar] * len(data), dtype=data.dtype)
result = op(s, other)
expected = pd.Series(op(data, other.array))
tm.assert_series_equal(result, expected)
else:
result = op(ser, other)
expected = pd.Series(op(data, other))
tm.assert_series_equal(result, expected)


# Test generic characteristics / errors
Expand Down Expand Up @@ -169,6 +205,9 @@ def test_error_len_mismatch(data, all_arithmetic_operators):
r"numpy boolean subtract, the `\-` operator, is not supported, use "
r"the bitwise_xor, the `\^` operator, or the logical_xor function instead"
)
elif is_bool_not_implemented(data, all_arithmetic_operators):
msg = "operator '.*' not implemented for bool dtypes"
err = NotImplementedError

for other in [other, np.array(other)]:
with pytest.raises(err, match=msg):
Expand Down
22 changes: 18 additions & 4 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests):

def check_opname(self, s, op_name, other, exc=None):
# overwriting to indicate ops don't raise an error
super().check_opname(s, op_name, other, exc=None)
exc = None
if op_name.strip("_").lstrip("r") in ["pow", "truediv", "floordiv"]:
# match behavior with non-masked bool dtype
exc = NotImplementedError
super().check_opname(s, op_name, other, exc=exc)

def _check_op(self, obj, op, other, op_name, exc=NotImplementedError):
if exc is None:
Expand Down Expand Up @@ -144,9 +148,19 @@ def _check_op(self, obj, op, other, op_name, exc=NotImplementedError):
with pytest.raises(exc):
op(obj, other)

def _check_divmod_op(self, s, op, other, exc=None):
# override to not raise an error
super()._check_divmod_op(s, op, other, None)
@pytest.mark.xfail(
reason="Inconsistency between floordiv and divmod; we raise for floordiv "
"but not for divmod. This matches what we do for non-masked bool dtype."
)
def test_divmod_series_array(self, data, data_for_twos):
super().test_divmod_series_array(data, data_for_twos)

@pytest.mark.xfail(
reason="Inconsistency between floordiv and divmod; we raise for floordiv "
"but not for divmod. This matches what we do for non-masked bool dtype."
)
def test_divmod(self, data):
super().test_divmod(data)


class TestComparisonOps(base.BaseComparisonOpsTests):
Expand Down

0 comments on commit 7c59260

Please sign in to comment.