From 00ae553211c5ab66c14aa63faa7bc64b57e762a1 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 1 Oct 2020 09:49:47 -0700 Subject: [PATCH] Backport PR #36552: REGR: Series.__mod__ behaves different with numexpr (#36750) Co-authored-by: Simon Hawkins --- doc/source/whatsnew/v1.1.3.rst | 1 + pandas/core/computation/expressions.py | 5 +++- pandas/core/ops/methods.py | 2 -- pandas/tests/test_expressions.py | 40 +++++++++++++++++++++++++- 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index 91b9cf59687b3..15777abcb8084 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -34,6 +34,7 @@ Fixed regressions - Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised a ``ValueError`` (:issue:`35897`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) +- Fixed regression in modulo of :class:`Index`, :class:`Series` and :class:`DataFrame` using ``numexpr`` using C not Python semantics (:issue:`36047`, :issue:`36526`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) - Fixed regression in :meth:`DataFrame.replace` inconsistent replace when using a float in the replace method (:issue:`35376`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 0e9077e6d557e..da290db362019 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -132,7 +132,10 @@ def _evaluate_numexpr(op, op_str, a, b): roperator.rtruediv: "/", operator.floordiv: "//", roperator.rfloordiv: "//", - operator.mod: "%", + # we require Python semantics for mod of negative for backwards compatibility + # see https://github.com/pydata/numexpr/issues/365 + # so sticking with unaccelerated for now + operator.mod: None, roperator.rmod: "%", operator.pow: "**", roperator.rpow: "**", diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index a4694a6e5134f..c60b67fa2f4f6 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -171,8 +171,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special): mul=arith_method(cls, operator.mul, special), truediv=arith_method(cls, operator.truediv, special), floordiv=arith_method(cls, operator.floordiv, special), - # Causes a floating point exception in the tests when numexpr enabled, - # so for now no speedup mod=arith_method(cls, operator.mod, special), pow=arith_method(cls, operator.pow, special), # not entirely sure why this is necessary, but previously was included diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2368e93ddc256..cc8a134ebcc9f 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -6,7 +6,7 @@ import pytest import pandas._testing as tm -from pandas.core.api import DataFrame +from pandas.core.api import DataFrame, Index, Series from pandas.core.computation import expressions as expr _frame = DataFrame(randn(10000, 4), columns=list("ABCD"), dtype="float64") @@ -380,3 +380,41 @@ def test_frame_series_axis(self, axis, arith): result = op_func(other, axis=axis) tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "op", + [ + "__mod__", + pytest.param("__rmod__", marks=pytest.mark.xfail(reason="GH-36552")), + "__floordiv__", + "__rfloordiv__", + ], + ) + @pytest.mark.parametrize("box", [DataFrame, Series, Index]) + @pytest.mark.parametrize("scalar", [-5, 5]) + def test_python_semantics_with_numexpr_installed(self, op, box, scalar): + # https://github.com/pandas-dev/pandas/issues/36047 + expr._MIN_ELEMENTS = 0 + data = np.arange(-50, 50) + obj = box(data) + method = getattr(obj, op) + result = method(scalar) + + # compare result with numpy + expr.set_use_numexpr(False) + expected = method(scalar) + expr.set_use_numexpr(True) + tm.assert_equal(result, expected) + + # compare result element-wise with Python + for i, elem in enumerate(data): + if box == DataFrame: + scalar_result = result.iloc[i, 0] + else: + scalar_result = result[i] + try: + expected = getattr(int(elem), op)(scalar) + except ZeroDivisionError: + pass + else: + assert scalar_result == expected