Backport PR #36552: REGR: Series.__mod__ behaves different with numex…

…pr (#36750) Co-authored-by: Simon Hawkins <[email protected]>
pandas-dev · Oct 1, 2020 · 00ae553 · 00ae553
1 parent 637bdc3
commit 00ae553
Show file tree

Hide file tree

Showing 4 changed files with 44 additions and 4 deletions.
diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst
@@ -34,6 +34,7 @@ Fixed regressions
 - Fixed regression when adding a :meth:`timedelta_range` to a :class:`Timestamp` raised a ``ValueError`` (:issue:`35897`)
 - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`)
 - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`)
+- Fixed regression in modulo of :class:`Index`, :class:`Series` and :class:`DataFrame` using ``numexpr`` using C not Python semantics (:issue:`36047`, :issue:`36526`)
 - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`)
 - Fixed regression in :meth:`DataFrame.replace` inconsistent replace when using a float in the replace method (:issue:`35376`)
 - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`)

diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py
@@ -132,7 +132,10 @@ def _evaluate_numexpr(op, op_str, a, b):
     roperator.rtruediv: "/",
     operator.floordiv: "//",
     roperator.rfloordiv: "//",
-    operator.mod: "%",
+    # we require Python semantics for mod of negative for backwards compatibility
+    # see https://github.com/pydata/numexpr/issues/365
+    # so sticking with unaccelerated for now
+    operator.mod: None,
     roperator.rmod: "%",
     operator.pow: "**",
     roperator.rpow: "**",

diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py
@@ -171,8 +171,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special):
         mul=arith_method(cls, operator.mul, special),
         truediv=arith_method(cls, operator.truediv, special),
         floordiv=arith_method(cls, operator.floordiv, special),
-        # Causes a floating point exception in the tests when numexpr enabled,
-        # so for now no speedup
         mod=arith_method(cls, operator.mod, special),
         pow=arith_method(cls, operator.pow, special),
         # not entirely sure why this is necessary, but previously was included

diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py
@@ -6,7 +6,7 @@
 import pytest
 
 import pandas._testing as tm
-from pandas.core.api import DataFrame
+from pandas.core.api import DataFrame, Index, Series
 from pandas.core.computation import expressions as expr
 
 _frame = DataFrame(randn(10000, 4), columns=list("ABCD"), dtype="float64")
@@ -380,3 +380,41 @@ def test_frame_series_axis(self, axis, arith):
 
         result = op_func(other, axis=axis)
         tm.assert_frame_equal(expected, result)
+
+    @pytest.mark.parametrize(
+        "op",
+        [
+            "__mod__",
+            pytest.param("__rmod__", marks=pytest.mark.xfail(reason="GH-36552")),
+            "__floordiv__",
+            "__rfloordiv__",
+        ],
+    )
+    @pytest.mark.parametrize("box", [DataFrame, Series, Index])
+    @pytest.mark.parametrize("scalar", [-5, 5])
+    def test_python_semantics_with_numexpr_installed(self, op, box, scalar):
+        # https://github.com/pandas-dev/pandas/issues/36047
+        expr._MIN_ELEMENTS = 0
+        data = np.arange(-50, 50)
+        obj = box(data)
+        method = getattr(obj, op)
+        result = method(scalar)
+
+        # compare result with numpy
+        expr.set_use_numexpr(False)
+        expected = method(scalar)
+        expr.set_use_numexpr(True)
+        tm.assert_equal(result, expected)
+
+        # compare result element-wise with Python
+        for i, elem in enumerate(data):
+            if box == DataFrame:
+                scalar_result = result.iloc[i, 0]
+            else:
+                scalar_result = result[i]
+            try:
+                expected = getattr(int(elem), op)(scalar)
+            except ZeroDivisionError:
+                pass
+            else:
+                assert scalar_result == expected