From 41bb091bc0d547c608b89f39aaf86675faeb0072 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 22 Sep 2020 20:15:34 +0100 Subject: [PATCH 1/6] REGR: Series.__mod__ behaves different with numexpr --- pandas/core/computation/expressions.py | 5 ++++- pandas/core/ops/methods.py | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 0032fe97b8b33..5bfd2e93a9247 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -133,7 +133,10 @@ def _evaluate_numexpr(op, op_str, a, b): roperator.rtruediv: "/", operator.floordiv: "//", roperator.rfloordiv: "//", - operator.mod: "%", + # we require Python semantics for mod of negative for backwards compatibility + # see https://github.com/pydata/numexpr/issues/365 + # so sticking with unaccelerated for now + operator.mod: None, roperator.rmod: "%", operator.pow: "**", roperator.rpow: "**", diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index e04db92b58c36..852157e52d5fe 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -171,8 +171,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special): mul=arith_method(cls, operator.mul, special), truediv=arith_method(cls, operator.truediv, special), floordiv=arith_method(cls, operator.floordiv, special), - # Causes a floating point exception in the tests when numexpr enabled, - # so for now no speedup mod=arith_method(cls, operator.mod, special), pow=arith_method(cls, operator.pow, special), # not entirely sure why this is necessary, but previously was included From 1e2a0868fc7edf829e2109f106918e74bfe61825 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 23 Sep 2020 16:47:00 +0100 Subject: [PATCH 2/6] add test --- pandas/tests/test_expressions.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index da7f8b9b4a721..c9185ecd84f7d 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -6,7 +6,7 @@ import pytest import pandas._testing as tm -from pandas.core.api import DataFrame +from pandas.core.api import DataFrame, Index, Series from pandas.core.computation import expressions as expr _frame = DataFrame(randn(10000, 4), columns=list("ABCD"), dtype="float64") @@ -380,3 +380,33 @@ def test_frame_series_axis(self, axis, arith): result = op_func(other, axis=axis) tm.assert_frame_equal(expected, result) + + @pytest.mark.parametrize( + "op", + [ + "__mod__", + pytest.param("__rmod__", marks=pytest.mark.xfail(reason="GH-36552")), + "__floordiv__", + "__rfloordiv__", + ], + ) + @pytest.mark.parametrize( + "box, tester", + [ + (DataFrame, tm.assert_frame_equal), + (Series, tm.assert_series_equal), + (Index, tm.assert_index_equal), + ], + ) + @pytest.mark.parametrize("scalar", [-5, 5]) + def test_python_semantics_with_numexpr_installed(self, op, box, tester, scalar): + # https://github.com/pandas-dev/pandas/issues/36047 + expr._MIN_ELEMENTS = 0 + data = np.arange(-50, 50) + obj = box(data) + method = getattr(obj, op) + result = method(scalar) + expr.set_use_numexpr(False) + expected = method(scalar) + expr.set_use_numexpr(True) + tester(result, expected) From 4527d23551e59c32bc38db21253230094fe2b38e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 23 Sep 2020 16:54:40 +0100 Subject: [PATCH 3/6] release note --- doc/source/whatsnew/v1.1.3.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.3.rst b/doc/source/whatsnew/v1.1.3.rst index c1effad34ab93..a32514e4fdf3c 100644 --- a/doc/source/whatsnew/v1.1.3.rst +++ b/doc/source/whatsnew/v1.1.3.rst @@ -33,6 +33,7 @@ Fixed regressions - Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`) - Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`) +- Fixed regression in modulo of :class:`Index`, :class:`Series` and :class:`DataFrame` using ``numexpr`` using C not Python semantics (:issue:`36047`, :issue:`36526`) - Fixed regression in :meth:`read_excel` with ``engine="odf"`` caused ``UnboundLocalError`` in some cases where cells had nested child nodes (:issue:`36122`, :issue:`35802`) - Fixed regression in :class:`DataFrame` and :class:`Series` comparisons between numeric arrays and strings (:issue:`35700`, :issue:`36377`) - Fixed regression when setting empty :class:`DataFrame` column to a :class:`Series` in preserving name of index in frame (:issue:`36527`) From 85db057aa05fa2923fd0a7067ba2d2a1bad55afc Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 25 Sep 2020 13:48:35 +0100 Subject: [PATCH 4/6] compare result element-wise with Python --- pandas/tests/test_expressions.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index c9185ecd84f7d..2cbb2f226bcf1 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -406,7 +406,20 @@ def test_python_semantics_with_numexpr_installed(self, op, box, tester, scalar): obj = box(data) method = getattr(obj, op) result = method(scalar) + + # compare result with numpy expr.set_use_numexpr(False) expected = method(scalar) expr.set_use_numexpr(True) tester(result, expected) + + # compare result element-wise with Python + for i, elem in enumerate(data): + if box == DataFrame: + scalar_result = result.iloc[i, 0] + else: + scalar_result = result[i] + try: + assert scalar_result == getattr(int(elem), op)(scalar) + except ZeroDivisionError: + pass From ff9e85bb7fe3609d737b5d04d469b853a834d158 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 25 Sep 2020 13:59:00 +0100 Subject: [PATCH 5/6] fix test --- pandas/tests/test_expressions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 2cbb2f226bcf1..0542ba2324153 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -420,6 +420,8 @@ def test_python_semantics_with_numexpr_installed(self, op, box, tester, scalar): else: scalar_result = result[i] try: - assert scalar_result == getattr(int(elem), op)(scalar) + expected = getattr(int(elem), op)(scalar) except ZeroDivisionError: pass + else: + assert scalar_result == expected From 4f9b91d56ad4812fc15e134159e5714e939195aa Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 30 Sep 2020 18:59:04 +0100 Subject: [PATCH 6/6] use tm.assert_equal --- pandas/tests/test_expressions.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 0542ba2324153..6db1078fcde4f 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -390,16 +390,9 @@ def test_frame_series_axis(self, axis, arith): "__rfloordiv__", ], ) - @pytest.mark.parametrize( - "box, tester", - [ - (DataFrame, tm.assert_frame_equal), - (Series, tm.assert_series_equal), - (Index, tm.assert_index_equal), - ], - ) + @pytest.mark.parametrize("box", [DataFrame, Series, Index]) @pytest.mark.parametrize("scalar", [-5, 5]) - def test_python_semantics_with_numexpr_installed(self, op, box, tester, scalar): + def test_python_semantics_with_numexpr_installed(self, op, box, scalar): # https://github.com/pandas-dev/pandas/issues/36047 expr._MIN_ELEMENTS = 0 data = np.arange(-50, 50) @@ -411,7 +404,7 @@ def test_python_semantics_with_numexpr_installed(self, op, box, tester, scalar): expr.set_use_numexpr(False) expected = method(scalar) expr.set_use_numexpr(True) - tester(result, expected) + tm.assert_equal(result, expected) # compare result element-wise with Python for i, elem in enumerate(data):