From 244a2feedd4cec6d1736359e7fcf38e2589f785d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 15 Jan 2023 23:57:20 +0100 Subject: [PATCH 01/16] BUG: eval and query not working with ea dtypes --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/computation/common.py | 17 ++++++++++++ pandas/core/computation/eval.py | 5 ++++ pandas/tests/frame/test_query_eval.py | 40 +++++++++++++++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 033f47f0c994d..f2b019f2adbec 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -942,6 +942,7 @@ Conversion - Bug in :meth:`Series.to_numpy` converting to NumPy array before applying ``na_value`` (:issue:`48951`) - Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`) - Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`) +- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` raising for extension array dtypes (:issue:`29618`, :issue:`50261`) - Strings diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py index a1ac3dfa06ee0..2d6efd07b82ee 100644 --- a/pandas/core/computation/common.py +++ b/pandas/core/computation/common.py @@ -26,3 +26,20 @@ def result_type_many(*arrays_and_dtypes): except ValueError: # we have > NPY_MAXARGS terms in our expression return reduce(np.result_type, arrays_and_dtypes) + except TypeError: + from pandas.core.dtypes.cast import find_common_type + from pandas.core.dtypes.common import is_extension_array_dtype + + arr_and_dtypes = list(arrays_and_dtypes) + ea_dtypes, non_ea_dtypes = [], [] + for arr_or_dtype in arr_and_dtypes: + if is_extension_array_dtype(arr_or_dtype): + ea_dtypes.append(arr_or_dtype) + else: + non_ea_dtypes.append(arr_or_dtype) + + if non_ea_dtypes: + np_dtype = np.result_type(*non_ea_dtypes) + return find_common_type(ea_dtypes + [np_dtype]) + + return find_common_type(ea_dtypes) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index f0127ae05182a..af3ae9bdd9e73 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -9,6 +9,8 @@ from pandas.util._validators import validate_bool_kwarg +from pandas.core.dtypes.common import is_extension_array_dtype + from pandas.core.computation.engines import ENGINES from pandas.core.computation.expr import ( PARSERS, @@ -333,6 +335,9 @@ def eval( parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) + if is_extension_array_dtype(parsed_expr.terms.return_type): + engine = "python" + # construct the engine and evaluate the parsed expression eng = ENGINES[engine] eng_inst = eng(parsed_expr) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 159dab04e7da6..51c2127588823 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1273,3 +1273,43 @@ def func(*_): with pytest.raises(TypeError, match="Only named functions are supported"): df.eval("@funcs[0].__call__()") + + def test_ea_dtypes(self): + # GH#29618 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") + result = df.eval("c = b - a") + expected = DataFrame( + [[1, 2, 1], [3, 4, 1]], columns=["a", "b", "c"], dtype="Float64" + ) + tm.assert_frame_equal(result, expected) + + def test_ea_dtypes_and_scalar(self): + # GH#29618 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") + result = df.eval("c = b - 1") + expected = DataFrame( + [[1, 2, 1], [3, 4, 3]], columns=["a", "b", "c"], dtype="Float64" + ) + tm.assert_frame_equal(result, expected) + + def test_ea_dtypes_and_scalar_operation(self): + # GH#29618 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") + result = df.eval("c = 2 - 1") + expected = DataFrame( + { + "a": Series([1, 3], dtype="Float64"), + "b": Series([2, 4], dtype="Float64"), + "c": 1, + } + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_query_ea_dtypes(self, dtype): + # GH#50261 + df = DataFrame({"a": Series([1, 2], dtype=dtype)}) + ref = {2} # noqa:F841 + result = df.query("a in @ref") + expected = DataFrame({"a": Series([2], dtype=dtype, index=[1])}) + tm.assert_frame_equal(result, expected) From 5a6697044857f2e488eb0c0f220b171f2da63762 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 16 Jan 2023 11:13:31 +0100 Subject: [PATCH 02/16] Fix windows build --- pandas/tests/frame/test_query_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 51c2127588823..79a9f95f96646 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1300,7 +1300,7 @@ def test_ea_dtypes_and_scalar_operation(self): { "a": Series([1, 3], dtype="Float64"), "b": Series([2, 4], dtype="Float64"), - "c": 1, + "c": Series([1, 1], dtype=np.intp), } ) tm.assert_frame_equal(result, expected) From 2fee5695d1c56a26f7158f77c07bb3c797a0d4eb Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 16 Jan 2023 12:22:47 +0100 Subject: [PATCH 03/16] Fix --- pandas/tests/frame/test_query_eval.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 79a9f95f96646..d610bed0b1aa5 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from pandas.compat import is_platform_windows from pandas.errors import UndefinedVariableError import pandas.util._test_decorators as td @@ -1300,7 +1301,9 @@ def test_ea_dtypes_and_scalar_operation(self): { "a": Series([1, 3], dtype="Float64"), "b": Series([2, 4], dtype="Float64"), - "c": Series([1, 1], dtype=np.intp), + "c": Series( + [1, 1], dtype="int64" if not is_platform_windows() else "int32" + ), } ) tm.assert_frame_equal(result, expected) From c010ca9fbcdaf109513079db6c0e095ec67d5c74 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 18 Jan 2023 22:57:58 +0100 Subject: [PATCH 04/16] Fix another bug --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/computation/eval.py | 4 +++- pandas/tests/frame/test_query_eval.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 0c1750ce7768d..6eb1de505a5e8 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -954,7 +954,7 @@ Conversion - Bug in :meth:`Series.to_numpy` converting to NumPy array before applying ``na_value`` (:issue:`48951`) - Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`) - Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`) -- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` raising for extension array dtypes (:issue:`29618`, :issue:`50261`) +- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` raising for extension array dtypes (:issue:`29618`, :issue:`50261`, :issue:`31913`) - Strings diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index af3ae9bdd9e73..c94b4c10f44a3 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -335,7 +335,9 @@ def eval( parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) - if is_extension_array_dtype(parsed_expr.terms.return_type): + if is_extension_array_dtype(parsed_expr.terms.return_type) or any( + is_extension_array_dtype(elem) for elem in parsed_expr.terms.operand_types + ): engine = "python" # construct the engine and evaluate the parsed expression diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index d610bed0b1aa5..3d98e945e8493 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1316,3 +1316,18 @@ def test_query_ea_dtypes(self, dtype): result = df.query("a in @ref") expected = DataFrame({"a": Series([2], dtype=dtype, index=[1])}) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_query_ea_equality_compariso(self, dtype): + # GH#50261 + df = DataFrame( + {"A": Series([1, 1, 2], dtype="Int64"), "B": Series([1, 2, 2], dtype=dtype)} + ) + result = df.query("A == B") + expected = DataFrame( + { + "A": Series([1, 2], dtype="Int64", index=[0, 2]), + "B": Series([1, 2], dtype=dtype, index=[0, 2]), + } + ) + tm.assert_frame_equal(result, expected) From 5e2327ec45f5617d5c0df11ef9b314d5d4754a50 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 19 Jan 2023 17:01:49 +0100 Subject: [PATCH 05/16] Fix eval --- pandas/core/computation/eval.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index c94b4c10f44a3..c8c11d5bb4b0a 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -335,8 +335,13 @@ def eval( parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) - if is_extension_array_dtype(parsed_expr.terms.return_type) or any( - is_extension_array_dtype(elem) for elem in parsed_expr.terms.operand_types + if ( + is_extension_array_dtype(parsed_expr.terms.return_type) + or getattr(parsed_expr.terms, "operand_typesany") is not None + and ( + is_extension_array_dtype(elem) + for elem in parsed_expr.terms.operand_types + ) ): engine = "python" From fddee233531b0a95d5378872012f87636ea25fef Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 25 Jan 2023 20:56:09 -0500 Subject: [PATCH 06/16] Fix --- pandas/core/computation/eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index c8c11d5bb4b0a..923141b1ebb6f 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -337,8 +337,8 @@ def eval( if ( is_extension_array_dtype(parsed_expr.terms.return_type) - or getattr(parsed_expr.terms, "operand_typesany") is not None - and ( + or getattr(parsed_expr.terms, "operand_types") is not None + and any( is_extension_array_dtype(elem) for elem in parsed_expr.terms.operand_types ) From a23523b9f8824e0896f6a548b7d41514d36873f8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 25 Jan 2023 21:39:37 -0500 Subject: [PATCH 07/16] Fix --- pandas/core/computation/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 923141b1ebb6f..a549d9d6dfbfc 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -337,7 +337,7 @@ def eval( if ( is_extension_array_dtype(parsed_expr.terms.return_type) - or getattr(parsed_expr.terms, "operand_types") is not None + or getattr(parsed_expr.terms, "operand_types", None) is not None and any( is_extension_array_dtype(elem) for elem in parsed_expr.terms.operand_types From 467127fafd180efd6d2d74407d90f8e99161668e Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 26 Jan 2023 20:27:04 -0500 Subject: [PATCH 08/16] Add arrow tests --- pandas/tests/frame/test_query_eval.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 3d98e945e8493..81343966abfea 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1275,12 +1275,16 @@ def func(*_): with pytest.raises(TypeError, match="Only named functions are supported"): df.eval("@funcs[0].__call__()") - def test_ea_dtypes(self): + def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype): # GH#29618 - df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") + df = DataFrame( + [[1, 2], [3, 4]], columns=["a", "b"], dtype=any_numeric_ea_and_arrow_dtype + ) result = df.eval("c = b - a") expected = DataFrame( - [[1, 2, 1], [3, 4, 1]], columns=["a", "b", "c"], dtype="Float64" + [[1, 2, 1], [3, 4, 1]], + columns=["a", "b", "c"], + dtype=any_numeric_ea_and_arrow_dtype, ) tm.assert_frame_equal(result, expected) @@ -1293,14 +1297,16 @@ def test_ea_dtypes_and_scalar(self): ) tm.assert_frame_equal(result, expected) - def test_ea_dtypes_and_scalar_operation(self): + def test_ea_dtypes_and_scalar_operation(self, any_numeric_ea_and_arrow_dtype): # GH#29618 - df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") + df = DataFrame( + [[1, 2], [3, 4]], columns=["a", "b"], dtype=any_numeric_ea_and_arrow_dtype + ) result = df.eval("c = 2 - 1") expected = DataFrame( { - "a": Series([1, 3], dtype="Float64"), - "b": Series([2, 4], dtype="Float64"), + "a": Series([1, 3], dtype=any_numeric_ea_and_arrow_dtype), + "b": Series([2, 4], dtype=any_numeric_ea_and_arrow_dtype), "c": Series( [1, 1], dtype="int64" if not is_platform_windows() else "int32" ), @@ -1308,7 +1314,7 @@ def test_ea_dtypes_and_scalar_operation(self): ) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"]) def test_query_ea_dtypes(self, dtype): # GH#50261 df = DataFrame({"a": Series([1, 2], dtype=dtype)}) @@ -1317,8 +1323,8 @@ def test_query_ea_dtypes(self, dtype): expected = DataFrame({"a": Series([2], dtype=dtype, index=[1])}) tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("dtype", ["int64", "Int64"]) - def test_query_ea_equality_compariso(self, dtype): + @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"]) + def test_query_ea_equality_comparison(self, dtype): # GH#50261 df = DataFrame( {"A": Series([1, 1, 2], dtype="Int64"), "B": Series([1, 2, 2], dtype=dtype)} From 9022e2e40973b3f914a6503f7df33bed3c977b8a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 1 Feb 2023 23:12:03 +0100 Subject: [PATCH 09/16] Fix pyarrow-less ci --- pandas/tests/frame/test_query_eval.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 81343966abfea..fe83b65068e38 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1316,6 +1316,8 @@ def test_ea_dtypes_and_scalar_operation(self, any_numeric_ea_and_arrow_dtype): @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"]) def test_query_ea_dtypes(self, dtype): + if dtype == "int64[pyarrow]": + pytest.importorskip("pyarrow") # GH#50261 df = DataFrame({"a": Series([1, 2], dtype=dtype)}) ref = {2} # noqa:F841 @@ -1326,6 +1328,8 @@ def test_query_ea_dtypes(self, dtype): @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"]) def test_query_ea_equality_comparison(self, dtype): # GH#50261 + if dtype == "int64[pyarrow]": + pytest.importorskip("pyarrow") df = DataFrame( {"A": Series([1, 1, 2], dtype="Int64"), "B": Series([1, 2, 2], dtype=dtype)} ) From 52685695c58942dde700498df6763ac01843450d Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 2 Feb 2023 17:37:28 +0100 Subject: [PATCH 10/16] Add try except --- pandas/core/computation/common.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py index 2d6efd07b82ee..115191829f044 100644 --- a/pandas/core/computation/common.py +++ b/pandas/core/computation/common.py @@ -39,7 +39,10 @@ def result_type_many(*arrays_and_dtypes): non_ea_dtypes.append(arr_or_dtype) if non_ea_dtypes: - np_dtype = np.result_type(*non_ea_dtypes) + try: + np_dtype = np.result_type(*non_ea_dtypes) + except ValueError: + np_dtype = reduce(np.result_type, arrays_and_dtypes) return find_common_type(ea_dtypes + [np_dtype]) return find_common_type(ea_dtypes) From 9f75603f5d0b41e2177af2a75e26fb8e47de9ed8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 2 Feb 2023 18:15:53 +0100 Subject: [PATCH 11/16] Add warning --- pandas/core/computation/eval.py | 9 ++++++++- pandas/tests/frame/test_query_eval.py | 13 +++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index a549d9d6dfbfc..e2227b8f51e35 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING import warnings +from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import is_extension_array_dtype @@ -335,7 +336,7 @@ def eval( parsed_expr = Expr(expr, engine=engine, parser=parser, env=env) - if ( + if engine == "numexpr" and ( is_extension_array_dtype(parsed_expr.terms.return_type) or getattr(parsed_expr.terms, "operand_types", None) is not None and any( @@ -343,6 +344,12 @@ def eval( for elem in parsed_expr.terms.operand_types ) ): + warnings.warn( + "Engine is switched to 'python' because numexpr does not support " + "extension array dtypes. Please set your engine to python manually.", + RuntimeWarning, + stacklevel=find_stack_level(), + ) engine = "python" # construct the engine and evaluate the parsed expression diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 6a7e8394c42ca..3e63d6065e88b 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1298,7 +1298,8 @@ def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype): df = DataFrame( [[1, 2], [3, 4]], columns=["a", "b"], dtype=any_numeric_ea_and_arrow_dtype ) - result = df.eval("c = b - a") + with tm.assert_produces_warning(RuntimeWarning): + result = df.eval("c = b - a") expected = DataFrame( [[1, 2, 1], [3, 4, 1]], columns=["a", "b", "c"], @@ -1309,7 +1310,8 @@ def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype): def test_ea_dtypes_and_scalar(self): # GH#29618 df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") - result = df.eval("c = b - 1") + with tm.assert_produces_warning(RuntimeWarning): + result = df.eval("c = b - 1") expected = DataFrame( [[1, 2, 1], [3, 4, 3]], columns=["a", "b", "c"], dtype="Float64" ) @@ -1343,15 +1345,18 @@ def test_query_ea_dtypes(self, dtype): expected = DataFrame({"a": Series([2], dtype=dtype, index=[1])}) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("engine", ["python", "numexpr"]) @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"]) - def test_query_ea_equality_comparison(self, dtype): + def test_query_ea_equality_comparison(self, dtype, engine): # GH#50261 + warning = RuntimeWarning if engine == "numexpr" else None if dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame( {"A": Series([1, 1, 2], dtype="Int64"), "B": Series([1, 2, 2], dtype=dtype)} ) - result = df.query("A == B") + with tm.assert_produces_warning(warning): + result = df.query("A == B", engine=engine) expected = DataFrame( { "A": Series([1, 2], dtype="Int64", index=[0, 2]), From b7560733e5eb4d8eed1e5115da75c12fe758e681 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 6 Feb 2023 21:09:12 +0100 Subject: [PATCH 12/16] Adjust warning --- pandas/tests/frame/test_query_eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 3e63d6065e88b..8d061777c497f 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1298,7 +1298,7 @@ def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype): df = DataFrame( [[1, 2], [3, 4]], columns=["a", "b"], dtype=any_numeric_ea_and_arrow_dtype ) - with tm.assert_produces_warning(RuntimeWarning): + with tm.assert_produces_warning(RuntimeWarning) and NUMEXPR_INSTALLED: result = df.eval("c = b - a") expected = DataFrame( [[1, 2, 1], [3, 4, 1]], @@ -1310,7 +1310,7 @@ def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype): def test_ea_dtypes_and_scalar(self): # GH#29618 df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") - with tm.assert_produces_warning(RuntimeWarning): + with tm.assert_produces_warning(RuntimeWarning) and NUMEXPR_INSTALLED: result = df.eval("c = b - 1") expected = DataFrame( [[1, 2, 1], [3, 4, 3]], columns=["a", "b", "c"], dtype="Float64" From 931c723b7e3b40b354c5fb68f070cd30d051d486 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 6 Feb 2023 21:51:06 +0100 Subject: [PATCH 13/16] Fix warning --- pandas/tests/frame/test_query_eval.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 8d061777c497f..393827d3ac084 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1298,7 +1298,8 @@ def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype): df = DataFrame( [[1, 2], [3, 4]], columns=["a", "b"], dtype=any_numeric_ea_and_arrow_dtype ) - with tm.assert_produces_warning(RuntimeWarning) and NUMEXPR_INSTALLED: + warning = RuntimeWarning if NUMEXPR_INSTALLED else None + with tm.assert_produces_warning(warning): result = df.eval("c = b - a") expected = DataFrame( [[1, 2, 1], [3, 4, 1]], @@ -1310,7 +1311,8 @@ def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype): def test_ea_dtypes_and_scalar(self): # GH#29618 df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64") - with tm.assert_produces_warning(RuntimeWarning) and NUMEXPR_INSTALLED: + warning = RuntimeWarning if NUMEXPR_INSTALLED else None + with tm.assert_produces_warning(warning): result = df.eval("c = b - 1") expected = DataFrame( [[1, 2, 1], [3, 4, 3]], columns=["a", "b", "c"], dtype="Float64" From 4fb7a39586f4664d3f140d54a3a89cef42a5b172 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 8 Feb 2023 19:38:48 +0100 Subject: [PATCH 14/16] Fix --- pandas/tests/frame/test_query_eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 393827d3ac084..a2b1f79dfb587 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1351,7 +1351,7 @@ def test_query_ea_dtypes(self, dtype): @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"]) def test_query_ea_equality_comparison(self, dtype, engine): # GH#50261 - warning = RuntimeWarning if engine == "numexpr" else None + warning = RuntimeWarning if engine == "numexpr" and NUMEXPR_INSTALLED else None if dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame( From 1a2c36a0bf853546093bb342f256d3652d35778f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 9 Feb 2023 01:30:50 +0100 Subject: [PATCH 15/16] Update test_query_eval.py --- pandas/tests/frame/test_query_eval.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index a2b1f79dfb587..c815b897e6a14 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -1351,7 +1351,9 @@ def test_query_ea_dtypes(self, dtype): @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"]) def test_query_ea_equality_comparison(self, dtype, engine): # GH#50261 - warning = RuntimeWarning if engine == "numexpr" and NUMEXPR_INSTALLED else None + warning = RuntimeWarning if engine == "numexpr" else None + if engine == "numexpr" and not NUMEXPR_INSTALLED: + pytest.skip("numexpr not installed") if dtype == "int64[pyarrow]": pytest.importorskip("pyarrow") df = DataFrame( From 73d72ed0749c76ac7093e031559781761212d791 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 9 Feb 2023 08:44:49 -0800 Subject: [PATCH 16/16] Update pandas/core/computation/eval.py --- pandas/core/computation/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index e2227b8f51e35..0326760a1ff24 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -345,7 +345,7 @@ def eval( ) ): warnings.warn( - "Engine is switched to 'python' because numexpr does not support " + "Engine has switched to 'python' because numexpr does not support " "extension array dtypes. Please set your engine to python manually.", RuntimeWarning, stacklevel=find_stack_level(),