add some xfails

pandas-dev · Aug 8, 2024 · 0b2bd61 · 0b2bd61
1 parent d414dc9
commit 0b2bd61
Show file tree

Hide file tree

Showing 33 changed files with 176 additions and 10 deletions.
diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -12,6 +12,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas.errors import SpecificationError
 
 from pandas import (
@@ -209,6 +211,8 @@ def transform(row):
         data.apply(transform, axis=1)
 
 
+# we should raise a proper TypeError instead of propagating the pyarrow error
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "df, func, expected",
     tm.get_cython_table_params(
@@ -229,6 +233,8 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str
             df.agg(func, axis=axis)
 
 
+# we should raise a proper TypeError instead of propagating the pyarrow error
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "series, func, expected",
     chain(

diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
@@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis):
 
 
 def test_numba_unsupported_dtypes(apply_axis):
+    pytest.importorskip("pyarrow")
     f = lambda x: x
     df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
     df["c"] = df["c"].astype("double[pyarrow]")

diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -3,6 +3,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname):
 # -----------------------------------------------------------------------------
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
     # invalid ops
 

diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
@@ -140,6 +140,7 @@ class BaseArithmeticOpsTests(BaseOpsUtil):
     series_array_exc: type[Exception] | None = TypeError
     divmod_exc: type[Exception] | None = TypeError
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         # series & scalar
         if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
@@ -149,6 +150,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         ser = pd.Series(data)
         self.check_opname(ser, op_name, ser.iloc[0])
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         # frame & scalar
         if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
@@ -158,12 +160,14 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         df = pd.DataFrame({"A": data})
         self.check_opname(df, op_name, data[0])
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_array(self, data, all_arithmetic_operators):
         # ndarray & other series
         op_name = all_arithmetic_operators
         ser = pd.Series(data)
         self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)))
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_divmod(self, data):
         ser = pd.Series(data)
         self._check_divmod_op(ser, divmod, 1)
@@ -179,6 +183,7 @@ def test_divmod_series_array(self, data, data_for_twos):
         other = pd.Series(other)
         self._check_divmod_op(other, ops.rdivmod, ser)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_add_series_with_extension_array(self, data):
         # Check adding an ExtensionArray to a Series of the same dtype matches
         # the behavior of adding the arrays directly and then wrapping in a

diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
@@ -140,6 +140,7 @@ def test_map(self, data, na_action):
         result = data.map(lambda x: x, na_action=na_action)
         tm.assert_extension_array_equal(result, data)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
         # frame & scalar
         op_name = all_arithmetic_operators
@@ -151,6 +152,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
             )
         super().test_arith_frame_with_scalar(data, op_name)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
         op_name = all_arithmetic_operators
         if op_name == "__rmod__":

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -19,6 +19,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas.core.dtypes.dtypes import NumpyEADtype
 
 import pandas as pd
@@ -255,13 +257,15 @@ def test_insert_invalid(self, data, invalid_scalar):
     frame_scalar_exc = None
     series_array_exc = None
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_divmod(self, data):
         divmod_exc = None
         if data.dtype.kind == "O":
             divmod_exc = TypeError
         self.divmod_exc = divmod_exc
         super().test_divmod(data)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_divmod_series_array(self, data):
         ser = pd.Series(data)
         exc = None
@@ -270,6 +274,7 @@ def test_divmod_series_array(self, data):
             self.divmod_exc = exc
         self._check_divmod_op(ser, divmod, data)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
         opname = all_arithmetic_operators
         series_scalar_exc = None
@@ -283,6 +288,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request)
         self.series_scalar_exc = series_scalar_exc
         super().test_arith_series_with_scalar(data, all_arithmetic_operators)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_series_with_array(self, data, all_arithmetic_operators):
         opname = all_arithmetic_operators
         series_array_exc = None
@@ -291,6 +297,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
         self.series_array_exc = series_array_exc
         super().test_arith_series_with_array(data, all_arithmetic_operators)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
         opname = all_arithmetic_operators
         frame_scalar_exc = None

diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -6,6 +6,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas.core.dtypes.common import is_scalar
 
 import pandas as pd
@@ -1018,6 +1020,9 @@ def test_where_producing_ea_cond_for_np_dtype():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)]
 )

diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
@@ -10,6 +10,7 @@
 from pandas._config import using_string_dtype
 
 from pandas.compat import (
+    HAS_PYARROW,
     IS64,
     PYPY,
 )
@@ -520,7 +521,7 @@ def test_info_int_columns():
     assert result == expected
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_memory_usage_empty_no_warning():
     # GH#50066
     df = DataFrame(index=["a", "b"])

diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
@@ -6,10 +6,13 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas._libs.algos import (
     Infinity,
     NegInfinity,
 )
+from pandas.compat import HAS_PYARROW
 
 from pandas import (
     DataFrame,
@@ -464,9 +467,18 @@ def test_rank_inf_nans_na_option(
         ],
     )
     def test_rank_object_first(
-        self, frame_or_series, na_option, ascending, expected, using_infer_string
+        self,
+        request,
+        frame_or_series,
+        na_option,
+        ascending,
+        expected,
+        using_infer_string,
     ):
         obj = frame_or_series(["foo", "foo", None, "foo"])
+        if using_string_dtype() and not HAS_PYARROW and isinstance(obj, Series):
+            request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
         if using_infer_string and isinstance(obj, Series):

diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
@@ -1,6 +1,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -132,6 +136,9 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_data_frame_value_counts_dropna_false(nulls_fixture):
     # GH 41334
     df = pd.DataFrame(

diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -8,6 +8,8 @@
 from pandas._config import using_string_dtype
 from pandas._config.config import option_context
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -113,7 +115,9 @@ def test_not_hashable(self):
         with pytest.raises(TypeError, match=msg):
             hash(empty_frame)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="surrogates not allowed")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="surrogates not allowed"
+    )
     def test_column_name_contains_unicode_surrogate(self):
         # GH 25509
         colname = "\ud83d"

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -13,6 +13,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -1542,7 +1544,9 @@ def test_comparisons(self, simple_frame, float_frame, func):
         with pytest.raises(ValueError, match=msg):
             func(simple_frame, simple_frame[:2])
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne):
         # GH 11565
         df = DataFrame(

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -24,6 +24,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IntCastingNaNError
 
@@ -299,7 +300,7 @@ def test_constructor_dtype_nocast_view_2d_array(self):
         df2 = DataFrame(df.values, dtype=df[0].dtype)
         assert df2._mgr.blocks[0].values.flags.c_contiguous
 
-    @pytest.mark.xfail(using_string_dtype(), reason="conversion copies")
+    @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="conversion copies")
     def test_1d_object_array_does_not_copy(self):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array(["a", "b"], dtype="object")

diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
@@ -4,6 +4,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     CategoricalIndex,
     DataFrame,
@@ -96,6 +100,9 @@ def test_logical_ops_int_frame(self):
         res_ser = df1a_int["A"] | df1a_bool["A"]
         tm.assert_series_equal(res_ser, df1a_bool["A"])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_logical_ops_invalid(self, using_infer_string):
         # GH#5808
 

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -226,6 +226,7 @@ def float_frame_with_na():
 class TestDataFrameAnalytics:
     # ---------------------------------------------------------------------
     # Reductions
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("axis", [0, 1])
     @pytest.mark.parametrize(
         "opname",
@@ -431,6 +432,7 @@ def test_stat_operators_attempt_obj_array(self, method, df, axis):
             expected[expected.isna()] = None
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"])
     def test_mixed_ops(self, op):
         # GH#16116
@@ -532,7 +534,7 @@ def test_mean_mixed_string_decimal(self):
         df = DataFrame(d)
 
         with pytest.raises(
-            TypeError, match="unsupported operand type|does not support"
+            TypeError, match="unsupported operand type|does not support|Cannot perform"
         ):
             df.mean()
         result = df[["A", "C"]].mean()
@@ -690,6 +692,7 @@ def test_mode_dropna(self, dropna, expected):
         expected = DataFrame(expected)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     def test_mode_sortwarning(self, using_infer_string):
         # Check for the warning that is raised when the mode
         # results cannot be sorted
@@ -979,7 +982,7 @@ def test_sum_mixed_datetime(self):
 
     def test_mean_corner(self, float_frame, float_string_frame):
         # unit test when have object data
-        msg = "Could not convert|does not support"
+        msg = "Could not convert|does not support|Cannot perform"
         with pytest.raises(TypeError, match=msg):
             float_string_frame.mean(axis=0)
 
@@ -1093,6 +1096,7 @@ def test_idxmin_empty(self, index, skipna, axis):
         expected = Series(dtype=index.dtype)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("numeric_only", [True, False])
     def test_idxmin_numeric_only(self, numeric_only):
         df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
@@ -1143,6 +1147,7 @@ def test_idxmax_empty(self, index, skipna, axis):
         expected = Series(dtype=index.dtype)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("numeric_only", [True, False])
     def test_idxmax_numeric_only(self, numeric_only):
         df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")})
@@ -1964,7 +1969,7 @@ def test_minmax_extensionarray(method, numeric_only):
 def test_frame_mixed_numeric_object_with_timestamp(ts_value):
     # GH 13912
     df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]})
-    with pytest.raises(TypeError, match="does not support operation"):
+    with pytest.raises(TypeError, match="does not support operation|Cannot perform"):
         df.sum()