From 63af5da968fb9a533422c7a42bd94e28f9b1e08d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 13 Nov 2023 13:21:41 -0800
Subject: [PATCH 1/2] TST: Skip pyarrow csv tests that raise ParseErrors

---
 .../io/parser/common/test_common_basic.py     | 31 ++++++++++---------
 .../tests/io/parser/common/test_data_list.py  |  4 +--
 .../io/parser/common/test_file_buffer_url.py  |  3 +-
 pandas/tests/io/parser/common/test_float.py   |  5 +--
 pandas/tests/io/parser/common/test_index.py   | 17 +++++-----
 pandas/tests/io/parser/common/test_inf.py     |  4 +--
 pandas/tests/io/parser/common/test_ints.py    |  5 +--
 .../io/parser/common/test_read_errors.py      |  3 +-
 pandas/tests/io/parser/conftest.py            | 20 ++++++++++++
 .../io/parser/dtypes/test_categorical.py      |  8 ++---
 pandas/tests/io/parser/dtypes/test_empty.py   | 18 +++++------
 pandas/tests/io/parser/test_encoding.py       | 11 ++++---
 pandas/tests/io/parser/test_header.py         | 15 ++++-----
 pandas/tests/io/parser/test_index_col.py      |  9 +++---
 pandas/tests/io/parser/test_mangle_dupes.py   | 12 +++----
 pandas/tests/io/parser/test_na_values.py      |  5 +--
 pandas/tests/io/parser/test_parse_dates.py    |  7 +++--
 pandas/tests/io/parser/test_quoting.py        |  3 +-
 pandas/tests/io/parser/test_skiprows.py       |  4 +--
 .../io/parser/usecols/test_parse_dates.py     |  3 +-
 .../io/parser/usecols/test_usecols_basic.py   |  9 +++---
 21 files changed, 115 insertions(+), 81 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py
index a2ffec45cfc7f..0c28db245de31 100644
--- a/pandas/tests/io/parser/common/test_common_basic.py
+++ b/pandas/tests/io/parser/common/test_common_basic.py
@@ -34,6 +34,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 def test_override_set_noconvert_columns():
@@ -137,7 +138,7 @@ def test_1000_sep(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: Found non-unique column index
 def test_unnamed_columns(all_parsers):
     data = """A,B,C,,
 1,2,3,4,5
@@ -278,7 +279,7 @@ def test_nrows_skipfooter_errors(all_parsers):
         parser.read_csv(StringIO(data), skipfooter=1, nrows=5)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_missing_trailing_delimiters(all_parsers):
     parser = all_parsers
     data = """A,B,C,D
@@ -366,7 +367,7 @@ def test_skip_initial_space(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_trailing_delimiters(all_parsers):
     # see gh-2442
     data = """A,B,C
@@ -398,7 +399,7 @@ def test_escapechar(all_parsers):
     tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: the 'pyarrow' engine does not support regex separators
 def test_ignore_leading_whitespace(all_parsers):
     # see gh-3374, gh-6607
     parser = all_parsers
@@ -409,7 +410,7 @@ def test_ignore_leading_whitespace(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 @pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]])
 def test_uneven_lines_with_usecols(all_parsers, usecols):
     # see gh-12203
@@ -432,7 +433,7 @@ def test_uneven_lines_with_usecols(all_parsers, usecols):
         tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 @pytest.mark.parametrize(
     "data,kwargs,expected",
     [
@@ -593,7 +594,7 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_whitespace_lines(all_parsers):
     parser = all_parsers
     data = """
@@ -609,7 +610,7 @@ def test_whitespace_lines(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: the 'pyarrow' engine does not support regex separators
 @pytest.mark.parametrize(
     "data,expected",
     [
@@ -707,7 +708,7 @@ def test_read_csv_and_table_sys_setprofile(all_parsers, read_func):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_first_row_bom(all_parsers):
     # see gh-26545
     parser = all_parsers
@@ -718,7 +719,7 @@ def test_first_row_bom(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_first_row_bom_unquoted(all_parsers):
     # see gh-36343
     parser = all_parsers
@@ -751,7 +752,7 @@ def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
     tm.assert_frame_equal(df, ref[:nrows])
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_no_header_two_extra_columns(all_parsers):
     # GH 26218
     column_names = ["one", "two", "three"]
@@ -852,7 +853,7 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
         parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_dict_keys_as_names(all_parsers):
     # GH: 36928
     data = "1,2"
@@ -865,7 +866,7 @@ def test_dict_keys_as_names(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0
 def test_encoding_surrogatepass(all_parsers):
     # GH39017
     parser = all_parsers
@@ -893,7 +894,7 @@ def test_malformed_second_line(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_short_single_line(all_parsers):
     # GH 47566
     parser = all_parsers
@@ -904,7 +905,7 @@ def test_short_single_line(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: Length mismatch: Expected axis has 2 elements
 def test_short_multi_line(all_parsers):
     # GH 47566
     parser = all_parsers
diff --git a/pandas/tests/io/parser/common/test_data_list.py b/pandas/tests/io/parser/common/test_data_list.py
index 3b0ff9e08d349..5c798316e2cea 100644
--- a/pandas/tests/io/parser/common/test_data_list.py
+++ b/pandas/tests/io/parser/common/test_data_list.py
@@ -16,10 +16,10 @@
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
 
-xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_read_data_list(all_parsers):
     parser = all_parsers
     kwargs = {"index_col": 0}
diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index 7fd86e956b543..a6e68cb984ef4 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -27,6 +27,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 @pytest.mark.network
@@ -431,7 +432,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
             assert not reader.handles.handle.closed
 
 
-@xfail_pyarrow  # ParserError: Empty CSV file
+@skip_pyarrow  # ParserError: Empty CSV file
 def test_file_descriptor_leak(all_parsers, using_copy_on_write):
     # GH 31488
     parser = all_parsers
diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py
index 63ad3bcb249ea..4b23774ee2d5b 100644
--- a/pandas/tests/io/parser/common/test_float.py
+++ b/pandas/tests/io/parser/common/test_float.py
@@ -16,9 +16,10 @@
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
-@xfail_pyarrow  # ParserError: CSV parse error: Empty CSV file or block
+@skip_pyarrow  # ParserError: CSV parse error: Empty CSV file or block
 def test_float_parser(all_parsers):
     # see gh-9565
     parser = all_parsers
@@ -50,7 +51,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
 @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
 def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
     # GH#38753
diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py
index 7df14043f478c..038c684c90c9e 100644
--- a/pandas/tests/io/parser/common/test_index.py
+++ b/pandas/tests/io/parser/common/test_index.py
@@ -20,6 +20,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 @pytest.mark.parametrize(
@@ -108,7 +109,7 @@ def test_multi_index_no_level_names(all_parsers, index_col):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_multi_index_no_level_names_implicit(all_parsers):
     parser = all_parsers
     data = """A,B,C,D
@@ -142,7 +143,7 @@ def test_multi_index_no_level_names_implicit(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize(
     "data,expected,header",
     [
@@ -164,7 +165,7 @@ def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: DataFrame.columns are different
 def test_no_unnamed_index(all_parsers):
     parser = all_parsers
     data = """ id c0 c1 c2
@@ -207,7 +208,7 @@ def test_read_duplicate_index_explicit(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_read_duplicate_index_implicit(all_parsers):
     data = """A,B,C,D
 foo,2,3,4,5
@@ -235,7 +236,7 @@ def test_read_duplicate_index_implicit(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_read_csv_no_index_name(all_parsers, csv_dir_path):
     parser = all_parsers
     csv2 = os.path.join(csv_dir_path, "test2.csv")
@@ -263,7 +264,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_empty_with_index(all_parsers):
     # see gh-10184
     data = "x,y"
@@ -275,7 +276,7 @@ def test_empty_with_index(all_parsers):
 
 
 # CSV parse error: Empty CSV file or block: cannot infer number of columns
-@xfail_pyarrow
+@skip_pyarrow
 def test_empty_with_multi_index(all_parsers):
     # see gh-10467
     data = "x,y,z"
@@ -289,7 +290,7 @@ def test_empty_with_multi_index(all_parsers):
 
 
 # CSV parse error: Empty CSV file or block: cannot infer number of columns
-@xfail_pyarrow
+@skip_pyarrow
 def test_empty_with_reversed_multi_index(all_parsers):
     data = "x,y,z"
     parser = all_parsers
diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py
index e1dc87ed0071e..74596b178d35d 100644
--- a/pandas/tests/io/parser/common/test_inf.py
+++ b/pandas/tests/io/parser/common/test_inf.py
@@ -20,7 +20,7 @@
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
 @pytest.mark.parametrize("na_filter", [True, False])
 def test_inf_parsing(all_parsers, na_filter):
     parser = all_parsers
@@ -44,7 +44,7 @@ def test_inf_parsing(all_parsers, na_filter):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: DataFrame.index are different
 @pytest.mark.parametrize("na_filter", [True, False])
 def test_infinity_parsing(all_parsers, na_filter):
     parser = all_parsers
diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py
index 41bfbb55d818f..a3167346c64ef 100644
--- a/pandas/tests/io/parser/common/test_ints.py
+++ b/pandas/tests/io/parser/common/test_ints.py
@@ -18,6 +18,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 def test_int_conversion(all_parsers):
@@ -179,7 +180,7 @@ def test_int64_overflow(all_parsers, conv, request):
             parser.read_csv(StringIO(data), converters={"ID": conv})
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 @pytest.mark.parametrize(
     "val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
 )
@@ -193,7 +194,7 @@ def test_int64_uint64_range(all_parsers, val):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 @pytest.mark.parametrize(
     "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
 )
diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py
index 7e841ed8b4ebd..f3794c056a256 100644
--- a/pandas/tests/io/parser/common/test_read_errors.py
+++ b/pandas/tests/io/parser/common/test_read_errors.py
@@ -22,6 +22,7 @@
 import pandas._testing as tm
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 def test_empty_decimal_marker(all_parsers):
@@ -139,7 +140,7 @@ def test_catch_too_many_names(all_parsers):
             parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 @pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
 def test_raise_on_no_columns(all_parsers, nrows):
     parser = all_parsers
diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py
index 471f525e229e5..202fbbc1206c7 100644
--- a/pandas/tests/io/parser/conftest.py
+++ b/pandas/tests/io/parser/conftest.py
@@ -282,6 +282,8 @@ def numeric_decimal(request):
 def pyarrow_xfail(request):
     """
     Fixture that xfails a test if the engine is pyarrow.
+
+    Use if failure is do to unsupported keywords or inconsistent results.
     """
     if "all_parsers" in request.fixturenames:
         parser = request.getfixturevalue("all_parsers")
@@ -293,3 +295,21 @@ def pyarrow_xfail(request):
     if parser.engine == "pyarrow":
         mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
         request.applymarker(mark)
+
+
+@pytest.fixture
+def pyarrow_skip(request):
+    """
+    Fixture that skips a test if the engine is pyarrow.
+
+    Use if failure is do pyarrow failing to parse the input.
+    """
+    if "all_parsers" in request.fixturenames:
+        parser = request.getfixturevalue("all_parsers")
+    elif "all_parsers_all_precisions" in request.fixturenames:
+        # Return value is tuple of (engine, precision)
+        parser = request.getfixturevalue("all_parsers_all_precisions")[0]
+    else:
+        return
+    if parser.engine == "pyarrow":
+        pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py
index b1b35447b60c2..f4aff14a5ce32 100644
--- a/pandas/tests/io/parser/dtypes/test_categorical.py
+++ b/pandas/tests/io/parser/dtypes/test_categorical.py
@@ -27,7 +27,7 @@
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
 @pytest.mark.parametrize(
     "dtype",
     [
@@ -76,7 +76,7 @@ def test_categorical_dtype_single(all_parsers, dtype, request):
     tm.assert_frame_equal(actual, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
 def test_categorical_dtype_unsorted(all_parsers):
     # see gh-10153
     parser = all_parsers
@@ -95,7 +95,7 @@ def test_categorical_dtype_unsorted(all_parsers):
     tm.assert_frame_equal(actual, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
 def test_categorical_dtype_missing(all_parsers):
     # see gh-10153
     parser = all_parsers
@@ -114,7 +114,7 @@ def test_categorical_dtype_missing(all_parsers):
     tm.assert_frame_equal(actual, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
 @pytest.mark.slow
 def test_categorical_dtype_high_cardinality_numeric(all_parsers, monkeypatch):
     # see gh-18186
diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py
index 8759c52485533..f34385b190c5f 100644
--- a/pandas/tests/io/parser/dtypes/test_empty.py
+++ b/pandas/tests/io/parser/dtypes/test_empty.py
@@ -17,10 +17,10 @@
 )
 import pandas._testing as tm
 
-xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_dtype_all_columns_empty(all_parsers):
     # see gh-12048
     parser = all_parsers
@@ -30,7 +30,7 @@ def test_dtype_all_columns_empty(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_pass_dtype(all_parsers):
     parser = all_parsers
 
@@ -43,7 +43,7 @@ def test_empty_pass_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_index_pass_dtype(all_parsers):
     parser = all_parsers
 
@@ -58,7 +58,7 @@ def test_empty_with_index_pass_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_multi_index_pass_dtype(all_parsers):
     parser = all_parsers
 
@@ -75,7 +75,7 @@ def test_empty_with_multi_index_pass_dtype(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
     parser = all_parsers
 
@@ -88,7 +88,7 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
     parser = all_parsers
 
@@ -101,7 +101,7 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers):
     # see gh-9424
     parser = all_parsers
@@ -171,7 +171,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers):
         ),
     ],
 )
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_dtype(all_parsers, dtype, expected):
     # see gh-14712
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
index 9e1200c142d6b..3580c040688d8 100644
--- a/pandas/tests/io/parser/test_encoding.py
+++ b/pandas/tests/io/parser/test_encoding.py
@@ -24,6 +24,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 def test_bytes_io_input(all_parsers):
@@ -37,7 +38,7 @@ def test_bytes_io_input(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_read_csv_unicode(all_parsers):
     parser = all_parsers
     data = BytesIO("\u0141aski, Jan;1".encode())
@@ -47,7 +48,7 @@ def test_read_csv_unicode(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 @pytest.mark.parametrize("sep", [",", "\t"])
 @pytest.mark.parametrize("encoding", ["utf-16", "utf-16le", "utf-16be"])
 def test_utf16_bom_skiprows(all_parsers, sep, encoding):
@@ -237,7 +238,7 @@ def test_parse_encoded_special_characters(encoding):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: The 'memory_map' option is not supported
 @pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"])
 def test_encoding_memory_map(all_parsers, encoding):
     # GH40986
@@ -255,7 +256,7 @@ def test_encoding_memory_map(all_parsers, encoding):
     tm.assert_frame_equal(df, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: The 'memory_map' option is not supported
 def test_chunk_splits_multibyte_char(all_parsers):
     """
     Chunk splits a multibyte character with memory_map=True
@@ -275,7 +276,7 @@ def test_chunk_splits_multibyte_char(all_parsers):
     tm.assert_frame_equal(dfr, df)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: The 'memory_map' option is not supported
 def test_readcsv_memmap_utf8(all_parsers):
     """
     GH 43787
diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py
index 2edb389a0c830..f55f8497f318c 100644
--- a/pandas/tests/io/parser/test_header.py
+++ b/pandas/tests/io/parser/test_header.py
@@ -23,6 +23,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 @xfail_pyarrow  # TypeError: an integer is required
@@ -79,7 +80,7 @@ def test_bool_header_arg(all_parsers, header):
         parser.read_csv(StringIO(data), header=header)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: DataFrame are different
 def test_header_with_index_col(all_parsers):
     parser = all_parsers
     data = """foo,1,2,3
@@ -183,7 +184,7 @@ def test_header_multi_index_invalid(all_parsers, kwargs, msg):
 _TestTuple = namedtuple("_TestTuple", ["first", "second"])
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -231,7 +232,7 @@ def test_header_multi_index_common_format1(all_parsers, kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -278,7 +279,7 @@ def test_header_multi_index_common_format2(all_parsers, kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # TypeError: an integer is required
 @pytest.mark.parametrize(
     "kwargs",
     [
@@ -419,7 +420,7 @@ def test_header_names_backward_compat(all_parsers, data, header, request):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block: cannot infer
+@skip_pyarrow  # CSV parse error: Empty CSV file or block: cannot infer
 @pytest.mark.parametrize("kwargs", [{}, {"index_col": False}])
 def test_read_only_header_no_rows(all_parsers, kwargs):
     # See gh-7773
@@ -561,7 +562,7 @@ def test_multi_index_unnamed(all_parsers, index_col, columns):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Expected 2 columns, got 3
+@skip_pyarrow  # CSV parse error: Expected 2 columns, got 3
 def test_names_longer_than_header_but_equal_with_data_rows(all_parsers):
     # GH#38453
     parser = all_parsers
@@ -622,7 +623,7 @@ def test_read_csv_multi_header_length_check(all_parsers):
         parser.read_csv(StringIO(case), header=[0, 2])
 
 
-@xfail_pyarrow  # CSV parse error: Expected 3 columns, got 2
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 2
 def test_header_none_and_implicit_index(all_parsers):
     # GH#22144
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
index b938b129ac38d..ba15d061b2deb 100644
--- a/pandas/tests/io/parser/test_index_col.py
+++ b/pandas/tests/io/parser/test_index_col.py
@@ -20,6 +20,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 @pytest.mark.parametrize("with_header", [True, False])
@@ -76,7 +77,7 @@ def test_index_col_is_true(all_parsers):
         parser.read_csv(StringIO(data), index_col=True)
 
 
-@xfail_pyarrow  # CSV parse error: Expected 3 columns, got 4
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 4
 def test_infer_index_col(all_parsers):
     data = """A,B,C
 foo,1,2,3
@@ -94,7 +95,7 @@ def test_infer_index_col(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 @pytest.mark.parametrize(
     "index_col,kwargs",
     [
@@ -143,7 +144,7 @@ def test_index_col_empty_data(all_parsers, index_col, kwargs):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_empty_with_index_col_false(all_parsers):
     # see gh-10413
     data = "x,y"
@@ -317,7 +318,7 @@ def test_multiindex_columns_index_col_with_data(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Empty CSV file or block
+@skip_pyarrow  # CSV parse error: Empty CSV file or block
 def test_infer_types_boolean_sum(all_parsers):
     # GH#44079
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py
index 7d148ae6c5a27..1d245f81f027c 100644
--- a/pandas/tests/io/parser/test_mangle_dupes.py
+++ b/pandas/tests/io/parser/test_mangle_dupes.py
@@ -18,7 +18,7 @@
 )
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: Found non-unique column index
 def test_basic(all_parsers):
     parser = all_parsers
 
@@ -29,7 +29,7 @@ def test_basic(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: Found non-unique column index
 def test_basic_names(all_parsers):
     # See gh-7160
     parser = all_parsers
@@ -50,7 +50,7 @@ def test_basic_names_raise(all_parsers):
         parser.read_csv(StringIO(data), names=["a", "b", "a"])
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: Found non-unique column index
 @pytest.mark.parametrize(
     "data,expected",
     [
@@ -118,7 +118,7 @@ def test_thorough_mangle_names(all_parsers, data, names, expected):
         parser.read_csv(StringIO(data), names=names)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: DataFrame.columns are different
 def test_mangled_unnamed_placeholders(all_parsers):
     # xref gh-13017
     orig_key = "0"
@@ -141,7 +141,7 @@ def test_mangled_unnamed_placeholders(all_parsers):
         tm.assert_frame_equal(df, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: Found non-unique column index
 def test_mangle_dupe_cols_already_exists(all_parsers):
     # GH#14704
     parser = all_parsers
@@ -155,7 +155,7 @@ def test_mangle_dupe_cols_already_exists(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # ValueError: Found non-unique column index
 def test_mangle_dupe_cols_already_exists_unnamed_col(all_parsers):
     # GH#14704
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index 59dae1eaa7e6c..437a5fb5e9f09 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -21,6 +21,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 def test_string_nas(all_parsers):
@@ -398,7 +399,7 @@ def test_na_values_na_filter_override(all_parsers, na_filter, row_data):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Expected 8 columns, got 5:
+@skip_pyarrow  # CSV parse error: Expected 8 columns, got 5:
 def test_na_trailing_columns(all_parsers):
     parser = all_parsers
     data = """Date,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax
@@ -630,7 +631,7 @@ def test_nan_multi_index(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # Failed: DID NOT RAISE <class 'ValueError'>
 def test_bool_and_nan_to_bool(all_parsers):
     # GH#42808
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 47e654fc606af..70d9171fa3c22 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -41,6 +41,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 @xfail_pyarrow
@@ -786,7 +787,7 @@ def test_nat_parse(all_parsers):
         tm.assert_frame_equal(result, df)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_csv_custom_parser(all_parsers):
     data = """A,B,C
 20090101,a,1,2
@@ -806,7 +807,7 @@ def test_csv_custom_parser(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@skip_pyarrow
 def test_parse_dates_implicit_first_col(all_parsers):
     data = """A,B,C
 20090101,a,1,2
@@ -2101,7 +2102,7 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst):
     tm.assert_index_equal(expected, res)
 
 
-@xfail_pyarrow  # CSV parse error: Expected 3 columns, got 4
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 4
 def test_infer_first_column_as_index(all_parsers):
     # GH#11019
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py
index a677d9caa4b19..0a1ba0252f106 100644
--- a/pandas/tests/io/parser/test_quoting.py
+++ b/pandas/tests/io/parser/test_quoting.py
@@ -18,6 +18,7 @@
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 @pytest.mark.parametrize(
@@ -31,7 +32,7 @@
         ({"quotechar": 2}, '"quotechar" must be string( or None)?, not int'),
     ],
 )
-@xfail_pyarrow  # ParserError: CSV parse error: Empty CSV file or block
+@skip_pyarrow  # ParserError: CSV parse error: Empty CSV file or block
 def test_bad_quote_char(all_parsers, kwargs, msg):
     data = "1,2,3"
     parser = all_parsers
diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py
index 9146af3f969e6..47c3739c979a3 100644
--- a/pandas/tests/io/parser/test_skiprows.py
+++ b/pandas/tests/io/parser/test_skiprows.py
@@ -67,7 +67,7 @@ def test_deep_skip_rows(all_parsers):
     tm.assert_frame_equal(result, condensed_result)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: DataFrame are different
 def test_skip_rows_blank(all_parsers):
     # see gh-9832
     parser = all_parsers
@@ -225,7 +225,7 @@ def test_skiprows_lineterminator(all_parsers, lineterminator, request):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
+@xfail_pyarrow  # AssertionError: DataFrame are different
 def test_skiprows_infield_quote(all_parsers):
     # see gh-14459
     parser = all_parsers
diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py
index bcb1c6af80df6..042c3814ef72a 100644
--- a/pandas/tests/io/parser/usecols/test_parse_dates.py
+++ b/pandas/tests/io/parser/usecols/test_parse_dates.py
@@ -17,6 +17,7 @@
     "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
 )
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 
 @xfail_pyarrow  # TypeError: expected bytes, int found
@@ -38,7 +39,7 @@ def test_usecols_with_parse_dates(all_parsers, usecols):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # pyarrow.lib.ArrowKeyError: Column 'fdate' in include_columns
+@skip_pyarrow  # pyarrow.lib.ArrowKeyError: Column 'fdate' in include_columns
 def test_usecols_with_parse_dates2(all_parsers):
     # see gh-13604
     parser = all_parsers
diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py
index 7a620768040a7..055be81d2996d 100644
--- a/pandas/tests/io/parser/usecols/test_usecols_basic.py
+++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py
@@ -30,6 +30,7 @@
 )
 
 xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
+skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
 
 pytestmark = pytest.mark.filterwarnings(
     "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning"
@@ -148,7 +149,7 @@ def test_usecols_single_string(all_parsers):
         parser.read_csv(StringIO(data), usecols="foo")
 
 
-@xfail_pyarrow  # CSV parse error in one case, AttributeError in another
+@skip_pyarrow  # CSV parse error in one case, AttributeError in another
 @pytest.mark.parametrize(
     "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"]
 )
@@ -191,7 +192,7 @@ def test_usecols_index_col_conflict2(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Expected 3 columns, got 4
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 4
 def test_usecols_implicit_index_col(all_parsers):
     # see gh-2654
     parser = all_parsers
@@ -337,7 +338,7 @@ def test_callable_usecols(all_parsers, usecols, expected):
 
 
 # ArrowKeyError: Column 'fa' in include_columns does not exist in CSV file
-@xfail_pyarrow
+@skip_pyarrow
 @pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]])
 def test_incomplete_first_row(all_parsers, usecols):
     # see gh-6710
@@ -350,7 +351,7 @@ def test_incomplete_first_row(all_parsers, usecols):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow  # CSV parse error: Expected 3 columns, got 4
+@skip_pyarrow  # CSV parse error: Expected 3 columns, got 4
 @pytest.mark.parametrize(
     "data,usecols,kwargs,expected",
     [

From a59b01d5eb5de9116b20a48c111c6a335a8c70a9 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 13 Nov 2023 13:24:33 -0800
Subject: [PATCH 2/2] Clarify

---
 pandas/tests/io/parser/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py
index 202fbbc1206c7..eb7835bb27372 100644
--- a/pandas/tests/io/parser/conftest.py
+++ b/pandas/tests/io/parser/conftest.py
@@ -302,7 +302,7 @@ def pyarrow_skip(request):
     """
     Fixture that skips a test if the engine is pyarrow.
 
-    Use if failure is do pyarrow failing to parse the input.
+    Use if failure is do a parsing failure from pyarrow.csv.read_csv
     """
     if "all_parsers" in request.fixturenames:
         parser = request.getfixturevalue("all_parsers")