From 63af5da968fb9a533422c7a42bd94e28f9b1e08d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 13 Nov 2023 13:21:41 -0800 Subject: [PATCH 1/2] TST: Skip pyarrow csv tests that raise ParseErrors --- .../io/parser/common/test_common_basic.py | 31 ++++++++++--------- .../tests/io/parser/common/test_data_list.py | 4 +-- .../io/parser/common/test_file_buffer_url.py | 3 +- pandas/tests/io/parser/common/test_float.py | 5 +-- pandas/tests/io/parser/common/test_index.py | 17 +++++----- pandas/tests/io/parser/common/test_inf.py | 4 +-- pandas/tests/io/parser/common/test_ints.py | 5 +-- .../io/parser/common/test_read_errors.py | 3 +- pandas/tests/io/parser/conftest.py | 20 ++++++++++++ .../io/parser/dtypes/test_categorical.py | 8 ++--- pandas/tests/io/parser/dtypes/test_empty.py | 18 +++++------ pandas/tests/io/parser/test_encoding.py | 11 ++++--- pandas/tests/io/parser/test_header.py | 15 ++++----- pandas/tests/io/parser/test_index_col.py | 9 +++--- pandas/tests/io/parser/test_mangle_dupes.py | 12 +++---- pandas/tests/io/parser/test_na_values.py | 5 +-- pandas/tests/io/parser/test_parse_dates.py | 7 +++-- pandas/tests/io/parser/test_quoting.py | 3 +- pandas/tests/io/parser/test_skiprows.py | 4 +-- .../io/parser/usecols/test_parse_dates.py | 3 +- .../io/parser/usecols/test_usecols_basic.py | 9 +++--- 21 files changed, 115 insertions(+), 81 deletions(-) diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index a2ffec45cfc7f..0c28db245de31 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -34,6 +34,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") def test_override_set_noconvert_columns(): @@ -137,7 +138,7 @@ def test_1000_sep(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: Found non-unique column index def test_unnamed_columns(all_parsers): data = """A,B,C,, 1,2,3,4,5 @@ -278,7 +279,7 @@ def test_nrows_skipfooter_errors(all_parsers): parser.read_csv(StringIO(data), skipfooter=1, nrows=5) -@xfail_pyarrow +@skip_pyarrow def test_missing_trailing_delimiters(all_parsers): parser = all_parsers data = """A,B,C,D @@ -366,7 +367,7 @@ def test_skip_initial_space(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_trailing_delimiters(all_parsers): # see gh-2442 data = """A,B,C @@ -398,7 +399,7 @@ def test_escapechar(all_parsers): tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"])) -@xfail_pyarrow +@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators def test_ignore_leading_whitespace(all_parsers): # see gh-3374, gh-6607 parser = all_parsers @@ -409,7 +410,7 @@ def test_ignore_leading_whitespace(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]]) def test_uneven_lines_with_usecols(all_parsers, usecols): # see gh-12203 @@ -432,7 +433,7 @@ def test_uneven_lines_with_usecols(all_parsers, usecols): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize( "data,kwargs,expected", [ @@ -593,7 +594,7 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_whitespace_lines(all_parsers): parser = all_parsers data = """ @@ -609,7 +610,7 @@ def test_whitespace_lines(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators @pytest.mark.parametrize( "data,expected", [ @@ -707,7 +708,7 @@ def test_read_csv_and_table_sys_setprofile(all_parsers, read_func): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_first_row_bom(all_parsers): # see gh-26545 parser = all_parsers @@ -718,7 +719,7 @@ def test_first_row_bom(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_first_row_bom_unquoted(all_parsers): # see gh-36343 parser = all_parsers @@ -751,7 +752,7 @@ def test_blank_lines_between_header_and_data_rows(all_parsers, nrows): tm.assert_frame_equal(df, ref[:nrows]) -@xfail_pyarrow +@skip_pyarrow def test_no_header_two_extra_columns(all_parsers): # GH 26218 column_names = ["one", "two", "three"] @@ -852,7 +853,7 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): parser.read_table(f, delim_whitespace=True, delimiter=delimiter) -@xfail_pyarrow +@skip_pyarrow def test_dict_keys_as_names(all_parsers): # GH: 36928 data = "1,2" @@ -865,7 +866,7 @@ def test_dict_keys_as_names(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0 def test_encoding_surrogatepass(all_parsers): # GH39017 parser = all_parsers @@ -893,7 +894,7 @@ def test_malformed_second_line(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_short_single_line(all_parsers): # GH 47566 parser = all_parsers @@ -904,7 +905,7 @@ def test_short_single_line(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: Length mismatch: Expected axis has 2 elements def test_short_multi_line(all_parsers): # GH 47566 parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_data_list.py b/pandas/tests/io/parser/common/test_data_list.py index 3b0ff9e08d349..5c798316e2cea 100644 --- a/pandas/tests/io/parser/common/test_data_list.py +++ b/pandas/tests/io/parser/common/test_data_list.py @@ -16,10 +16,10 @@ "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") -@xfail_pyarrow +@skip_pyarrow def test_read_data_list(all_parsers): parser = all_parsers kwargs = {"index_col": 0} diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 7fd86e956b543..a6e68cb984ef4 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -27,6 +27,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @pytest.mark.network @@ -431,7 +432,7 @@ def test_context_manageri_user_provided(all_parsers, datapath): assert not reader.handles.handle.closed -@xfail_pyarrow # ParserError: Empty CSV file +@skip_pyarrow # ParserError: Empty CSV file def test_file_descriptor_leak(all_parsers, using_copy_on_write): # GH 31488 parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_float.py b/pandas/tests/io/parser/common/test_float.py index 63ad3bcb249ea..4b23774ee2d5b 100644 --- a/pandas/tests/io/parser/common/test_float.py +++ b/pandas/tests/io/parser/common/test_float.py @@ -16,9 +16,10 @@ "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") -@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block +@skip_pyarrow # ParserError: CSV parse error: Empty CSV file or block def test_float_parser(all_parsers): # see gh-9565 parser = all_parsers @@ -50,7 +51,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different @pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999]) def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request): # GH#38753 diff --git a/pandas/tests/io/parser/common/test_index.py b/pandas/tests/io/parser/common/test_index.py index 7df14043f478c..038c684c90c9e 100644 --- a/pandas/tests/io/parser/common/test_index.py +++ b/pandas/tests/io/parser/common/test_index.py @@ -20,6 +20,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @pytest.mark.parametrize( @@ -108,7 +109,7 @@ def test_multi_index_no_level_names(all_parsers, index_col): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_multi_index_no_level_names_implicit(all_parsers): parser = all_parsers data = """A,B,C,D @@ -142,7 +143,7 @@ def test_multi_index_no_level_names_implicit(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # TypeError: an integer is required @pytest.mark.parametrize( "data,expected,header", [ @@ -164,7 +165,7 @@ def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: DataFrame.columns are different def test_no_unnamed_index(all_parsers): parser = all_parsers data = """ id c0 c1 c2 @@ -207,7 +208,7 @@ def test_read_duplicate_index_explicit(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_read_duplicate_index_implicit(all_parsers): data = """A,B,C,D foo,2,3,4,5 @@ -235,7 +236,7 @@ def test_read_duplicate_index_implicit(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_read_csv_no_index_name(all_parsers, csv_dir_path): parser = all_parsers csv2 = os.path.join(csv_dir_path, "test2.csv") @@ -263,7 +264,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_empty_with_index(all_parsers): # see gh-10184 data = "x,y" @@ -275,7 +276,7 @@ def test_empty_with_index(all_parsers): # CSV parse error: Empty CSV file or block: cannot infer number of columns -@xfail_pyarrow +@skip_pyarrow def test_empty_with_multi_index(all_parsers): # see gh-10467 data = "x,y,z" @@ -289,7 +290,7 @@ def test_empty_with_multi_index(all_parsers): # CSV parse error: Empty CSV file or block: cannot infer number of columns -@xfail_pyarrow +@skip_pyarrow def test_empty_with_reversed_multi_index(all_parsers): data = "x,y,z" parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_inf.py b/pandas/tests/io/parser/common/test_inf.py index e1dc87ed0071e..74596b178d35d 100644 --- a/pandas/tests/io/parser/common/test_inf.py +++ b/pandas/tests/io/parser/common/test_inf.py @@ -20,7 +20,7 @@ xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") -@xfail_pyarrow +@xfail_pyarrow # AssertionError: DataFrame.index are different @pytest.mark.parametrize("na_filter", [True, False]) def test_inf_parsing(all_parsers, na_filter): parser = all_parsers @@ -44,7 +44,7 @@ def test_inf_parsing(all_parsers, na_filter): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: DataFrame.index are different @pytest.mark.parametrize("na_filter", [True, False]) def test_infinity_parsing(all_parsers, na_filter): parser = all_parsers diff --git a/pandas/tests/io/parser/common/test_ints.py b/pandas/tests/io/parser/common/test_ints.py index 41bfbb55d818f..a3167346c64ef 100644 --- a/pandas/tests/io/parser/common/test_ints.py +++ b/pandas/tests/io/parser/common/test_ints.py @@ -18,6 +18,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") def test_int_conversion(all_parsers): @@ -179,7 +180,7 @@ def test_int64_overflow(all_parsers, conv, request): parser.read_csv(StringIO(data), converters={"ID": conv}) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block @pytest.mark.parametrize( "val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min] ) @@ -193,7 +194,7 @@ def test_int64_uint64_range(all_parsers, val): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block @pytest.mark.parametrize( "val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1] ) diff --git a/pandas/tests/io/parser/common/test_read_errors.py b/pandas/tests/io/parser/common/test_read_errors.py index 7e841ed8b4ebd..f3794c056a256 100644 --- a/pandas/tests/io/parser/common/test_read_errors.py +++ b/pandas/tests/io/parser/common/test_read_errors.py @@ -22,6 +22,7 @@ import pandas._testing as tm xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") def test_empty_decimal_marker(all_parsers): @@ -139,7 +140,7 @@ def test_catch_too_many_names(all_parsers): parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"]) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block @pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5]) def test_raise_on_no_columns(all_parsers, nrows): parser = all_parsers diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 471f525e229e5..202fbbc1206c7 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -282,6 +282,8 @@ def numeric_decimal(request): def pyarrow_xfail(request): """ Fixture that xfails a test if the engine is pyarrow. + + Use if failure is do to unsupported keywords or inconsistent results. """ if "all_parsers" in request.fixturenames: parser = request.getfixturevalue("all_parsers") @@ -293,3 +295,21 @@ def pyarrow_xfail(request): if parser.engine == "pyarrow": mark = pytest.mark.xfail(reason="pyarrow doesn't support this.") request.applymarker(mark) + + +@pytest.fixture +def pyarrow_skip(request): + """ + Fixture that skips a test if the engine is pyarrow. + + Use if failure is do pyarrow failing to parse the input. + """ + if "all_parsers" in request.fixturenames: + parser = request.getfixturevalue("all_parsers") + elif "all_parsers_all_precisions" in request.fixturenames: + # Return value is tuple of (engine, precision) + parser = request.getfixturevalue("all_parsers_all_precisions")[0] + else: + return + if parser.engine == "pyarrow": + pytest.skip(reason="https://github.com/apache/arrow/issues/38676") diff --git a/pandas/tests/io/parser/dtypes/test_categorical.py b/pandas/tests/io/parser/dtypes/test_categorical.py index b1b35447b60c2..f4aff14a5ce32 100644 --- a/pandas/tests/io/parser/dtypes/test_categorical.py +++ b/pandas/tests/io/parser/dtypes/test_categorical.py @@ -27,7 +27,7 @@ xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") -@xfail_pyarrow +@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different @pytest.mark.parametrize( "dtype", [ @@ -76,7 +76,7 @@ def test_categorical_dtype_single(all_parsers, dtype, request): tm.assert_frame_equal(actual, expected) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different def test_categorical_dtype_unsorted(all_parsers): # see gh-10153 parser = all_parsers @@ -95,7 +95,7 @@ def test_categorical_dtype_unsorted(all_parsers): tm.assert_frame_equal(actual, expected) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different def test_categorical_dtype_missing(all_parsers): # see gh-10153 parser = all_parsers @@ -114,7 +114,7 @@ def test_categorical_dtype_missing(all_parsers): tm.assert_frame_equal(actual, expected) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different @pytest.mark.slow def test_categorical_dtype_high_cardinality_numeric(all_parsers, monkeypatch): # see gh-18186 diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index 8759c52485533..f34385b190c5f 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -17,10 +17,10 @@ ) import pandas._testing as tm -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_dtype_all_columns_empty(all_parsers): # see gh-12048 parser = all_parsers @@ -30,7 +30,7 @@ def test_dtype_all_columns_empty(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_pass_dtype(all_parsers): parser = all_parsers @@ -43,7 +43,7 @@ def test_empty_pass_dtype(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_index_pass_dtype(all_parsers): parser = all_parsers @@ -58,7 +58,7 @@ def test_empty_with_index_pass_dtype(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_multi_index_pass_dtype(all_parsers): parser = all_parsers @@ -75,7 +75,7 @@ def test_empty_with_multi_index_pass_dtype(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): parser = all_parsers @@ -88,7 +88,7 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): parser = all_parsers @@ -101,7 +101,7 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): # see gh-9424 parser = all_parsers @@ -171,7 +171,7 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): ), ], ) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_dtype(all_parsers, dtype, expected): # see gh-14712 parser = all_parsers diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 9e1200c142d6b..3580c040688d8 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -24,6 +24,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") def test_bytes_io_input(all_parsers): @@ -37,7 +38,7 @@ def test_bytes_io_input(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_read_csv_unicode(all_parsers): parser = all_parsers data = BytesIO("\u0141aski, Jan;1".encode()) @@ -47,7 +48,7 @@ def test_read_csv_unicode(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize("sep", [",", "\t"]) @pytest.mark.parametrize("encoding", ["utf-16", "utf-16le", "utf-16be"]) def test_utf16_bom_skiprows(all_parsers, sep, encoding): @@ -237,7 +238,7 @@ def test_parse_encoded_special_characters(encoding): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: The 'memory_map' option is not supported @pytest.mark.parametrize("encoding", ["utf-8", None, "utf-16", "cp1255", "latin-1"]) def test_encoding_memory_map(all_parsers, encoding): # GH40986 @@ -255,7 +256,7 @@ def test_encoding_memory_map(all_parsers, encoding): tm.assert_frame_equal(df, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_chunk_splits_multibyte_char(all_parsers): """ Chunk splits a multibyte character with memory_map=True @@ -275,7 +276,7 @@ def test_chunk_splits_multibyte_char(all_parsers): tm.assert_frame_equal(dfr, df) -@xfail_pyarrow +@xfail_pyarrow # ValueError: The 'memory_map' option is not supported def test_readcsv_memmap_utf8(all_parsers): """ GH 43787 diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 2edb389a0c830..f55f8497f318c 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -23,6 +23,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @xfail_pyarrow # TypeError: an integer is required @@ -79,7 +80,7 @@ def test_bool_header_arg(all_parsers, header): parser.read_csv(StringIO(data), header=header) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: DataFrame are different def test_header_with_index_col(all_parsers): parser = all_parsers data = """foo,1,2,3 @@ -183,7 +184,7 @@ def test_header_multi_index_invalid(all_parsers, kwargs, msg): _TestTuple = namedtuple("_TestTuple", ["first", "second"]) -@xfail_pyarrow +@xfail_pyarrow # TypeError: an integer is required @pytest.mark.parametrize( "kwargs", [ @@ -231,7 +232,7 @@ def test_header_multi_index_common_format1(all_parsers, kwargs): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # TypeError: an integer is required @pytest.mark.parametrize( "kwargs", [ @@ -278,7 +279,7 @@ def test_header_multi_index_common_format2(all_parsers, kwargs): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # TypeError: an integer is required @pytest.mark.parametrize( "kwargs", [ @@ -419,7 +420,7 @@ def test_header_names_backward_compat(all_parsers, data, header, request): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block: cannot infer +@skip_pyarrow # CSV parse error: Empty CSV file or block: cannot infer @pytest.mark.parametrize("kwargs", [{}, {"index_col": False}]) def test_read_only_header_no_rows(all_parsers, kwargs): # See gh-7773 @@ -561,7 +562,7 @@ def test_multi_index_unnamed(all_parsers, index_col, columns): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Expected 2 columns, got 3 +@skip_pyarrow # CSV parse error: Expected 2 columns, got 3 def test_names_longer_than_header_but_equal_with_data_rows(all_parsers): # GH#38453 parser = all_parsers @@ -622,7 +623,7 @@ def test_read_csv_multi_header_length_check(all_parsers): parser.read_csv(StringIO(case), header=[0, 2]) -@xfail_pyarrow # CSV parse error: Expected 3 columns, got 2 +@skip_pyarrow # CSV parse error: Expected 3 columns, got 2 def test_header_none_and_implicit_index(all_parsers): # GH#22144 parser = all_parsers diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index b938b129ac38d..ba15d061b2deb 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -20,6 +20,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @pytest.mark.parametrize("with_header", [True, False]) @@ -76,7 +77,7 @@ def test_index_col_is_true(all_parsers): parser.read_csv(StringIO(data), index_col=True) -@xfail_pyarrow # CSV parse error: Expected 3 columns, got 4 +@skip_pyarrow # CSV parse error: Expected 3 columns, got 4 def test_infer_index_col(all_parsers): data = """A,B,C foo,1,2,3 @@ -94,7 +95,7 @@ def test_infer_index_col(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block @pytest.mark.parametrize( "index_col,kwargs", [ @@ -143,7 +144,7 @@ def test_index_col_empty_data(all_parsers, index_col, kwargs): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_empty_with_index_col_false(all_parsers): # see gh-10413 data = "x,y" @@ -317,7 +318,7 @@ def test_multiindex_columns_index_col_with_data(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Empty CSV file or block +@skip_pyarrow # CSV parse error: Empty CSV file or block def test_infer_types_boolean_sum(all_parsers): # GH#44079 parser = all_parsers diff --git a/pandas/tests/io/parser/test_mangle_dupes.py b/pandas/tests/io/parser/test_mangle_dupes.py index 7d148ae6c5a27..1d245f81f027c 100644 --- a/pandas/tests/io/parser/test_mangle_dupes.py +++ b/pandas/tests/io/parser/test_mangle_dupes.py @@ -18,7 +18,7 @@ ) -@xfail_pyarrow +@xfail_pyarrow # ValueError: Found non-unique column index def test_basic(all_parsers): parser = all_parsers @@ -29,7 +29,7 @@ def test_basic(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: Found non-unique column index def test_basic_names(all_parsers): # See gh-7160 parser = all_parsers @@ -50,7 +50,7 @@ def test_basic_names_raise(all_parsers): parser.read_csv(StringIO(data), names=["a", "b", "a"]) -@xfail_pyarrow +@xfail_pyarrow # ValueError: Found non-unique column index @pytest.mark.parametrize( "data,expected", [ @@ -118,7 +118,7 @@ def test_thorough_mangle_names(all_parsers, data, names, expected): parser.read_csv(StringIO(data), names=names) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: DataFrame.columns are different def test_mangled_unnamed_placeholders(all_parsers): # xref gh-13017 orig_key = "0" @@ -141,7 +141,7 @@ def test_mangled_unnamed_placeholders(all_parsers): tm.assert_frame_equal(df, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: Found non-unique column index def test_mangle_dupe_cols_already_exists(all_parsers): # GH#14704 parser = all_parsers @@ -155,7 +155,7 @@ def test_mangle_dupe_cols_already_exists(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # ValueError: Found non-unique column index def test_mangle_dupe_cols_already_exists_unnamed_col(all_parsers): # GH#14704 parser = all_parsers diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py index 59dae1eaa7e6c..437a5fb5e9f09 100644 --- a/pandas/tests/io/parser/test_na_values.py +++ b/pandas/tests/io/parser/test_na_values.py @@ -21,6 +21,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") def test_string_nas(all_parsers): @@ -398,7 +399,7 @@ def test_na_values_na_filter_override(all_parsers, na_filter, row_data): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Expected 8 columns, got 5: +@skip_pyarrow # CSV parse error: Expected 8 columns, got 5: def test_na_trailing_columns(all_parsers): parser = all_parsers data = """Date,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax @@ -630,7 +631,7 @@ def test_nan_multi_index(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # Failed: DID NOT RAISE def test_bool_and_nan_to_bool(all_parsers): # GH#42808 parser = all_parsers diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 47e654fc606af..70d9171fa3c22 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -41,6 +41,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @xfail_pyarrow @@ -786,7 +787,7 @@ def test_nat_parse(all_parsers): tm.assert_frame_equal(result, df) -@xfail_pyarrow +@skip_pyarrow def test_csv_custom_parser(all_parsers): data = """A,B,C 20090101,a,1,2 @@ -806,7 +807,7 @@ def test_csv_custom_parser(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@skip_pyarrow def test_parse_dates_implicit_first_col(all_parsers): data = """A,B,C 20090101,a,1,2 @@ -2101,7 +2102,7 @@ def test_dayfirst_warnings_no_leading_zero(date_string, dayfirst): tm.assert_index_equal(expected, res) -@xfail_pyarrow # CSV parse error: Expected 3 columns, got 4 +@skip_pyarrow # CSV parse error: Expected 3 columns, got 4 def test_infer_first_column_as_index(all_parsers): # GH#11019 parser = all_parsers diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index a677d9caa4b19..0a1ba0252f106 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -18,6 +18,7 @@ "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @pytest.mark.parametrize( @@ -31,7 +32,7 @@ ({"quotechar": 2}, '"quotechar" must be string( or None)?, not int'), ], ) -@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block +@skip_pyarrow # ParserError: CSV parse error: Empty CSV file or block def test_bad_quote_char(all_parsers, kwargs, msg): data = "1,2,3" parser = all_parsers diff --git a/pandas/tests/io/parser/test_skiprows.py b/pandas/tests/io/parser/test_skiprows.py index 9146af3f969e6..47c3739c979a3 100644 --- a/pandas/tests/io/parser/test_skiprows.py +++ b/pandas/tests/io/parser/test_skiprows.py @@ -67,7 +67,7 @@ def test_deep_skip_rows(all_parsers): tm.assert_frame_equal(result, condensed_result) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: DataFrame are different def test_skip_rows_blank(all_parsers): # see gh-9832 parser = all_parsers @@ -225,7 +225,7 @@ def test_skiprows_lineterminator(all_parsers, lineterminator, request): tm.assert_frame_equal(result, expected) -@xfail_pyarrow +@xfail_pyarrow # AssertionError: DataFrame are different def test_skiprows_infield_quote(all_parsers): # see gh-14459 parser = all_parsers diff --git a/pandas/tests/io/parser/usecols/test_parse_dates.py b/pandas/tests/io/parser/usecols/test_parse_dates.py index bcb1c6af80df6..042c3814ef72a 100644 --- a/pandas/tests/io/parser/usecols/test_parse_dates.py +++ b/pandas/tests/io/parser/usecols/test_parse_dates.py @@ -17,6 +17,7 @@ "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") @xfail_pyarrow # TypeError: expected bytes, int found @@ -38,7 +39,7 @@ def test_usecols_with_parse_dates(all_parsers, usecols): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # pyarrow.lib.ArrowKeyError: Column 'fdate' in include_columns +@skip_pyarrow # pyarrow.lib.ArrowKeyError: Column 'fdate' in include_columns def test_usecols_with_parse_dates2(all_parsers): # see gh-13604 parser = all_parsers diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 7a620768040a7..055be81d2996d 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -30,6 +30,7 @@ ) xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") +skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip") pytestmark = pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning" @@ -148,7 +149,7 @@ def test_usecols_single_string(all_parsers): parser.read_csv(StringIO(data), usecols="foo") -@xfail_pyarrow # CSV parse error in one case, AttributeError in another +@skip_pyarrow # CSV parse error in one case, AttributeError in another @pytest.mark.parametrize( "data", ["a,b,c,d\n1,2,3,4\n5,6,7,8", "a,b,c,d\n1,2,3,4,\n5,6,7,8,"] ) @@ -191,7 +192,7 @@ def test_usecols_index_col_conflict2(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Expected 3 columns, got 4 +@skip_pyarrow # CSV parse error: Expected 3 columns, got 4 def test_usecols_implicit_index_col(all_parsers): # see gh-2654 parser = all_parsers @@ -337,7 +338,7 @@ def test_callable_usecols(all_parsers, usecols, expected): # ArrowKeyError: Column 'fa' in include_columns does not exist in CSV file -@xfail_pyarrow +@skip_pyarrow @pytest.mark.parametrize("usecols", [["a", "c"], lambda x: x in ["a", "c"]]) def test_incomplete_first_row(all_parsers, usecols): # see gh-6710 @@ -350,7 +351,7 @@ def test_incomplete_first_row(all_parsers, usecols): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # CSV parse error: Expected 3 columns, got 4 +@skip_pyarrow # CSV parse error: Expected 3 columns, got 4 @pytest.mark.parametrize( "data,usecols,kwargs,expected", [ From a59b01d5eb5de9116b20a48c111c6a335a8c70a9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 13 Nov 2023 13:24:33 -0800 Subject: [PATCH 2/2] Clarify --- pandas/tests/io/parser/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 202fbbc1206c7..eb7835bb27372 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -302,7 +302,7 @@ def pyarrow_skip(request): """ Fixture that skips a test if the engine is pyarrow. - Use if failure is do pyarrow failing to parse the input. + Use if failure is do a parsing failure from pyarrow.csv.read_csv """ if "all_parsers" in request.fixturenames: parser = request.getfixturevalue("all_parsers")