Skip to content

Commit

Permalink
TST: Skip pyarrow csv tests that raise ParseErrors (#55943)
Browse files Browse the repository at this point in the history
* TST: Skip pyarrow csv tests that raise ParseErrors

* Clarify
  • Loading branch information
mroeschke authored Nov 14, 2023
1 parent f777e67 commit 7f0b890
Show file tree
Hide file tree
Showing 21 changed files with 115 additions and 81 deletions.
31 changes: 16 additions & 15 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_override_set_noconvert_columns():
Expand Down Expand Up @@ -137,7 +138,7 @@ def test_1000_sep(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # ValueError: Found non-unique column index
def test_unnamed_columns(all_parsers):
data = """A,B,C,,
1,2,3,4,5
Expand Down Expand Up @@ -278,7 +279,7 @@ def test_nrows_skipfooter_errors(all_parsers):
parser.read_csv(StringIO(data), skipfooter=1, nrows=5)


@xfail_pyarrow
@skip_pyarrow
def test_missing_trailing_delimiters(all_parsers):
parser = all_parsers
data = """A,B,C,D
Expand Down Expand Up @@ -366,7 +367,7 @@ def test_skip_initial_space(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_trailing_delimiters(all_parsers):
# see gh-2442
data = """A,B,C
Expand Down Expand Up @@ -398,7 +399,7 @@ def test_escapechar(all_parsers):
tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))


@xfail_pyarrow
@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
def test_ignore_leading_whitespace(all_parsers):
# see gh-3374, gh-6607
parser = all_parsers
Expand All @@ -409,7 +410,7 @@ def test_ignore_leading_whitespace(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
@pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]])
def test_uneven_lines_with_usecols(all_parsers, usecols):
# see gh-12203
Expand All @@ -432,7 +433,7 @@ def test_uneven_lines_with_usecols(all_parsers, usecols):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
@pytest.mark.parametrize(
"data,kwargs,expected",
[
Expand Down Expand Up @@ -593,7 +594,7 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_whitespace_lines(all_parsers):
parser = all_parsers
data = """
Expand All @@ -609,7 +610,7 @@ def test_whitespace_lines(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
@pytest.mark.parametrize(
"data,expected",
[
Expand Down Expand Up @@ -707,7 +708,7 @@ def test_read_csv_and_table_sys_setprofile(all_parsers, read_func):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_first_row_bom(all_parsers):
# see gh-26545
parser = all_parsers
Expand All @@ -718,7 +719,7 @@ def test_first_row_bom(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_first_row_bom_unquoted(all_parsers):
# see gh-36343
parser = all_parsers
Expand Down Expand Up @@ -751,7 +752,7 @@ def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
tm.assert_frame_equal(df, ref[:nrows])


@xfail_pyarrow
@skip_pyarrow
def test_no_header_two_extra_columns(all_parsers):
# GH 26218
column_names = ["one", "two", "three"]
Expand Down Expand Up @@ -852,7 +853,7 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)


@xfail_pyarrow
@skip_pyarrow
def test_dict_keys_as_names(all_parsers):
# GH: 36928
data = "1,2"
Expand All @@ -865,7 +866,7 @@ def test_dict_keys_as_names(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0
def test_encoding_surrogatepass(all_parsers):
# GH39017
parser = all_parsers
Expand Down Expand Up @@ -893,7 +894,7 @@ def test_malformed_second_line(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_short_single_line(all_parsers):
# GH 47566
parser = all_parsers
Expand All @@ -904,7 +905,7 @@ def test_short_single_line(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # ValueError: Length mismatch: Expected axis has 2 elements
def test_short_multi_line(all_parsers):
# GH 47566
parser = all_parsers
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/common/test_data_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow
@skip_pyarrow
def test_read_data_list(all_parsers):
parser = all_parsers
kwargs = {"index_col": 0}
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.mark.network
Expand Down Expand Up @@ -431,7 +432,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
assert not reader.handles.handle.closed


@xfail_pyarrow # ParserError: Empty CSV file
@skip_pyarrow # ParserError: Empty CSV file
def test_file_descriptor_leak(all_parsers, using_copy_on_write):
# GH 31488
parser = all_parsers
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/parser/common/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block
@skip_pyarrow # ParserError: CSV parse error: Empty CSV file or block
def test_float_parser(all_parsers):
# see gh-9565
parser = all_parsers
Expand Down Expand Up @@ -50,7 +51,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
# GH#38753
Expand Down
17 changes: 9 additions & 8 deletions pandas/tests/io/parser/common/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -108,7 +109,7 @@ def test_multi_index_no_level_names(all_parsers, index_col):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_multi_index_no_level_names_implicit(all_parsers):
parser = all_parsers
data = """A,B,C,D
Expand Down Expand Up @@ -142,7 +143,7 @@ def test_multi_index_no_level_names_implicit(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # TypeError: an integer is required
@pytest.mark.parametrize(
"data,expected,header",
[
Expand All @@ -164,7 +165,7 @@ def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # AssertionError: DataFrame.columns are different
def test_no_unnamed_index(all_parsers):
parser = all_parsers
data = """ id c0 c1 c2
Expand Down Expand Up @@ -207,7 +208,7 @@ def test_read_duplicate_index_explicit(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_read_duplicate_index_implicit(all_parsers):
data = """A,B,C,D
foo,2,3,4,5
Expand Down Expand Up @@ -235,7 +236,7 @@ def test_read_duplicate_index_implicit(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_read_csv_no_index_name(all_parsers, csv_dir_path):
parser = all_parsers
csv2 = os.path.join(csv_dir_path, "test2.csv")
Expand Down Expand Up @@ -263,7 +264,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_empty_with_index(all_parsers):
# see gh-10184
data = "x,y"
Expand All @@ -275,7 +276,7 @@ def test_empty_with_index(all_parsers):


# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
@skip_pyarrow
def test_empty_with_multi_index(all_parsers):
# see gh-10467
data = "x,y,z"
Expand All @@ -289,7 +290,7 @@ def test_empty_with_multi_index(all_parsers):


# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
@skip_pyarrow
def test_empty_with_reversed_multi_index(all_parsers):
data = "x,y,z"
parser = all_parsers
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/common/test_inf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow
@xfail_pyarrow # AssertionError: DataFrame.index are different
@pytest.mark.parametrize("na_filter", [True, False])
def test_inf_parsing(all_parsers, na_filter):
parser = all_parsers
Expand All @@ -44,7 +44,7 @@ def test_inf_parsing(all_parsers, na_filter):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # AssertionError: DataFrame.index are different
@pytest.mark.parametrize("na_filter", [True, False])
def test_infinity_parsing(all_parsers, na_filter):
parser = all_parsers
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/parser/common/test_ints.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_int_conversion(all_parsers):
Expand Down Expand Up @@ -179,7 +180,7 @@ def test_int64_overflow(all_parsers, conv, request):
parser.read_csv(StringIO(data), converters={"ID": conv})


@xfail_pyarrow # CSV parse error: Empty CSV file or block
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
)
Expand All @@ -193,7 +194,7 @@ def test_int64_uint64_range(all_parsers, val):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/parser/common/test_read_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import pandas._testing as tm

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_empty_decimal_marker(all_parsers):
Expand Down Expand Up @@ -139,7 +140,7 @@ def test_catch_too_many_names(all_parsers):
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])


@xfail_pyarrow # CSV parse error: Empty CSV file or block
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
def test_raise_on_no_columns(all_parsers, nrows):
parser = all_parsers
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/parser/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ def numeric_decimal(request):
def pyarrow_xfail(request):
"""
Fixture that xfails a test if the engine is pyarrow.
Use if failure is do to unsupported keywords or inconsistent results.
"""
if "all_parsers" in request.fixturenames:
parser = request.getfixturevalue("all_parsers")
Expand All @@ -293,3 +295,21 @@ def pyarrow_xfail(request):
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
request.applymarker(mark)


@pytest.fixture
def pyarrow_skip(request):
"""
Fixture that skips a test if the engine is pyarrow.
Use if failure is do a parsing failure from pyarrow.csv.read_csv
"""
if "all_parsers" in request.fixturenames:
parser = request.getfixturevalue("all_parsers")
elif "all_parsers_all_precisions" in request.fixturenames:
# Return value is tuple of (engine, precision)
parser = request.getfixturevalue("all_parsers_all_precisions")[0]
else:
return
if parser.engine == "pyarrow":
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
Loading

0 comments on commit 7f0b890

Please sign in to comment.