Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TST: Skip pyarrow csv tests that raise ParseErrors #55943

Merged
merged 2 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_override_set_noconvert_columns():
Expand Down Expand Up @@ -137,7 +138,7 @@ def test_1000_sep(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # ValueError: Found non-unique column index
def test_unnamed_columns(all_parsers):
data = """A,B,C,,
1,2,3,4,5
Expand Down Expand Up @@ -278,7 +279,7 @@ def test_nrows_skipfooter_errors(all_parsers):
parser.read_csv(StringIO(data), skipfooter=1, nrows=5)


@xfail_pyarrow
@skip_pyarrow
def test_missing_trailing_delimiters(all_parsers):
parser = all_parsers
data = """A,B,C,D
Expand Down Expand Up @@ -366,7 +367,7 @@ def test_skip_initial_space(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_trailing_delimiters(all_parsers):
# see gh-2442
data = """A,B,C
Expand Down Expand Up @@ -398,7 +399,7 @@ def test_escapechar(all_parsers):
tm.assert_index_equal(result.columns, Index(["SEARCH_TERM", "ACTUAL_URL"]))


@xfail_pyarrow
@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
def test_ignore_leading_whitespace(all_parsers):
# see gh-3374, gh-6607
parser = all_parsers
Expand All @@ -409,7 +410,7 @@ def test_ignore_leading_whitespace(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
@pytest.mark.parametrize("usecols", [None, [0, 1], ["a", "b"]])
def test_uneven_lines_with_usecols(all_parsers, usecols):
# see gh-12203
Expand All @@ -432,7 +433,7 @@ def test_uneven_lines_with_usecols(all_parsers, usecols):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
@pytest.mark.parametrize(
"data,kwargs,expected",
[
Expand Down Expand Up @@ -593,7 +594,7 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_whitespace_lines(all_parsers):
parser = all_parsers
data = """
Expand All @@ -609,7 +610,7 @@ def test_whitespace_lines(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # ValueError: the 'pyarrow' engine does not support regex separators
@pytest.mark.parametrize(
"data,expected",
[
Expand Down Expand Up @@ -707,7 +708,7 @@ def test_read_csv_and_table_sys_setprofile(all_parsers, read_func):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_first_row_bom(all_parsers):
# see gh-26545
parser = all_parsers
Expand All @@ -718,7 +719,7 @@ def test_first_row_bom(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_first_row_bom_unquoted(all_parsers):
# see gh-36343
parser = all_parsers
Expand Down Expand Up @@ -751,7 +752,7 @@ def test_blank_lines_between_header_and_data_rows(all_parsers, nrows):
tm.assert_frame_equal(df, ref[:nrows])


@xfail_pyarrow
@skip_pyarrow
def test_no_header_two_extra_columns(all_parsers):
# GH 26218
column_names = ["one", "two", "three"]
Expand Down Expand Up @@ -852,7 +853,7 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)


@xfail_pyarrow
@skip_pyarrow
def test_dict_keys_as_names(all_parsers):
# GH: 36928
data = "1,2"
Expand All @@ -865,7 +866,7 @@ def test_dict_keys_as_names(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # UnicodeDecodeError: 'utf-8' codec can't decode byte 0xed in position 0
def test_encoding_surrogatepass(all_parsers):
# GH39017
parser = all_parsers
Expand Down Expand Up @@ -893,7 +894,7 @@ def test_malformed_second_line(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_short_single_line(all_parsers):
# GH 47566
parser = all_parsers
Expand All @@ -904,7 +905,7 @@ def test_short_single_line(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # ValueError: Length mismatch: Expected axis has 2 elements
def test_short_multi_line(all_parsers):
# GH 47566
parser = all_parsers
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/common/test_data_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow
@skip_pyarrow
def test_read_data_list(all_parsers):
parser = all_parsers
kwargs = {"index_col": 0}
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/parser/common/test_file_buffer_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.mark.network
Expand Down Expand Up @@ -431,7 +432,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
assert not reader.handles.handle.closed


@xfail_pyarrow # ParserError: Empty CSV file
@skip_pyarrow # ParserError: Empty CSV file
def test_file_descriptor_leak(all_parsers, using_copy_on_write):
# GH 31488
parser = all_parsers
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/parser/common/test_float.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@xfail_pyarrow # ParserError: CSV parse error: Empty CSV file or block
@skip_pyarrow # ParserError: CSV parse error: Empty CSV file or block
def test_float_parser(all_parsers):
# see gh-9565
parser = all_parsers
Expand Down Expand Up @@ -50,7 +51,7 @@ def test_very_negative_exponent(all_parsers_all_precisions, neg_exp):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # AssertionError: Attributes of DataFrame.iloc[:, 0] are different
@pytest.mark.parametrize("exp", [999999999999999999, -999999999999999999])
def test_too_many_exponent_digits(all_parsers_all_precisions, exp, request):
# GH#38753
Expand Down
17 changes: 9 additions & 8 deletions pandas/tests/io/parser/common/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -108,7 +109,7 @@ def test_multi_index_no_level_names(all_parsers, index_col):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_multi_index_no_level_names_implicit(all_parsers):
parser = all_parsers
data = """A,B,C,D
Expand Down Expand Up @@ -142,7 +143,7 @@ def test_multi_index_no_level_names_implicit(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # TypeError: an integer is required
@pytest.mark.parametrize(
"data,expected,header",
[
Expand All @@ -164,7 +165,7 @@ def test_multi_index_blank_df(all_parsers, data, expected, header, round_trip):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # AssertionError: DataFrame.columns are different
def test_no_unnamed_index(all_parsers):
parser = all_parsers
data = """ id c0 c1 c2
Expand Down Expand Up @@ -207,7 +208,7 @@ def test_read_duplicate_index_explicit(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_read_duplicate_index_implicit(all_parsers):
data = """A,B,C,D
foo,2,3,4,5
Expand Down Expand Up @@ -235,7 +236,7 @@ def test_read_duplicate_index_implicit(all_parsers):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_read_csv_no_index_name(all_parsers, csv_dir_path):
parser = all_parsers
csv2 = os.path.join(csv_dir_path, "test2.csv")
Expand Down Expand Up @@ -263,7 +264,7 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@skip_pyarrow
def test_empty_with_index(all_parsers):
# see gh-10184
data = "x,y"
Expand All @@ -275,7 +276,7 @@ def test_empty_with_index(all_parsers):


# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
@skip_pyarrow
def test_empty_with_multi_index(all_parsers):
# see gh-10467
data = "x,y,z"
Expand All @@ -289,7 +290,7 @@ def test_empty_with_multi_index(all_parsers):


# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
@skip_pyarrow
def test_empty_with_reversed_multi_index(all_parsers):
data = "x,y,z"
parser = all_parsers
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/parser/common/test_inf.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


@xfail_pyarrow
@xfail_pyarrow # AssertionError: DataFrame.index are different
@pytest.mark.parametrize("na_filter", [True, False])
def test_inf_parsing(all_parsers, na_filter):
parser = all_parsers
Expand All @@ -44,7 +44,7 @@ def test_inf_parsing(all_parsers, na_filter):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow
@xfail_pyarrow # AssertionError: DataFrame.index are different
@pytest.mark.parametrize("na_filter", [True, False])
def test_infinity_parsing(all_parsers, na_filter):
parser = all_parsers
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/io/parser/common/test_ints.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_int_conversion(all_parsers):
Expand Down Expand Up @@ -179,7 +180,7 @@ def test_int64_overflow(all_parsers, conv, request):
parser.read_csv(StringIO(data), converters={"ID": conv})


@xfail_pyarrow # CSV parse error: Empty CSV file or block
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
)
Expand All @@ -193,7 +194,7 @@ def test_int64_uint64_range(all_parsers, val):
tm.assert_frame_equal(result, expected)


@xfail_pyarrow # CSV parse error: Empty CSV file or block
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/parser/common/test_read_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import pandas._testing as tm

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_empty_decimal_marker(all_parsers):
Expand Down Expand Up @@ -139,7 +140,7 @@ def test_catch_too_many_names(all_parsers):
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])


@xfail_pyarrow # CSV parse error: Empty CSV file or block
@skip_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
def test_raise_on_no_columns(all_parsers, nrows):
parser = all_parsers
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/parser/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ def numeric_decimal(request):
def pyarrow_xfail(request):
"""
Fixture that xfails a test if the engine is pyarrow.

Use if failure is do to unsupported keywords or inconsistent results.
"""
if "all_parsers" in request.fixturenames:
parser = request.getfixturevalue("all_parsers")
Expand All @@ -293,3 +295,21 @@ def pyarrow_xfail(request):
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
request.applymarker(mark)


@pytest.fixture
def pyarrow_skip(request):
"""
Fixture that skips a test if the engine is pyarrow.

Use if failure is do a parsing failure from pyarrow.csv.read_csv
"""
if "all_parsers" in request.fixturenames:
parser = request.getfixturevalue("all_parsers")
elif "all_parsers_all_precisions" in request.fixturenames:
# Return value is tuple of (engine, precision)
parser = request.getfixturevalue("all_parsers_all_precisions")[0]
else:
return
if parser.engine == "pyarrow":
pytest.skip(reason="https://github.com/apache/arrow/issues/38676")
Loading
Loading