Skip to content

Commit

Permalink
TST: change pyarrow skips to xfails (#55576)
Browse files Browse the repository at this point in the history
* TST: change pyarrow skips to xfails

* revert edits where CI is different from local

---------

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
jbrockmendel and mroeschke authored Oct 23, 2023
1 parent 0c7d303 commit f64c608
Show file tree
Hide file tree
Showing 15 changed files with 268 additions and 239 deletions.
11 changes: 7 additions & 4 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_override_set_noconvert_columns():
Expand Down Expand Up @@ -515,8 +514,6 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
tm.assert_frame_equal(result, expected)


# Skip for now, actually only one test fails though, but its tricky to xfail
@skip_pyarrow
@pytest.mark.parametrize(
"sep,skip_blank_lines,exp_data",
[
Expand All @@ -536,7 +533,7 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
),
],
)
def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data):
def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data, request):
parser = all_parsers
data = """\
A,B,C
Expand All @@ -550,6 +547,12 @@ def test_empty_lines(all_parsers, sep, skip_blank_lines, exp_data):

if sep == r"\s+":
data = data.replace(",", " ")
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(
raises=ValueError,
reason="the 'pyarrow' engine does not support regex separators",
)
request.applymarker(mark)

result = parser.read_csv(StringIO(data), sep=sep, skip_blank_lines=skip_blank_lines)
expected = DataFrame(exp_data, columns=["A", "B", "C"])
Expand Down
10 changes: 4 additions & 6 deletions pandas/tests/io/parser/common/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")

# GH#43650: Some expected failures with the pyarrow engine can occasionally
# cause a deadlock instead, so we skip these instead of xfailing
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.mark.parametrize(
"data,kwargs,expected",
Expand Down Expand Up @@ -278,7 +274,8 @@ def test_empty_with_index(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
def test_empty_with_multi_index(all_parsers):
# see gh-10467
data = "x,y,z"
Expand All @@ -291,7 +288,8 @@ def test_empty_with_multi_index(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
# CSV parse error: Empty CSV file or block: cannot infer number of columns
@xfail_pyarrow
def test_empty_with_reversed_multi_index(all_parsers):
data = "x,y,z"
parser = all_parsers
Expand Down
19 changes: 11 additions & 8 deletions pandas/tests/io/parser/common/test_ints.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

# GH#43650: Some expected failures with the pyarrow engine can occasionally
# cause a deadlock instead, so we skip these instead of xfailing
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")
xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")


def test_int_conversion(all_parsers):
Expand Down Expand Up @@ -102,12 +100,16 @@ def test_parse_integers_above_fp_precision(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow # Flaky
@pytest.mark.parametrize("sep", [" ", r"\s+"])
def test_integer_overflow_bug(all_parsers, sep):
# see gh-2601
data = "65248E10 11\n55555E55 22\n"
parser = all_parsers
if parser.engine == "pyarrow" and sep != " ":
msg = "the 'pyarrow' engine does not support regex separators"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), header=None, sep=sep)
return

result = parser.read_csv(StringIO(data), header=None, sep=sep)
expected = DataFrame([[6.5248e14, 11], [5.5555e59, 22]])
Expand All @@ -124,7 +126,8 @@ def test_int64_min_issues(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
# ValueError: The 'converters' option is not supported with the 'pyarrow' engine
@xfail_pyarrow
@pytest.mark.parametrize("conv", [None, np.int64, np.uint64])
def test_int64_overflow(all_parsers, conv):
data = """ID
Expand Down Expand Up @@ -168,7 +171,7 @@ def test_int64_overflow(all_parsers, conv):
parser.read_csv(StringIO(data), converters={"ID": conv})


@skip_pyarrow
@xfail_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max, np.iinfo(np.int64).max, np.iinfo(np.int64).min]
)
Expand All @@ -182,7 +185,7 @@ def test_int64_uint64_range(all_parsers, val):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@xfail_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize(
"val", [np.iinfo(np.uint64).max + 1, np.iinfo(np.int64).min - 1]
)
Expand All @@ -196,7 +199,7 @@ def test_outside_int64_uint64_range(all_parsers, val):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@xfail_pyarrow # gets float64 dtype instead of object
@pytest.mark.parametrize("exp_data", [[str(-1), str(2**63)], [str(2**63), str(-1)]])
def test_numeric_range_too_wide(all_parsers, exp_data):
# No numerical dtype can hold both negative and uint64
Expand Down
31 changes: 20 additions & 11 deletions pandas/tests/io/parser/common/test_read_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import pandas._testing as tm

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


def test_empty_decimal_marker(all_parsers):
Expand All @@ -44,7 +43,6 @@ def test_empty_decimal_marker(all_parsers):
parser.read_csv(StringIO(data), decimal="")


@skip_pyarrow
def test_bad_stream_exception(all_parsers, csv_dir_path):
# see gh-13652
#
Expand All @@ -65,7 +63,7 @@ def test_bad_stream_exception(all_parsers, csv_dir_path):
parser.read_csv(stream)


@skip_pyarrow
@xfail_pyarrow # ValueError: The 'comment' option is not supported
def test_malformed(all_parsers):
# see gh-6607
parser = all_parsers
Expand All @@ -80,7 +78,7 @@ def test_malformed(all_parsers):
parser.read_csv(StringIO(data), header=1, comment="#")


@skip_pyarrow
@xfail_pyarrow # ValueError: The 'iterator' option is not supported
@pytest.mark.parametrize("nrows", [5, 3, None])
def test_malformed_chunks(all_parsers, nrows):
data = """ignore
Expand All @@ -100,7 +98,7 @@ def test_malformed_chunks(all_parsers, nrows):
reader.read(nrows)


@skip_pyarrow
@xfail_pyarrow # does not raise
def test_catch_too_many_names(all_parsers):
# see gh-5156
data = """\
Expand All @@ -115,12 +113,17 @@ def test_catch_too_many_names(all_parsers):
else "Number of passed names did not match "
"number of header fields in the file"
)
depr_msg = "Passing a BlockManager to DataFrame is deprecated"
warn = None
if parser.engine == "pyarrow":
warn = DeprecationWarning

with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])
with tm.assert_produces_warning(warn, match=depr_msg, check_stacklevel=False):
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data), header=0, names=["a", "b", "c", "d"])


@skip_pyarrow
@xfail_pyarrow # CSV parse error: Empty CSV file or block
@pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
def test_raise_on_no_columns(all_parsers, nrows):
parser = all_parsers
Expand Down Expand Up @@ -208,7 +211,6 @@ def test_read_csv_wrong_num_columns(all_parsers):
parser.read_csv(StringIO(data))


@skip_pyarrow
def test_null_byte_char(request, all_parsers):
# see gh-2741
data = "\x00,foo"
Expand All @@ -226,12 +228,19 @@ def test_null_byte_char(request, all_parsers):
out = parser.read_csv(StringIO(data), names=names)
tm.assert_frame_equal(out, expected)
else:
msg = "NULL byte detected"
if parser.engine == "pyarrow":
msg = (
"CSV parse error: Empty CSV file or block: "
"cannot infer number of columns"
)
else:
msg = "NULL byte detected"
with pytest.raises(ParserError, match=msg):
parser.read_csv(StringIO(data), names=names)


@skip_pyarrow
# ValueError: the 'pyarrow' engine does not support sep=None with delim_whitespace=False
@xfail_pyarrow
@pytest.mark.filterwarnings("always::ResourceWarning")
def test_open_file(request, all_parsers):
# GH 39024
Expand Down
16 changes: 0 additions & 16 deletions pandas/tests/io/parser/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,19 +279,3 @@ def pyarrow_xfail(request):
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
request.applymarker(mark)


@pytest.fixture
def pyarrow_skip(request):
"""
Fixture that skips a test if the engine is pyarrow.
"""
if "all_parsers" in request.fixturenames:
parser = request.getfixturevalue("all_parsers")
elif "all_parsers_all_precisions" in request.fixturenames:
# Return value is tuple of (engine, precision)
parser = request.getfixturevalue("all_parsers_all_precisions")[0]
else:
return
if parser.engine == "pyarrow":
pytest.skip("pyarrow doesn't support this.")
14 changes: 10 additions & 4 deletions pandas/tests/io/parser/dtypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)

xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail")
skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@xfail_pyarrow
Expand Down Expand Up @@ -55,9 +54,8 @@ def test_categorical_dtype(all_parsers, dtype):
tm.assert_frame_equal(actual, expected)


@skip_pyarrow # Flaky
@pytest.mark.parametrize("dtype", [{"b": "category"}, {1: "category"}])
def test_categorical_dtype_single(all_parsers, dtype):
def test_categorical_dtype_single(all_parsers, dtype, request):
# see gh-10153
parser = all_parsers
data = """a,b,c
Expand All @@ -67,6 +65,13 @@ def test_categorical_dtype_single(all_parsers, dtype):
expected = DataFrame(
{"a": [1, 1, 2], "b": Categorical(["a", "a", "b"]), "c": [3.4, 3.4, 4.5]}
)
if parser.engine == "pyarrow":
mark = pytest.mark.xfail(
strict=False,
reason="Flaky test sometimes gives object dtype instead of Categorical",
)
request.applymarker(mark)

actual = parser.read_csv(StringIO(data), dtype=dtype)
tm.assert_frame_equal(actual, expected)

Expand Down Expand Up @@ -141,6 +146,7 @@ def test_categorical_dtype_utf16(all_parsers, csv_dir_path):
tm.assert_frame_equal(actual, expected)


# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
@xfail_pyarrow
def test_categorical_dtype_chunksize_infer_categories(all_parsers):
# see gh-10153
Expand All @@ -161,6 +167,7 @@ def test_categorical_dtype_chunksize_infer_categories(all_parsers):
tm.assert_frame_equal(actual, expected)


# ValueError: The 'chunksize' option is not supported with the 'pyarrow' engine
@xfail_pyarrow
def test_categorical_dtype_chunksize_explicit_categories(all_parsers):
# see gh-10153
Expand Down Expand Up @@ -253,7 +260,6 @@ def test_categorical_coerces_numeric(all_parsers):
tm.assert_frame_equal(result, expected)


@skip_pyarrow # Flaky
def test_categorical_coerces_datetime(all_parsers):
parser = all_parsers
dti = pd.DatetimeIndex(["2017-01-01", "2018-01-01", "2019-01-01"], freq=None)
Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/io/parser/test_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
)

skip_pyarrow = pytest.mark.usefixtures("pyarrow_skip")


@pytest.fixture(params=[True, False])
def buffer(request):
Expand All @@ -36,7 +34,6 @@ def parser_and_data(all_parsers, csv1):
return parser, data, expected


@skip_pyarrow
@pytest.mark.parametrize("compression", ["zip", "infer", "zip2"])
def test_zip(parser_and_data, compression):
parser, data, expected = parser_and_data
Expand All @@ -54,7 +51,6 @@ def test_zip(parser_and_data, compression):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@pytest.mark.parametrize("compression", ["zip", "infer"])
def test_zip_error_multiple_files(parser_and_data, compression):
parser, data, expected = parser_and_data
Expand All @@ -70,7 +66,6 @@ def test_zip_error_multiple_files(parser_and_data, compression):
parser.read_csv(path, compression=compression)


@skip_pyarrow
def test_zip_error_no_files(parser_and_data):
parser, _, _ = parser_and_data

Expand All @@ -82,7 +77,6 @@ def test_zip_error_no_files(parser_and_data):
parser.read_csv(path, compression="zip")


@skip_pyarrow
def test_zip_error_invalid_zip(parser_and_data):
parser, _, _ = parser_and_data

Expand All @@ -92,7 +86,6 @@ def test_zip_error_invalid_zip(parser_and_data):
parser.read_csv(f, compression="zip")


@skip_pyarrow
@pytest.mark.parametrize("filename", [None, "test.{ext}"])
def test_compression(
request,
Expand Down Expand Up @@ -128,7 +121,6 @@ def test_compression(
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@pytest.mark.parametrize("ext", [None, "gz", "bz2"])
def test_infer_compression(all_parsers, csv1, buffer, ext):
# see gh-9770
Expand All @@ -148,7 +140,6 @@ def test_infer_compression(all_parsers, csv1, buffer, ext):
tm.assert_frame_equal(result, expected)


@skip_pyarrow
def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding_fmt):
# see gh-18071, gh-24130
parser = all_parsers
Expand All @@ -166,7 +157,6 @@ def test_compression_utf_encoding(all_parsers, csv_dir_path, utf_value, encoding
tm.assert_frame_equal(result, expected)


@skip_pyarrow
@pytest.mark.parametrize("invalid_compression", ["sfark", "bz3", "zipper"])
def test_invalid_compression(all_parsers, invalid_compression):
parser = all_parsers
Expand All @@ -178,7 +168,6 @@ def test_invalid_compression(all_parsers, invalid_compression):
parser.read_csv("test_file.zip", **compress_kwargs)


@skip_pyarrow
def test_compression_tar_archive(all_parsers, csv_dir_path):
parser = all_parsers
path = os.path.join(csv_dir_path, "tar_csv.tar.gz")
Expand All @@ -200,7 +189,6 @@ def test_ignore_compression_extension(all_parsers):
tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)


@skip_pyarrow
def test_writes_tar_gz(all_parsers):
parser = all_parsers
data = DataFrame(
Expand Down
Loading

0 comments on commit f64c608

Please sign in to comment.