Skip to content

Commit

Permalink
ARROW-6683: [Python] Test for fastparquet <-> pyarrow cross-compatibi…
Browse files Browse the repository at this point in the history
…lity
  • Loading branch information
jorisvandenbossche committed Sep 25, 2019
1 parent 7f2d637 commit c5d6161
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions python/pyarrow/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3345,3 +3345,40 @@ def test_filter_before_validate_schema(tempdir):
# read single file using filter
table = pq.read_table(tempdir, filters=[[('A', '==', 0)]])
assert table.column('B').equals(pa.chunked_array([[1, 2, 3]]))


@pytest.mark.pandas
@pytest.mark.filterwarnings("ignore:RangeIndex:DeprecationWarning")
def test_fastparquet_cross_compatibility(tempdir):
fp = pytest.importorskip('fastparquet')

df = pd.DataFrame(
{
"a": list("abc"),
"b": list(range(1, 4)),
"c": np.arange(4.0, 7.0, dtype="float64"),
"d": [True, False, True],
"e": pd.date_range("20130101", periods=3),
"f": pd.Categorical(["a", "b", "a"])
# "g": [[1, 2], None, [1, 2, 3]],
}
)
table = pa.table(df)

# Arrow -> fastparquet
file_arrow = str(tempdir / "cross_compat_arrow.parquet")
pq.write_table(table, file_arrow, compression=None)

fp_file = fp.ParquetFile(file_arrow)
df_fp = fp_file.to_pandas()
tm.assert_frame_equal(df, df_fp)

# Fastparquet -> arrow
file_fastparquet = str(tempdir / "cross_compat_fastparquet.parquet")
fp.write(file_fastparquet, df)

table_fp = pq.read_pandas(file_fastparquet)
# for fastparquet written file, categoricals comes back as strings
# (no arrow schema in parquet metadata)
df['f'] = df['f'].astype(object)
tm.assert_frame_equal(table_fp.to_pandas(), df)

0 comments on commit c5d6161

Please sign in to comment.