Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Jul 3, 2023
1 parent 74bb9e4 commit d758a74
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 0 deletions.
10 changes: 10 additions & 0 deletions python/pyarrow/_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,16 @@ cdef class ColumnChunkMetaData(_Weakrefable):
"""Uncompressed size in bytes (int)."""
return self.metadata.total_uncompressed_size()

@property
def has_offset_index(self):
"""Has offset index"""
return self.metadata.GetOffsetIndexLocation().has_value()

@property
def has_column_index(self):
"""Has column index"""
return self.metadata.GetColumnIndexLocation().has_value()


cdef class RowGroupMetaData(_Weakrefable):
"""Metadata for a single row group."""
Expand Down
13 changes: 13 additions & 0 deletions python/pyarrow/tests/parquet/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,19 @@ def test_field_id_metadata():
assert schema[4].metadata[field_id] == b'xyz'
assert schema[5].metadata[field_id] == b'-1000'

def test_parquet_file_page_index():
table = pa.table({'a': [1, 2, 3]})

writer = pa.BufferOutputStream()
_write_table(table, writer, write_page_index=True)
reader = pa.BufferReader(writer.getvalue())

# Can retrieve sorting columns from metadata
metadata = pq.read_metadata(reader)
cc = metadata.row_group(0).column(0)
assert cc.has_offset_index is True
assert cc.has_column_index is True


@pytest.mark.pandas
def test_multi_dataset_metadata(tempdir):
Expand Down

0 comments on commit d758a74

Please sign in to comment.