Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec #13921

Merged
merged 10 commits into from
Aug 29, 2022
45 changes: 44 additions & 1 deletion python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ cdef class NativeFile(_Weakrefable):
def __exit__(self, exc_type, exc_value, tb):
self.close()

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"closed={self.closed} "
f"own_file={self.own_file} "
f"is_seekable={self.is_seekable} "
f"is_writable={self.is_writable} "
f"is_readable={self.is_readable}>")

@property
def mode(self):
"""
Expand Down Expand Up @@ -766,6 +775,13 @@ cdef class PythonFile(NativeFile):
As a downside, there is a non-zero redirection cost in translating
Arrow stream calls to Python method calls. Furthermore, Python's
Global Interpreter Lock may limit parallelism in some situations.

Examples
--------
>>> import io
>>> import pyarrow as pa
>>> pa.PythonFile(io.BytesIO())
<pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
"""
cdef:
object handle
Expand Down Expand Up @@ -1053,6 +1069,14 @@ cdef class Buffer(_Weakrefable):
def __len__(self):
return self.size

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"address={hex(self.address)} "
f"size={self.size} "
f"is_cpu={self.is_cpu} "
f"is_mutable={self.is_mutable}>")

@property
def size(self):
"""
Expand Down Expand Up @@ -1843,6 +1867,17 @@ cdef class Codec(_Weakrefable):
------
ValueError
If invalid compression value is passed.

Examples
--------
>>> import pyarrow as pa
>>> pa.Codec.is_available('gzip')
True
>>> codec = pa.Codec('gzip')
>>> codec.name
'gzip'
>>> codec.compression_level
9
"""

def __init__(self, str compression not None, compression_level=None):
Expand Down Expand Up @@ -1964,7 +1999,9 @@ cdef class Codec(_Weakrefable):
@property
def compression_level(self):
"""Returns the compression level parameter of the codec"""
return frombytes(self.unwrap().compression_level())
if self.name == 'snappy':
return None
return self.unwrap().compression_level()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should add a test for this? (I assume this was raising for Snappy?)

Copy link
Contributor Author

@milesgranger milesgranger Aug 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, it was failing as-is, compression_level() -> int and frombytes would fail trying to decode an int. Also modified snappy variant as that has no compression level and would give invalid integers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


def compress(self, object buf, asbytes=False, memory_pool=None):
"""
Expand Down Expand Up @@ -2080,6 +2117,12 @@ cdef class Codec(_Weakrefable):

return pybuf if asbytes else out_buf

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"name={self.name} "
f"compression_level={self.compression_level}>")


def compress(object buf, codec='lz4', asbytes=False, memory_pool=None):
"""
Expand Down
11 changes: 11 additions & 0 deletions python/pyarrow/memory.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ cdef class MemoryPool(_Weakrefable):
"""
return frombytes(self.pool.backend_name())

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"backend_name={self.backend_name} "
f"bytes_allocated={self.bytes_allocated()} "
f"max_memory={self.max_memory()}>")

cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
Expand Down Expand Up @@ -118,6 +124,11 @@ cdef class ProxyMemoryPool(MemoryPool):
def default_memory_pool():
"""
Return the process-global memory pool.
Examples
--------
>>> default_memory_pool()
<pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
"""
cdef:
MemoryPool pool = MemoryPool.__new__(MemoryPool)
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2013,7 +2013,7 @@ cdef class RecordBatch(_PandasConvertible):
>>> batch = pa.RecordBatch.from_arrays([n_legs, animals],
... names=["n_legs", "animals"])
>>> batch.serialize()
<pyarrow.lib.Buffer object at ...>
<pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
"""
cdef shared_ptr[CBuffer] buffer
cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
Expand Down
6 changes: 6 additions & 0 deletions python/pyarrow/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,12 @@ def test_compression_level(compression):
if not Codec.is_available(compression):
pytest.skip("{} support is not built".format(compression))

codec = Codec(compression)
if codec.name == "snappy":
assert codec.compression_level is None
else:
assert isinstance(codec.compression_level, int)

# These codecs do not support a compression level
no_level = ['snappy']
if compression in no_level:
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2065,7 +2065,7 @@ cdef class Schema(_Weakrefable):
Write schema to Buffer:

>>> schema.serialize()
<pyarrow.lib.Buffer object at ...>
<pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
"""
cdef:
shared_ptr[CBuffer] buffer
Expand Down