Skip to content

Commit

Permalink
ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile …
Browse files Browse the repository at this point in the history
…and Codec (#13921)

Example:
```python
In [1]: import io
In [2]: import pyarrow as pa

In [3]: pa.PythonFile(io.BytesIO())
Out[3]: <pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>

In [4]: pa.Codec('gzip')
Out[4]: <pyarrow.Codec name=gzip compression_level=9>

In [5]: pool = pa.default_memory_pool()
In [6]: pool
Out[6]: <pyarrow.MemoryPool backend_name=jemalloc bytes_allocated=0 max_memory=0>

In [7]: pa.allocate_buffer(1024, memory_pool=pool)
Out[7]: <pyarrow.Buffer address=0x7fd660a08000 size=1024 is_cpu=True is_mutable=True
```

Authored-by: Miles Granger <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
milesgranger authored Aug 29, 2022
1 parent bd76850 commit 6f302a3
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 3 deletions.
45 changes: 44 additions & 1 deletion python/pyarrow/io.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ cdef class NativeFile(_Weakrefable):
def __exit__(self, exc_type, exc_value, tb):
self.close()

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"closed={self.closed} "
f"own_file={self.own_file} "
f"is_seekable={self.is_seekable} "
f"is_writable={self.is_writable} "
f"is_readable={self.is_readable}>")

@property
def mode(self):
"""
Expand Down Expand Up @@ -766,6 +775,13 @@ cdef class PythonFile(NativeFile):
As a downside, there is a non-zero redirection cost in translating
Arrow stream calls to Python method calls. Furthermore, Python's
Global Interpreter Lock may limit parallelism in some situations.
Examples
--------
>>> import io
>>> import pyarrow as pa
>>> pa.PythonFile(io.BytesIO())
<pyarrow.PythonFile closed=False own_file=False is_seekable=False is_writable=True is_readable=False>
"""
cdef:
object handle
Expand Down Expand Up @@ -1053,6 +1069,14 @@ cdef class Buffer(_Weakrefable):
def __len__(self):
return self.size

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"address={hex(self.address)} "
f"size={self.size} "
f"is_cpu={self.is_cpu} "
f"is_mutable={self.is_mutable}>")

@property
def size(self):
"""
Expand Down Expand Up @@ -1843,6 +1867,17 @@ cdef class Codec(_Weakrefable):
------
ValueError
If invalid compression value is passed.
Examples
--------
>>> import pyarrow as pa
>>> pa.Codec.is_available('gzip')
True
>>> codec = pa.Codec('gzip')
>>> codec.name
'gzip'
>>> codec.compression_level
9
"""

def __init__(self, str compression not None, compression_level=None):
Expand Down Expand Up @@ -1964,7 +1999,9 @@ cdef class Codec(_Weakrefable):
@property
def compression_level(self):
"""Returns the compression level parameter of the codec"""
return frombytes(self.unwrap().compression_level())
if self.name == 'snappy':
return None
return self.unwrap().compression_level()

def compress(self, object buf, asbytes=False, memory_pool=None):
"""
Expand Down Expand Up @@ -2080,6 +2117,12 @@ cdef class Codec(_Weakrefable):

return pybuf if asbytes else out_buf

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"name={self.name} "
f"compression_level={self.compression_level}>")


def compress(object buf, codec='lz4', asbytes=False, memory_pool=None):
"""
Expand Down
11 changes: 11 additions & 0 deletions python/pyarrow/memory.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ cdef class MemoryPool(_Weakrefable):
"""
return frombytes(self.pool.backend_name())

def __repr__(self):
name = f"pyarrow.{self.__class__.__name__}"
return (f"<{name} "
f"backend_name={self.backend_name} "
f"bytes_allocated={self.bytes_allocated()} "
f"max_memory={self.max_memory()}>")

cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
if memory_pool is None:
Expand Down Expand Up @@ -118,6 +124,11 @@ cdef class ProxyMemoryPool(MemoryPool):
def default_memory_pool():
"""
Return the process-global memory pool.
Examples
--------
>>> default_memory_pool()
<pyarrow.MemoryPool backend_name=... bytes_allocated=0 max_memory=...>
"""
cdef:
MemoryPool pool = MemoryPool.__new__(MemoryPool)
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/table.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2002,7 +2002,7 @@ cdef class RecordBatch(_PandasConvertible):
>>> batch = pa.RecordBatch.from_arrays([n_legs, animals],
... names=["n_legs", "animals"])
>>> batch.serialize()
<pyarrow.lib.Buffer object at ...>
<pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
"""
cdef shared_ptr[CBuffer] buffer
cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults()
Expand Down
6 changes: 6 additions & 0 deletions python/pyarrow/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,12 @@ def test_compression_level(compression):
if not Codec.is_available(compression):
pytest.skip("{} support is not built".format(compression))

codec = Codec(compression)
if codec.name == "snappy":
assert codec.compression_level is None
else:
assert isinstance(codec.compression_level, int)

# These codecs do not support a compression level
no_level = ['snappy']
if compression in no_level:
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -2127,7 +2127,7 @@ cdef class Schema(_Weakrefable):
Write schema to Buffer:
>>> schema.serialize()
<pyarrow.lib.Buffer object at ...>
<pyarrow.Buffer address=0x... size=... is_cpu=True is_mutable=True>
"""
cdef:
shared_ptr[CBuffer] buffer
Expand Down

0 comments on commit 6f302a3

Please sign in to comment.