From 2c57d472b63d92924dd0685c8979761d5dba5391 Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Mon, 29 Aug 2022 12:24:44 +0200 Subject: [PATCH] ARROW-17449: [Python] Better repr for Buffer, MemoryPool, NativeFile and Codec (#13921) Example: ```python In [1]: import io In [2]: import pyarrow as pa In [3]: pa.PythonFile(io.BytesIO()) Out[3]: In [4]: pa.Codec('gzip') Out[4]: In [5]: pool = pa.default_memory_pool() In [6]: pool Out[6]: In [7]: pa.allocate_buffer(1024, memory_pool=pool) Out[7]: Signed-off-by: Joris Van den Bossche --- python/pyarrow/io.pxi | 45 ++++++++++++++++++++++++++++++++- python/pyarrow/memory.pxi | 11 ++++++++ python/pyarrow/table.pxi | 2 +- python/pyarrow/tests/test_io.py | 6 +++++ python/pyarrow/types.pxi | 2 +- 5 files changed, 63 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index d1d3feb3c175e..3dd60735c3cc8 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -121,6 +121,15 @@ cdef class NativeFile(_Weakrefable): def __exit__(self, exc_type, exc_value, tb): self.close() + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"closed={self.closed} " + f"own_file={self.own_file} " + f"is_seekable={self.is_seekable} " + f"is_writable={self.is_writable} " + f"is_readable={self.is_readable}>") + @property def mode(self): """ @@ -766,6 +775,13 @@ cdef class PythonFile(NativeFile): As a downside, there is a non-zero redirection cost in translating Arrow stream calls to Python method calls. Furthermore, Python's Global Interpreter Lock may limit parallelism in some situations. + + Examples + -------- + >>> import io + >>> import pyarrow as pa + >>> pa.PythonFile(io.BytesIO()) + """ cdef: object handle @@ -1053,6 +1069,14 @@ cdef class Buffer(_Weakrefable): def __len__(self): return self.size + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"address={hex(self.address)} " + f"size={self.size} " + f"is_cpu={self.is_cpu} " + f"is_mutable={self.is_mutable}>") + @property def size(self): """ @@ -1843,6 +1867,17 @@ cdef class Codec(_Weakrefable): ------ ValueError If invalid compression value is passed. + + Examples + -------- + >>> import pyarrow as pa + >>> pa.Codec.is_available('gzip') + True + >>> codec = pa.Codec('gzip') + >>> codec.name + 'gzip' + >>> codec.compression_level + 9 """ def __init__(self, str compression not None, compression_level=None): @@ -1964,7 +1999,9 @@ cdef class Codec(_Weakrefable): @property def compression_level(self): """Returns the compression level parameter of the codec""" - return frombytes(self.unwrap().compression_level()) + if self.name == 'snappy': + return None + return self.unwrap().compression_level() def compress(self, object buf, asbytes=False, memory_pool=None): """ @@ -2080,6 +2117,12 @@ cdef class Codec(_Weakrefable): return pybuf if asbytes else out_buf + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"name={self.name} " + f"compression_level={self.compression_level}>") + def compress(object buf, codec='lz4', asbytes=False, memory_pool=None): """ diff --git a/python/pyarrow/memory.pxi b/python/pyarrow/memory.pxi index 2258be78d5479..1ddcb01ccb6ab 100644 --- a/python/pyarrow/memory.pxi +++ b/python/pyarrow/memory.pxi @@ -76,6 +76,12 @@ cdef class MemoryPool(_Weakrefable): """ return frombytes(self.pool.backend_name()) + def __repr__(self): + name = f"pyarrow.{self.__class__.__name__}" + return (f"<{name} " + f"backend_name={self.backend_name} " + f"bytes_allocated={self.bytes_allocated()} " + f"max_memory={self.max_memory()}>") cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool): if memory_pool is None: @@ -118,6 +124,11 @@ cdef class ProxyMemoryPool(MemoryPool): def default_memory_pool(): """ Return the process-global memory pool. + + Examples + -------- + >>> default_memory_pool() + """ cdef: MemoryPool pool = MemoryPool.__new__(MemoryPool) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index b8c98df1f0e0e..931677f9848ca 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -2002,7 +2002,7 @@ cdef class RecordBatch(_PandasConvertible): >>> batch = pa.RecordBatch.from_arrays([n_legs, animals], ... names=["n_legs", "animals"]) >>> batch.serialize() - + """ cdef shared_ptr[CBuffer] buffer cdef CIpcWriteOptions options = CIpcWriteOptions.Defaults() diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index ca49c5218e88b..a6488d70df53e 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -719,6 +719,12 @@ def test_compression_level(compression): if not Codec.is_available(compression): pytest.skip("{} support is not built".format(compression)) + codec = Codec(compression) + if codec.name == "snappy": + assert codec.compression_level is None + else: + assert isinstance(codec.compression_level, int) + # These codecs do not support a compression level no_level = ['snappy'] if compression in no_level: diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 1babbc41549c7..d37363e06ff30 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -2127,7 +2127,7 @@ cdef class Schema(_Weakrefable): Write schema to Buffer: >>> schema.serialize() - + """ cdef: shared_ptr[CBuffer] buffer