diff --git a/src/blosc2/core.py b/src/blosc2/core.py index 611f113f..1a4c4df3 100644 --- a/src/blosc2/core.py +++ b/src/blosc2/core.py @@ -133,8 +133,9 @@ def compress( return blosc2_ext.compress(src, typesize, clevel, filter, codec) -def decompress(src: object, dst: object | bytearray = None, - as_bytearray: bool = False) -> str | bytes | bytearray | None: +def decompress( + src: object, dst: object | bytearray = None, as_bytearray: bool = False +) -> str | bytes | bytearray | None: """Decompresses a bytes-like compressed object. Parameters @@ -202,8 +203,12 @@ def decompress(src: object, dst: object | bytearray = None, return blosc2_ext.decompress(src, dst, as_bytearray) -def pack(obj: object, clevel: int = 9, filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.BLOSCLZ) -> str | bytes: +def pack( + obj: object, + clevel: int = 9, + filter: blosc2.Filter = blosc2.Filter.SHUFFLE, + codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, +) -> str | bytes: """Pack (compress) a Python object. Parameters @@ -321,8 +326,12 @@ def unpack(packed_object: str | bytes, **kwargs: dict) -> object: return obj -def pack_array(arr: np.ndarray, clevel: int = 9, filter: blosc2.Filter = blosc2.Filter.SHUFFLE, - codec: blosc2.Codec = blosc2.Codec.BLOSCLZ) -> str | bytes: +def pack_array( + arr: np.ndarray, + clevel: int = 9, + filter: blosc2.Filter = blosc2.Filter.SHUFFLE, + codec: blosc2.Codec = blosc2.Codec.BLOSCLZ, +) -> str | bytes: """Pack (compress) a NumPy array. It is equivalent to the pack function. Parameters @@ -591,8 +600,9 @@ def load_array(urlpath: str, dparams: dict = None) -> np.ndarray: return load_tensor(urlpath, dparams=dparams) -def pack_tensor(tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, chunksize: int = None, - **kwargs: dict) -> bytes | int: +def pack_tensor( + tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, chunksize: int = None, **kwargs: dict +) -> bytes | int: """Pack (compress) a TensorFlow or PyTorch tensor or a NumPy array. Parameters @@ -719,8 +729,12 @@ def unpack_tensor(cframe: bytes) -> tensorflow.Tensor | torch.Tensor | np.ndarra return _unpack_tensor(schunk) -def save_tensor(tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, urlpath: str, chunksize: int = None, - **kwargs: dict) -> int: +def save_tensor( + tensor: tensorflow.Tensor | torch.Tensor | np.ndarray, + urlpath: str, + chunksize: int = None, + **kwargs: dict, +) -> int: """Save a serialized PyTorch or TensorFlow tensor or NumPy array in `urlpath`. Parameters @@ -1271,8 +1285,11 @@ def compute_partition(nitems, maxshape, minpart=None): def compute_chunks_blocks( - shape: tuple[int] | list, chunks: tuple | list | None = None, blocks: tuple | list | None = None, - dtype: np.dtype = np.uint8, **kwargs: dict + shape: tuple[int] | list, + chunks: tuple | list | None = None, + blocks: tuple | list | None = None, + dtype: np.dtype = np.uint8, + **kwargs: dict, ) -> tuple[(int, int)]: """ Compute educated guesses for chunks and blocks of a :ref:`NDArray`. @@ -1421,13 +1438,13 @@ def compress2(src: object, **kwargs: dict) -> str | bytes: If an internal error occurred, probably because some parameter is not a valid parameter. """ - if kwargs is not None and 'cparams' in kwargs: + if kwargs is not None and "cparams" in kwargs: if len(kwargs) > 1: raise AttributeError("Cannot pass both cparams and other kwargs already included in CParams") - if isinstance(kwargs.get('cparams'), blosc2.CParams): - kwargs = asdict(kwargs.get('cparams')) + if isinstance(kwargs.get("cparams"), blosc2.CParams): + kwargs = asdict(kwargs.get("cparams")) else: - kwargs = kwargs.get('cparams') + kwargs = kwargs.get("cparams") return blosc2_ext.compress2(src, **kwargs) @@ -1481,13 +1498,13 @@ def decompress2(src: object, dst: object | bytearray = None, **kwargs: dict) -> If the length of :paramref:`src` is smaller than the minimum. If :paramref:`dst` is not None and its length is 0. """ - if kwargs is not None and 'dparams' in kwargs: + if kwargs is not None and "dparams" in kwargs: if len(kwargs) > 1: raise AttributeError("Cannot pass both dparams and other kwargs already included in DParams") - if isinstance(kwargs.get('dparams'), blosc2.DParams): - kwargs = asdict(kwargs.get('dparams')) + if isinstance(kwargs.get("dparams"), blosc2.DParams): + kwargs = asdict(kwargs.get("dparams")) else: - kwargs = kwargs.get('dparams') + kwargs = kwargs.get("dparams") return blosc2_ext.decompress2(src, dst, **kwargs) @@ -1582,11 +1599,11 @@ def ndarray_from_cframe(cframe: bytes | str, copy: bool = False) -> blosc2.NDArr def register_codec( - codec_name: str, - id: int, - encoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, - decoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, - version: int = 1 + codec_name: str, + id: int, + encoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, + decoder: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], int] = None, + version: int = 1, ) -> None: """Register a user defined codec. @@ -1664,10 +1681,10 @@ def decoder1(input, output, meta, schunk): def register_filter( - id: int, - forward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, - backward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, - name: str = None + id: int, + forward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, + backward: Callable[[np.ndarray[np.uint8], np.ndarray[np.uint8], int, blosc2.SChunk], None] = None, + name: str = None, ) -> None: """Register an user defined filter. diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py index f1d41f8c..f2b31771 100644 --- a/src/blosc2/lazyexpr.py +++ b/src/blosc2/lazyexpr.py @@ -610,7 +610,10 @@ def fill_chunk_operands( def fast_eval( - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, getitem: bool, **kwargs + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands using a fast path. @@ -721,7 +724,11 @@ def fast_eval( def slices_eval( - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, getitem: bool, _slice=None, **kwargs + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + getitem: bool, + _slice=None, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands. @@ -896,7 +903,11 @@ def slices_eval( def reduce_slices( - expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, reduce_args, _slice=None, **kwargs + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], + operands: dict, + reduce_args, + _slice=None, + **kwargs, ) -> blosc2.NDArray | np.ndarray: """Evaluate the expression in chunks of operands. @@ -1131,7 +1142,9 @@ def convert_none_out(dtype, reduce_op, reduced_shape): return out -def chunked_eval(expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=None, **kwargs): +def chunked_eval( + expression: str | Callable[[tuple, np.ndarray, tuple[int]], None], operands: dict, item=None, **kwargs +): """ Evaluate the expression in chunks of operands. @@ -1942,8 +1955,13 @@ def _open_lazyarray(array): return expr -def lazyudf(func: Callable[[tuple, np.ndarray, tuple[int]], None], inputs: tuple | list, - dtype: np.dtype, chunked_eval: bool = True, **kwargs: dict) -> LazyUDF: +def lazyudf( + func: Callable[[tuple, np.ndarray, tuple[int]], None], + inputs: tuple | list, + dtype: np.dtype, + chunked_eval: bool = True, + **kwargs: dict, +) -> LazyUDF: """ Get a LazyUDF from a python user-defined function. @@ -2002,8 +2020,12 @@ def lazyudf(func: Callable[[tuple, np.ndarray, tuple[int]], None], inputs: tuple return LazyUDF(func, inputs, dtype, chunked_eval, **kwargs) -def lazyexpr(expression: str | bytes | LazyExpr, operands: dict = None, - out: blosc2.NDArray | np.ndarray = None, where: tuple | list = None) -> LazyExpr: +def lazyexpr( + expression: str | bytes | LazyExpr, + operands: dict = None, + out: blosc2.NDArray | np.ndarray = None, + where: tuple | list = None, +) -> LazyExpr: """ Get a LazyExpr from an expression. diff --git a/src/blosc2/ndarray.py b/src/blosc2/ndarray.py index 463cb48a..8d503076 100644 --- a/src/blosc2/ndarray.py +++ b/src/blosc2/ndarray.py @@ -34,6 +34,7 @@ def wrapper(target): return wrapper + def make_key_hashable(key): if isinstance(key, slice): return (key.start, key.stop, key.step) @@ -151,9 +152,13 @@ def _check_allowed_dtypes( ) -def sum(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def sum( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the sum of array elements over a given axis. @@ -204,9 +209,13 @@ def sum(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.sum(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) -def mean(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def mean( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the arithmetic mean along the specified axis. @@ -251,9 +260,14 @@ def mean(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int return ndarr.mean(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) -def std(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, ddof: int = 0, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | bool: +def std( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + ddof: int = 0, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | bool: """ Return the standard deviation along the specified axis. @@ -305,9 +319,14 @@ def std(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.std(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) -def var(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, ddof: int = 0, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | bool: +def var( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + ddof: int = 0, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | bool: """ Return the variance along the specified axis. @@ -360,9 +379,13 @@ def var(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.var(axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, **kwargs) -def prod(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - dtype: np.dtype = None, keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def prod( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + dtype: np.dtype = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the product of array elements over a given axis. @@ -414,9 +437,12 @@ def prod(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int return ndarr.prod(axis=axis, dtype=dtype, keepdims=keepdims, **kwargs) -def min(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def min( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the minimum along a given axis. @@ -459,9 +485,12 @@ def min(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.min(axis=axis, keepdims=keepdims, **kwargs) -def max(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | int | float | complex | bool: +def max( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | int | float | complex | bool: """ Return the maximum along a given axis. @@ -510,9 +539,12 @@ def max(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.max(axis=axis, keepdims=keepdims, **kwargs) -def any(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | bool: +def any( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | bool: """ Test whether any array element along a given axis evaluates to True. @@ -559,9 +591,12 @@ def any(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | return ndarr.any(axis=axis, keepdims=keepdims, **kwargs) -def all(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, axis: int | tuple[int] = None, - keepdims: bool = False, **kwargs: dict - ) -> np.ndarray | NDArray | bool: +def all( + ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, + axis: int | tuple[int] = None, + keepdims: bool = False, + **kwargs: dict, +) -> np.ndarray | NDArray | bool: """ Test whether all array elements along a given axis evaluate to True. @@ -981,7 +1016,9 @@ def blocksize(self) -> int: """ return self._schunk.blocksize - def __getitem__(self, key: int | slice | Sequence[slice] | blosc2.LazyExpr | str) -> np.ndarray | blosc2.LazyExpr: + def __getitem__( + self, key: int | slice | Sequence[slice] | blosc2.LazyExpr | str + ) -> np.ndarray | blosc2.LazyExpr: """Get a (multidimensional) slice as specified in key. Parameters @@ -1144,16 +1181,19 @@ def get_chunk(self, nchunk: int) -> bytes: """ return self.schunk.get_chunk(nchunk) - def iterchunks_info(self) -> Iterator[ - NamedTuple("info", - nchunk = int, - coords = tuple, - cratio = float, - special = blosc2.SpecialValue, - repeated_value = bytes | None, - lazychunk = bytes - ) - ]: + def iterchunks_info( + self, + ) -> Iterator[ + NamedTuple( + "info", + nchunk=int, + coords=tuple, + cratio=float, + special=blosc2.SpecialValue, + repeated_value=bytes | None, + lazychunk=bytes, + ) + ]: """ Iterate over :paramref:`self` chunks, providing info on index and special values. @@ -1201,7 +1241,6 @@ def iterchunks_info(self) -> Iterator[ repeated_value = np.frombuffer(cinfo.repeated_value, dtype=self.dtype)[0] yield ChunkInfoNDArray(nchunk, coords, cratio, special, repeated_value, lazychunk) - def tobytes(self) -> bytes: """Returns a buffer with the data contents. @@ -1291,10 +1330,16 @@ def copy(self, dtype: np.dtype = None, **kwargs: dict) -> NDArray: """ if dtype is None: dtype = self.dtype - kwargs["cparams"] = kwargs.get("cparams").copy() if isinstance(kwargs.get("cparams"), dict) \ + kwargs["cparams"] = ( + kwargs.get("cparams").copy() + if isinstance(kwargs.get("cparams"), dict) else asdict(self.schunk.cparams) - kwargs["dparams"] = kwargs.get("dparams").copy() if isinstance(kwargs.get("dparams"), dict) \ + ) + kwargs["dparams"] = ( + kwargs.get("dparams").copy() + if isinstance(kwargs.get("dparams"), dict) else asdict(self.schunk.dparams) + ) if "meta" not in kwargs: # Copy metalayers as well meta_dict = {meta: self.schunk.meta[meta] for meta in self.schunk.meta} @@ -1759,8 +1804,9 @@ def arctan(ndarr: NDArray | NDField | blosc2.C2Array | blosc2.LazyExpr, /) -> bl return blosc2.LazyExpr(new_op=(ndarr, "arctan", None)) -def arctan2(ndarr1: NDArray | NDField | blosc2.C2Array, - ndarr2: NDArray | NDField | blosc2.C2Array, /) -> blosc2.LazyExpr: +def arctan2( + ndarr1: NDArray | NDField | blosc2.C2Array, ndarr2: NDArray | NDField | blosc2.C2Array, / +) -> blosc2.LazyExpr: """ Element-wise arc tangent of ``ndarr1 / ndarr2`` choosing the quadrant correctly. @@ -2445,8 +2491,9 @@ def zeros(shape: int | tuple | list, dtype: np.dtype = np.uint8, **kwargs: dict) return blosc2_ext.zeros(shape, chunks, blocks, dtype, **kwargs) -def full(shape: int | tuple | list, fill_value: bytes | int | float | bool, dtype: np.dtype = None, - **kwargs: dict) -> NDArray: +def full( + shape: int | tuple | list, fill_value: bytes | int | float | bool, dtype: np.dtype = None, **kwargs: dict +) -> NDArray: """Create an array, with :paramref:`fill_value` being used as the default value for uninitialized portions of the array. @@ -2655,7 +2702,9 @@ def _check_ndarray_kwargs(**kwargs): if "storage" in kwargs: for key in kwargs: if key in list(blosc2.Storage.__annotations__): - raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage") + raise AttributeError( + "Cannot pass both `storage` and other kwargs already included in Storage" + ) storage = kwargs.get("storage") if isinstance(storage, blosc2.Storage): kwargs = {**kwargs, **asdict(storage)} @@ -2695,9 +2744,9 @@ def _check_ndarray_kwargs(**kwargs): return kwargs -def get_slice_nchunks(schunk: blosc2.SChunk, - key: tuple[(int, int)] | int | slice | Sequence[slice] - ) -> np.ndarray: +def get_slice_nchunks( + schunk: blosc2.SChunk, key: tuple[(int, int)] | int | slice | Sequence[slice] +) -> np.ndarray: """ Get the unidimensional chunk indexes needed to get a slice of a :ref:`SChunk ` or a :ref:`NDArray`. diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py index 0104475e..b77d68df 100644 --- a/src/blosc2/schunk.py +++ b/src/blosc2/schunk.py @@ -227,14 +227,16 @@ def __init__(self, chunksize: int = None, data: object = None, **kwargs: dict): "mmap_mode", "initial_mapping_size", "_is_view", - "storage" + "storage", ] for kwarg in kwargs: if kwarg not in allowed_kwargs: raise ValueError(f"{kwarg} is not supported as keyword argument") if kwargs.get("storage") is not None: if any(key in list(blosc2.Storage.__annotations__) for key in kwargs): - raise AttributeError("Cannot pass both `storage` and other kwargs already included in Storage") + raise AttributeError( + "Cannot pass both `storage` and other kwargs already included in Storage" + ) storage = kwargs.get("storage") if isinstance(storage, blosc2.Storage): kwargs = {**kwargs, **asdict(storage)} @@ -330,8 +332,7 @@ def chunksize(self) -> int: @property def blocksize(self) -> int: - """The block size (in bytes). - """ + """The block size (in bytes).""" return super().blocksize @property @@ -345,7 +346,7 @@ def cratio(self) -> float: Compression ratio. """ if self.cbytes == 0: - return 0. + return 0.0 return self.nbytes / self.cbytes @property @@ -416,8 +417,9 @@ def append_data(self, data: object) -> int: blosc2_ext.check_access_mode(self.urlpath, self.mode) return super().append_data(data) - def fill_special(self, nitems: int, special_value: blosc2.SpecialValue, - value: bytes | int | float | bool = None) -> int: + def fill_special( + self, nitems: int, special_value: blosc2.SpecialValue, value: bytes | int | float | bool = None + ) -> int: """Fill the SChunk with a special value. SChunk must be empty. Parameters @@ -992,15 +994,18 @@ def iterchunks(self, dtype: np.dtype) -> Iterator[np.ndarray]: self.get_slice(i, i + self.chunkshape, out) yield out - def iterchunks_info(self) -> Iterator[ - NamedTuple("info", - nchunk = int, - cratio = float, - special = blosc2.SpecialValue, - repeated_value = bytes | None, - lazychunk = bytes - ) - ]: + def iterchunks_info( + self, + ) -> Iterator[ + NamedTuple( + "info", + nchunk=int, + cratio=float, + special=blosc2.SpecialValue, + repeated_value=bytes | None, + lazychunk=bytes, + ) + ]: """ Iterate over :paramref:`self` chunks, providing info on index and special values. @@ -1339,8 +1344,9 @@ def __dealloc__(self): @_inherit_doc_parameter(blosc2.Storage, "mmap_mode:", {r"\* - 'w\+'[^*]+": ""}) @_inherit_doc_parameter(blosc2.Storage, "initial_mapping_size:", {r"r\+ w\+, or c": "r+ or c"}) -def open(urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, - **kwargs: dict) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array: +def open( + urlpath: str | pathlib.Path | blosc2.URLPath, mode: str = "a", offset: int = 0, **kwargs: dict +) -> blosc2.SChunk | blosc2.NDArray | blosc2.C2Array: """Open a persistent :ref:`SChunk` or :ref:`NDArray` or a remote :ref:`C2Array` or a :ref:`Proxy` (see the `Notes` section for more info on the latter case). diff --git a/src/blosc2/storage.py b/src/blosc2/storage.py index a5c2f6bc..8bd4d90b 100644 --- a/src/blosc2/storage.py +++ b/src/blosc2/storage.py @@ -15,18 +15,22 @@ def default_nthreads(): return blosc2.nthreads + def default_filters(): - return [blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.NOFILTER, - blosc2.Filter.SHUFFLE] + return [ + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.NOFILTER, + blosc2.Filter.SHUFFLE, + ] def default_filters_meta(): return [0] * 6 + @dataclass class CParams: """Dataclass for hosting the different compression parameters. @@ -64,6 +68,7 @@ class CParams: tuner: :class:`Tuner` The tuner to use. Default: :py:obj:`Tuner.STUNE `. """ + codec: blosc2.Codec | int = blosc2.Codec.ZSTD codec_meta: int = 0 clevel: int = 1 @@ -80,7 +85,7 @@ def __post_init__(self): if len(self.filters) > 6: raise ValueError("Number of filters exceeds 6") if len(self.filters) < len(self.filters_meta): - self.filters_meta = self.filters_meta[:len(self.filters)] + self.filters_meta = self.filters_meta[: len(self.filters)] warnings.warn("Changed `filters_meta` length to match `filters` length") if len(self.filters) > len(self.filters_meta): raise ValueError("Number of filters cannot exceed number of filters meta") @@ -101,6 +106,7 @@ class DParams: value of :py:obj:`blosc2.nthreads` is used. If not set with :func:`blosc2.set_nthreads`, blosc2 computes a good guess for it. """ + nthreads: int = field(default_factory=default_nthreads) @@ -191,9 +197,10 @@ class Storage: value: object The metalayer object that will be serialized using msgpack. """ + contiguous: bool = None urlpath: str = None - mode: str = 'a' + mode: str = "a" mmap_mode: str = None initial_mapping_size: int = None meta: dict = None @@ -203,8 +210,12 @@ def __post_init__(self): self.contiguous = self.urlpath is not None # Check for None values for f in fields(self): - if (getattr(self, f.name) is None and - f.name not in ['urlpath', 'mmap_mode', 'initial_mapping_size', 'meta']): + if getattr(self, f.name) is None and f.name not in [ + "urlpath", + "mmap_mode", + "initial_mapping_size", + "meta", + ]: setattr(self, f.name, getattr(Storage(), f.name)) warnings.warn(f"`{f.name}` field value changed from `None` to `{getattr(self, f.name)}`") diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py index 69fd0022..a6fe3bc1 100644 --- a/tests/ndarray/test_lazyexpr.py +++ b/tests/ndarray/test_lazyexpr.py @@ -493,8 +493,12 @@ def test_save(): chunks = tuple(i // 2 for i in nres.shape) blocks = tuple(i // 4 for i in nres.shape) urlpath_eval = "eval_expr.b2nd" - res = expr.eval(storage=blosc2.Storage(urlpath=urlpath_eval, mode="w"), - chunks=chunks, blocks=blocks, cparams=cparams, dparams=dparams, + res = expr.eval( + storage=blosc2.Storage(urlpath=urlpath_eval, mode="w"), + chunks=chunks, + blocks=blocks, + cparams=cparams, + dparams=dparams, ) np.testing.assert_allclose(res[:], nres, rtol=tol, atol=tol) diff --git a/tests/ndarray/test_metalayers.py b/tests/ndarray/test_metalayers.py index 68c6ab48..b130edc2 100644 --- a/tests/ndarray/test_metalayers.py +++ b/tests/ndarray/test_metalayers.py @@ -41,8 +41,9 @@ def test_metalayers(shape, chunks, blocks, urlpath, contiguous, dtype): chunks=chunks, blocks=blocks, dtype=dtype, - storage=blosc2.Storage(urlpath=urlpath, contiguous=contiguous, - meta={"numpy": numpy_meta, "test": test_meta}), + storage=blosc2.Storage( + urlpath=urlpath, contiguous=contiguous, meta={"numpy": numpy_meta, "test": test_meta} + ), ) assert os.path.exists(urlpath) diff --git a/tests/ndarray/test_proxy.py b/tests/ndarray/test_proxy.py index e78ba342..950daeba 100644 --- a/tests/ndarray/test_proxy.py +++ b/tests/ndarray/test_proxy.py @@ -111,15 +111,17 @@ def test_open(urlpath, shape, chunks, blocks, slices, dtype): # Test the ProxyNDSources interface -@pytest.mark.parametrize("shape, chunks, blocks", [ - # One should be careful to choose aligned partitions for our source - # E.g., the following is not aligned - # ((10, 8), (4, 4), (2, 2)) - ((12,), (4,), (2,)), - ((10, 8), (2, 8), (1, 4)), - ((10, 8, 6), (2, 4, 3), (1, 2, 3)), - ((4, 8, 6, 4), (2, 4, 3, 2), (1, 2, 3, 2)), - ] +@pytest.mark.parametrize( + "shape, chunks, blocks", + [ + # One should be careful to choose aligned partitions for our source + # E.g., the following is not aligned + # ((10, 8), (4, 4), (2, 2)) + ((12,), (4,), (2,)), + ((10, 8), (2, 8), (1, 4)), + ((10, 8, 6), (2, 4, 3), (1, 2, 3)), + ((4, 8, 6, 4), (2, 4, 3, 2), (1, 2, 3, 2)), + ], ) def test_proxy_source(shape, chunks, blocks): # Define an object that will be used as a source @@ -128,6 +130,7 @@ class Source(blosc2.ProxyNDSource): A simple source that will be used to test the ProxyNDSource interface. """ + def __init__(self, data, chunks, blocks): self._data = data self._shape = data.shape diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py index 496713c6..aa231338 100644 --- a/tests/ndarray/test_reductions.py +++ b/tests/ndarray/test_reductions.py @@ -65,7 +65,10 @@ def test_reduce_bool(array_fixture, reduce_op): @pytest.mark.parametrize("axis", [0, 1, (0, 1), None]) @pytest.mark.parametrize("keepdims", [True, False]) @pytest.mark.parametrize("dtype_out", [np.int16, np.float64]) -@pytest.mark.parametrize("kwargs", [{}, {"cparams": blosc2.CParams(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}]) +@pytest.mark.parametrize( + "kwargs", + [{}, {"cparams": blosc2.CParams(clevel=1, filters=[blosc2.Filter.BITSHUFFLE], filters_meta=[0])}], +) def test_reduce_params(array_fixture, axis, keepdims, dtype_out, reduce_op, kwargs): a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture if axis is not None and np.isscalar(axis) and len(a1.shape) >= axis: diff --git a/tests/test_compress2.py b/tests/test_compress2.py index af5e6f92..498921e2 100644 --- a/tests/test_compress2.py +++ b/tests/test_compress2.py @@ -19,33 +19,41 @@ @pytest.mark.parametrize( "obj, cparams, dparams", [ - (random.integers(0, 10, 10), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}), + (random.integers(0, 10, 10), {"cparams": blosc2.CParams(codec=blosc2.Codec.LZ4, clevel=6)}, {}), ( np.arange(10, dtype="float32"), # Select an absolute precision of 10 bits in mantissa - {'cparams': blosc2.CParams( - filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - filters_meta=[10, 0], - typesize=4 - )}, - {'dparams': blosc2.DParams(nthreads=4)}, + { + "cparams": blosc2.CParams( + filters=[blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], + filters_meta=[10, 0], + typesize=4, + ) + }, + {"dparams": blosc2.DParams(nthreads=4)}, ), ( np.arange(10, dtype="float32"), # Do a reduction of precision of 10 bits in mantissa - {"cparams": {"filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], - "filters_meta": [-10, 0], - "typesize": 4, - }, - }, + { + "cparams": { + "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.BITSHUFFLE], + "filters_meta": [-10, 0], + "typesize": 4, + }, + }, {"nthreads": 4}, ), ( random.integers(0, 1000, 1000, endpoint=True), - {'cparams': blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)}, - {'dparams': blosc2.DParams()}, + {"cparams": blosc2.CParams(splitmode=blosc2.SplitMode.ALWAYS_SPLIT, nthreads=5, typesize=4)}, + {"dparams": blosc2.DParams()}, + ), + ( + np.arange(45, dtype=np.float64), + {"cparams": blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, + {}, ), - (np.arange(45, dtype=np.float64), {'cparams': blosc2.CParams(codec=blosc2.Codec.LZ4HC, typesize=4)}, {}), (np.arange(50, dtype=np.int64), {"typesize": 4}, {"dparams": blosc2.dparams_dflts}), ], ) diff --git a/tests/test_schunk.py b/tests/test_schunk.py index b5198a10..176db54e 100644 --- a/tests/test_schunk.py +++ b/tests/test_schunk.py @@ -59,15 +59,12 @@ def test_schunk_numpy(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nc blosc2.SChunk(chunksize=chunk_len * 4, storage=storage, cparams=cparams, dparams=dparams) # Create a schunk which we can read later - storage2 = replace(storage, - mode="w" if mmap_mode is None else None, - mmap_mode="w+" if mmap_mode is not None else None) - schunk = blosc2.SChunk( - chunksize=chunk_len * 4, - storage=storage2, - cparams=cparams, - dparams=dparams + storage2 = replace( + storage, + mode="w" if mmap_mode is None else None, + mmap_mode="w+" if mmap_mode is not None else None, ) + schunk = blosc2.SChunk(chunksize=chunk_len * 4, storage=storage2, cparams=cparams, dparams=dparams) assert schunk.urlpath == urlpath assert schunk.contiguous == contiguous @@ -203,12 +200,13 @@ def test_schunk(contiguous, urlpath, mode, mmap_mode, nbytes, cparams, dparams, ) @pytest.mark.parametrize("copy", [True, False]) def test_schunk_cframe(contiguous, urlpath, mode, mmap_mode, cparams, dparams, nchunks, copy): - storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, - mode=mode, mmap_mode=mmap_mode) + storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath, mode=mode, mmap_mode=mmap_mode) blosc2.remove_urlpath(urlpath) data = np.arange(200 * 1000 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, data=data, **asdict(storage), cparams=cparams, dparams=dparams) + schunk = blosc2.SChunk( + chunksize=200 * 1000 * 4, data=data, **asdict(storage), cparams=cparams, dparams=dparams + ) cframe = schunk.to_cframe() schunk2 = blosc2.schunk_from_cframe(cframe, copy) @@ -286,7 +284,9 @@ def test_schunk_cdparams(cparams, dparams, new_cparams, new_dparams): schunk.dparams = new_dparams for field in fields(schunk.cparams): if field.name in ["filters", "filters_meta"]: - assert getattr(schunk.cparams, field.name)[: len(getattr(new_cparams, field.name))] == getattr(new_cparams, field.name) + assert getattr(schunk.cparams, field.name)[: len(getattr(new_cparams, field.name))] == getattr( + new_cparams, field.name + ) else: assert getattr(schunk.cparams, field.name) == getattr(new_cparams, field.name) diff --git a/tests/test_schunk_constructor.py b/tests/test_schunk_constructor.py index dda1e956..01dfa259 100644 --- a/tests/test_schunk_constructor.py +++ b/tests/test_schunk_constructor.py @@ -129,16 +129,17 @@ def test_schunk(contiguous, urlpath, cparams, dparams, chunksize): ({"splitmode": blosc2.SplitMode.ALWAYS_SPLIT, "nthreads": 5, "typesize": 4}, 200 * 1000 * 2 + 17), ], ) -@pytest.mark.parametrize("special_value, expected_value", - [ - (blosc2.SpecialValue.ZERO, 0), - (blosc2.SpecialValue.NAN, np.nan), - (blosc2.SpecialValue.UNINIT, 0), - (blosc2.SpecialValue.VALUE, 34), - (blosc2.SpecialValue.VALUE, np.pi), - (blosc2.SpecialValue.VALUE, b"0123"), - (blosc2.SpecialValue.VALUE, True), - ], +@pytest.mark.parametrize( + "special_value, expected_value", + [ + (blosc2.SpecialValue.ZERO, 0), + (blosc2.SpecialValue.NAN, np.nan), + (blosc2.SpecialValue.UNINIT, 0), + (blosc2.SpecialValue.VALUE, 34), + (blosc2.SpecialValue.VALUE, np.pi), + (blosc2.SpecialValue.VALUE, b"0123"), + (blosc2.SpecialValue.VALUE, True), + ], ) def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value, expected_value): kwargs = {"contiguous": contiguous, "urlpath": urlpath, "cparams": cparams} @@ -157,7 +158,7 @@ def test_schunk_fill_special(contiguous, urlpath, cparams, nitems, special_value if isinstance(expected_value, float): dtype = np.float32 elif isinstance(expected_value, bytes): - dtype = np.dtype('|S' + str(len(expected_value))) + dtype = np.dtype("|S" + str(len(expected_value))) array = np.full(nitems, expected_value, dtype=dtype) dest = np.empty(nitems, dtype=dtype) schunk.get_slice(out=dest) diff --git a/tests/test_schunk_insert.py b/tests/test_schunk_insert.py index 18abc21a..06fecc95 100644 --- a/tests/test_schunk_insert.py +++ b/tests/test_schunk_insert.py @@ -33,7 +33,9 @@ def test_schunk_insert_numpy(contiguous, urlpath, nchunks, ninserts, copy, creat storage = blosc2.Storage(contiguous=contiguous, urlpath=urlpath) blosc2.remove_urlpath(urlpath) - schunk = blosc2.SChunk(chunksize=200 * 1000 * 4, storage=storage, cparams={"nthreads": 2}, dparams={"nthreads": 2}) + schunk = blosc2.SChunk( + chunksize=200 * 1000 * 4, storage=storage, cparams={"nthreads": 2}, dparams={"nthreads": 2} + ) for i in range(nchunks): buffer = i * np.arange(200 * 1000, dtype="int32") nchunks_ = schunk.append_data(buffer) diff --git a/tests/test_schunk_set_slice.py b/tests/test_schunk_set_slice.py index f5a26683..d78b5e67 100644 --- a/tests/test_schunk_set_slice.py +++ b/tests/test_schunk_set_slice.py @@ -38,7 +38,9 @@ def test_schunk_set_slice(contiguous, urlpath, mode, cparams, dparams, nchunks, blosc2.remove_urlpath(urlpath) data = np.arange(200 * 100 * nchunks, dtype="int32") - schunk = blosc2.SChunk(chunksize=200 * 100 * 4, data=data, storage=storage, cparams=cparams, dparams=dparams) + schunk = blosc2.SChunk( + chunksize=200 * 100 * 4, data=data, storage=storage, cparams=cparams, dparams=dparams + ) _start, _stop = start, stop if _start is None: diff --git a/tests/test_storage.py b/tests/test_storage.py index 2e7fa6d5..4a4b4551 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -82,27 +82,33 @@ def test_raises_storage(contiguous, urlpath): "cparams", [ blosc2.CParams(codec=blosc2.Codec.LZ4, filters=[blosc2.Filter.BITSHUFFLE]), - {"typesize": 4, 'filters': [blosc2.Filter.TRUNC_PREC, blosc2.Filter.DELTA], 'filters_meta': [0, 0]}, - blosc2.CParams(nthreads=5, filters=[blosc2.Filter.BITSHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0] * 3), - {"codec": blosc2.Codec.LZ4HC, "typesize": 4, 'filters': [blosc2.Filter.BYTEDELTA]}, + {"typesize": 4, "filters": [blosc2.Filter.TRUNC_PREC, blosc2.Filter.DELTA], "filters_meta": [0, 0]}, + blosc2.CParams( + nthreads=5, filters=[blosc2.Filter.BITSHUFFLE, blosc2.Filter.BYTEDELTA], filters_meta=[0] * 3 + ), + {"codec": blosc2.Codec.LZ4HC, "typesize": 4, "filters": [blosc2.Filter.BYTEDELTA]}, ], ) def test_cparams_values(cparams): schunk = blosc2.SChunk(cparams=cparams) cparams_dataclass = cparams if isinstance(cparams, blosc2.CParams) else blosc2.CParams(**cparams) for field in fields(cparams_dataclass): - if field.name in ['filters', 'filters_meta']: - assert getattr(schunk.cparams, field.name)[:len(getattr(cparams_dataclass, field.name))] == getattr(cparams_dataclass, field.name) + if field.name in ["filters", "filters_meta"]: + assert getattr(schunk.cparams, field.name)[ + : len(getattr(cparams_dataclass, field.name)) + ] == getattr(cparams_dataclass, field.name) else: assert getattr(schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) array = blosc2.empty((30, 30), np.int32, cparams=cparams) for field in fields(cparams_dataclass): - if field.name in ['filters', 'filters_meta']: - assert getattr(array.schunk.cparams, field.name)[:len(getattr(cparams_dataclass, field.name))] == getattr(cparams_dataclass, field.name) - elif field.name == 'typesize': + if field.name in ["filters", "filters_meta"]: + assert getattr(array.schunk.cparams, field.name)[ + : len(getattr(cparams_dataclass, field.name)) + ] == getattr(cparams_dataclass, field.name) + elif field.name == "typesize": assert getattr(array.schunk.cparams, field.name) == array.dtype.itemsize - elif field.name != 'blocksize': + elif field.name != "blocksize": assert getattr(array.schunk.cparams, field.name) == getattr(cparams_dataclass, field.name) blosc2.set_nthreads(10) @@ -150,7 +156,7 @@ def test_raises_cparams(): (blosc2.DParams()), (blosc2.DParams(nthreads=2)), ({}), - ({'nthreads': 2}), + ({"nthreads": 2}), ], ) def test_dparams_values(dparams): @@ -168,6 +174,7 @@ def test_dparams_values(dparams): assert schunk.dparams.nthreads == dparams_dataclass.nthreads assert array.schunk.dparams.nthreads == dparams_dataclass.nthreads + def test_dparams_defaults(): dparams = blosc2.DParams() assert dparams.nthreads == blosc2.nthreads