diff --git a/docs/release.rst b/docs/release.rst index 148a9b00..8d36c444 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -17,9 +17,10 @@ Maintenance Enhancements ~~~~~~~~~~~~ - +* Add checksum flag to zstd and sets the default level to 0. + By :user:`Norman Rzepka `, :issue:`519`. * Add PCodec - By :user:`Ryan Abernathey `. + By :user:`Ryan Abernathey `, :issue:`501`. * Use PyData theme for docs By :user:`John Kirkham `, :issue:`485`. diff --git a/fixture/zstd/codec.00/config.json b/fixture/zstd/codec.00/config.json index dd42c2e1..4dd5847e 100644 --- a/fixture/zstd/codec.00/config.json +++ b/fixture/zstd/codec.00/config.json @@ -1,4 +1,4 @@ { "id": "zstd", - "level": 1 + "level": 0 } \ No newline at end of file diff --git a/fixture/zstd/codec.07/config.json b/fixture/zstd/codec.07/config.json new file mode 100644 index 00000000..8cc79f9b --- /dev/null +++ b/fixture/zstd/codec.07/config.json @@ -0,0 +1,5 @@ +{ + "checksum": true, + "id": "zstd", + "level": 0 +} \ No newline at end of file diff --git a/fixture/zstd/codec.07/encoded.00.dat b/fixture/zstd/codec.07/encoded.00.dat new file mode 100644 index 00000000..1ad2e8d7 Binary files /dev/null and b/fixture/zstd/codec.07/encoded.00.dat differ diff --git a/fixture/zstd/codec.07/encoded.01.dat b/fixture/zstd/codec.07/encoded.01.dat new file mode 100644 index 00000000..fd3607be Binary files /dev/null and b/fixture/zstd/codec.07/encoded.01.dat differ diff --git a/fixture/zstd/codec.07/encoded.02.dat b/fixture/zstd/codec.07/encoded.02.dat new file mode 100644 index 00000000..eb4ba7bf Binary files /dev/null and b/fixture/zstd/codec.07/encoded.02.dat differ diff --git a/fixture/zstd/codec.07/encoded.03.dat b/fixture/zstd/codec.07/encoded.03.dat new file mode 100644 index 00000000..7f5a154b Binary files /dev/null and b/fixture/zstd/codec.07/encoded.03.dat differ diff --git a/fixture/zstd/codec.07/encoded.04.dat b/fixture/zstd/codec.07/encoded.04.dat new file mode 100644 index 00000000..7ee553b7 Binary files /dev/null and b/fixture/zstd/codec.07/encoded.04.dat differ diff --git a/fixture/zstd/codec.07/encoded.05.dat b/fixture/zstd/codec.07/encoded.05.dat new file mode 100644 index 00000000..c11f1b32 Binary files /dev/null and b/fixture/zstd/codec.07/encoded.05.dat differ diff --git a/fixture/zstd/codec.07/encoded.06.dat b/fixture/zstd/codec.07/encoded.06.dat new file mode 100644 index 00000000..3c692161 Binary files /dev/null and b/fixture/zstd/codec.07/encoded.06.dat differ diff --git a/fixture/zstd/codec.07/encoded.07.dat b/fixture/zstd/codec.07/encoded.07.dat new file mode 100644 index 00000000..c83875ba Binary files /dev/null and b/fixture/zstd/codec.07/encoded.07.dat differ diff --git a/fixture/zstd/codec.07/encoded.08.dat b/fixture/zstd/codec.07/encoded.08.dat new file mode 100644 index 00000000..0cec36fb Binary files /dev/null and b/fixture/zstd/codec.07/encoded.08.dat differ diff --git a/fixture/zstd/codec.07/encoded.09.dat b/fixture/zstd/codec.07/encoded.09.dat new file mode 100644 index 00000000..f05134ea Binary files /dev/null and b/fixture/zstd/codec.07/encoded.09.dat differ diff --git a/fixture/zstd/codec.07/encoded.10.dat b/fixture/zstd/codec.07/encoded.10.dat new file mode 100644 index 00000000..4c2d7e2d Binary files /dev/null and b/fixture/zstd/codec.07/encoded.10.dat differ diff --git a/fixture/zstd/codec.07/encoded.11.dat b/fixture/zstd/codec.07/encoded.11.dat new file mode 100644 index 00000000..443547c7 Binary files /dev/null and b/fixture/zstd/codec.07/encoded.11.dat differ diff --git a/fixture/zstd/codec.07/encoded.12.dat b/fixture/zstd/codec.07/encoded.12.dat new file mode 100644 index 00000000..7dd61e18 Binary files /dev/null and b/fixture/zstd/codec.07/encoded.12.dat differ diff --git a/fixture/zstd/codec.08/config.json b/fixture/zstd/codec.08/config.json new file mode 100644 index 00000000..8cc79f9b --- /dev/null +++ b/fixture/zstd/codec.08/config.json @@ -0,0 +1,5 @@ +{ + "checksum": true, + "id": "zstd", + "level": 0 +} \ No newline at end of file diff --git a/fixture/zstd/codec.08/encoded.00.dat b/fixture/zstd/codec.08/encoded.00.dat new file mode 100644 index 00000000..1ad2e8d7 Binary files /dev/null and b/fixture/zstd/codec.08/encoded.00.dat differ diff --git a/fixture/zstd/codec.08/encoded.01.dat b/fixture/zstd/codec.08/encoded.01.dat new file mode 100644 index 00000000..fd3607be Binary files /dev/null and b/fixture/zstd/codec.08/encoded.01.dat differ diff --git a/fixture/zstd/codec.08/encoded.02.dat b/fixture/zstd/codec.08/encoded.02.dat new file mode 100644 index 00000000..eb4ba7bf Binary files /dev/null and b/fixture/zstd/codec.08/encoded.02.dat differ diff --git a/fixture/zstd/codec.08/encoded.03.dat b/fixture/zstd/codec.08/encoded.03.dat new file mode 100644 index 00000000..7f5a154b Binary files /dev/null and b/fixture/zstd/codec.08/encoded.03.dat differ diff --git a/fixture/zstd/codec.08/encoded.04.dat b/fixture/zstd/codec.08/encoded.04.dat new file mode 100644 index 00000000..7ee553b7 Binary files /dev/null and b/fixture/zstd/codec.08/encoded.04.dat differ diff --git a/fixture/zstd/codec.08/encoded.05.dat b/fixture/zstd/codec.08/encoded.05.dat new file mode 100644 index 00000000..c11f1b32 Binary files /dev/null and b/fixture/zstd/codec.08/encoded.05.dat differ diff --git a/fixture/zstd/codec.08/encoded.06.dat b/fixture/zstd/codec.08/encoded.06.dat new file mode 100644 index 00000000..3c692161 Binary files /dev/null and b/fixture/zstd/codec.08/encoded.06.dat differ diff --git a/fixture/zstd/codec.08/encoded.07.dat b/fixture/zstd/codec.08/encoded.07.dat new file mode 100644 index 00000000..c83875ba Binary files /dev/null and b/fixture/zstd/codec.08/encoded.07.dat differ diff --git a/fixture/zstd/codec.08/encoded.08.dat b/fixture/zstd/codec.08/encoded.08.dat new file mode 100644 index 00000000..0cec36fb Binary files /dev/null and b/fixture/zstd/codec.08/encoded.08.dat differ diff --git a/fixture/zstd/codec.08/encoded.09.dat b/fixture/zstd/codec.08/encoded.09.dat new file mode 100644 index 00000000..f05134ea Binary files /dev/null and b/fixture/zstd/codec.08/encoded.09.dat differ diff --git a/fixture/zstd/codec.08/encoded.10.dat b/fixture/zstd/codec.08/encoded.10.dat new file mode 100644 index 00000000..4c2d7e2d Binary files /dev/null and b/fixture/zstd/codec.08/encoded.10.dat differ diff --git a/fixture/zstd/codec.08/encoded.11.dat b/fixture/zstd/codec.08/encoded.11.dat new file mode 100644 index 00000000..443547c7 Binary files /dev/null and b/fixture/zstd/codec.08/encoded.11.dat differ diff --git a/fixture/zstd/codec.08/encoded.12.dat b/fixture/zstd/codec.08/encoded.12.dat new file mode 100644 index 00000000..7dd61e18 Binary files /dev/null and b/fixture/zstd/codec.08/encoded.12.dat differ diff --git a/fixture/zstd/codec.09/config.json b/fixture/zstd/codec.09/config.json new file mode 100644 index 00000000..3e7715bf --- /dev/null +++ b/fixture/zstd/codec.09/config.json @@ -0,0 +1,5 @@ +{ + "checksum": true, + "id": "zstd", + "level": 22 +} \ No newline at end of file diff --git a/fixture/zstd/codec.09/encoded.00.dat b/fixture/zstd/codec.09/encoded.00.dat new file mode 100644 index 00000000..e68bcb84 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.00.dat differ diff --git a/fixture/zstd/codec.09/encoded.01.dat b/fixture/zstd/codec.09/encoded.01.dat new file mode 100644 index 00000000..1943a845 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.01.dat differ diff --git a/fixture/zstd/codec.09/encoded.02.dat b/fixture/zstd/codec.09/encoded.02.dat new file mode 100644 index 00000000..de473eae Binary files /dev/null and b/fixture/zstd/codec.09/encoded.02.dat differ diff --git a/fixture/zstd/codec.09/encoded.03.dat b/fixture/zstd/codec.09/encoded.03.dat new file mode 100644 index 00000000..7e8439ad Binary files /dev/null and b/fixture/zstd/codec.09/encoded.03.dat differ diff --git a/fixture/zstd/codec.09/encoded.04.dat b/fixture/zstd/codec.09/encoded.04.dat new file mode 100644 index 00000000..15c7e048 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.04.dat differ diff --git a/fixture/zstd/codec.09/encoded.05.dat b/fixture/zstd/codec.09/encoded.05.dat new file mode 100644 index 00000000..275444f0 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.05.dat differ diff --git a/fixture/zstd/codec.09/encoded.06.dat b/fixture/zstd/codec.09/encoded.06.dat new file mode 100644 index 00000000..ef4a750b Binary files /dev/null and b/fixture/zstd/codec.09/encoded.06.dat differ diff --git a/fixture/zstd/codec.09/encoded.07.dat b/fixture/zstd/codec.09/encoded.07.dat new file mode 100644 index 00000000..cb6236c0 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.07.dat differ diff --git a/fixture/zstd/codec.09/encoded.08.dat b/fixture/zstd/codec.09/encoded.08.dat new file mode 100644 index 00000000..40c26e00 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.08.dat differ diff --git a/fixture/zstd/codec.09/encoded.09.dat b/fixture/zstd/codec.09/encoded.09.dat new file mode 100644 index 00000000..902dd2c0 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.09.dat differ diff --git a/fixture/zstd/codec.09/encoded.10.dat b/fixture/zstd/codec.09/encoded.10.dat new file mode 100644 index 00000000..2b4e66b9 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.10.dat differ diff --git a/fixture/zstd/codec.09/encoded.11.dat b/fixture/zstd/codec.09/encoded.11.dat new file mode 100644 index 00000000..8f770e96 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.11.dat differ diff --git a/fixture/zstd/codec.09/encoded.12.dat b/fixture/zstd/codec.09/encoded.12.dat new file mode 100644 index 00000000..8c49d686 Binary files /dev/null and b/fixture/zstd/codec.09/encoded.12.dat differ diff --git a/numcodecs/tests/test_zstd.py b/numcodecs/tests/test_zstd.py index d69acdbf..d5646c2d 100644 --- a/numcodecs/tests/test_zstd.py +++ b/numcodecs/tests/test_zstd.py @@ -29,6 +29,9 @@ Zstd(level=10), Zstd(level=22), Zstd(level=100), + Zstd(checksum=True), + Zstd(level=0, checksum=True), + Zstd(level=22, checksum=True), ] @@ -36,8 +39,8 @@ # mix of shapes: 1D, 2D, 3D # mix of orders: C, F arrays = [ - np.arange(1000, dtype='i4'), - np.linspace(1000, 1001, 1000, dtype='f8'), + np.arange(1000, dtype="i4"), + np.linspace(1000, 1001, 1000, dtype="f8"), np.random.normal(loc=1000, scale=1, size=(100, 10)), np.random.randint(0, 2, size=1000, dtype=bool).reshape(100, 10, order='F'), np.random.choice([b'a', b'bb', b'ccc'], size=1000).reshape(10, 10, 10), @@ -76,3 +79,17 @@ def test_err_decode_object_buffer(): def test_err_encode_object_buffer(): check_err_encode_object_buffer(Zstd()) + + +def test_checksum(): + data = np.arange(0, 64, dtype="uint8") + assert len(Zstd(level=0, checksum=False).encode(data)) + 4 == len( + Zstd(level=0, checksum=True).encode(data) + ) + + +def test_native_functions(): + # Note, these assertions might need to be changed for new versions of zstd + assert Zstd.default_level == 3 + assert Zstd.min_level == -131072 + assert Zstd.max_level == 22 diff --git a/numcodecs/zstd.pyx b/numcodecs/zstd.pyx index f96be4f4..fab6ed2b 100644 --- a/numcodecs/zstd.pyx +++ b/numcodecs/zstd.pyx @@ -19,21 +19,38 @@ cdef extern from "zstd.h": unsigned ZSTD_versionNumber() nogil - size_t ZSTD_compress(void* dst, - size_t dstCapacity, - const void* src, - size_t srcSize, - int compressionLevel) nogil + struct ZSTD_CCtx_s: + pass + ctypedef ZSTD_CCtx_s ZSTD_CCtx + cdef enum ZSTD_cParameter: + ZSTD_c_compressionLevel=100 + ZSTD_c_checksumFlag=201 + + ZSTD_CCtx* ZSTD_createCCtx() nogil + size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) nogil + size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, + ZSTD_cParameter param, + int value) nogil + + size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, + size_t dstCapacity, + const void* src, + size_t srcSize) nogil size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t compressedSize) nogil - unsigned long long ZSTD_getDecompressedSize(const void* src, + cdef long ZSTD_CONTENTSIZE_UNKNOWN + cdef long ZSTD_CONTENTSIZE_ERROR + unsigned long long ZSTD_getFrameContentSize(const void* src, size_t srcSize) nogil + int ZSTD_minCLevel() nogil int ZSTD_maxCLevel() nogil + int ZSTD_defaultCLevel() nogil size_t ZSTD_compressBound(size_t srcSize) nogil @@ -51,11 +68,11 @@ MICRO_VERSION_NUMBER = ( (MINOR_VERSION_NUMBER * 100) ) __version__ = '%s.%s.%s' % (MAJOR_VERSION_NUMBER, MINOR_VERSION_NUMBER, MICRO_VERSION_NUMBER) -DEFAULT_CLEVEL = 1 +DEFAULT_CLEVEL = 0 MAX_CLEVEL = ZSTD_maxCLevel() -def compress(source, int level=DEFAULT_CLEVEL): +def compress(source, int level=DEFAULT_CLEVEL, bint checksum=False): """Compress data. Parameters @@ -64,7 +81,9 @@ def compress(source, int level=DEFAULT_CLEVEL): Data to be compressed. Can be any object supporting the buffer protocol. level : int - Compression level (1-22). + Compression level (-131072 to 22). + checksum : bool + Flag to enable checksums. The default is False. Returns ------- @@ -80,8 +99,6 @@ def compress(source, int level=DEFAULT_CLEVEL): bytes dest # check level - if level <= 0: - level = DEFAULT_CLEVEL if level > MAX_CLEVEL: level = MAX_CLEVEL @@ -90,6 +107,19 @@ def compress(source, int level=DEFAULT_CLEVEL): source_ptr = source_buffer.ptr source_size = source_buffer.nbytes + cctx = ZSTD_createCCtx() + param_set_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level) + + if ZSTD_isError(param_set_result): + error = ZSTD_getErrorName(param_set_result) + raise RuntimeError('Could not set zstd compression level: %s' % error) + + param_set_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1 if checksum else 0) + + if ZSTD_isError(param_set_result): + error = ZSTD_getErrorName(param_set_result) + raise RuntimeError('Could not set zstd checksum flag: %s' % error) + try: # setup destination @@ -99,10 +129,11 @@ def compress(source, int level=DEFAULT_CLEVEL): # perform compression with nogil: - compressed_size = ZSTD_compress(dest_ptr, dest_size, source_ptr, source_size, level) + compressed_size = ZSTD_compress2(cctx, dest_ptr, dest_size, source_ptr, source_size) finally: - + if cctx: + ZSTD_freeCCtx(cctx) # release buffers source_buffer.release() @@ -148,8 +179,8 @@ def decompress(source, dest=None): try: # determine uncompressed size - dest_size = ZSTD_getDecompressedSize(source_ptr, source_size) - if dest_size == 0: + dest_size = ZSTD_getFrameContentSize(source_ptr, source_size) + if dest_size == 0 or dest_size == ZSTD_CONTENTSIZE_UNKNOWN or dest_size == ZSTD_CONTENTSIZE_ERROR: raise RuntimeError('Zstd decompression error: invalid input data') # setup destination buffer @@ -193,7 +224,9 @@ class Zstd(Codec): Parameters ---------- level : int - Compression level (1-22). + Compression level (-131072 to 22). + checksum : bool + Flag to enable checksums. The default is False. See Also -------- @@ -207,12 +240,13 @@ class Zstd(Codec): # practical limit on the size of buffers that Zstd can process and so we don't # enforce a max_buffer_size option here. - def __init__(self, level=DEFAULT_CLEVEL): + def __init__(self, level=DEFAULT_CLEVEL, checksum=False): self.level = level + self.checksum = checksum def encode(self, buf): buf = ensure_contiguous_ndarray(buf) - return compress(buf, self.level) + return compress(buf, self.level, self.checksum) def decode(self, buf, out=None): buf = ensure_contiguous_ndarray(buf) @@ -223,3 +257,21 @@ class Zstd(Codec): (type(self).__name__, self.level) return r + + @classmethod + @property + def default_level(cls): + """Returns the default compression level of the underlying zstd library.""" + return ZSTD_defaultCLevel() + + @classmethod + @property + def min_level(cls): + """Returns the minimum compression level of the underlying zstd library.""" + return ZSTD_minCLevel() + + @classmethod + @property + def max_level(cls): + """Returns the maximum compression level of the underlying zstd library.""" + return ZSTD_maxCLevel()