From 7417fee7547d94625fec3e64c8f843ad9a073afe Mon Sep 17 00:00:00 2001 From: Miles Granger Date: Sat, 5 Oct 2024 14:21:59 +0200 Subject: [PATCH] Add zlib, izlib and ideflate (#184) * Add zlib, izlib and ideflate * Bump minor version -> 2.9.0-rc1 --- Cargo.toml | 17 +++++-- README.md | 6 ++- src/experimental.rs | 14 ++++++ src/ideflate.rs | 107 +++++++++++++++++++++++++++++++++++++++++ src/izlib.rs | 107 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 24 +++++++-- src/zlib.rs | 106 ++++++++++++++++++++++++++++++++++++++++ tests/test_variants.py | 8 +-- 8 files changed, 378 insertions(+), 11 deletions(-) create mode 100644 src/ideflate.rs create mode 100644 src/izlib.rs create mode 100644 src/zlib.rs diff --git a/Cargo.toml b/Cargo.toml index 20ca0db9..6b3a571b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramjam-python" -version = "2.8.5-rc1" +version = "2.9.0-rc1" authors = ["Miles Granger "] edition = "2021" license = "MIT" @@ -14,7 +14,7 @@ name = "cramjam" crate-type = ["cdylib"] [features] -default = ["extension-module", "snappy", "lz4", "bzip2", "brotli", "xz", "zstd", "gzip", "deflate", "blosc2", "igzip"] +default = ["extension-module", "snappy", "lz4", "bzip2", "brotli", "xz", "zstd", "gzip", "zlib", "deflate", "blosc2", "igzip", "ideflate", "izlib"] extension-module = ["pyo3/extension-module"] generate-import-lib = ["pyo3/generate-import-lib"] # needed for Windows PyPy builds @@ -28,9 +28,16 @@ xz = ["xz-static"] xz-static = ["libcramjam/xz-static"] xz-shared = ["libcramjam/xz-shared"] +# ISA-L stuff igzip = ["igzip-static"] igzip-static = ["libcramjam/igzip-static"] igzip-shared = ["libcramjam/igzip-shared"] +ideflate = ["ideflate-static"] +ideflate-static = ["libcramjam/ideflate-static"] +ideflate-shared = ["libcramjam/ideflate-shared"] +izlib = ["izlib-static"] +izlib-static = ["libcramjam/izlib-static"] +izlib-shared = ["libcramjam/izlib-shared"] use-system-isal-static = ["libcramjam/use-system-isal", "libcramjam/igzip-static"] use-system-isal-shared = ["libcramjam/use-system-isal", "libcramjam/igzip-shared"] @@ -38,6 +45,10 @@ gzip = ["gzip-static"] gzip-static = ["libcramjam/gzip-static"] gzip-shared = ["libcramjam/gzip-shared"] +zlib = ["zlib-static"] +zlib-static = ["libcramjam/zlib-static"] +zlib-shared = ["libcramjam/zlib-shared"] + deflate = ["deflate-static"] deflate-static = ["libcramjam/deflate-static"] deflate-shared = ["libcramjam/deflate-shared"] @@ -53,7 +64,7 @@ wasm32-compat = ["libcramjam/wasm32-compat"] [dependencies] pyo3 = { version = "^0.22", default-features = false, features = ["macros"] } -libcramjam = { version = "^0.5", default-features = false } +libcramjam = { version = "^0.6", default-features = false } [build-dependencies] pyo3-build-config = "^0.22" diff --git a/README.md b/README.md index fa482fa7..8ae5f7c0 100644 --- a/README.md +++ b/README.md @@ -41,11 +41,15 @@ Available algorithms: - [X] Bzip2          `cramjam.bzip2` - [X] Lz4              `cramjam.lz4` - [X] Gzip            `cramjam.gzip` +- [X] Zlib            `cramjam.zlib` - [X] Deflate       `cramjam.deflate` - [X] ZSTD           `cramjam.zstd` - [X] XZ / LZMA  `cramjam.xz` - [X] Blosc2        `cramjam.experimental.blosc2` -- [X] IGzip          `cramjam.experimental.igzip` (only on 64-bit targets) +- [X] ISA-L backend + - [X] igzip          `cramjam.experimental.igzip` (only on 64-bit targets) + - [X] ideflate          `cramjam.experimental.ideflate` (only on 64-bit targets) + - [X] izlib          `cramjam.experimental.izlib` (only on 64-bit targets) All available for use as: diff --git a/src/experimental.rs b/src/experimental.rs index 998d81ee..774e8121 100644 --- a/src/experimental.rs +++ b/src/experimental.rs @@ -13,10 +13,24 @@ pub mod experimental { #[pymodule_export] use crate::blosc2::blosc2; + #[cfg(all( + any(feature = "ideflate", feature = "ideflate-static", feature = "ideflate-shared"), + target_pointer_width = "64" + ))] + #[pymodule_export] + use crate::ideflate::ideflate; + #[cfg(all( any(feature = "igzip", feature = "igzip-static", feature = "igzip-shared"), target_pointer_width = "64" ))] #[pymodule_export] use crate::igzip::igzip; + + #[cfg(all( + any(feature = "izlib", feature = "izlib-static", feature = "izlib-shared"), + target_pointer_width = "64" + ))] + #[pymodule_export] + use crate::izlib::izlib; } diff --git a/src/ideflate.rs b/src/ideflate.rs new file mode 100644 index 00000000..85f7d1ef --- /dev/null +++ b/src/ideflate.rs @@ -0,0 +1,107 @@ +//! ideflate de/compression interface +use pyo3::prelude::*; + +/// ideflate de/compression interface +#[pymodule] +pub mod ideflate { + + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; + + const DEFAULT_COMPRESSION_LEVEL: u32 = 6; + + /// ideflate decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.gzip.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::ideflate::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } + + /// ideflate compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.gzip.compress(b'some bytes here', level=2, output_len=Optional[int]) # Level defaults to 6 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::ideflate::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } + + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::ideflate::compress[input, output], level).map_err(CompressionError::from_err) + } + + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::ideflate::decompress[input, output]).map_err(DecompressionError::from_err) + } + + /// ideflate Compressor object for streaming compression + #[pyclass(unsendable)] // TODO: make sendable + pub struct Compressor { + inner: Option>>>, + } + + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or(DEFAULT_COMPRESSION_LEVEL); + let inner = libcramjam::ideflate::isal::write::GzipEncoder::new( + Cursor::new(vec![]), + libcramjam::ideflate::isal::CompressionLevel::try_from(level as isize) + .map_err(CompressionError::from_err)?, + ); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_ref_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } + } + + mod _decompressor { + use super::*; + crate::make_decompressor!(ideflate); + } + #[pymodule_export] + use _decompressor::Decompressor; +} diff --git a/src/izlib.rs b/src/izlib.rs new file mode 100644 index 00000000..bda37ea1 --- /dev/null +++ b/src/izlib.rs @@ -0,0 +1,107 @@ +//! izlib de/compression interface +use pyo3::prelude::*; + +/// izlib de/compression interface +#[pymodule] +pub mod izlib { + + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; + + const DEFAULT_COMPRESSION_LEVEL: u32 = 6; + + /// izlib decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.gzip.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::izlib::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } + + /// izlib compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.gzip.compress(b'some bytes here', level=2, output_len=Optional[int]) # Level defaults to 6 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::izlib::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } + + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::izlib::compress[input, output], level).map_err(CompressionError::from_err) + } + + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::izlib::decompress[input, output]).map_err(DecompressionError::from_err) + } + + /// izlib Compressor object for streaming compression + #[pyclass(unsendable)] // TODO: make sendable + pub struct Compressor { + inner: Option>>>, + } + + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or(DEFAULT_COMPRESSION_LEVEL); + let inner = libcramjam::izlib::isal::write::GzipEncoder::new( + Cursor::new(vec![]), + libcramjam::izlib::isal::CompressionLevel::try_from(level as isize) + .map_err(CompressionError::from_err)?, + ); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_ref_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } + } + + mod _decompressor { + use super::*; + crate::make_decompressor!(izlib); + } + #[pymodule_export] + use _decompressor::Decompressor; +} diff --git a/src/lib.rs b/src/lib.rs index 27eba0f1..6b5e6058 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,17 +65,29 @@ pub mod bzip2; pub mod deflate; #[cfg(any(feature = "gzip", feature = "gzip-static", feature = "gzip-shared"))] pub mod gzip; +#[cfg(all( + any(feature = "ideflate", feature = "ideflate-static", feature = "ideflate-shared"), + target_pointer_width = "64" +))] +pub mod ideflate; #[cfg(all( any(feature = "igzip", feature = "igzip-static", feature = "igzip-shared"), target_pointer_width = "64" ))] pub mod igzip; +#[cfg(all( + any(feature = "izlib", feature = "izlib-static", feature = "izlib-shared"), + target_pointer_width = "64" +))] +pub mod izlib; #[cfg(feature = "lz4")] pub mod lz4; #[cfg(feature = "snappy")] pub mod snappy; #[cfg(any(feature = "xz", feature = "xz-static", feature = "xz-shared"))] pub mod xz; +#[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-shared"))] +pub mod zlib; #[cfg(feature = "zstd")] pub mod zstd; @@ -416,10 +428,6 @@ mod cramjam { #[pymodule_export] use crate::brotli::brotli; - #[cfg(any(feature = "deflate", feature = "deflate-static", feature = "deflate-shared"))] - #[pymodule_export] - use crate::deflate::deflate; - #[cfg(any(feature = "xz", feature = "xz-static", feature = "xz-shared"))] #[pymodule_export] use crate::xz::xz; @@ -432,6 +440,14 @@ mod cramjam { #[pymodule_export] use crate::gzip::gzip; + #[cfg(any(feature = "zlib", feature = "zlib-static", feature = "zlib-shared"))] + #[pymodule_export] + use crate::zlib::zlib; + + #[cfg(any(feature = "deflate", feature = "deflate-static", feature = "deflate-shared"))] + #[pymodule_export] + use crate::deflate::deflate; + #[pymodule_export] use crate::experimental::experimental; } diff --git a/src/zlib.rs b/src/zlib.rs new file mode 100644 index 00000000..a5031058 --- /dev/null +++ b/src/zlib.rs @@ -0,0 +1,106 @@ +//! zlib de/compression interface +use pyo3::prelude::*; + +/// zlib de/compression interface +#[pymodule] +pub mod zlib { + + use crate::exceptions::{CompressionError, DecompressionError}; + use crate::io::{AsBytes, RustyBuffer}; + use crate::BytesType; + use pyo3::prelude::*; + use pyo3::PyResult; + use std::io::Cursor; + + const DEFAULT_COMPRESSION_LEVEL: u32 = 6; + + /// zlib decompression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.zlib.decompress(compressed_bytes, output_len=Optional[int]) + /// ``` + #[pyfunction] + #[pyo3(signature = (data, output_len=None))] + pub fn decompress(py: Python, data: BytesType, output_len: Option) -> PyResult { + crate::generic!(py, libcramjam::zlib::decompress[data], output_len = output_len) + .map_err(DecompressionError::from_err) + } + + /// zlib compression. + /// + /// Python Example + /// -------------- + /// ```python + /// >>> cramjam.zlib.compress(b'some bytes here', level=2, output_len=Optional[int]) # Level defaults to 6 + /// ``` + #[pyfunction] + #[pyo3(signature = (data, level=None, output_len=None))] + pub fn compress( + py: Python, + data: BytesType, + level: Option, + output_len: Option, + ) -> PyResult { + crate::generic!(py, libcramjam::zlib::compress[data], output_len = output_len, level) + .map_err(CompressionError::from_err) + } + + /// Compress directly into an output buffer + #[pyfunction] + #[pyo3(signature = (input, output, level=None))] + pub fn compress_into(py: Python, input: BytesType, mut output: BytesType, level: Option) -> PyResult { + crate::generic!(py, libcramjam::zlib::compress[input, output], level).map_err(CompressionError::from_err) + } + + /// Decompress directly into an output buffer + #[pyfunction] + pub fn decompress_into(py: Python, input: BytesType, mut output: BytesType) -> PyResult { + crate::generic!(py, libcramjam::zlib::decompress[input, output]).map_err(DecompressionError::from_err) + } + + /// zlib Compressor object for streaming compression + #[pyclass] + pub struct Compressor { + inner: Option>>>, + } + + #[pymethods] + impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + #[pyo3(signature = (level=None))] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or(DEFAULT_COMPRESSION_LEVEL); + let inner = libcramjam::zlib::flate2::write::GzEncoder::new( + Cursor::new(vec![]), + libcramjam::zlib::flate2::Compression::new(level), + ); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Flush and return current compressed stream + pub fn flush(&mut self) -> PyResult { + crate::io::stream_flush(&mut self.inner, |e| e.get_mut()) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } + } + + mod _decompressor { + use super::*; + crate::make_decompressor!(zlib); + } + #[pymodule_export] + use _decompressor::Decompressor; +} diff --git a/tests/test_variants.py b/tests/test_variants.py index d62f338b..d65a0972 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -16,10 +16,11 @@ "gzip", "deflate", "zstd", + "zlib", "xz", ) -for experimental_feat in ("blosc2", "igzip"): +for experimental_feat in ("blosc2", "igzip", "ideflate", "izlib"): if not hasattr(cramjam, experimental_feat) and hasattr(cramjam, "experimental"): mod = getattr(cramjam.experimental, experimental_feat) setattr(cramjam, experimental_feat, mod) @@ -154,7 +155,6 @@ def test_variants_compress_into( ) n_bytes = variant.compress_into(input, output) - assert n_bytes == compressed_len if hasattr(output, "read"): output.seek(0) @@ -163,7 +163,9 @@ def test_variants_compress_into( output = output.tobytes() else: output = bytes(output) - assert same_same(output, compressed) + + decompressed = variant.decompress(output[:n_bytes]) + assert same_same(raw_data, decompressed) @pytest.mark.parametrize(