From 466ce00c8f4e2637b016ac2901f2341b9025c8fd Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sat, 13 Nov 2021 15:40:40 +0100 Subject: [PATCH] Add ZstdEncoder::with_quality_and_params to enable compression parameters This allows enabling parameters such as long-distance matching mode or rsyncable mode. Add a type supporting a subset of zstd compression parameters, to insulate against future changes to zstd. --- src/codec/zstd/encoder.rs | 12 ++++- src/lib.rs | 107 ++++++++++++++++++++++++++++++++++++++ src/macros.rs | 11 ++++ 3 files changed, 129 insertions(+), 1 deletion(-) diff --git a/src/codec/zstd/encoder.rs b/src/codec/zstd/encoder.rs index 34407abc..eec162a5 100644 --- a/src/codec/zstd/encoder.rs +++ b/src/codec/zstd/encoder.rs @@ -1,5 +1,5 @@ use crate::{codec::Encode, unshared::Unshared, util::PartialBuffer}; -use libzstd::stream::raw::{Encoder, Operation}; +use libzstd::stream::raw::{CParameter, Encoder, Operation}; use std::io::Result; #[derive(Debug)] @@ -13,6 +13,16 @@ impl ZstdEncoder { encoder: Unshared::new(Encoder::new(level).unwrap()), } } + + pub(crate) fn new_with_params(level: i32, params: &[crate::zstd::CParameter]) -> Self { + let mut encoder = Encoder::new(level).unwrap(); + for param in params { + encoder.set_parameter(param.as_zstd()).unwrap(); + } + Self { + encoder: Unshared::new(encoder), + } + } } impl Encode for ZstdEncoder { diff --git a/src/lib.rs b/src/lib.rs index 108cb859..77fd32eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -258,3 +258,110 @@ impl Level { } } } + +#[cfg(feature = "zstd")] +/// This module contains zstd-specific types for async-compression. +pub mod zstd { + use libzstd::stream::raw::CParameter::*; + + /// A compression parameter for zstd. This is a stable wrapper around zstd's own `CParameter` + /// type, to abstract over different versions of the zstd library. + /// + /// See the [zstd documentation](https://facebook.github.io/zstd/zstd_manual.html) for more + /// information on these parameters. + #[derive(Copy, Clone, Debug, PartialEq, Eq)] + pub struct CParameter(libzstd::stream::raw::CParameter); + + impl CParameter { + /// Window size in bytes (as a power of two) + pub fn window_log(value: u32) -> Self { + Self(WindowLog(value)) + } + + /// Size of the initial probe table in 4-byte entries (as a power of two) + pub fn hash_log(value: u32) -> Self { + Self(HashLog(value)) + } + + /// Size of the multi-probe table in 4-byte entries (as a power of two) + pub fn chain_log(value: u32) -> Self { + Self(ChainLog(value)) + } + + /// Number of search attempts (as a power of two) + pub fn search_log(value: u32) -> Self { + Self(SearchLog(value)) + } + + /// Minimum size of matches searched for + pub fn min_match(value: u32) -> Self { + Self(MinMatch(value)) + } + + /// Strategy-dependent length modifier + pub fn target_length(value: u32) -> Self { + Self(TargetLength(value)) + } + + /// Enable long-distance matching mode to look for and emit long-distance references. + /// + /// This increases the default window size. + pub fn enable_long_distance_matching(value: bool) -> Self { + Self(EnableLongDistanceMatching(value)) + } + + /// Size of the long-distance matching table (as a power of two) + pub fn ldm_hash_log(value: u32) -> Self { + Self(LdmHashLog(value)) + } + + /// Minimum size of long-distance matches searched for + pub fn ldm_min_match(value: u32) -> Self { + Self(LdmMinMatch(value)) + } + + /// Size of each bucket in the LDM hash table for collision resolution (as a power of two) + pub fn ldm_bucket_size_log(value: u32) -> Self { + Self(LdmBucketSizeLog(value)) + } + + /// Frequency of using the LDM hash table (as a power of two) + pub fn ldm_hash_rate_log(value: u32) -> Self { + Self(LdmHashRateLog(value)) + } + + /// Emit the size of the content (default: true). + pub fn content_size_flag(value: bool) -> Self { + Self(ContentSizeFlag(value)) + } + + /// Emit a checksum (default: false). + pub fn checksum_flag(value: bool) -> Self { + Self(ChecksumFlag(value)) + } + + /// Emit a dictionary ID when using a custom dictionary (default: true). + pub fn dict_id_flag(value: bool) -> Self { + Self(DictIdFlag(value)) + } + + /// Number of threads to spawn. + /// + /// If set to 0, compression functions will block; if set to 1 or more, compression will + /// run in background threads and `flush` pushes bytes through the compressor. + pub fn nb_workers(value: u32) -> Self { + Self(NbWorkers(value)) + } + + /// Number of bytes given to each worker. + /// + /// If set to 0, zstd selects a job size based on compression parameters. + pub fn job_size(value: u32) -> Self { + Self(JobSize(value)) + } + + pub(crate) fn as_zstd(&self) -> libzstd::stream::raw::CParameter { + self.0 + } + } +} diff --git a/src/macros.rs b/src/macros.rs index a9fbbc6b..7270251d 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -89,6 +89,17 @@ macro_rules! algos { ), } } + + /// Creates a new encoder, using the specified compression level and parameters, which + /// will read uncompressed data from the given stream and emit a compressed stream. + pub fn with_quality_and_params(inner: $inner, level: crate::Level, params: &[crate::zstd::CParameter]) -> Self { + Self { + inner: crate::$($mod::)+generic::Encoder::new( + inner, + crate::codec::ZstdEncoder::new_with_params(level.into_zstd(), params), + ), + } + } }); algos!(@algo xz ["xz"] XzDecoder XzEncoder<$inner> {