Skip to content

Commit

Permalink
Add ZstdEncoder::with_quality_and_params to enable compression parame…
Browse files Browse the repository at this point in the history
…ters

This allows enabling parameters such as long-distance matching mode or
rsyncable mode.

Add a type supporting a subset of zstd compression parameters, to
insulate against future changes to zstd.
  • Loading branch information
joshtriplett committed May 10, 2023
1 parent 79a36e4 commit 466ce00
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 1 deletion.
12 changes: 11 additions & 1 deletion src/codec/zstd/encoder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{codec::Encode, unshared::Unshared, util::PartialBuffer};
use libzstd::stream::raw::{Encoder, Operation};
use libzstd::stream::raw::{CParameter, Encoder, Operation};
use std::io::Result;

#[derive(Debug)]
Expand All @@ -13,6 +13,16 @@ impl ZstdEncoder {
encoder: Unshared::new(Encoder::new(level).unwrap()),
}
}

pub(crate) fn new_with_params(level: i32, params: &[crate::zstd::CParameter]) -> Self {
let mut encoder = Encoder::new(level).unwrap();
for param in params {
encoder.set_parameter(param.as_zstd()).unwrap();
}
Self {
encoder: Unshared::new(encoder),
}
}
}

impl Encode for ZstdEncoder {
Expand Down
107 changes: 107 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,110 @@ impl Level {
}
}
}

#[cfg(feature = "zstd")]
/// This module contains zstd-specific types for async-compression.
pub mod zstd {
use libzstd::stream::raw::CParameter::*;

/// A compression parameter for zstd. This is a stable wrapper around zstd's own `CParameter`
/// type, to abstract over different versions of the zstd library.
///
/// See the [zstd documentation](https://facebook.github.io/zstd/zstd_manual.html) for more
/// information on these parameters.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct CParameter(libzstd::stream::raw::CParameter);

impl CParameter {
/// Window size in bytes (as a power of two)
pub fn window_log(value: u32) -> Self {
Self(WindowLog(value))
}

/// Size of the initial probe table in 4-byte entries (as a power of two)
pub fn hash_log(value: u32) -> Self {
Self(HashLog(value))
}

/// Size of the multi-probe table in 4-byte entries (as a power of two)
pub fn chain_log(value: u32) -> Self {
Self(ChainLog(value))
}

/// Number of search attempts (as a power of two)
pub fn search_log(value: u32) -> Self {
Self(SearchLog(value))
}

/// Minimum size of matches searched for
pub fn min_match(value: u32) -> Self {
Self(MinMatch(value))
}

/// Strategy-dependent length modifier
pub fn target_length(value: u32) -> Self {
Self(TargetLength(value))
}

/// Enable long-distance matching mode to look for and emit long-distance references.
///
/// This increases the default window size.
pub fn enable_long_distance_matching(value: bool) -> Self {
Self(EnableLongDistanceMatching(value))
}

/// Size of the long-distance matching table (as a power of two)
pub fn ldm_hash_log(value: u32) -> Self {
Self(LdmHashLog(value))
}

/// Minimum size of long-distance matches searched for
pub fn ldm_min_match(value: u32) -> Self {
Self(LdmMinMatch(value))
}

/// Size of each bucket in the LDM hash table for collision resolution (as a power of two)
pub fn ldm_bucket_size_log(value: u32) -> Self {
Self(LdmBucketSizeLog(value))
}

/// Frequency of using the LDM hash table (as a power of two)
pub fn ldm_hash_rate_log(value: u32) -> Self {
Self(LdmHashRateLog(value))
}

/// Emit the size of the content (default: true).
pub fn content_size_flag(value: bool) -> Self {
Self(ContentSizeFlag(value))
}

/// Emit a checksum (default: false).
pub fn checksum_flag(value: bool) -> Self {
Self(ChecksumFlag(value))
}

/// Emit a dictionary ID when using a custom dictionary (default: true).
pub fn dict_id_flag(value: bool) -> Self {
Self(DictIdFlag(value))
}

/// Number of threads to spawn.
///
/// If set to 0, compression functions will block; if set to 1 or more, compression will
/// run in background threads and `flush` pushes bytes through the compressor.
pub fn nb_workers(value: u32) -> Self {
Self(NbWorkers(value))
}

/// Number of bytes given to each worker.
///
/// If set to 0, zstd selects a job size based on compression parameters.
pub fn job_size(value: u32) -> Self {
Self(JobSize(value))
}

pub(crate) fn as_zstd(&self) -> libzstd::stream::raw::CParameter {
self.0
}
}
}
11 changes: 11 additions & 0 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@ macro_rules! algos {
),
}
}

/// Creates a new encoder, using the specified compression level and parameters, which
/// will read uncompressed data from the given stream and emit a compressed stream.
pub fn with_quality_and_params(inner: $inner, level: crate::Level, params: &[crate::zstd::CParameter]) -> Self {
Self {
inner: crate::$($mod::)+generic::Encoder::new(
inner,
crate::codec::ZstdEncoder::new_with_params(level.into_zstd(), params),
),
}
}
});

algos!(@algo xz ["xz"] XzDecoder XzEncoder<$inner> {
Expand Down

0 comments on commit 466ce00

Please sign in to comment.