Skip to content

Commit

Permalink
Optimize match_len == 3 (#146)
Browse files Browse the repository at this point in the history
* Add throughput to benches

* Trim debug/error fmt

* Amortize bounds checks in apply_match fast path

* Avoid panic in tree_lookup
  • Loading branch information
kornelski authored Jan 29, 2024
1 parent 201ef39 commit 10ff5a0
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 8 deletions.
2 changes: 2 additions & 0 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ macro_rules! decompress_bench {
let compressed = compress_to_vec(input.as_slice(), $level);

let mut out_len: usize = 0;
b.bytes = input.len() as _;
b.iter(|| unsafe {
w($decompress_func(
compressed.as_ptr() as *mut c_void,
Expand All @@ -70,6 +71,7 @@ macro_rules! compress_bench {

let mut out_len: usize = 0;
let flags = create_comp_flags_from_zip_params($level, -15, 0) as i32;
b.bytes = input.len() as _;
b.iter(|| unsafe {
w($compress_func(
input.as_ptr() as *mut c_void,
Expand Down
30 changes: 24 additions & 6 deletions miniz_oxide/src/inflate/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use super::*;
use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER};
use ::core::cell::Cell;

use ::core::convert::TryInto;
use ::core::{cmp, slice};
Expand Down Expand Up @@ -52,7 +53,12 @@ impl HuffmanTable {
loop {
// symbol here indicates the position of the left (0) node, if the next bit is 1
// we add 1 to the lookup position to get the right node.
symbol = i32::from(self.tree[(!symbol + ((bit_buf >> code_len) & 1) as i32) as usize]);
let tree_index = (!symbol + ((bit_buf >> code_len) & 1) as i32) as usize;
debug_assert!(tree_index < self.tree.len());
if tree_index >= self.tree.len() {
break;
}
symbol = i32::from(self.tree[tree_index]);
code_len += 1;
if symbol >= 0 {
break;
Expand Down Expand Up @@ -896,15 +902,27 @@ fn apply_match(
match_len: usize,
out_buf_size_mask: usize,
) {
debug_assert!(out_pos + match_len <= out_slice.len());
debug_assert!(out_pos.checked_add(match_len).unwrap() <= out_slice.len());

let source_pos = out_pos.wrapping_sub(dist) & out_buf_size_mask;

if match_len == 3 {
// Fast path for match len 3.
out_slice[out_pos] = out_slice[source_pos];
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask];
let out_slice = Cell::from_mut(out_slice).as_slice_of_cells();
if let Some(dst) = out_slice.get(out_pos..out_pos + 3) {
// Moving bounds checks before any memory mutation allows the optimizer
// combine them together.
let src = out_slice
.get(source_pos)
.zip(out_slice.get((source_pos + 1) & out_buf_size_mask))
.zip(out_slice.get((source_pos + 2) & out_buf_size_mask));
if let Some(((a, b), c)) = src {
// For correctness, the memory reads and writes have to be interleaved.
// Cells make it possible for read and write references to overlap.
dst[0].set(a.get());
dst[1].set(b.get());
dst[2].set(c.get());
}
}
return;
}

Expand Down
3 changes: 2 additions & 1 deletion miniz_oxide/src/inflate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,14 @@ pub struct DecompressError {

#[cfg(feature = "with-alloc")]
impl alloc::fmt::Display for DecompressError {
#[cold]
fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
f.write_str(match self.status {
TINFLStatus::FailedCannotMakeProgress => "Truncated input stream",
TINFLStatus::BadParam => "Invalid output buffer size",
TINFLStatus::Adler32Mismatch => "Adler32 checksum mismatch",
TINFLStatus::Failed => "Invalid input data",
TINFLStatus::Done => unreachable!(),
TINFLStatus::Done => "", // Unreachable
TINFLStatus::NeedsMoreInput => "Truncated input stream",
TINFLStatus::HasMoreOutput => "Output size exceeded the specified limit",
})
Expand Down
3 changes: 2 additions & 1 deletion src/lib_oxide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,13 @@ pub enum InternalState {
}

impl fmt::Debug for InternalState {
#[cold]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let name = match &self {
InternalState::Inflate(_) => "Decompressor",
InternalState::Deflate(_) => "Compressor",
};
write!(f, "{}", name)
f.write_str(name)
}
}

Expand Down

0 comments on commit 10ff5a0

Please sign in to comment.