Skip to content

Commit

Permalink
fix(inflate): don't use lookup table on aarch64 and loong since we ha…
Browse files Browse the repository at this point in the history
…ve bit rev instruction there, fix clippy warnings and fix conditional in tree_lookup that seemed to break perf
  • Loading branch information
oyvindln committed Dec 1, 2024
1 parent c5f8f76 commit 083e4b3
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 11 deletions.
8 changes: 4 additions & 4 deletions miniz_oxide/src/deflate/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ pub struct CallbackFunc<'a> {
pub put_buf_func: &'a mut dyn FnMut(&[u8]) -> bool,
}

impl<'a> CallbackFunc<'a> {
impl CallbackFunc<'_> {
fn flush_output(
&mut self,
saved_output: SavedOutputBufferOxide,
Expand All @@ -580,7 +580,7 @@ struct CallbackBuf<'a> {
pub out_buf: &'a mut [u8],
}

impl<'a> CallbackBuf<'a> {
impl CallbackBuf<'_> {
fn flush_output(
&mut self,
saved_output: SavedOutputBufferOxide,
Expand Down Expand Up @@ -609,7 +609,7 @@ enum CallbackOut<'a> {
Buf(CallbackBuf<'a>),
}

impl<'a> CallbackOut<'a> {
impl CallbackOut<'_> {
fn new_output_buffer<'b>(
&'b mut self,
local_buf: &'b mut [u8],
Expand Down Expand Up @@ -700,7 +700,7 @@ struct OutputBufferOxide<'a> {
pub bits_in: u32,
}

impl<'a> OutputBufferOxide<'a> {
impl OutputBufferOxide<'_> {
fn put_bits(&mut self, bits: u32, len: u32) {
// TODO: Removing this assertion worsens performance
// Need to figure out why
Expand Down
28 changes: 21 additions & 7 deletions miniz_oxide/src/inflate/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,13 @@ impl HuffmanTable {
// symbol here indicates the position of the left (0) node, if the next bit is 1
// we add 1 to the lookup position to get the right node.
let tree_index = (!symbol + ((bit_buf >> code_len) & 1) as i32) as usize;

// Use get here to avoid generatic panic code.
// The init_tree code should prevent this from actually going out of bounds
// but if there were somehow a bug with that
// we would at worst end up with corrupted output in release mode.
debug_assert!(tree_index < self.tree.len());
if tree_index >= self.tree.len() {
break;
}
symbol = i32::from(self.tree[tree_index]);
symbol = i32::from(self.tree.get(tree_index).copied().unwrap_or(i16::MAX));
code_len += 1;
if symbol >= 0 {
break;
Expand Down Expand Up @@ -694,16 +696,29 @@ fn start_static_table(r: &mut DecompressorOxide) {
memset(&mut r.code_size_dist[0..32], 5);
}

#[cfg(feature = "rustc-dep-of-std")]
#[cfg(any(
feature = "rustc-dep-of-std",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "loongarch64"
))]
fn reverse_bits(n: u32) -> u32 {
// Lookup is not used when building as part of std to avoid wasting space
// for lookup table in every rust binary
// as it's only used for backtraces in the cold path
// - see #152

// armv7 and newer, and loongarch have a cpu instruction for bit reversal so
// it's preferable to just use that on those architectures.
n.reverse_bits()
}

#[cfg(not(feature = "rustc-dep-of-std"))]
#[cfg(not(any(
feature = "rustc-dep-of-std",
target_arch = "aarch64",
target_arch = "arm64ec",
target_arch = "loongarch64"
)))]
fn reverse_bits(n: u32) -> u32 {
static REVERSED_BITS_LOOKUP: [u32; 512] = {
let mut table = [0; 512];
Expand All @@ -716,7 +731,6 @@ fn reverse_bits(n: u32) -> u32 {

table
};

REVERSED_BITS_LOOKUP[n as usize]
}

Expand Down

0 comments on commit 083e4b3

Please sign in to comment.