diff --git a/src/decoder/zlib.rs b/src/decoder/zlib.rs index 2e183bee..d9eba517 100644 --- a/src/decoder/zlib.rs +++ b/src/decoder/zlib.rs @@ -12,8 +12,12 @@ pub(super) struct ZlibStream { /// The decoder sometimes wants inspect some already finished bytes for further decoding. So we /// keep a total of 32KB of decoded data available as long as more data may be appended. out_buffer: Vec, - /// The cursor position in the output stream as a buffer index. + /// The first index of `out_buffer` where new data can be written. out_pos: usize, + /// The first index of `out_buffer` that hasn't yet been passed to our client + /// (i.e. not yet appended to the `image_data` parameter of `fn decompress` or `fn + /// finish_compressed_chunks`). + read_pos: usize, /// Limit on how many bytes can be decompressed in total. This field is mostly used for /// performance optimizations (e.g. to avoid allocating and zeroing out large buffers when only /// a small image is being decoded). @@ -33,6 +37,7 @@ impl ZlibStream { started: false, out_buffer: Vec::new(), out_pos: 0, + read_pos: 0, max_total_output: usize::MAX, ignore_adler32: true, } @@ -42,6 +47,7 @@ impl ZlibStream { self.started = false; self.out_buffer.clear(); self.out_pos = 0; + self.read_pos = 0; self.max_total_output = usize::MAX; *self.state = Decompressor::new(); } @@ -94,6 +100,7 @@ impl ZlibStream { self.started = true; self.out_pos += out_consumed; self.transfer_finished_data(image_data); + self.compact_out_buffer_if_needed(); Ok(in_consumed) } @@ -128,11 +135,12 @@ impl ZlibStream { transferred > 0 || out_consumed > 0, "No more forward progress made in stream decoding." ); + self.compact_out_buffer_if_needed(); } } - self.out_buffer.truncate(self.out_pos); - image_data.append(&mut self.out_buffer); + self.transfer_finished_data(image_data); + self.out_buffer.clear(); Ok(()) } @@ -179,10 +187,37 @@ impl ZlibStream { } fn transfer_finished_data(&mut self, image_data: &mut Vec) -> usize { - let safe = self.out_pos.saturating_sub(CHUNCK_BUFFER_SIZE); - // TODO: allocation limits. - image_data.extend(self.out_buffer.drain(..safe)); - self.out_pos -= safe; - safe + let transferred = &self.out_buffer[self.read_pos..self.out_pos]; + image_data.extend_from_slice(transferred); + self.read_pos = self.out_pos; + transferred.len() + } + + fn compact_out_buffer_if_needed(&mut self) { + // [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that + // "deflate/inflate compression with a sliding window (which is an upper bound on the + // distances appearing in the deflate stream) of at most 32768 bytes". + // + // `fdeflate` requires that we keep this many most recently decompressed bytes in the + // `out_buffer` - this allows referring back to them when handling "length and distance + // codes" in the deflate stream). + const LOOKBACK_SIZE: usize = 32768; + + // Compact `self.out_buffer` when "needed". Doing this conditionally helps to put an upper + // bound on the amortized cost of copying the data within `self.out_buffer`. + // + // TODO: The factor of 4 is an ad-hoc heuristic. Consider measuring and using a different + // factor. (Early experiments seem to indicate that factor of 4 is faster than a factor of + // 2 and 4 * `LOOKBACK_SIZE` seems like an acceptable memory trade-off. Higher factors + // result in higher memory usage, but the compaction cost is lower - factor of 4 means + // that 1 byte gets copied during compaction for 3 decompressed bytes.) + if self.out_pos > LOOKBACK_SIZE * 4 { + // Only preserve the `lookback_buffer` and "throw away" the earlier prefix. + let lookback_buffer = self.out_pos.saturating_sub(LOOKBACK_SIZE)..self.out_pos; + let preserved_len = lookback_buffer.len(); + self.out_buffer.copy_within(lookback_buffer, 0); + self.read_pos = preserved_len; + self.out_pos = preserved_len; + } } }