Skip to content

Commit

Permalink
Copy 4 bytes at a time in create_rgba_palette
Browse files Browse the repository at this point in the history
This improves the performance as follows:

- expand_paletted(ctor)/plte=256/trns=256
  [-40.581% -40.396% -40.211%] (p = 0.00 < 0.05)
- expand_paletted(ctor)/plte=224/trns=32
  [-24.070% -23.840% -23.592%] (p = 0.00 < 0.05)

Small palettes are mostly unaffected:

- expand_paletted(ctor)/plte=16/trns=1
  [-0.2525% +0.0338% +0.3239%] (p = 0.81 > 0.05)
  • Loading branch information
anforowicz authored and fintelia committed Feb 2, 2024
1 parent 72aecc3 commit b13388f
Showing 1 changed file with 27 additions and 6 deletions.
33 changes: 27 additions & 6 deletions src/decoder/transform/palette.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,37 @@ fn create_rgba_palette(info: &Info) -> [[u8; 4]; 256] {
// Default to black, opaque entries.
let mut rgba_palette = [[0, 0, 0, 0xFF]; 256];

// Replace missing `trns` entry with 100%-opaque alpha.
let trns = trns.iter().copied().chain(std::iter::repeat(0xFF));
// Copy `palette` (RGB) entries into `rgba_palette`. This may clobber alpha
// values in `rgba_palette` - we need to fix this later.
{
let mut palette_iter = palette;
let mut rgba_iter = &mut rgba_palette[..];
while palette_iter.len() >= 4 {
// Copying 4 bytes at a time is more efficient than copying 3.
// OTOH, this clobbers the alpha value in `rgba_iter[0][3]` - we
// need to fix this later.
rgba_iter[0].copy_from_slice(&palette_iter[0..4]);

palette_iter = &palette_iter[3..];
rgba_iter = &mut rgba_iter[1..];
}
if palette_iter.len() > 0 {
rgba_iter[0][0..3].copy_from_slice(&palette_iter[0..3]);
}
}

// Combine `palette` and `trns` into a single lookup table: `rgba_palette`.
let rgba_and_alpha_iter = palette.chunks_exact(3).zip(trns);
for (rgba, (rgb, alpha)) in rgba_palette.iter_mut().zip(rgba_and_alpha_iter) {
rgba[0..3].copy_from_slice(rgb);
// Copy `trns` (alpha) entries into `rgba_palette`. `trns.len()` may be
// smaller than `palette.len()` and therefore this is not sufficient to fix
// all the clobbered alpha values.
for (alpha, rgba) in trns.iter().copied().zip(rgba_palette.iter_mut()) {
rgba[3] = alpha;
}

// Unclobber the remaining alpha values.
for rgba in rgba_palette[trns.len()..(palette.len() / 3)].iter_mut() {
rgba[3] = 0xFF;
}

rgba_palette
}

Expand Down

0 comments on commit b13388f

Please sign in to comment.