Skip to content

Commit

Permalink
Merge pull request #8628 from rouault/tif_jxl_memcpy
Browse files Browse the repository at this point in the history
GTiff JXL codec: fix wrong use of memcpy() in decoding, and add memcpy() optimizations
  • Loading branch information
rouault authored Oct 29, 2023
2 parents 4de5a34 + e46be1c commit a28eb0e
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 14 deletions.
1 change: 1 addition & 0 deletions autotest/gcore/tiff_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -10375,6 +10375,7 @@ def test_tiff_write_jpegxl_band_combinations():
types = [
gdal.GDT_Byte,
gdal.GDT_UInt16,
gdal.GDT_Float32,
]

creationOptions = [
Expand Down
134 changes: 120 additions & 14 deletions frmts/gtiff/tif_jxl.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,33 +514,139 @@ static int JXLPreDecode(TIFF *tif, uint16_t s)
if (nFirstExtraChannel < info.num_extra_channels)
{
// first reorder the main buffer
int nMainChannels = bAlphaEmbedded ? info.num_color_channels + 1
: info.num_color_channels;
unsigned int mainPixSize = nMainChannels * nBytesPerSample;
unsigned int fullPixSize = td->td_samplesperpixel * nBytesPerSample;
const int nMainChannels = bAlphaEmbedded ? info.num_color_channels + 1
: info.num_color_channels;
const unsigned int mainPixSize = nMainChannels * nBytesPerSample;
const unsigned int fullPixSize =
td->td_samplesperpixel * nBytesPerSample;
assert(fullPixSize > mainPixSize);

/* Find min value of k such that k * fullPixSize >= (k + 1) * mainPixSize:
* ==> k = ceil(mainPixSize / (fullPixSize - mainPixSize))
* ==> k = (mainPixSize + (fullPixSize - mainPixSize) - 1) / (fullPixSize - mainPixSize)
* ==> k = (fullPixSize - 1) / (fullPixSize - mainPixSize)
*/
const unsigned int nNumPixels = info.xsize * info.ysize;
unsigned int outOff = sp->uncompressed_size - fullPixSize;
unsigned int inOff = main_buffer_size - mainPixSize;
while (TRUE)
const unsigned int kThreshold =
(fullPixSize - 1) / (fullPixSize - mainPixSize);
if (mainPixSize == 1)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, mainPixSize);
if (inOff < mainPixSize)
break;
for (unsigned int k = kThreshold; k < nNumPixels; ++k)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, /*mainPixSize=*/1);
inOff -= /*mainPixSize=*/1;
outOff -= fullPixSize;
}
}
else if (mainPixSize == 2)
{
for (unsigned int k = kThreshold; k < nNumPixels; ++k)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, /*mainPixSize=*/2);
inOff -= /*mainPixSize=*/2;
outOff -= fullPixSize;
}
}
else if (mainPixSize == 3)
{
for (unsigned int k = kThreshold; k < nNumPixels; ++k)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, /*mainPixSize=*/3);
inOff -= /*mainPixSize=*/3;
outOff -= fullPixSize;
}
}
else if (mainPixSize == 4)
{
for (unsigned int k = kThreshold; k < nNumPixels; ++k)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, /*mainPixSize=*/4);
inOff -= /*mainPixSize=*/4;
outOff -= fullPixSize;
}
}
else if (mainPixSize == 3 * 2)
{
for (unsigned int k = kThreshold; k < nNumPixels; ++k)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, /*mainPixSize=*/3 * 2);
inOff -= /*mainPixSize=*/3 * 2;
outOff -= fullPixSize;
}
}
else if (mainPixSize == 4 * 2)
{
for (unsigned int k = kThreshold; k < nNumPixels; ++k)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, /*mainPixSize=*/4 * 2);
inOff -= /*mainPixSize=*/4 * 2;
outOff -= fullPixSize;
}
}
else
{
for (unsigned int k = kThreshold; k < nNumPixels; ++k)
{
memcpy(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, mainPixSize);
inOff -= mainPixSize;
outOff -= fullPixSize;
}
}
/* Last iterations need memmove() because of overlapping between */
/* source and target regions. */
for (unsigned int k = kThreshold; k > 1;)
{
--k;
memmove(sp->uncompressed_buffer + outOff,
sp->uncompressed_buffer + inOff, mainPixSize);
inOff -= mainPixSize;
outOff -= fullPixSize;
}
// then copy over the data from the extra_channel_buffer
int nExtraChannelsToExtract =
const int nExtraChannelsToExtract =
info.num_extra_channels - nFirstExtraChannel;
for (int i = 0; i < nExtraChannelsToExtract; ++i)
{
outOff = (i + nMainChannels) * nBytesPerSample;
uint8_t *channel_buffer = extra_channel_buffer + i * channel_size;
for (; outOff < sp->uncompressed_size;
outOff += fullPixSize, channel_buffer += nBytesPerSample)
if (nBytesPerSample == 1)
{
for (; outOff < sp->uncompressed_size;
outOff += fullPixSize,
channel_buffer += /*nBytesPerSample=*/1)
{
memcpy(sp->uncompressed_buffer + outOff, channel_buffer,
/*nBytesPerSample=*/1);
}
}
else if (nBytesPerSample == 2)
{
for (; outOff < sp->uncompressed_size;
outOff += fullPixSize,
channel_buffer += /*nBytesPerSample=*/2)
{
memcpy(sp->uncompressed_buffer + outOff, channel_buffer,
/*nBytesPerSample=*/2);
}
}
else
{
memcpy(sp->uncompressed_buffer + outOff, channel_buffer,
nBytesPerSample);
assert(nBytesPerSample == 4);
for (; outOff < sp->uncompressed_size;
outOff += fullPixSize, channel_buffer += nBytesPerSample)
{
memcpy(sp->uncompressed_buffer + outOff, channel_buffer,
nBytesPerSample);
}
}
}
_TIFFfreeExt(tif, extra_channel_buffer);
Expand Down

0 comments on commit a28eb0e

Please sign in to comment.