From e46be1cc385abd64c296e09f4d26713c879c345b Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 29 Oct 2023 14:42:19 +0100 Subject: [PATCH] GTiff JXL codec: fix wrong use of memcpy() in decoding, and add memcpy() optimizations --- autotest/gcore/tiff_write.py | 1 + frmts/gtiff/tif_jxl.c | 134 +++++++++++++++++++++++++++++++---- 2 files changed, 121 insertions(+), 14 deletions(-) diff --git a/autotest/gcore/tiff_write.py b/autotest/gcore/tiff_write.py index 0d8c4ab5bb8b..99fcf5c86f9a 100755 --- a/autotest/gcore/tiff_write.py +++ b/autotest/gcore/tiff_write.py @@ -10375,6 +10375,7 @@ def test_tiff_write_jpegxl_band_combinations(): types = [ gdal.GDT_Byte, gdal.GDT_UInt16, + gdal.GDT_Float32, ] creationOptions = [ diff --git a/frmts/gtiff/tif_jxl.c b/frmts/gtiff/tif_jxl.c index 601c162c7944..be74b4170482 100644 --- a/frmts/gtiff/tif_jxl.c +++ b/frmts/gtiff/tif_jxl.c @@ -514,33 +514,139 @@ static int JXLPreDecode(TIFF *tif, uint16_t s) if (nFirstExtraChannel < info.num_extra_channels) { // first reorder the main buffer - int nMainChannels = bAlphaEmbedded ? info.num_color_channels + 1 - : info.num_color_channels; - unsigned int mainPixSize = nMainChannels * nBytesPerSample; - unsigned int fullPixSize = td->td_samplesperpixel * nBytesPerSample; + const int nMainChannels = bAlphaEmbedded ? info.num_color_channels + 1 + : info.num_color_channels; + const unsigned int mainPixSize = nMainChannels * nBytesPerSample; + const unsigned int fullPixSize = + td->td_samplesperpixel * nBytesPerSample; + assert(fullPixSize > mainPixSize); + + /* Find min value of k such that k * fullPixSize >= (k + 1) * mainPixSize: + * ==> k = ceil(mainPixSize / (fullPixSize - mainPixSize)) + * ==> k = (mainPixSize + (fullPixSize - mainPixSize) - 1) / (fullPixSize - mainPixSize) + * ==> k = (fullPixSize - 1) / (fullPixSize - mainPixSize) + */ + const unsigned int nNumPixels = info.xsize * info.ysize; unsigned int outOff = sp->uncompressed_size - fullPixSize; unsigned int inOff = main_buffer_size - mainPixSize; - while (TRUE) + const unsigned int kThreshold = + (fullPixSize - 1) / (fullPixSize - mainPixSize); + if (mainPixSize == 1) { - memcpy(sp->uncompressed_buffer + outOff, - sp->uncompressed_buffer + inOff, mainPixSize); - if (inOff < mainPixSize) - break; + for (unsigned int k = kThreshold; k < nNumPixels; ++k) + { + memcpy(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, /*mainPixSize=*/1); + inOff -= /*mainPixSize=*/1; + outOff -= fullPixSize; + } + } + else if (mainPixSize == 2) + { + for (unsigned int k = kThreshold; k < nNumPixels; ++k) + { + memcpy(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, /*mainPixSize=*/2); + inOff -= /*mainPixSize=*/2; + outOff -= fullPixSize; + } + } + else if (mainPixSize == 3) + { + for (unsigned int k = kThreshold; k < nNumPixels; ++k) + { + memcpy(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, /*mainPixSize=*/3); + inOff -= /*mainPixSize=*/3; + outOff -= fullPixSize; + } + } + else if (mainPixSize == 4) + { + for (unsigned int k = kThreshold; k < nNumPixels; ++k) + { + memcpy(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, /*mainPixSize=*/4); + inOff -= /*mainPixSize=*/4; + outOff -= fullPixSize; + } + } + else if (mainPixSize == 3 * 2) + { + for (unsigned int k = kThreshold; k < nNumPixels; ++k) + { + memcpy(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, /*mainPixSize=*/3 * 2); + inOff -= /*mainPixSize=*/3 * 2; + outOff -= fullPixSize; + } + } + else if (mainPixSize == 4 * 2) + { + for (unsigned int k = kThreshold; k < nNumPixels; ++k) + { + memcpy(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, /*mainPixSize=*/4 * 2); + inOff -= /*mainPixSize=*/4 * 2; + outOff -= fullPixSize; + } + } + else + { + for (unsigned int k = kThreshold; k < nNumPixels; ++k) + { + memcpy(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, mainPixSize); + inOff -= mainPixSize; + outOff -= fullPixSize; + } + } + /* Last iterations need memmove() because of overlapping between */ + /* source and target regions. */ + for (unsigned int k = kThreshold; k > 1;) + { + --k; + memmove(sp->uncompressed_buffer + outOff, + sp->uncompressed_buffer + inOff, mainPixSize); inOff -= mainPixSize; outOff -= fullPixSize; } // then copy over the data from the extra_channel_buffer - int nExtraChannelsToExtract = + const int nExtraChannelsToExtract = info.num_extra_channels - nFirstExtraChannel; for (int i = 0; i < nExtraChannelsToExtract; ++i) { outOff = (i + nMainChannels) * nBytesPerSample; uint8_t *channel_buffer = extra_channel_buffer + i * channel_size; - for (; outOff < sp->uncompressed_size; - outOff += fullPixSize, channel_buffer += nBytesPerSample) + if (nBytesPerSample == 1) + { + for (; outOff < sp->uncompressed_size; + outOff += fullPixSize, + channel_buffer += /*nBytesPerSample=*/1) + { + memcpy(sp->uncompressed_buffer + outOff, channel_buffer, + /*nBytesPerSample=*/1); + } + } + else if (nBytesPerSample == 2) + { + for (; outOff < sp->uncompressed_size; + outOff += fullPixSize, + channel_buffer += /*nBytesPerSample=*/2) + { + memcpy(sp->uncompressed_buffer + outOff, channel_buffer, + /*nBytesPerSample=*/2); + } + } + else { - memcpy(sp->uncompressed_buffer + outOff, channel_buffer, - nBytesPerSample); + assert(nBytesPerSample == 4); + for (; outOff < sp->uncompressed_size; + outOff += fullPixSize, channel_buffer += nBytesPerSample) + { + memcpy(sp->uncompressed_buffer + outOff, channel_buffer, + nBytesPerSample); + } } } _TIFFfreeExt(tif, extra_channel_buffer);