diff --git a/crengine/include/lvdrawbuf.h b/crengine/include/lvdrawbuf.h index ee0d2dd31..5de108322 100644 --- a/crengine/include/lvdrawbuf.h +++ b/crengine/include/lvdrawbuf.h @@ -99,6 +99,10 @@ class LVDrawBuf : public CacheableObject virtual void setHidePartialGlyphs( bool hide ) = 0; /// set to true to invert images only (so they get inverted back to normal by nightmode) virtual void setInvertImages( bool invert ) = 0; + /// set to true to enforce dithering (only relevant for 8bpp Gray drawBuf) + virtual void setDitherImages( bool dither ) = 0; + /// set to true to switch to a more costly smooth scaler instead of nearest neighbor + virtual void setSmoothScalingImages( bool smooth ) = 0; /// invert image virtual void Invert() = 0; /// get buffer width, pixels @@ -231,6 +235,8 @@ class LVBaseDrawBuf : public LVDrawBuf lUInt32 _textColor; bool _hidePartialGlyphs; bool _invertImages; + bool _ditherImages; + bool _smoothImages; int _drawnImagesCount; int _drawnImagesSurface; public: @@ -238,6 +244,10 @@ class LVBaseDrawBuf : public LVDrawBuf virtual void setHidePartialGlyphs( bool hide ) { _hidePartialGlyphs = hide; } /// set to true to invert images only (so they get inverted back to normal by nightmode) virtual void setInvertImages( bool invert ) { _invertImages = invert; } + /// set to true to enforce dithering (only relevant for 8bpp Gray drawBuf) + virtual void setDitherImages( bool dither ) { _ditherImages = dither; } + /// set to true to switch to a more costly smooth scaler instead of nearest neighbor + virtual void setSmoothScalingImages( bool smooth ) { _smoothImages = smooth; } /// returns current background color virtual lUInt32 GetBackgroundColor() { return _backgroundColor; } /// sets current background color @@ -277,7 +287,8 @@ class LVBaseDrawBuf : public LVDrawBuf int getDrawnImagesSurface() { return _drawnImagesSurface; } LVBaseDrawBuf() : _dx(0), _dy(0), _rowsize(0), _data(NULL), _hidePartialGlyphs(true), - _invertImages(false), _drawnImagesCount(0), _drawnImagesSurface(0) { } + _invertImages(false), _ditherImages(false), _smoothImages(false), + _drawnImagesCount(0), _drawnImagesSurface(0) { } virtual ~LVBaseDrawBuf() { } }; @@ -399,11 +410,12 @@ class LVGrayDrawBuf : public LVBaseDrawBuf // c.f., https://github.com/koreader/koreader-base/pull/878#issuecomment-476723747 #ifdef CR_RENDER_32BPP_RGB_PXFMT inline lUInt32 RevRGB( lUInt32 cl ) { - return ((cl>>16)&0x0000FF) | ((cl<<16)&0xFF0000) | (cl&0x00FF00); + return ((cl<<16)&0xFF0000) | ((cl>>16)&0x0000FF) | (cl&0x00FF00); } inline lUInt32 RevRGBA( lUInt32 cl ) { - return (cl&0xFF000000) | ((cl>>16)&0x0000FF) | ((cl<<16)&0xFF0000) | (cl&0x00FF00); + // Swap B <-> R, keep G & A + return ((cl<<16)&0x00FF0000) | ((cl>>16)&0x000000FF) | (cl&0xFF00FF00); } #else inline lUInt32 RevRGB( lUInt32 cl ) { @@ -423,6 +435,65 @@ inline lUInt16 rgb888to565( lUInt32 cl ) { return (lUInt16)(((cl>>8)& 0xF800) | ((cl>>5 )& 0x07E0) | ((cl>>3 )& 0x001F)); } +#define DIV255(V) \ +({ \ + auto _v = (V) + 128; \ + (((_v >> 8U) + _v) >> 8U); \ +}) + +// Quantize an 8-bit color value down to a palette of 16 evenly spaced colors, using an ordered 8x8 dithering pattern. +// With a grayscale input, this happens to match the eInk palette perfectly ;). +// If the input is not grayscale, and the output fb is not grayscale either, +// this usually still happens to match the eInk palette after the EPDC's own quantization pass. +// c.f., https://en.wikipedia.org/wiki/Ordered_dithering +// & https://github.com/ImageMagick/ImageMagick/blob/ecfeac404e75f304004f0566557848c53030bad6/MagickCore/threshold.c#L1627 +// NOTE: As the references imply, this is straight from ImageMagick, +// with only minor simplifications to enforce Q8 & avoid fp maths. +static inline lUInt8 dither_o8x8(int x, int y, lUInt8 v) +{ + // c.f., https://github.com/ImageMagick/ImageMagick/blob/ecfeac404e75f304004f0566557848c53030bad6/config/thresholds.xml#L107 + static const lUInt8 threshold_map_o8x8[] = { 1, 49, 13, 61, 4, 52, 16, 64, 33, 17, 45, 29, 36, 20, 48, 32, + 9, 57, 5, 53, 12, 60, 8, 56, 41, 25, 37, 21, 44, 28, 40, 24, + 3, 51, 15, 63, 2, 50, 14, 62, 35, 19, 47, 31, 34, 18, 46, 30, + 11, 59, 7, 55, 10, 58, 6, 54, 43, 27, 39, 23, 42, 26, 38, 22 }; + + // Constants: + // Quantum = 8; Levels = 16; map Divisor = 65 + // QuantumRange = 0xFF + // QuantumScale = 1.0 / QuantumRange + // + // threshold = QuantumScale * v * ((L-1) * (D-1) + 1) + // NOTE: The initial computation of t (specifically, what we pass to DIV255) would overflow an uint8_t. + // So jump to shorts, and do it signed to be extra careful, although I don't *think* we can ever underflow here. + lInt16 t = (lInt16) DIV255(v * ((15U << 6) + 1U)); + // level = t / (D-1); + lInt16 l = (t >> 6); + // t -= l * (D-1); + t = (lInt16)(t - (l << 6)); + + // map width & height = 8 + // c = ClampToQuantum((l+(t >= map[(x % mw) + mw * (y % mh)])) * QuantumRange / (L-1)); + lInt16 q = (lInt16)((l + (t >= threshold_map_o8x8[(x & 7U) + 8U * (y & 7U)])) * 17); + // NOTE: For some arcane reason, on ARM (at least), this is noticeably faster than Pillow's CLIP8 macro. + // Following this logic with ternary operators yields similar results, + // so I'm guessing it's the < 256 part of Pillow's macro that doesn't agree with GCC/ARM... + lUInt8 c; + if (q > 0xFF) { + c = 0xFF; + } else if (q < 0) { + c = 0U; + } else { + c = (lUInt8) q; + } + + return c; +} + +// Declare our bit of scaler ripped from Qt5... +namespace CRe { +lUInt8* qSmoothScaleImage(const lUInt8* src, int sw, int sh, bool ignore_alpha, int dw, int dh); +} + /// 32-bit RGB buffer class LVColorDrawBuf : public LVBaseDrawBuf { diff --git a/crengine/qimagescale/QIMAGETRANSFORM_LICENSE.txt b/crengine/qimagescale/QIMAGETRANSFORM_LICENSE.txt new file mode 100644 index 000000000..67c910826 --- /dev/null +++ b/crengine/qimagescale/QIMAGETRANSFORM_LICENSE.txt @@ -0,0 +1,60 @@ +qimagetransform.cpp was contributed by Daniel M. Duley based on code from Imlib2. + +Copyright (C) 2004, 2005 Daniel M. Duley + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + +Imlib2 License + +Copyright (C) 2000 Carsten Haitzler and various contributors (see +AUTHORS) + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies of the Software and its Copyright notices. In addition +publicly documented acknowledgment must be given that this software has +been used if no source code of this software is made available publicly. +This includes acknowledgments in either Copyright notices, Manuals, +Publicity and Marketing documents or any documentation provided with any +product containing this software. This License does not apply to any +software that links to the libraries provided by this software +(statically or dynamically), but only to the software provided. + +Please see the COPYING.PLAIN for a plain-english explanation of this +notice and it's intent. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/crengine/qimagescale/qdrawhelper_p.h b/crengine/qimagescale/qdrawhelper_p.h new file mode 100644 index 000000000..205ee3a1f --- /dev/null +++ b/crengine/qimagescale/qdrawhelper_p.h @@ -0,0 +1,182 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QDRAWHELPER_P_H +#define QDRAWHELPER_P_H + +#include "qglobal.h" +#if defined(__ARM_NEON) || defined(__ARM_NEON__) +#include +#endif +#if defined(__SSE2__) +#include +#include +#endif + +namespace CRe { + +#if defined(__GNUC__) +# if (defined(__i386) || defined(__i386__) || defined(_M_IX86)) && defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +# define Q_DECL_VECTORCALL __attribute__((sseregparm,regparm(3))) +# else +# define Q_DECL_VECTORCALL +# endif +#elif defined(_MSC_VER) +# define Q_DECL_VECTORCALL __vectorcall +#else +# define Q_DECL_VECTORCALL +#endif + +#if __SIZEOF_POINTER__ == 8 // 64-bit versions + +static inline __attribute__((always_inline)) uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) { + quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a; + t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b; + t >>= 8; + t &= 0x00ff00ff00ff00ff; + return (uint(t)) | (uint(t >> 24)); +} + +#else // 32-bit versions + +static inline __attribute__((always_inline)) uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) { + uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b; + t >>= 8; + t &= 0xff00ff; + + x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b; + x &= 0xff00ff00; + x |= t; + return x; +} + +#endif + +// NOTE: Unlike the SIMD qimagescale_* routines, these ones seem to offer a very small performance gain. +#if defined(__SSE2__) +static inline __attribute__((always_inline)) uint interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty) +{ + // First interpolate top and bottom pixels in parallel. + vt = _mm_unpacklo_epi8(vt, _mm_setzero_si128()); + vb = _mm_unpacklo_epi8(vb, _mm_setzero_si128()); + vt = _mm_mullo_epi16(vt, _mm_set1_epi16(256 - disty)); + vb = _mm_mullo_epi16(vb, _mm_set1_epi16(disty)); + __m128i vlr = _mm_add_epi16(vt, vb); + vlr = _mm_srli_epi16(vlr, 8); + // vlr now contains the result of the first two interpolate calls vlr = unpacked((xright << 64) | xleft) + + // Now the last interpolate between left and right.. + const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(256 - distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0)); + const __m128i vmulx = _mm_unpacklo_epi16(vidistx, vdistx); + vlr = _mm_unpacklo_epi16(vlr, _mm_srli_si128(vlr, 8)); + // vlr now contains the colors of left and right interleaved { la, ra, lr, rr, lg, rg, lb, rb } + vlr = _mm_madd_epi16(vlr, vmulx); // Multiply and horizontal add. + vlr = _mm_srli_epi32(vlr, 8); + vlr = _mm_packs_epi32(vlr, vlr); + vlr = _mm_packus_epi16(vlr, vlr); + return _mm_cvtsi128_si32(vlr); +} + +static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) +{ + __m128i vt = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tl), _mm_cvtsi32_si128(tr)); + __m128i vb = _mm_unpacklo_epi32(_mm_cvtsi32_si128(bl), _mm_cvtsi32_si128(br)); + return interpolate_4_pixels_sse2(vt, vb, distx, disty); +} + +static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty) +{ + __m128i vt = _mm_loadl_epi64((const __m128i*)t); + __m128i vb = _mm_loadl_epi64((const __m128i*)b); + return interpolate_4_pixels_sse2(vt, vb, distx, disty); +} + +#elif defined(__ARM_NEON__) +static inline __attribute__((always_inline)) uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty) +{ + uint16x8_t vt16 = vmovl_u8(vreinterpret_u8_u32(vt32)); + uint16x8_t vb16 = vmovl_u8(vreinterpret_u8_u32(vb32)); + vt16 = vmulq_n_u16(vt16, 256 - disty); + vt16 = vmlaq_n_u16(vt16, vb16, disty); + vt16 = vshrq_n_u16(vt16, 8); + uint16x4_t vl16 = vget_low_u16(vt16); + uint16x4_t vr16 = vget_high_u16(vt16); + vl16 = vmul_n_u16(vl16, 256 - distx); + vl16 = vmla_n_u16(vl16, vr16, distx); + vl16 = vshr_n_u16(vl16, 8); + uint8x8_t vr = vmovn_u16(vcombine_u16(vl16, vl16)); + return vget_lane_u32(vreinterpret_u32_u8(vr), 0); +} + +static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) +{ + uint32x2_t vt32 = vmov_n_u32(tl); + uint32x2_t vb32 = vmov_n_u32(bl); + vt32 = vset_lane_u32(tr, vt32, 1); + vb32 = vset_lane_u32(br, vb32, 1); + return interpolate_4_pixels_neon(vt32, vb32, distx, disty); +} + +static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty) +{ + uint32x2_t vt32 = vld1_u32(t); + uint32x2_t vb32 = vld1_u32(b); + return interpolate_4_pixels_neon(vt32, vb32, distx, disty); +} + +#else +static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty) +{ + uint idistx = 256 - distx; + uint idisty = 256 - disty; + uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx); + uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx); + return INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty); +} + +static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty) +{ + return interpolate_4_pixels(t[0], t[1], b[0], b[1], distx, disty); +} +#endif + +} + +#endif // QDRAWHELPER_P_H diff --git a/crengine/qimagescale/qglobal.h b/crengine/qimagescale/qglobal.h new file mode 100644 index 000000000..bce56d481 --- /dev/null +++ b/crengine/qimagescale/qglobal.h @@ -0,0 +1,81 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2016 Intel Corporation. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QGLOBAL_H +#define QGLOBAL_H + +namespace CRe { + +#if defined(_WIN32) && !defined(__GNUC__) +typedef __int64 qint64; /* 64 bit signed */ +typedef unsigned __int64 quint64; /* 64 bit unsigned */ +#else +typedef long long qint64; /* 64 bit signed */ +typedef unsigned long long quint64; /* 64 bit unsigned */ +#endif + +} + +/* + Useful type definitions for Qt +*/ + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +namespace CRe { + +/* + Utility macros and inline functions +*/ + +template +constexpr inline T qAbs(const T &t) { return t >= 0 ? t : -t; } + +template +constexpr inline const T &qMin(const T &a, const T &b) { return (a < b) ? a : b; } +template +constexpr inline const T &qMax(const T &a, const T &b) { return (a < b) ? b : a; } + +} + +#endif /* QGLOBAL_H */ diff --git a/crengine/qimagescale/qimagescale.cpp b/crengine/qimagescale/qimagescale.cpp new file mode 100644 index 000000000..46da8f888 --- /dev/null +++ b/crengine/qimagescale/qimagescale.cpp @@ -0,0 +1,789 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#include "qglobal.h" +#include "qrgb.h" +#include "qimagescale_p.h" +#include "qdrawhelper_p.h" + +#include +#include + +#ifndef FBINK_QIS_NO_SIMD +#if defined(__ARM_NEON__) +#include "qimagescale_neon.cpp" +#endif +#if defined(__SSE4_1__) +#include "qimagescale_sse4.cpp" +#endif +#endif + +namespace CRe { + +/* + * Copyright (C) 2004, 2005 Daniel M. Duley + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* OTHER CREDITS: + * + * This is the normal smoothscale method, based on Imlib2's smoothscale. + * + * Originally I took the algorithm used in NetPBM and Qt and added MMX/3dnow + * optimizations. It ran in about 1/2 the time as Qt. Then I ported Imlib's + * C algorithm and it ran at about the same speed as my MMX optimized one... + * Finally I ported Imlib's MMX version and it ran in less than half the + * time as my MMX algorithm, (taking only a quarter of the time Qt does). + * After further optimization it seems to run at around 1/6th. + * + * Changes include formatting, namespaces and other C++'ings, removal of old + * #ifdef'ed code, and removal of unneeded border calculation code. + * Later the code has been refactored, an SSE4.1 optimizated path have been + * added instead of the removed MMX assembler, and scaling of clipped area + * removed, and an RGBA64 version written + * + * Imlib2 is (C) Carsten Haitzler and various contributors. The MMX code + * is by Willem Monsuwe . All other modifications are + * (C) Daniel M. Duley. + */ + + +namespace QImageScale { + static const unsigned int** qimageCalcYPoints(const unsigned int *src, int sw, int sh, int dh); + static int* qimageCalcXPoints(int sw, int dw); + static int* qimageCalcApoints(int s, int d, int up); + static QImageScaleInfo* qimageFreeScaleInfo(QImageScaleInfo *isi); + static QImageScaleInfo *qimageCalcScaleInfo(const unsigned char* img, int sw, int sh, int dw, int dh, char aa); +} + +using namespace QImageScale; + +// +// Code ported from Imlib... +// + +static const unsigned int** QImageScale::qimageCalcYPoints(const unsigned int *src, + int sw, int sh, int dh) +{ + const unsigned int **p; + int j = 0, rv = 0; + qint64 val, inc; + + if (dh < 0) { + dh = -dh; + rv = 1; + } + p = new const unsigned int* [dh+1]; + + int up = qAbs(dh) >= sh; + val = up ? 0x8000 * sh / dh - 0x8000 : 0; + inc = (((qint64)sh) << 16) / dh; + for (int i = 0; i < dh; i++) { + p[j++] = src + qMax(0LL, val >> 16) * sw; + val += inc; + } + if (rv) { + for (int i = dh / 2; --i >= 0; ) { + const unsigned int *tmp = p[i]; + p[i] = p[dh - i - 1]; + p[dh - i - 1] = tmp; + } + } + return(p); +} + +static int* QImageScale::qimageCalcXPoints(int sw, int dw) +{ + int *p, j = 0, rv = 0; + qint64 val, inc; + + if (dw < 0) { + dw = -dw; + rv = 1; + } + p = new int[dw+1]; + + int up = qAbs(dw) >= sw; + val = up ? 0x8000 * sw / dw - 0x8000 : 0; + inc = (((qint64)sw) << 16) / dw; + for (int i = 0; i < dw; i++) { + p[j++] = qMax(0LL, val >> 16); + val += inc; + } + + if (rv) { + for (int i = dw / 2; --i >= 0; ) { + int tmp = p[i]; + p[i] = p[dw - i - 1]; + p[dw - i - 1] = tmp; + } + } + return p; +} + +static int* QImageScale::qimageCalcApoints(int s, int d, int up) +{ + int *p, j = 0, rv = 0; + + if (d < 0) { + rv = 1; + d = -d; + } + p = new int[d]; + + if (up) { + /* scaling up */ + qint64 val = 0x8000 * s / d - 0x8000; + qint64 inc = (((qint64)s) << 16) / d; + for (int i = 0; i < d; i++) { + int pos = val >> 16; + if (pos < 0) + p[j++] = 0; + else if (pos >= (s - 1)) + p[j++] = 0; + else + p[j++] = (val >> 8) - ((val >> 8) & 0xffffff00); + val += inc; + } + } else { + /* scaling down */ + qint64 val = 0; + qint64 inc = (((qint64)s) << 16) / d; + int Cp = (((d << 14) + s - 1) / s); + for (int i = 0; i < d; i++) { + int ap = ((0x10000 - (val & 0xffff)) * Cp) >> 16; + p[j] = ap | (Cp << 16); + j++; + val += inc; + } + } + if (rv) { + int tmp; + for (int i = d / 2; --i >= 0; ) { + tmp = p[i]; + p[i] = p[d - i - 1]; + p[d - i - 1] = tmp; + } + } + return p; +} + +static QImageScaleInfo* QImageScale::qimageFreeScaleInfo(QImageScaleInfo *isi) +{ + if (isi) { + delete[] isi->xpoints; + delete[] isi->ypoints; + delete[] isi->xapoints; + delete[] isi->yapoints; + delete isi; + } + return 0; +} + +static QImageScaleInfo* QImageScale::qimageCalcScaleInfo(const unsigned char* img, + int sw, int sh, + int dw, int dh, char aa) +{ + QImageScaleInfo *isi; + int scw, sch; + + scw = dw; + sch = dh; + + isi = new QImageScaleInfo; + if (!isi) + return 0; + + isi->xup_yup = (qAbs(dw) >= sw) + ((qAbs(dh) >= sh) << 1); + + isi->xpoints = qimageCalcXPoints(sw, scw); + if (!isi->xpoints) + return qimageFreeScaleInfo(isi); + // NOTE: We use sw directly as a simplification. Technically, it's img bytes-per-lines / bytes-per-pixel + // (i.e., img's width * number of color components / sizeof(uint32_t) for unpadded packed pixels). + // As we enforce 32bpp input, n is 4, as is sizeof(uint32_t), hence using width directly ;). + // NOTE: Qt's Rgba64 codepath *still* divides by 4, so, err, double-check that? + isi->ypoints = qimageCalcYPoints((const unsigned int *)img, + sw, sh, sch); + if (!isi->ypoints) + return qimageFreeScaleInfo(isi); + if (aa) { + isi->xapoints = qimageCalcApoints(sw, scw, isi->xup_yup & 1); + if (!isi->xapoints) + return qimageFreeScaleInfo(isi); + isi->yapoints = qimageCalcApoints(sh, sch, isi->xup_yup & 2); + if (!isi->yapoints) + return qimageFreeScaleInfo(isi); + } + return isi; +} + + +static void qt_qimageScaleAARGBA_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +#ifndef FBINK_QIS_NO_SIMD +#if defined(__SSE4_1__) +template +void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); +template +void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); +template +void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); +#endif + +#if defined(__ARM_NEON__) +template +void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); +template +void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); +template +void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); +#endif +#endif + +static void qt_qimageScaleAARGBA_up_xy(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + /* calculate the source line we'll scan from */ + const unsigned int *sptr = ypoints[y]; + unsigned int *dptr = dest + (y * dow); + const int yap = yapoints[y]; + if (yap > 0) { + for (int x = 0; x < dw; x++) { + const unsigned int *pix = sptr + xpoints[x]; + const int xap = xapoints[x]; + if (xap > 0) + *dptr = interpolate_4_pixels(pix, pix + sow, xap, yap); + else + *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - yap, pix[sow], yap); + dptr++; + } + } else { + for (int x = 0; x < dw; x++) { + const unsigned int *pix = sptr + xpoints[x]; + const int xap = xapoints[x]; + if (xap > 0) + *dptr = INTERPOLATE_PIXEL_256(pix[0], 256 - xap, pix[1], xap); + else + *dptr = pix[0]; + dptr++; + } + } + } +} + +/* scale by area sampling - with alpha */ +static void qt_qimageScaleAARGBA(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + /* scaling up both ways */ + if (isi->xup_yup == 3) { + qt_qimageScaleAARGBA_up_xy(isi, dest, dw, dh, dow, sow); + } + /* if we're scaling down vertically */ + else if (isi->xup_yup == 1) { +#ifndef FBINK_QIS_NO_SIMD +#if defined(__SSE4_1__) + qt_qimageScaleAARGBA_up_x_down_y_sse4(isi, dest, dw, dh, dow, sow); +#elif defined(__ARM_NEON__) + qt_qimageScaleAARGBA_up_x_down_y_neon(isi, dest, dw, dh, dow, sow); +#else + qt_qimageScaleAARGBA_up_x_down_y(isi, dest, dw, dh, dow, sow); +#endif +#else + qt_qimageScaleAARGBA_up_x_down_y(isi, dest, dw, dh, dow, sow); +#endif + } + /* if we're scaling down horizontally */ + else if (isi->xup_yup == 2) { +#ifndef FBINK_QIS_NO_SIMD +#if defined(__SSE4_1__) + qt_qimageScaleAARGBA_down_x_up_y_sse4(isi, dest, dw, dh, dow, sow); +#elif defined(__ARM_NEON__) + qt_qimageScaleAARGBA_down_x_up_y_neon(isi, dest, dw, dh, dow, sow); +#else + qt_qimageScaleAARGBA_down_x_up_y(isi, dest, dw, dh, dow, sow); +#endif +#else + qt_qimageScaleAARGBA_down_x_up_y(isi, dest, dw, dh, dow, sow); +#endif + } + /* if we're scaling down horizontally & vertically */ + else { +#ifndef FBINK_QIS_NO_SIMD +#if defined(__SSE4_1__) + qt_qimageScaleAARGBA_down_xy_sse4(isi, dest, dw, dh, dow, sow); +#elif defined(__ARM_NEON__) + qt_qimageScaleAARGBA_down_xy_neon(isi, dest, dw, dh, dow, sow); +#else + qt_qimageScaleAARGBA_down_xy(isi, dest, dw, dh, dow, sow); +#endif +#else + qt_qimageScaleAARGBA_down_xy(isi, dest, dw, dh, dow, sow); +#endif + } +} + +inline static void qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, int &r, int &g, int &b, int &a) +{ + r = qRed(*pix) * xyap; + g = qGreen(*pix) * xyap; + b = qBlue(*pix) * xyap; + a = qAlpha(*pix) * xyap; + int j; + for (j = (1 << 14) - xyap; j > Cxy; j -= Cxy) { + pix += step; + r += qRed(*pix) * Cxy; + g += qGreen(*pix) * Cxy; + b += qBlue(*pix) * Cxy; + a += qAlpha(*pix) * Cxy; + } + pix += step; + r += qRed(*pix) * j; + g += qGreen(*pix) * j; + b += qBlue(*pix) * j; + a += qAlpha(*pix) * j; +} + +static void qt_qimageScaleAARGBA_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const unsigned int *sptr = ypoints[y] + xpoints[x]; + int r, g, b, a; + qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, r, g, b, a); + + int xap = xapoints[x]; + if (xap > 0) { + int rr, gg, bb, aa; + qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, rr, gg, bb, aa); + + r = r * (256 - xap); + g = g * (256 - xap); + b = b * (256 - xap); + a = a * (256 - xap); + r = (r + (rr * xap)) >> 8; + g = (g + (gg * xap)) >> 8; + b = (b + (bb * xap)) >> 8; + a = (a + (aa * xap)) >> 8; + } + *dptr++ = qRgba(r >> 14, g >> 14, b >> 14, a >> 14); + } + } +} + +static void qt_qimageScaleAARGBA_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + int r, g, b, a; + qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, r, g, b, a); + + int yap = yapoints[y]; + if (yap > 0) { + int rr, gg, bb, aa; + qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, rr, gg, bb, aa); + + r = r * (256 - yap); + g = g * (256 - yap); + b = b * (256 - yap); + a = a * (256 - yap); + r = (r + (rr * yap)) >> 8; + g = (g + (gg * yap)) >> 8; + b = (b + (bb * yap)) >> 8; + a = (a + (aa * yap)) >> 8; + } + *dptr = qRgba(r >> 14, g >> 14, b >> 14, a >> 14); + dptr++; + } + } +} + +static void qt_qimageScaleAARGBA_down_xy(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + int Cy = (yapoints[y]) >> 16; + int yap = (yapoints[y]) & 0xffff; + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + int rx, gx, bx, ax; + qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax); + + int r = ((rx>>4) * yap); + int g = ((gx>>4) * yap); + int b = ((bx>>4) * yap); + int a = ((ax>>4) * yap); + + int j; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) { + sptr += sow; + qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax); + r += ((rx>>4) * Cy); + g += ((gx>>4) * Cy); + b += ((bx>>4) * Cy); + a += ((ax>>4) * Cy); + } + sptr += sow; + qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, rx, gx, bx, ax); + + r += ((rx>>4) * j); + g += ((gx>>4) * j); + b += ((bx>>4) * j); + a += ((ax>>4) * j); + + *dptr = qRgba(r >> 24, g >> 24, b >> 24, a >> 24); + dptr++; + } + } +} + +static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +/* scale by area sampling - IGNORE the ALPHA byte*/ +static void qt_qimageScaleAARGB(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + /* scaling up both ways */ + if (isi->xup_yup == 3) { + qt_qimageScaleAARGBA_up_xy(isi, dest, dw, dh, dow, sow); + } + /* if we're scaling down vertically */ + else if (isi->xup_yup == 1) { +#ifndef FBINK_QIS_NO_SIMD +#if defined(__SSE4_1__) + qt_qimageScaleAARGBA_up_x_down_y_sse4(isi, dest, dw, dh, dow, sow); +#elif defined(__ARM_NEON__) + qt_qimageScaleAARGBA_up_x_down_y_neon(isi, dest, dw, dh, dow, sow); +#else + qt_qimageScaleAARGB_up_x_down_y(isi, dest, dw, dh, dow, sow); +#endif +#else + qt_qimageScaleAARGB_up_x_down_y(isi, dest, dw, dh, dow, sow); +#endif + } + /* if we're scaling down horizontally */ + else if (isi->xup_yup == 2) { +#ifndef FBINK_QIS_NO_SIMD +#if defined(__SSE4_1__) + qt_qimageScaleAARGBA_down_x_up_y_sse4(isi, dest, dw, dh, dow, sow); +#elif defined(__ARM_NEON__) + qt_qimageScaleAARGBA_down_x_up_y_neon(isi, dest, dw, dh, dow, sow); +#else + qt_qimageScaleAARGB_down_x_up_y(isi, dest, dw, dh, dow, sow); +#endif +#else + qt_qimageScaleAARGB_down_x_up_y(isi, dest, dw, dh, dow, sow); +#endif + } + /* if we're scaling down horizontally & vertically */ + else { +#ifndef FBINK_QIS_NO_SIMD +#if defined(__SSE4_1__) + qt_qimageScaleAARGBA_down_xy_sse4(isi, dest, dw, dh, dow, sow); +#elif defined(__ARM_NEON__) + qt_qimageScaleAARGBA_down_xy_neon(isi, dest, dw, dh, dow, sow); +#else + qt_qimageScaleAARGB_down_xy(isi, dest, dw, dh, dow, sow); +#endif +#else + qt_qimageScaleAARGB_down_xy(isi, dest, dw, dh, dow, sow); +#endif + } +} + + +inline static void qt_qimageScaleAARGB_helper(const unsigned int *pix, int xyap, int Cxy, int step, int &r, int &g, int &b) +{ + r = qRed(*pix) * xyap; + g = qGreen(*pix) * xyap; + b = qBlue(*pix) * xyap; + int j; + for (j = (1 << 14) - xyap; j > Cxy; j -= Cxy) { + pix += step; + r += qRed(*pix) * Cxy; + g += qGreen(*pix) * Cxy; + b += qBlue(*pix) * Cxy; + } + pix += step; + r += qRed(*pix) * j; + g += qGreen(*pix) * j; + b += qBlue(*pix) * j; +} + +static void qt_qimageScaleAARGB_up_x_down_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const unsigned int *sptr = ypoints[y] + xpoints[x]; + int r, g, b; + qt_qimageScaleAARGB_helper(sptr, yap, Cy, sow, r, g, b); + + int xap = xapoints[x]; + if (xap > 0) { + int rr, bb, gg; + qt_qimageScaleAARGB_helper(sptr + 1, yap, Cy, sow, rr, gg, bb); + + r = r * (256 - xap); + g = g * (256 - xap); + b = b * (256 - xap); + r = (r + (rr * xap)) >> 8; + g = (g + (gg * xap)) >> 8; + b = (b + (bb * xap)) >> 8; + } + *dptr++ = qRgb(r >> 14, g >> 14, b >> 14); + } + } +} + +static void qt_qimageScaleAARGB_down_x_up_y(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + int r, g, b; + qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, r, g, b); + + int yap = yapoints[y]; + if (yap > 0) { + int rr, bb, gg; + qt_qimageScaleAARGB_helper(sptr + sow, xap, Cx, 1, rr, gg, bb); + + r = r * (256 - yap); + g = g * (256 - yap); + b = b * (256 - yap); + r = (r + (rr * yap)) >> 8; + g = (g + (gg * yap)) >> 8; + b = (b + (bb * yap)) >> 8; + } + *dptr++ = qRgb(r >> 14, g >> 14, b >> 14); + } + } +} + +static void qt_qimageScaleAARGB_down_xy(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + int rx, gx, bx; + qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx); + + int r = (rx >> 4) * yap; + int g = (gx >> 4) * yap; + int b = (bx >> 4) * yap; + + int j; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) { + sptr += sow; + qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx); + + r += (rx >> 4) * Cy; + g += (gx >> 4) * Cy; + b += (bx >> 4) * Cy; + } + sptr += sow; + qt_qimageScaleAARGB_helper(sptr, xap, Cx, 1, rx, gx, bx); + + r += (rx >> 4) * j; + g += (gx >> 4) * j; + b += (bx >> 4) * j; + + *dptr = qRgb(r >> 24, g >> 24, b >> 24); + dptr++; + } + } +} + +unsigned char* qSmoothScaleImage(const unsigned char* src, int sw, int sh, bool ignore_alpha, int dw, int dh) +{ + unsigned char* buffer = nullptr; + if (src == nullptr || dw <= 0 || dh <= 0) + return buffer; + + // NOTE: We enforce 32bpp input buffers, because that's what Qt uses, even for RGB with no alpha. + // (the pixelformat constant is helpfully named RGB32 to remind you of that ;)). + QImageScaleInfo *scaleinfo = + qimageCalcScaleInfo(src, sw, sh, dw, dh, true); + if (!scaleinfo) + return buffer; + + // SSE/NEON friendly alignment, just in case... + void *ptr; + // NOTE: Output format is always RGBA! So make enough room for 4 bytes per pixel ;). + if (posix_memalign(&ptr, 16, dw * dh * 4) != 0) { + std::cerr << "qSmoothScaleImage: out of memory, returning null!" << std::endl; + qimageFreeScaleInfo(scaleinfo); + return nullptr; + } else { + buffer = (unsigned char*) ptr; + } + + // NOTE: See comment in qimageCalcScaleInfo regarding our simplification of using sw directly. + // Here, the Rgba64 codepath *does* divide by 8, because it casts buffer to QRgba64 *, + // which I imagine is an uint64_t ;). + if (!ignore_alpha) { + qt_qimageScaleAARGBA(scaleinfo, (unsigned int *)buffer, + dw, dh, dw, sw); + } else { + // NOTE: Input buffer is still 32bpp, we just skip *processing* of the alpha channel. + qt_qimageScaleAARGB(scaleinfo, (unsigned int *)buffer, + dw, dh, dw, sw); + } + + qimageFreeScaleInfo(scaleinfo); + return buffer; +} + +} diff --git a/crengine/qimagescale/qimagescale_neon.cpp b/crengine/qimagescale/qimagescale_neon.cpp new file mode 100644 index 000000000..bfde5e858 --- /dev/null +++ b/crengine/qimagescale/qimagescale_neon.cpp @@ -0,0 +1,212 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qimagescale_p.h" + +#if defined(__ARM_NEON__) + +namespace CRe { + +using namespace QImageScale; + +inline static uint32x4_t qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step) +{ + uint32x2_t vpix32 = vmov_n_u32(*pix); + uint16x4_t vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32))); + uint32x4_t vx = vmull_n_u16(vpix16, xyap); + int i; + for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) { + pix += step; + vpix32 = vmov_n_u32(*pix); + vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32))); + vx = vaddq_u32(vx, vmull_n_u16(vpix16, Cxy)); + } + pix += step; + vpix32 = vmov_n_u32(*pix); + vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32))); + vx = vaddq_u32(vx, vmull_n_u16(vpix16, i)); + return vx; +} + +template +void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const unsigned int *sptr = ypoints[y] + xpoints[x]; + uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow); + + int xap = xapoints[x]; + if (xap > 0) { + uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow); + + vx = vmulq_n_u32(vx, 256 - xap); + vr = vmulq_n_u32(vr, xap); + vx = vaddq_u32(vx, vr); + vx = vshrq_n_u32(vx, 8); + } + vx = vshrq_n_u32(vx, 14); + const uint16x4_t vx16 = vmovn_u32(vx); + const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); + *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); + if (RGB) + *dptr |= 0xff000000; + dptr++; + } + } +} + +template +void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); + + int yap = yapoints[y]; + if (yap > 0) { + uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1); + + vx = vmulq_n_u32(vx, 256 - yap); + vr = vmulq_n_u32(vr, yap); + vx = vaddq_u32(vx, vr); + vx = vshrq_n_u32(vx, 8); + } + vx = vshrq_n_u32(vx, 14); + const uint16x4_t vx16 = vmovn_u32(vx); + const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); + *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); + if (RGB) + *dptr |= 0xff000000; + dptr++; + } + } +} + +template +void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const int Cx = xapoints[x] >> 16; + const int xap = xapoints[x] & 0xffff; + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); + vx = vshrq_n_u32(vx, 4); + uint32x4_t vr = vmulq_n_u32(vx, yap); + + int j; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) { + sptr += sow; + vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); + vx = vshrq_n_u32(vx, 4); + vx = vmulq_n_u32(vx, Cy); + vr = vaddq_u32(vr, vx); + } + sptr += sow; + vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1); + vx = vshrq_n_u32(vx, 4); + vx = vmulq_n_u32(vx, j); + vr = vaddq_u32(vr, vx); + + vx = vshrq_n_u32(vr, 24); + const uint16x4_t vx16 = vmovn_u32(vx); + const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16)); + *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0); + if (RGB) + *dptr |= 0xff000000; + dptr++; + } + } +} + +template void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); +} + +#endif diff --git a/crengine/qimagescale/qimagescale_p.h b/crengine/qimagescale/qimagescale_p.h new file mode 100644 index 000000000..aa8d8fbc7 --- /dev/null +++ b/crengine/qimagescale/qimagescale_p.h @@ -0,0 +1,69 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#ifndef QIMAGESCALE_P_H +#define QIMAGESCALE_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +namespace CRe { + +unsigned char* qSmoothScaleImage(const unsigned char* src, int sw, int sh, bool ignore_alpha, int dw, int dh); + +namespace QImageScale { + struct QImageScaleInfo { + int *xpoints{nullptr}; + const unsigned int **ypoints{nullptr}; + int *xapoints{nullptr}; + int *yapoints{nullptr}; + int xup_yup{0}; + }; +} + +} + +#endif diff --git a/crengine/qimagescale/qimagescale_sse4.cpp b/crengine/qimagescale/qimagescale_sse4.cpp new file mode 100644 index 000000000..e61c969e7 --- /dev/null +++ b/crengine/qimagescale/qimagescale_sse4.cpp @@ -0,0 +1,222 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qimagescale_p.h" + +#if defined(__SSE4_1__) + +namespace CRe { + +using namespace QImageScale; + +inline static __m128i Q_DECL_VECTORCALL +qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy) +{ + __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); + __m128i vx = _mm_mullo_epi32(vpix, vxyap); + int i; + for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) { + pix += step; + vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); + vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy)); + } + pix += step; + vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix)); + vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i))); + return vx; +} + +template +void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + const __m128i v256 = _mm_set1_epi32(256); + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + const __m128i vCy = _mm_set1_epi32(Cy); + const __m128i vyap = _mm_set1_epi32(yap); + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const unsigned int *sptr = ypoints[y] + xpoints[x]; + __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy); + + int xap = xapoints[x]; + if (xap > 0) { + const __m128i vxap = _mm_set1_epi32(xap); + const __m128i vinvxap = _mm_sub_epi32(v256, vxap); + __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy); + + vx = _mm_mullo_epi32(vx, vinvxap); + vr = _mm_mullo_epi32(vr, vxap); + vx = _mm_add_epi32(vx, vr); + vx = _mm_srli_epi32(vx, 8); + } + vx = _mm_srli_epi32(vx, 14); + vx = _mm_packus_epi32(vx, _mm_setzero_si128()); + vx = _mm_packus_epi16(vx, _mm_setzero_si128()); + *dptr = _mm_cvtsi128_si32(vx); + if (RGB) + *dptr |= 0xff000000; + dptr++; + } + } +} + +template +void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + const __m128i v256 = _mm_set1_epi32(256); + + /* go through every scanline in the output buffer */ + for (int y = 0; y < dh; y++) { + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + int Cx = xapoints[x] >> 16; + int xap = xapoints[x] & 0xffff; + const __m128i vCx = _mm_set1_epi32(Cx); + const __m128i vxap = _mm_set1_epi32(xap); + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); + + int yap = yapoints[y]; + if (yap > 0) { + const __m128i vyap = _mm_set1_epi32(yap); + const __m128i vinvyap = _mm_sub_epi32(v256, vyap); + __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx); + + vx = _mm_mullo_epi32(vx, vinvyap); + vr = _mm_mullo_epi32(vr, vyap); + vx = _mm_add_epi32(vx, vr); + vx = _mm_srli_epi32(vx, 8); + } + vx = _mm_srli_epi32(vx, 14); + vx = _mm_packus_epi32(vx, _mm_setzero_si128()); + vx = _mm_packus_epi16(vx, _mm_setzero_si128()); + *dptr = _mm_cvtsi128_si32(vx); + if (RGB) + *dptr |= 0xff000000; + dptr++; + } + } +} + +template +void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow) +{ + const unsigned int **ypoints = isi->ypoints; + int *xpoints = isi->xpoints; + int *xapoints = isi->xapoints; + int *yapoints = isi->yapoints; + + for (int y = 0; y < dh; y++) { + int Cy = yapoints[y] >> 16; + int yap = yapoints[y] & 0xffff; + const __m128i vCy = _mm_set1_epi32(Cy); + const __m128i vyap = _mm_set1_epi32(yap); + + unsigned int *dptr = dest + (y * dow); + for (int x = 0; x < dw; x++) { + const int Cx = xapoints[x] >> 16; + const int xap = xapoints[x] & 0xffff; + const __m128i vCx = _mm_set1_epi32(Cx); + const __m128i vxap = _mm_set1_epi32(xap); + + const unsigned int *sptr = ypoints[y] + xpoints[x]; + __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); + __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap); + + int j; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) { + sptr += sow; + vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); + vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy)); + } + sptr += sow; + vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx); + vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j))); + + vr = _mm_srli_epi32(vr, 24); + vr = _mm_packus_epi32(vr, _mm_setzero_si128()); + vr = _mm_packus_epi16(vr, _mm_setzero_si128()); + *dptr = _mm_cvtsi128_si32(vr); + if (RGB) + *dptr |= 0xff000000; + dptr++; + } + } +} + +template void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +template void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest, + int dw, int dh, int dow, int sow); + +} + +#endif diff --git a/crengine/qimagescale/qrgb.h b/crengine/qimagescale/qrgb.h new file mode 100644 index 000000000..fc90d3527 --- /dev/null +++ b/crengine/qimagescale/qrgb.h @@ -0,0 +1,67 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QRGB_H +#define QRGB_H + +namespace CRe { + +typedef unsigned int QRgb; // RGB triplet + +inline constexpr int qRed(QRgb rgb) // get red part of RGB +{ return ((rgb >> 16) & 0xff); } + +inline constexpr int qGreen(QRgb rgb) // get green part of RGB +{ return ((rgb >> 8) & 0xff); } + +inline constexpr int qBlue(QRgb rgb) // get blue part of RGB +{ return (rgb & 0xff); } + +inline constexpr int qAlpha(QRgb rgb) // get alpha part of RGBA +{ return rgb >> 24; } + +inline constexpr QRgb qRgb(int r, int g, int b)// set RGB value +{ return (0xffu << 24) | ((r & 0xffu) << 16) | ((g & 0xffu) << 8) | (b & 0xffu); } + +inline constexpr QRgb qRgba(int r, int g, int b, int a)// set RGBA value +{ return ((a & 0xffu) << 24) | ((r & 0xffu) << 16) | ((g & 0xffu) << 8) | (b & 0xffu); } + +} + +#endif // QRGB_H diff --git a/crengine/src/lvdrawbuf.cpp b/crengine/src/lvdrawbuf.cpp index 6417b1724..1f2177df5 100644 --- a/crengine/src/lvdrawbuf.cpp +++ b/crengine/src/lvdrawbuf.cpp @@ -33,6 +33,9 @@ void LVDrawBuf::RoundRect( int x0, int y0, int x1, int y1, int borderWidth, int // TODO: draw rounded corners } +// NOTE: For more accurate (but slightly more costly) conversions, see: +// stb does (lUInt8) (((r*77) + (g*150) + (b*29)) >> 8) (That's roughly the Rec601Luma algo) +// Qt5 does (lUInt8) (((r*11) + (g*16) + (b*5)) >> 5) (That's closer to Rec601Luminance or Rec709Luminance IIRC) static lUInt32 rgbToGray( lUInt32 color ) { lUInt32 r = (0xFF0000 & color) >> 16; @@ -400,6 +403,9 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback int * xmap; int * ymap; bool dither; + bool invert; + bool smoothscale; + lUInt8 * decoded; bool isNinePatch; public: static int * GenMap( int src_len, int dst_len ) @@ -446,8 +452,8 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback } return map; } - LVImageScaledDrawCallback(LVBaseDrawBuf * dstbuf, LVImageSourceRef img, int x, int y, int width, int height, bool dith ) - : src(img), dst(dstbuf), dst_x(x), dst_y(y), dst_dx(width), dst_dy(height), xmap(0), ymap(0), dither(dith) + LVImageScaledDrawCallback(LVBaseDrawBuf * dstbuf, LVImageSourceRef img, int x, int y, int width, int height, bool dith, bool inv, bool smooth ) + : src(img), dst(dstbuf), dst_x(x), dst_y(y), dst_dx(width), dst_dy(height), xmap(0), ymap(0), dither(dith), invert(inv), smoothscale(smooth), decoded(0) { src_dx = img->GetWidth(); src_dy = img->GetHeight(); @@ -458,18 +464,28 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback isNinePatch = true; ninePatch = np->frame; } + // If smoothscaling was requested, but no scaling was needed, disable the post-processing pass + if (smoothscale && src_dx == dst_dx && src_dy == dst_dy) { + smoothscale = false; + //fprintf( stderr, "Disabling smoothscale because no scaling was needed (%dx%d -> %dx%d)\n", src_dx, src_dy, dst_dx, dst_dy ); + } if ( src_dx != dst_dx || isNinePatch) { if (isNinePatch) xmap = GenNinePatchMap(src_dx, dst_dx, ninePatch.left, ninePatch.right); - else + else if (!smoothscale) xmap = GenMap( src_dx, dst_dx ); } if ( src_dy != dst_dy || isNinePatch) { if (isNinePatch) ymap = GenNinePatchMap(src_dy, dst_dy, ninePatch.top, ninePatch.bottom); - else + else if (!smoothscale) ymap = GenMap( src_dy, dst_dy ); } + // If we have a smoothscale post-processing pass, we'll need to build a buffer of the *full* decoded image. + if (smoothscale) { + // Byte-sized buffer, we're 32bpp, so, 4 bytes per pixel. + decoded = new lUInt8[src_dy * (src_dx * 4)]; + } } virtual ~LVImageScaledDrawCallback() { @@ -477,6 +493,8 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback delete[] xmap; if (ymap) delete[] ymap; + if (decoded) + delete[] decoded; } virtual void OnStartDecode( LVImageSource * ) { @@ -488,8 +506,15 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback if (y == 0 || y == src_dy-1) // ignore first and last lines return true; } + // Defer everything to the post-process pass for smooth scaling, we just have to store the line in our decoded buffer + if (smoothscale) { + //fprintf( stderr, "Smoothscale l_%d pass\n", y ); + memcpy(decoded + (y * (src_dx * 4)), data, (src_dx * 4)); + return true; + } int yy = -1; int yy2 = -1; + const lUInt32 rgba_invert = invert ? 0x00FFFFFF : 0; if (ymap) { for (int i = 0; i < dst_dy; i++) { if (ymap[i] == y) { @@ -530,7 +555,7 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback row += dst_x; for (int x=0; x> 24)&0xFF; if ( xx=clip.right || alpha==0xFF ) @@ -551,7 +576,7 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback row += dst_x; for (int x=0; x> 24)&0xFF; if ( xx=clip.right || alpha==0xFF ) @@ -572,7 +597,7 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback for (int x=0; x> 24)&0xFF; if ( xx=clip.right || alpha==0xFF ) @@ -592,12 +617,15 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback } lUInt8 dcl; - if ( dither && bpp < 8) { + if ( dither && bpp < 8 ) { #if (GRAY_INVERSE==1) dcl = (lUInt8)DitherNBitColor( cl^0xFFFFFF, x, yy, bpp ); #else dcl = (lUInt8)DitherNBitColor( cl, x, yy, bpp ); #endif + } else if ( dither && bpp == 8 ) { + dcl = rgbToGray( cl ); + dcl = dither_o8x8( x, yy, dcl ); } else { dcl = rgbToGray( cl, bpp ); } @@ -612,7 +640,7 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback //row += dst_x; for (int x=0; x> 24)&0xFF; if ( xx=clip.right || alpha==0xFF ) @@ -652,7 +680,7 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback //row += dst_x; for (int x=0; x> 24)&0xFF; if ( xx=clip.right || (alpha&0x80) ) @@ -681,8 +709,41 @@ class LVImageScaledDrawCallback : public LVImageDecoderCallback } return true; } - virtual void OnEndDecode( LVImageSource *, bool ) + virtual void OnEndDecode( LVImageSource * obj, bool ) { + // If we're not smooth scaling, we're done! + if (!smoothscale) { + return; + } + + // Scale our decoded data... + lUInt8 * sdata = nullptr; + //fprintf( stderr, "Requesting smooth scaling (%dx%d -> %dx%d)\n", src_dx, src_dy, dst_dx, dst_dy ); + sdata = CRe::qSmoothScaleImage(decoded, src_dx, src_dy, false, dst_dx, dst_dy); + if (sdata == nullptr) { + // Hu oh... Scaling failed! Return *without* drawing anything! + // We skipped map generation, so we can't easily fallback to nearest-neighbor... + //fprintf( stderr, "Smooth scaling failed :(\n" ); + return; + } + + // Process as usual, with a bit of a hack to avoid code duplication... + smoothscale = false; + for (int y=0; y < dst_dy; y++) { + lUInt8 * row = sdata + (y * (dst_dx * 4)); + this->OnLineDecoded( obj, y, (lUInt32 *) row ); + } + + // This prints the unscaled decoded buffer, for debugging purposes ;). + /* + for (int y=0; y < src_dy; y++) { + lUInt8 * row = decoded + (y * (src_dx * 4)); + this->OnLineDecoded( obj, y, (lUInt32 *) row ); + } + */ + + // And now that it's been rendered we can free the scaled buffer (it was allocated by CRe::qSmoothScaleImage). + free(sdata); } }; @@ -707,10 +768,8 @@ void LVGrayDrawBuf::Draw( LVImageSourceRef img, int x, int y, int width, int hei //fprintf( stderr, "LVGrayDrawBuf::Draw( img(%d, %d), %d, %d, %d, %d\n", img->GetWidth(), img->GetHeight(), x, y, width, height ); if ( width<=0 || height<=0 ) return; - LVImageScaledDrawCallback drawcb( this, img, x, y, width, height, dither ); + LVImageScaledDrawCallback drawcb( this, img, x, y, width, height, _ditherImages, _invertImages, _smoothImages ); img->Decode( &drawcb ); - if ( _invertImages ) - InvertRect(x, y, x+width, y+height); _drawnImagesCount++; _drawnImagesSurface += width*height; @@ -929,7 +988,7 @@ void LVGrayDrawBuf::InvertRect(int x0, int y0, int x1, int y1) lUInt8 * line = GetScanLine(y0); for (int y=y0; yGetWidth(), img->GetHeight(), x, y, width, height ); - LVImageScaledDrawCallback drawcb( this, img, x, y, width, height, dither ); + LVImageScaledDrawCallback drawcb( this, img, x, y, width, height, dither, _invertImages, _smoothImages ); img->Decode( &drawcb ); - if ( _invertImages ) - InvertRect(x, y, x+width, y+height); _drawnImagesCount++; _drawnImagesSurface += width*height; } diff --git a/crengine/src/lvimg.cpp b/crengine/src/lvimg.cpp index 3f0f58525..cbc3d27a3 100644 --- a/crengine/src/lvimg.cpp +++ b/crengine/src/lvimg.cpp @@ -810,7 +810,7 @@ class LVJpegImageSource : public LVNodeImageSource } callback->OnLineDecoded( this, y, row ); } - callback->OnEndDecode(this, true); + callback->OnEndDecode(this, false); } if ( buffer ) @@ -939,11 +939,11 @@ bool LVPngImageSource::Decode( LVImageDecoderCallback * callback ) { callback->OnLineDecoded( this, y, (lUInt32*) image[y] ); } - png_read_end(png_ptr, info_ptr); callback->OnEndDecode(this, false); - for (lUInt32 i=0; i