koreader · NiLuJe · Apr 15, 2019 · Apr 9, 2019 · Apr 11, 2019 · Apr 11, 2019
diff --git a/crengine/include/lvdrawbuf.h b/crengine/include/lvdrawbuf.h
@@ -99,6 +99,10 @@ class LVDrawBuf : public CacheableObject
     virtual void setHidePartialGlyphs( bool hide ) = 0;
     /// set to true to invert images only (so they get inverted back to normal by nightmode)
     virtual void setInvertImages( bool invert ) = 0;
+    /// set to true to enforce dithering (only relevant for 8bpp Gray drawBuf)
+    virtual void setDitherImages( bool dither ) = 0;
+    /// set to true to switch to a more costly smooth scaler instead of nearest neighbor
+    virtual void setSmoothScalingImages( bool smooth ) = 0;
     /// invert image
     virtual void  Invert() = 0;
     /// get buffer width, pixels
@@ -231,13 +235,19 @@ class LVBaseDrawBuf : public LVDrawBuf
     lUInt32 _textColor;
     bool _hidePartialGlyphs;
     bool _invertImages;
+    bool _ditherImages;
+    bool _smoothImages;
     int _drawnImagesCount;
     int _drawnImagesSurface;
 public:
     /// set to true for drawing in Paged mode, false for Scroll mode
     virtual void setHidePartialGlyphs( bool hide ) { _hidePartialGlyphs = hide; }
     /// set to true to invert images only (so they get inverted back to normal by nightmode)
     virtual void setInvertImages( bool invert ) { _invertImages = invert; }
+    /// set to true to enforce dithering (only relevant for 8bpp Gray drawBuf)
+    virtual void setDitherImages( bool dither ) { _ditherImages = dither; }
+    /// set to true to switch to a more costly smooth scaler instead of nearest neighbor
+    virtual void setSmoothScalingImages( bool smooth ) { _smoothImages = smooth; }
     /// returns current background color
     virtual lUInt32 GetBackgroundColor() { return _backgroundColor; }
     /// sets current background color
@@ -277,7 +287,8 @@ class LVBaseDrawBuf : public LVDrawBuf
     int getDrawnImagesSurface() { return _drawnImagesSurface; }
 
     LVBaseDrawBuf() : _dx(0), _dy(0), _rowsize(0), _data(NULL), _hidePartialGlyphs(true),
-                        _invertImages(false), _drawnImagesCount(0), _drawnImagesSurface(0) { }
+                        _invertImages(false), _ditherImages(false), _smoothImages(false),
+                        _drawnImagesCount(0), _drawnImagesSurface(0) { }
     virtual ~LVBaseDrawBuf() { }
 };
 
@@ -399,11 +410,12 @@ class LVGrayDrawBuf : public LVBaseDrawBuf
 //       c.f., https://github.com/koreader/koreader-base/pull/878#issuecomment-476723747
 #ifdef CR_RENDER_32BPP_RGB_PXFMT
 inline lUInt32 RevRGB( lUInt32 cl ) {
-    return ((cl>>16)&0x0000FF) | ((cl<<16)&0xFF0000) | (cl&0x00FF00);
+    return ((cl<<16)&0xFF0000) | ((cl>>16)&0x0000FF) | (cl&0x00FF00);
 }
 
 inline lUInt32 RevRGBA( lUInt32 cl ) {
-    return (cl&0xFF000000) | ((cl>>16)&0x0000FF) | ((cl<<16)&0xFF0000) | (cl&0x00FF00);
+    // Swap B <-> R, keep G & A
+    return ((cl<<16)&0x00FF0000) | ((cl>>16)&0x000000FF) | (cl&0xFF00FF00);
 }
 #else
 inline lUInt32 RevRGB( lUInt32 cl ) {
@@ -423,6 +435,65 @@ inline lUInt16 rgb888to565( lUInt32 cl ) {
     return (lUInt16)(((cl>>8)& 0xF800) | ((cl>>5 )& 0x07E0) | ((cl>>3 )& 0x001F));
 }
 
+#define DIV255(V)                                                                                        \
+({                                                                                                       \
+	auto _v = (V) + 128;                                                                             \
+	(((_v >> 8U) + _v) >> 8U);                                                                       \
+})
+
+// Quantize an 8-bit color value down to a palette of 16 evenly spaced colors, using an ordered 8x8 dithering pattern.
+// With a grayscale input, this happens to match the eInk palette perfectly ;).
+// If the input is not grayscale, and the output fb is not grayscale either,
+// this usually still happens to match the eInk palette after the EPDC's own quantization pass.
+// c.f., https://en.wikipedia.org/wiki/Ordered_dithering
+// & https://github.com/ImageMagick/ImageMagick/blob/ecfeac404e75f304004f0566557848c53030bad6/MagickCore/threshold.c#L1627
+// NOTE: As the references imply, this is straight from ImageMagick,
+//       with only minor simplifications to enforce Q8 & avoid fp maths.
+static inline lUInt8 dither_o8x8(int x, int y, lUInt8 v)
+{
+	// c.f., https://github.com/ImageMagick/ImageMagick/blob/ecfeac404e75f304004f0566557848c53030bad6/config/thresholds.xml#L107
+	static const lUInt8 threshold_map_o8x8[] = { 1,  49, 13, 61, 4,  52, 16, 64, 33, 17, 45, 29, 36, 20, 48, 32,
+						      9,  57, 5,  53, 12, 60, 8,  56, 41, 25, 37, 21, 44, 28, 40, 24,
+						      3,  51, 15, 63, 2,  50, 14, 62, 35, 19, 47, 31, 34, 18, 46, 30,
+						      11, 59, 7,  55, 10, 58, 6,  54, 43, 27, 39, 23, 42, 26, 38, 22 };
+
+	// Constants:
+	// Quantum = 8; Levels = 16; map Divisor = 65
+	// QuantumRange = 0xFF
+	// QuantumScale = 1.0 / QuantumRange
+	//
+	// threshold = QuantumScale * v * ((L-1) * (D-1) + 1)
+	// NOTE: The initial computation of t (specifically, what we pass to DIV255) would overflow an uint8_t.
+	//       So jump to shorts, and do it signed to be extra careful, although I don't *think* we can ever underflow here.
+	lInt16 t = (lInt16) DIV255(v * ((15U << 6) + 1U));
+	// level = t / (D-1);
+	lInt16 l = (t >> 6);
+	// t -= l * (D-1);
+	t = (lInt16)(t - (l << 6));
+
+	// map width & height = 8
+	// c = ClampToQuantum((l+(t >= map[(x % mw) + mw * (y % mh)])) * QuantumRange / (L-1));
+	lInt16 q = (lInt16)((l + (t >= threshold_map_o8x8[(x & 7U) + 8U * (y & 7U)])) * 17);
+	// NOTE: For some arcane reason, on ARM (at least), this is noticeably faster than Pillow's CLIP8 macro.
+	//       Following this logic with ternary operators yields similar results,
+	//       so I'm guessing it's the < 256 part of Pillow's macro that doesn't agree with GCC/ARM...
+	lUInt8 c;
+	if (q > 0xFF) {
+		c = 0xFF;
+	} else if (q < 0) {
+		c = 0U;
+	} else {
+		c = (lUInt8) q;
+	}
+
+	return c;
+}
+
+// Declare our bit of scaler ripped from Qt5...
+namespace CRe {
+lUInt8* qSmoothScaleImage(const lUInt8* src, int sw, int sh, bool ignore_alpha, int dw, int dh);
+}
+
 /// 32-bit RGB buffer
 class LVColorDrawBuf : public LVBaseDrawBuf
 {

diff --git a/crengine/qimagescale/QIMAGETRANSFORM_LICENSE.txt b/crengine/qimagescale/QIMAGETRANSFORM_LICENSE.txt
@@ -0,0 +1,60 @@
+qimagetransform.cpp was contributed by Daniel M. Duley based on code from Imlib2.
+
+Copyright (C) 2004, 2005 Daniel M. Duley
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+
+Imlib2 License
+
+Copyright (C) 2000 Carsten Haitzler and various contributors (see
+AUTHORS)
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies of the Software and its Copyright notices. In addition
+publicly documented acknowledgment must be given that this software has
+been used if no source code of this software is made available publicly.
+This includes acknowledgments in either Copyright notices, Manuals,
+Publicity and Marketing documents or any documentation provided with any
+product containing this software. This License does not apply to any
+software that links to the libraries provided by this software
+(statically or dynamically), but only to the software provided.
+
+Please see the COPYING.PLAIN for a plain-english explanation of this
+notice and it's intent.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/crengine/qimagescale/qdrawhelper_p.h b/crengine/qimagescale/qdrawhelper_p.h
@@ -0,0 +1,182 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QDRAWHELPER_P_H
+#define QDRAWHELPER_P_H
+
+#include "qglobal.h"
+#if defined(__ARM_NEON) || defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+#if defined(__SSE2__)
+#include <immintrin.h>
+#include <x86intrin.h>
+#endif
+
+namespace CRe {
+
+#if defined(__GNUC__)
+#  if (defined(__i386) || defined(__i386__) || defined(_M_IX86)) && defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
+#    define Q_DECL_VECTORCALL __attribute__((sseregparm,regparm(3)))
+#  else
+#    define Q_DECL_VECTORCALL
+#  endif
+#elif defined(_MSC_VER)
+#  define Q_DECL_VECTORCALL __vectorcall
+#else
+#  define Q_DECL_VECTORCALL
+#endif
+
+#if __SIZEOF_POINTER__ == 8 // 64-bit versions
+
+static inline __attribute__((always_inline)) uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+    quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
+    t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
+    t >>= 8;
+    t &= 0x00ff00ff00ff00ff;
+    return (uint(t)) | (uint(t >> 24));
+}
+
+#else // 32-bit versions
+
+static inline __attribute__((always_inline)) uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+    uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
+    t >>= 8;
+    t &= 0xff00ff;
+
+    x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
+    x &= 0xff00ff00;
+    x |= t;
+    return x;
+}
+
+#endif
+
+// NOTE: Unlike the SIMD qimagescale_* routines, these ones seem to offer a very small performance gain.
+#if defined(__SSE2__)
+static inline __attribute__((always_inline)) uint interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty)
+{
+    // First interpolate top and bottom pixels in parallel.
+    vt = _mm_unpacklo_epi8(vt, _mm_setzero_si128());
+    vb = _mm_unpacklo_epi8(vb, _mm_setzero_si128());
+    vt = _mm_mullo_epi16(vt, _mm_set1_epi16(256 - disty));
+    vb = _mm_mullo_epi16(vb, _mm_set1_epi16(disty));
+    __m128i vlr = _mm_add_epi16(vt, vb);
+    vlr = _mm_srli_epi16(vlr, 8);
+    // vlr now contains the result of the first two interpolate calls vlr = unpacked((xright << 64) | xleft)
+
+    // Now the last interpolate between left and right..
+    const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(256 - distx), _MM_SHUFFLE(0, 0, 0, 0));
+    const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
+    const __m128i vmulx = _mm_unpacklo_epi16(vidistx, vdistx);
+    vlr = _mm_unpacklo_epi16(vlr, _mm_srli_si128(vlr, 8));
+    // vlr now contains the colors of left and right interleaved { la, ra, lr, rr, lg, rg, lb, rb }
+    vlr = _mm_madd_epi16(vlr, vmulx); // Multiply and horizontal add.
+    vlr = _mm_srli_epi32(vlr, 8);
+    vlr = _mm_packs_epi32(vlr, vlr);
+    vlr = _mm_packus_epi16(vlr, vlr);
+    return _mm_cvtsi128_si32(vlr);
+}
+
+static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
+{
+    __m128i vt = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tl), _mm_cvtsi32_si128(tr));
+    __m128i vb = _mm_unpacklo_epi32(_mm_cvtsi32_si128(bl), _mm_cvtsi32_si128(br));
+    return interpolate_4_pixels_sse2(vt, vb, distx, disty);
+}
+
+static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty)
+{
+    __m128i vt = _mm_loadl_epi64((const __m128i*)t);
+    __m128i vb = _mm_loadl_epi64((const __m128i*)b);
+    return interpolate_4_pixels_sse2(vt, vb, distx, disty);
+}
+
+#elif defined(__ARM_NEON__)
+static inline __attribute__((always_inline)) uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty)
+{
+    uint16x8_t vt16 = vmovl_u8(vreinterpret_u8_u32(vt32));
+    uint16x8_t vb16 = vmovl_u8(vreinterpret_u8_u32(vb32));
+    vt16 = vmulq_n_u16(vt16, 256 - disty);
+    vt16 = vmlaq_n_u16(vt16, vb16, disty);
+    vt16 = vshrq_n_u16(vt16, 8);
+    uint16x4_t vl16 = vget_low_u16(vt16);
+    uint16x4_t vr16 = vget_high_u16(vt16);
+    vl16 = vmul_n_u16(vl16, 256 - distx);
+    vl16 = vmla_n_u16(vl16, vr16, distx);
+    vl16 = vshr_n_u16(vl16, 8);
+    uint8x8_t vr = vmovn_u16(vcombine_u16(vl16, vl16));
+    return vget_lane_u32(vreinterpret_u32_u8(vr), 0);
+}
+
+static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
+{
+    uint32x2_t vt32 = vmov_n_u32(tl);
+    uint32x2_t vb32 = vmov_n_u32(bl);
+    vt32 = vset_lane_u32(tr, vt32, 1);
+    vb32 = vset_lane_u32(br, vb32, 1);
+    return interpolate_4_pixels_neon(vt32, vb32, distx, disty);
+}
+
+static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty)
+{
+    uint32x2_t vt32 = vld1_u32(t);
+    uint32x2_t vb32 = vld1_u32(b);
+    return interpolate_4_pixels_neon(vt32, vb32, distx, disty);
+}
+
+#else
+static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
+{
+    uint idistx = 256 - distx;
+    uint idisty = 256 - disty;
+    uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx);
+    uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx);
+    return INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty);
+}
+
+static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty)
+{
+    return interpolate_4_pixels(t[0], t[1], b[0], b[1], distx, disty);
+}
+#endif
+
+}
+
+#endif // QDRAWHELPER_P_H