Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement optional 8bpp dithering & image smooth scaling #282

Merged
merged 28 commits into from
Apr 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9373606
Simplify RevRGBA
NiLuJe Apr 9, 2019
2282c77
Switch to XOR for Gray InvertRect, too
NiLuJe Apr 11, 2019
f5e0be9
Do the image inversion *during* decoding, instead of in another pass.
NiLuJe Apr 11, 2019
adc9e31
Allow dithering @ 8bpp
NiLuJe Apr 11, 2019
381599e
And add a flag to dynamically toggle SW dithering on grayscale buffers
NiLuJe Apr 12, 2019
728138b
Yay, typo!
NiLuJe Apr 12, 2019
8fab6fc
And another stupid typo ;)
NiLuJe Apr 12, 2019
61fcd8d
Import Qt's SmoothScale algorithm
NiLuJe Apr 12, 2019
6f81e7e
Switch the namespace from FBInk to CRe
NiLuJe Apr 12, 2019
273067e
Drop C linkage
NiLuJe Apr 12, 2019
94daa8b
Add a flag to dynamically toggle the smooth scaler
NiLuJe Apr 12, 2019
a19691d
Pass decoded data to OnEndDecode
NiLuJe Apr 12, 2019
c90363d
Okay, try to implement the post-process pass without code
NiLuJe Apr 12, 2019
44ff307
Tweak that, as CRe buffers are always uint32_t
NiLuJe Apr 12, 2019
4be7cc3
Better with the right typedef
NiLuJe Apr 12, 2019
ccd20a1
Sigh.
NiLuJe Apr 12, 2019
6cab218
Well, that builds, at least.
NiLuJe Apr 12, 2019
f3915c8
Go back to byte-sized pointers
NiLuJe Apr 12, 2019
3f19db1
Duh.
NiLuJe Apr 12, 2019
634f2f2
Revert that bit of insanity
NiLuJe Apr 12, 2019
db143d9
Don't allocate a buffer if we don't need it
NiLuJe Apr 12, 2019
1f6160b
Disable smoothscale earlier when no scaling is needed
NiLuJe Apr 12, 2019
e518840
Review pass
NiLuJe Apr 12, 2019
4071526
Default to the legacy scaler
NiLuJe Apr 13, 2019
68194d0
We know bpp is 8, use the simpler rgbToGray variant
NiLuJe Apr 13, 2019
e9e71f6
Jot down more accurate grayscaling formulas
NiLuJe Apr 13, 2019
564a12d
Always honor our own SW dithering setting
NiLuJe Apr 14, 2019
1a344ed
Yep, as expected, a proper grayscale is a tiny bit more expensive.
NiLuJe Apr 14, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 74 additions & 3 deletions crengine/include/lvdrawbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ class LVDrawBuf : public CacheableObject
virtual void setHidePartialGlyphs( bool hide ) = 0;
/// set to true to invert images only (so they get inverted back to normal by nightmode)
virtual void setInvertImages( bool invert ) = 0;
/// set to true to enforce dithering (only relevant for 8bpp Gray drawBuf)
virtual void setDitherImages( bool dither ) = 0;
/// set to true to switch to a more costly smooth scaler instead of nearest neighbor
virtual void setSmoothScalingImages( bool smooth ) = 0;
/// invert image
virtual void Invert() = 0;
/// get buffer width, pixels
Expand Down Expand Up @@ -231,13 +235,19 @@ class LVBaseDrawBuf : public LVDrawBuf
lUInt32 _textColor;
bool _hidePartialGlyphs;
bool _invertImages;
bool _ditherImages;
bool _smoothImages;
int _drawnImagesCount;
int _drawnImagesSurface;
public:
/// set to true for drawing in Paged mode, false for Scroll mode
virtual void setHidePartialGlyphs( bool hide ) { _hidePartialGlyphs = hide; }
/// set to true to invert images only (so they get inverted back to normal by nightmode)
virtual void setInvertImages( bool invert ) { _invertImages = invert; }
/// set to true to enforce dithering (only relevant for 8bpp Gray drawBuf)
virtual void setDitherImages( bool dither ) { _ditherImages = dither; }
/// set to true to switch to a more costly smooth scaler instead of nearest neighbor
virtual void setSmoothScalingImages( bool smooth ) { _smoothImages = smooth; }
/// returns current background color
virtual lUInt32 GetBackgroundColor() { return _backgroundColor; }
/// sets current background color
Expand Down Expand Up @@ -277,7 +287,8 @@ class LVBaseDrawBuf : public LVDrawBuf
int getDrawnImagesSurface() { return _drawnImagesSurface; }

LVBaseDrawBuf() : _dx(0), _dy(0), _rowsize(0), _data(NULL), _hidePartialGlyphs(true),
_invertImages(false), _drawnImagesCount(0), _drawnImagesSurface(0) { }
_invertImages(false), _ditherImages(false), _smoothImages(false),
_drawnImagesCount(0), _drawnImagesSurface(0) { }
virtual ~LVBaseDrawBuf() { }
};

Expand Down Expand Up @@ -399,11 +410,12 @@ class LVGrayDrawBuf : public LVBaseDrawBuf
// c.f., https://github.com/koreader/koreader-base/pull/878#issuecomment-476723747
#ifdef CR_RENDER_32BPP_RGB_PXFMT
inline lUInt32 RevRGB( lUInt32 cl ) {
return ((cl>>16)&0x0000FF) | ((cl<<16)&0xFF0000) | (cl&0x00FF00);
return ((cl<<16)&0xFF0000) | ((cl>>16)&0x0000FF) | (cl&0x00FF00);
}

inline lUInt32 RevRGBA( lUInt32 cl ) {
return (cl&0xFF000000) | ((cl>>16)&0x0000FF) | ((cl<<16)&0xFF0000) | (cl&0x00FF00);
// Swap B <-> R, keep G & A
return ((cl<<16)&0x00FF0000) | ((cl>>16)&0x000000FF) | (cl&0xFF00FF00);
}
#else
inline lUInt32 RevRGB( lUInt32 cl ) {
Expand All @@ -423,6 +435,65 @@ inline lUInt16 rgb888to565( lUInt32 cl ) {
return (lUInt16)(((cl>>8)& 0xF800) | ((cl>>5 )& 0x07E0) | ((cl>>3 )& 0x001F));
}

#define DIV255(V) \
({ \
auto _v = (V) + 128; \
(((_v >> 8U) + _v) >> 8U); \
})

// Quantize an 8-bit color value down to a palette of 16 evenly spaced colors, using an ordered 8x8 dithering pattern.
// With a grayscale input, this happens to match the eInk palette perfectly ;).
// If the input is not grayscale, and the output fb is not grayscale either,
// this usually still happens to match the eInk palette after the EPDC's own quantization pass.
// c.f., https://en.wikipedia.org/wiki/Ordered_dithering
// & https://github.com/ImageMagick/ImageMagick/blob/ecfeac404e75f304004f0566557848c53030bad6/MagickCore/threshold.c#L1627
// NOTE: As the references imply, this is straight from ImageMagick,
// with only minor simplifications to enforce Q8 & avoid fp maths.
static inline lUInt8 dither_o8x8(int x, int y, lUInt8 v)
{
// c.f., https://github.com/ImageMagick/ImageMagick/blob/ecfeac404e75f304004f0566557848c53030bad6/config/thresholds.xml#L107
static const lUInt8 threshold_map_o8x8[] = { 1, 49, 13, 61, 4, 52, 16, 64, 33, 17, 45, 29, 36, 20, 48, 32,
9, 57, 5, 53, 12, 60, 8, 56, 41, 25, 37, 21, 44, 28, 40, 24,
3, 51, 15, 63, 2, 50, 14, 62, 35, 19, 47, 31, 34, 18, 46, 30,
11, 59, 7, 55, 10, 58, 6, 54, 43, 27, 39, 23, 42, 26, 38, 22 };

// Constants:
// Quantum = 8; Levels = 16; map Divisor = 65
// QuantumRange = 0xFF
// QuantumScale = 1.0 / QuantumRange
//
// threshold = QuantumScale * v * ((L-1) * (D-1) + 1)
// NOTE: The initial computation of t (specifically, what we pass to DIV255) would overflow an uint8_t.
// So jump to shorts, and do it signed to be extra careful, although I don't *think* we can ever underflow here.
lInt16 t = (lInt16) DIV255(v * ((15U << 6) + 1U));
// level = t / (D-1);
lInt16 l = (t >> 6);
// t -= l * (D-1);
t = (lInt16)(t - (l << 6));

// map width & height = 8
// c = ClampToQuantum((l+(t >= map[(x % mw) + mw * (y % mh)])) * QuantumRange / (L-1));
lInt16 q = (lInt16)((l + (t >= threshold_map_o8x8[(x & 7U) + 8U * (y & 7U)])) * 17);
// NOTE: For some arcane reason, on ARM (at least), this is noticeably faster than Pillow's CLIP8 macro.
// Following this logic with ternary operators yields similar results,
// so I'm guessing it's the < 256 part of Pillow's macro that doesn't agree with GCC/ARM...
lUInt8 c;
if (q > 0xFF) {
c = 0xFF;
} else if (q < 0) {
c = 0U;
} else {
c = (lUInt8) q;
}

return c;
}

// Declare our bit of scaler ripped from Qt5...
namespace CRe {
lUInt8* qSmoothScaleImage(const lUInt8* src, int sw, int sh, bool ignore_alpha, int dw, int dh);
}

/// 32-bit RGB buffer
class LVColorDrawBuf : public LVBaseDrawBuf
{
Expand Down
60 changes: 60 additions & 0 deletions crengine/qimagescale/QIMAGETRANSFORM_LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
qimagetransform.cpp was contributed by Daniel M. Duley based on code from Imlib2.

Copyright (C) 2004, 2005 Daniel M. Duley

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.



Imlib2 License

Copyright (C) 2000 Carsten Haitzler and various contributors (see
AUTHORS)

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be included
in all copies of the Software and its Copyright notices. In addition
publicly documented acknowledgment must be given that this software has
been used if no source code of this software is made available publicly.
This includes acknowledgments in either Copyright notices, Manuals,
Publicity and Marketing documents or any documentation provided with any
product containing this software. This License does not apply to any
software that links to the libraries provided by this software
(statically or dynamically), but only to the software provided.

Please see the COPYING.PLAIN for a plain-english explanation of this
notice and it's intent.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
182 changes: 182 additions & 0 deletions crengine/qimagescale/qdrawhelper_p.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 3 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL3 included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 3 requirements
** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 2.0 or (at your option) the GNU General
** Public license version 3 or any later version approved by the KDE Free
** Qt Foundation. The licenses are as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-2.0.html and
** https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/

#ifndef QDRAWHELPER_P_H
#define QDRAWHELPER_P_H

#include "qglobal.h"
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
#include <arm_neon.h>
#endif
#if defined(__SSE2__)
#include <immintrin.h>
#include <x86intrin.h>
#endif

namespace CRe {

#if defined(__GNUC__)
# if (defined(__i386) || defined(__i386__) || defined(_M_IX86)) && defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER)
# define Q_DECL_VECTORCALL __attribute__((sseregparm,regparm(3)))
# else
# define Q_DECL_VECTORCALL
# endif
#elif defined(_MSC_VER)
# define Q_DECL_VECTORCALL __vectorcall
#else
# define Q_DECL_VECTORCALL
#endif

#if __SIZEOF_POINTER__ == 8 // 64-bit versions

static inline __attribute__((always_inline)) uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
t >>= 8;
t &= 0x00ff00ff00ff00ff;
return (uint(t)) | (uint(t >> 24));
}

#else // 32-bit versions

static inline __attribute__((always_inline)) uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
t >>= 8;
t &= 0xff00ff;

x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
x &= 0xff00ff00;
x |= t;
return x;
}

#endif

// NOTE: Unlike the SIMD qimagescale_* routines, these ones seem to offer a very small performance gain.
#if defined(__SSE2__)
static inline __attribute__((always_inline)) uint interpolate_4_pixels_sse2(__m128i vt, __m128i vb, uint distx, uint disty)
{
// First interpolate top and bottom pixels in parallel.
vt = _mm_unpacklo_epi8(vt, _mm_setzero_si128());
vb = _mm_unpacklo_epi8(vb, _mm_setzero_si128());
vt = _mm_mullo_epi16(vt, _mm_set1_epi16(256 - disty));
vb = _mm_mullo_epi16(vb, _mm_set1_epi16(disty));
__m128i vlr = _mm_add_epi16(vt, vb);
vlr = _mm_srli_epi16(vlr, 8);
// vlr now contains the result of the first two interpolate calls vlr = unpacked((xright << 64) | xleft)

// Now the last interpolate between left and right..
const __m128i vidistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(256 - distx), _MM_SHUFFLE(0, 0, 0, 0));
const __m128i vdistx = _mm_shufflelo_epi16(_mm_cvtsi32_si128(distx), _MM_SHUFFLE(0, 0, 0, 0));
const __m128i vmulx = _mm_unpacklo_epi16(vidistx, vdistx);
vlr = _mm_unpacklo_epi16(vlr, _mm_srli_si128(vlr, 8));
// vlr now contains the colors of left and right interleaved { la, ra, lr, rr, lg, rg, lb, rb }
vlr = _mm_madd_epi16(vlr, vmulx); // Multiply and horizontal add.
vlr = _mm_srli_epi32(vlr, 8);
vlr = _mm_packs_epi32(vlr, vlr);
vlr = _mm_packus_epi16(vlr, vlr);
return _mm_cvtsi128_si32(vlr);
}

static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
{
__m128i vt = _mm_unpacklo_epi32(_mm_cvtsi32_si128(tl), _mm_cvtsi32_si128(tr));
__m128i vb = _mm_unpacklo_epi32(_mm_cvtsi32_si128(bl), _mm_cvtsi32_si128(br));
return interpolate_4_pixels_sse2(vt, vb, distx, disty);
}

static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty)
{
__m128i vt = _mm_loadl_epi64((const __m128i*)t);
__m128i vb = _mm_loadl_epi64((const __m128i*)b);
return interpolate_4_pixels_sse2(vt, vb, distx, disty);
}

#elif defined(__ARM_NEON__)
static inline __attribute__((always_inline)) uint interpolate_4_pixels_neon(uint32x2_t vt32, uint32x2_t vb32, uint distx, uint disty)
{
uint16x8_t vt16 = vmovl_u8(vreinterpret_u8_u32(vt32));
uint16x8_t vb16 = vmovl_u8(vreinterpret_u8_u32(vb32));
vt16 = vmulq_n_u16(vt16, 256 - disty);
vt16 = vmlaq_n_u16(vt16, vb16, disty);
vt16 = vshrq_n_u16(vt16, 8);
uint16x4_t vl16 = vget_low_u16(vt16);
uint16x4_t vr16 = vget_high_u16(vt16);
vl16 = vmul_n_u16(vl16, 256 - distx);
vl16 = vmla_n_u16(vl16, vr16, distx);
vl16 = vshr_n_u16(vl16, 8);
uint8x8_t vr = vmovn_u16(vcombine_u16(vl16, vl16));
return vget_lane_u32(vreinterpret_u32_u8(vr), 0);
}

static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
{
uint32x2_t vt32 = vmov_n_u32(tl);
uint32x2_t vb32 = vmov_n_u32(bl);
vt32 = vset_lane_u32(tr, vt32, 1);
vb32 = vset_lane_u32(br, vb32, 1);
return interpolate_4_pixels_neon(vt32, vb32, distx, disty);
}

static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty)
{
uint32x2_t vt32 = vld1_u32(t);
uint32x2_t vb32 = vld1_u32(b);
return interpolate_4_pixels_neon(vt32, vb32, distx, disty);
}

#else
static inline uint interpolate_4_pixels(uint tl, uint tr, uint bl, uint br, uint distx, uint disty)
{
uint idistx = 256 - distx;
uint idisty = 256 - disty;
uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx);
uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx);
return INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty);
}

static inline uint interpolate_4_pixels(const uint t[], const uint b[], uint distx, uint disty)
{
return interpolate_4_pixels(t[0], t[1], b[0], b[1], distx, disty);
}
#endif

}

#endif // QDRAWHELPER_P_H
Loading