diff --git a/build/unix/Makefile.am b/build/unix/Makefile.am index cc23cab..e311ca1 100644 --- a/build/unix/Makefile.am +++ b/build/unix/Makefile.am @@ -1,10 +1,54 @@ ACLOCAL_AMFLAGS = -I m4 -warningflags = -Wall -Wextra -Wshadow -Wno-expansion-to-defined -Wno-missing-field-initializers +warningflags = \ + -Wall \ + -Wextra \ + -Wshadow \ + -Wunused \ + -Wnull-dereference \ + -Wvla \ + -Wstrict-aliasing \ + -Wuninitialized \ + -Wunused-parameter \ + -Wreorder \ + -Wsign-compare \ + -Wunreachable-code \ + -Wconversion \ + -Wno-sign-conversion \ + $(COMPWARNFLAGS) + +if CLG + +# Clang +warningflags += \ + -Wshadow-all \ + -Wshorten-64-to-32 \ + -Wint-conversion \ + -Wconditional-uninitialized \ + -Wconstant-conversion \ + -Wunused-private-field \ + -Wbool-conversion \ + -Wextra-semi \ + -Wnullable-to-nonnull-conversion \ + -Wno-unused-private-field \ + -Wno-unused-command-line-argument +# -Wzero-as-null-pointer-constant + +else + +# GCC +warningflags += \ + -Wredundant-decls \ + -Wno-ignored-attributes \ + -Wno-expansion-to-defined + +endif + +warnflagscpp = includeflags = -I$(srcdir)/../../src -commonflags = $(DEBUGCFLAGS) $(MFLAGS) $(warningflags) $(includeflags) -AM_CXXFLAGS = -std=$(CXXSTD) $(commonflags) -AM_LDFLAGS = $(PLUGINLDFLAGS) +commoncflags = $(DEBUGCFLAGS) $(MFLAGS) $(warningflags) $(includeflags) $(STACKREALIGN) +AM_CXXFLAGS = -std=$(CXXSTD) $(commoncflags) $(warnflagscpp) $(EXTRA_CXXFLAGS) +AM_LDFLAGS = $(PLUGINLDFLAGS) lib_LTLIBRARIES = libfmtconv.la @@ -52,8 +96,6 @@ libfmtconv_la_SOURCES = \ ../../src/conc/ObjPool.h \ ../../src/conc/ObjPool.hpp \ ../../src/ffft/def.h \ - ../../src/ffft/DynArray.h \ - ../../src/ffft/DynArray.hpp \ ../../src/ffft/FFTReal.h \ ../../src/ffft/FFTReal.hpp \ ../../src/ffft/OscSinCos.h \ @@ -129,6 +171,8 @@ libfmtconv_la_SOURCES = \ ../../src/fmtcl/DiscreteFirCustom.h \ ../../src/fmtcl/DiscreteFirInterface.cpp \ ../../src/fmtcl/DiscreteFirInterface.h \ + ../../src/fmtcl/Dither.cpp \ + ../../src/fmtcl/Dither.h \ ../../src/fmtcl/ErrDifBuf.cpp \ ../../src/fmtcl/ErrDifBuf.h \ ../../src/fmtcl/ErrDifBuf.hpp \ @@ -138,6 +182,7 @@ libfmtconv_la_SOURCES = \ ../../src/fmtcl/FilterResize.h \ ../../src/fmtcl/fnc.cpp \ ../../src/fmtcl/fnc.h \ + ../../src/fmtcl/InterlacingType.h \ ../../src/fmtcl/KernelData.cpp \ ../../src/fmtcl/KernelData.h \ ../../src/fmtcl/Mat3.h \ @@ -150,9 +195,14 @@ libfmtconv_la_SOURCES = \ ../../src/fmtcl/MatrixProc.cpp \ ../../src/fmtcl/MatrixProc.h \ ../../src/fmtcl/MatrixProc_macro.h \ + ../../src/fmtcl/MatrixUtil.cpp \ + ../../src/fmtcl/MatrixUtil.h \ ../../src/fmtcl/MatrixWrap.h \ ../../src/fmtcl/MatrixWrap.hpp \ + ../../src/fmtcl/PicFmt.h \ ../../src/fmtcl/PrimariesPreset.h \ + ../../src/fmtcl/PrimUtil.cpp \ + ../../src/fmtcl/PrimUtil.h \ ../../src/fmtcl/Proxy.h \ ../../src/fmtcl/Proxy.hpp \ ../../src/fmtcl/ProxyRwCpp.h \ @@ -163,8 +213,11 @@ libfmtconv_la_SOURCES = \ ../../src/fmtcl/ReadWrapperFlt.hpp \ ../../src/fmtcl/ReadWrapperInt.h \ ../../src/fmtcl/ReadWrapperInt.hpp \ + ../../src/fmtcl/ResamplePlaneData.h \ ../../src/fmtcl/ResampleSpecPlane.cpp \ ../../src/fmtcl/ResampleSpecPlane.h \ + ../../src/fmtcl/ResampleUtil.cpp \ + ../../src/fmtcl/ResampleUtil.h \ ../../src/fmtcl/ResizeData.cpp \ ../../src/fmtcl/ResizeData.h \ ../../src/fmtcl/ResizeData.hpp \ @@ -212,6 +265,8 @@ libfmtconv_la_SOURCES = \ ../../src/fmtcl/TransOpSLog.h \ ../../src/fmtcl/TransOpSLog3.cpp \ ../../src/fmtcl/TransOpSLog3.h \ + ../../src/fmtcl/TransUtil.cpp \ + ../../src/fmtcl/TransUtil.h \ ../../src/fmtcl/Vec3.h \ ../../src/fmtcl/Vec3.hpp \ ../../src/fmtcl/VoidAndCluster.cpp \ @@ -223,7 +278,7 @@ libfmtconv_la_SOURCES = \ ../../src/fstb/CpuId.cpp \ ../../src/fstb/CpuId.h \ ../../src/fstb/def.h \ - ../../src/fstb/fnc.cpp \ + ../../src/fstb/fnc_fstb.cpp \ ../../src/fstb/fnc.h \ ../../src/fstb/fnc.hpp \ ../../src/fstb/SingleObj.h \ diff --git a/build/unix/configure.ac b/build/unix/configure.ac index 0a52132..65676fb 100644 --- a/build/unix/configure.ac +++ b/build/unix/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([fmtconv], [r22], [http://forum.doom9.org/showthread.php?t=166504], [fmtconv], [http://forum.doom9.org/showthread.php?t=166504]) +AC_INIT([fmtconv], [r23], [http://forum.doom9.org/showthread.php?t=166504], [fmtconv], [http://forum.doom9.org/showthread.php?t=166504]) AC_CONFIG_MACRO_DIR([m4]) AM_INIT_AUTOMAKE([foreign no-dist-gzip dist-xz subdir-objects no-define]) @@ -6,32 +6,26 @@ AM_SILENT_RULES([yes]) LT_INIT([win32-dll disable-static]) +: ${CXXFLAGS=""} +: ${CFLAGS=""} + AC_PROG_CXX +AC_PROG_CC AC_CANONICAL_HOST AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Compilation options required for debugging. [default=no]])) +AC_ARG_ENABLE([clang], AS_HELP_STRING([--enable-clang], [Use Clang as compiler along with libc++. [default=no]])) -AC_LANG_PUSH([C++]) -AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++17], [CXXSTD="c++17"])) -AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++11], [CXXSTD="c++11"])) -AS_IF([test "x$CXXSTD" = "x"], AC_MSG_ERROR([Minimum requirement: C++11])) -AX_CHECK_COMPILE_FLAG([-Wunused-private-field] , [CXXFLAGS="$CXXFLAGS -Wno-unused-private-field"] , , [-Werror]) -AX_CHECK_COMPILE_FLAG([-Wunused-command-line-argument], [CXXFLAGS="$CXXFLAGS -Wno-unused-command-line-argument"], , [-Werror]) -AC_LANG_POP([C++]) -# It seems that -latomic is needed only for some versions of GCC < 5.3 -AX_CHECK_LINK_FLAG([-latomic], [LIBS="$LIBS -latomic"]) - -AS_IF( - [test "x$enable_debug" = "xyes"], - [DEBUGCFLAGS="-O0 -g3 -ggdb"], - [DEBUGCFLAGS="-O3 -g3 -DNDEBUG"] -) X86="false" PPC="false" ARM="false" +WIN="false" +UNX="false" +MAC="false" +CLG="false" AS_CASE( [$host_cpu], @@ -44,21 +38,62 @@ AS_CASE( AS_CASE( [$host_os], - [cygwin*|mingw*], - [AS_IF( - [test "x$BITS" = "x32"], - [ - PLUGINLDFLAGS="-Wl,--kill-at" - STACKREALIGN="-mstackrealign" - ] - )] + [cygwin*|mingw*], [WIN="true"], + [darwin*], [MAC="true"], + [*linux*|gnu*|dragonfly*|*bsd*], [UNX="true"] ) AS_IF( - [test "x$X86" = "xtrue"], + [test "x$enable_debug" = "xyes"], + [ + DEBUGCFLAGS="-O0 -g3 -ggdb" + AC_MSG_NOTICE([Debug mode enabled.]) + ], + [DEBUGCFLAGS="-O3 -g3 -DNDEBUG"] +) + +AS_IF( + [test "x$enable_clang" = "xyes"], + [ + CLG="true" + CXX="clang++" + CC="clang" + LD="clang++" + MFLAGS="$MFLAGS -fexperimental-new-pass-manager -mllvm -inline-threshold=1000" + COMPWARNFLAGS="" + AC_MSG_NOTICE([Using clang as compiler.]) + ], + [COMPWARNFLAGS="-Wduplicated-cond -Wduplicated-branches -Wlogical-op"] +) + +AC_LANG_PUSH([C++]) +#AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++20], [CXXSTD="c++20"])) +#AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++17], [CXXSTD="c++17"])) +AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++14], [CXXSTD="c++14"])) +AS_IF([test "x$CXXSTD" = "x"], AC_MSG_ERROR([Minimum requirement: C++14])) +AC_LANG_POP([C++]) + +# It seems that -latomic is needed only for some versions of GCC < 5.3 +AX_CHECK_LINK_FLAG([-latomic], [LIBS="$LIBS -latomic"]) + +AS_IF( + [test "x$WIN" = "xtrue"], [ - MFLAGS="-mfpmath=sse -msse2 -Wno-ignored-attributes" + AS_IF( + [test "x$BITS" = "x32"], + [ + PLUGINLDFLAGS="-Wl,--kill-at" + STACKREALIGN="-mstackrealign" + ] + ) + ] +) +AS_IF( + [test "x$X86" = "xtrue"], + [ + MFLAGS="$MFLAGS -mfpmath=sse -msse2" + COMPWARNFLAGS="$COMPWARNFLAGS -Wno-ignored-attributes" # We need this to use CMPXCHG16B for 2x64-bit CAS (compare and swap) AS_IF([test "x$BITS" = "x64"], [MFLAGS="$MFLAGS -mcx16"]) ] @@ -72,19 +107,27 @@ AS_IF( AX_CHECK_COMPILE_FLAG([-mfpu=neon], [MFLAGS="$MFLAGS -mfpu=neon"]) # GCC 7 emits some warnings about ABI changes when using std::vector - AX_CHECK_COMPILE_FLAG([-Wpsabi -Werror], [MFLAGS="$MFLAGS -Wno-psabi"]) + AX_CHECK_COMPILE_FLAG([-Wpsabi], [COMPWARNFLAGS="$COMPWARNFLAGS -Wno-psabi"], , [-Werror]) ] ) +AX_CHECK_COMPILE_FLAG([-Wmisleading-indentation] , [COMPWARNFLAGS="$COMPWARNFLAGS -Wmisleading-indentation"] , , [-Werror]) +# Clang only +AX_CHECK_COMPILE_FLAG([-Wno-implicit-int-float-conversion], [COMPWARNFLAGS="$COMPWARNFLAGS -Wno-implicit-int-float-conversion"], , [-Werror]) + +AC_SUBST([CXXSTD]) +AC_SUBST([EXTRA_CXXFLAGS]) +AC_SUBST([LDFLAGS]) AC_SUBST([MFLAGS]) AC_SUBST([DEBUGCFLAGS]) -AC_SUBST([CXXSTD]) AC_SUBST([PLUGINLDFLAGS]) AC_SUBST([STACKREALIGN]) AM_CONDITIONAL([X86], [test "x$X86" = "xtrue"]) AM_CONDITIONAL([ARM], [test "x$ARM" = "xtrue"]) - +AM_CONDITIONAL([UNX], [test "x$UNX" = "xtrue"]) +AM_CONDITIONAL([WIN], [test "x$WIN" = "xtrue"]) +AM_CONDITIONAL([CLG], [test "x$CLG" = "xtrue"]) AC_CONFIG_FILES([Makefile]) AC_OUTPUT diff --git a/build/win/fmtconv.vcxproj b/build/win/fmtconv.vcxproj index 6190cf5..c8f1bed 100644 --- a/build/win/fmtconv.vcxproj +++ b/build/win/fmtconv.vcxproj @@ -32,10 +32,11 @@ Unicode - true + true + false - false + false true @@ -45,8 +46,13 @@ + + true + + + false + - <_ProjectFileVersion>10.0.30319.1 $(ProjectDir)$(Configuration)$(Platform)\ $(ProjectDir)$(Configuration)$(Platform)\ AllRules.ruleset @@ -57,11 +63,13 @@ Disabled _DEBUG;%(PreprocessorDefinitions) - true EnableFastChecks MultiThreadedDebug true + + true + @@ -74,6 +82,7 @@ true + true true true @@ -102,8 +111,11 @@ Level4 - 4127;4505 + 4127 ProgramDatabase + true + true + /Zc:__cplusplus %(AdditionalOptions) true @@ -136,11 +148,13 @@ + + @@ -150,9 +164,12 @@ + + + @@ -165,7 +182,9 @@ + + @@ -197,6 +216,7 @@ + @@ -269,8 +289,6 @@ - - @@ -311,6 +329,7 @@ + @@ -327,7 +346,10 @@ AdvancedVectorExtensions2 + + + @@ -353,6 +375,7 @@ + @@ -378,14 +401,15 @@ - - $(IntDir)%(Filename)1.obj - $(IntDir)%(Filename)1.xdc - + + + + + diff --git a/build/win/fmtconv.vcxproj.filters b/build/win/fmtconv.vcxproj.filters index 93e2181..1bf331d 100644 --- a/build/win/fmtconv.vcxproj.filters +++ b/build/win/fmtconv.vcxproj.filters @@ -177,12 +177,6 @@ ffft - - ffft - - - ffft - ffft @@ -532,6 +526,30 @@ fmtcl + + fmtcl + + + fmtcl + + + fmtcl + + + fmtcl + + + fmtcl + + + fmtcl + + + fmtcl + + + fmtcl + @@ -561,7 +579,7 @@ vsutl - + fstb @@ -756,5 +774,24 @@ fmtcl + + fmtcl + + + fmtcl + + + fmtcl + + + fmtcl + + + fmtcl + + + + + \ No newline at end of file diff --git a/build/win/toolset.props b/build/win/toolset.props index 4227e63..8e24a0c 100644 --- a/build/win/toolset.props +++ b/build/win/toolset.props @@ -4,9 +4,9 @@ <_ProjectFileVersion>12.0.30501.0 - v141_xp + v142 - v141 + v142 \ No newline at end of file diff --git a/doc/fmtconv.html b/doc/fmtconv.html index 8cc575a..2a0b9fe 100644 --- a/doc/fmtconv.html +++ b/doc/fmtconv.html @@ -15,7 +15,7 @@

Abstract

- + @@ -205,7 +205,7 @@

Compiling from the source code

Visual C++

-

Visual Studio 2013 or later is required, previous versions are +

Visual Studio 2019 or later is required, previous versions are not supported anymore. Just load build/win/fmtconv.sln and run the compiler.

@@ -215,7 +215,6 @@

Visual C++

  • Add . (the src directory) as include path.
  • -
  • Use the v120_xp toolset for the 32-bit version.
  • For the whole project, enable the SS2 instruction set.
  • Enable the AVX2 instruction set for the *.cpp files containing avx2 in their name, and the AVX set for the avx files.
  • Enable optimizations maximizing speed and “any suitable” functions for inlining.
  • @@ -223,13 +222,18 @@

    Visual C++

    GNU/Linux and other Unix-like systems

    -

    On Linux and similar GNU-based systems, the build directory -contains autotools settings:

    +

    On Linux and similar GNU-based systems (including MSYS2 and Cygwin), the +build directory contains autotools settings:

    cd build/unix
     ./autogen.sh
     ./configure
     make
     make install
    +

    You can add some options to the configure command:

    +
      +
    • --enable-debug to activate debugging code
    • +
    • --enable-clang to use Clang instead of the default compiler, usually GCC
    • +

    GCC

    @@ -247,7 +251,7 @@

    GCC

    Add AvstpFinder.cpp on Windows. Use the following options (on a single line):

    -
    -std=c++11 -shared -fabi-version=6 -msse2 -mcx16 -O3 -DNDEBUG -I.
    +
    -std=c++14 -shared -fabi-version=6 -msse2 -mcx16 -O3 -DNDEBUG -I.
     -Wall -Wextra -Wno-unused-parameter -Wno-unused-result -Wno-missing-field-initializers -Wshadow
     -Wno-unused-private-field
    @@ -276,7 +280,7 @@

    GCC

    Link with -latomic -lpthread.

    With MinGW, it seems you will need a specific MinGW-64 build supporting -C++11 threading (not tested here yet, please report if you find something +C++14 threading (not tested here yet, please report if you find something useful).

    III) Filters description

    @@ -298,6 +302,9 @@

    bitdepth

    staticnoise: int : opt; (False) cpuopt : int : opt; (-1) patsize : int : opt; (32) + tpdfo : int : opt; (0) + tpdfn : int : opt; (0) + corplane : int : opt; (0) )
    @@ -309,14 +316,14 @@

    bitdepth

  • Doing a full-range ↔ TV-range conversion between integer formats, because the resulting values haven’t an exact representation.
-

Pure ordered dithering seems to be retained better than noise or error -diffusion by video compression in 8 bits. -Therefore this is the recommended method to avoid color banding, unless you -encode at very high bitrates. -If you don’t care about video compression, error diffusion gives the most -accurate results. -To avoid discontinuities between purely flat areas and dithered areas, you -can add a bit of noise.

+

Video compression seems to retrain better pure ordered (Bayer) dithering. +Therefore this is the recommended method to avoid color banding in 8 bit +signals, unless you encode at high bitrates. +If you don’t care about video compression, error diffusion, void and cluster +and quasirandom sequence methods give the most accurate results. +To avoid discontinuities between purely flat areas and dithered areas (also +called noise modulation), you can add a bit of noise, ideally in triangular +distribution.

The internal noise generator is deterministic and will give the same result each run.

@@ -381,10 +388,11 @@

Parameters

- - + + - + +
Authors:  Firesledge (aka Cretindesalpes)
Version:  r22
Version:  r23
Download:  http://ldesoras.free.fr/prod.html
Category:  Format tools
Requirements: Vapoursynth
2Round, may be a bit faster but possibly less accurate.
3Sierra-2-4A error diffusion, aka “Filter Lite”. Quick and excellent quality, similar to Floyd-Steinberg.
4Stucki error diffusion. Preserves delicate edges better but distorts gradients.
5Atkinson error diffusion. Generates distinct patterns but keeps clean the flat areas.
6Floyd-Steinberg error diffusion. Classic.
5Atkinson error diffusion. Generates distinct patterns but keeps clean the flat areas (noise modulation).
6Classic Floyd-Steinberg error diffusion, modified for serpentine scan (avoids worm artefacts).
7Ostromoukhov error diffusion. Slow, available only for integer input at the moment. Avoids usual F-S artefacts.
8Void and cluster halftone dithering. Better visual aspect than ordered dithering.
8Void and cluster halftone dithering. This is a way to generate blue-noise dither and has a much better visual aspect than ordered dithering.
9Dither using quasirandom sequences. Good intermediated between Void and cluster and error diffusion algorithms.

When using error-diffusion dithering on interlaced content, you should @@ -408,7 +416,7 @@

Parameters

dyn

Indicates if the ordered dither pattern is dynamic (True) or static (False). -If dynamic, the pattern is rotated each frame.

+If dynamic, the pattern is changed or rotated each frame.

staticnoise

If set to 1, the noise generated with ampn is static @@ -425,6 +433,27 @@

Parameters

Width of the pattern used in the Void and cluster algorithm. The only valid values are 4, 8, 16 and 32.

+

tpdfo

+

Set it to 1 to enable the triangular probability distribution function +(TPDF) for halftone-based dithering algorithms. +It has no effect on error diffusion methods. +0 is the standard rectangular distribution (RPDF). +Note that when triangular distribution is enabled, the maximum halftone +amplitude is multiplied by 1.414 at constant ampo.

+ +

tpdfn

+

Same as tpdfo, but for the additive noise part. +TPDF noise looks more natural than RPDF noise, and is a crude approximation of +a gaussian noise, with a bounded amplitude. +Maximum noise amplitude is multiplied by 1.414 at constant ampn, +so the introduced noise power is kept approximately constant.

+ +

corplane

+

Set it to 1 to keep the dither and noise patterns correlated for all the +planes. +When processing a RGB picture, it helps to prevent colored noise on grey +features.

+

convert

@@ -502,7 +531,7 @@

convert

cplaced : data : opt; (cplace) matd : data : opt; - # Transfert curve parameters + # Transfer curve parameters transs : data[] : opt; transd : data[] : opt; cont : float : opt; @@ -1361,6 +1390,8 @@

transfer

flt : int : opt; fulls : int : opt; (True) fulld : int : opt; (True) + logceis : int : opt; (800) + logceid : int : opt; (800) cpuopt : int : opt; (-1) blacklvl : float : opt; (0) ) @@ -1427,8 +1458,8 @@

Parameters

"slog" −0.006…10Sony S-Log
Linear 1.0 is the reference white, peak white is at 10.0. "slog2" −0.0085…14.13Sony S-Log 2
Linear 1.0 is the reference white, peak white is at 14.13. "slog3" 0…38.421Sony S-Log3. -"logc2" UnspecifiedArri Log C Alexa 2.x (800 EI), linear scene exposure
Peak white is 57.45 linear. The negative part of the range allows coding sensor noise. -"logc3" UnspecifiedArri Log C Alexa 3.x (800 EI), linear scene exposure
Peak white is 55.08 linear. The negative part of the range allows coding sensor noise. +"logc2" UnspecifiedArri Log C Alexa 2.x, linear scene exposure
Peak white is 57.45 linear. The negative part of the range allows coding sensor noise. logceis and logceid set the Exposure Index (EI). +"logc3" UnspecifiedArri Log C Alexa 3.x, linear scene exposure
Peak white is 55.08 linear. The negative part of the range allows coding sensor noise. logceis and logceid set the Exposure Index (EI). "canonlog" 0…8.00903Canon-Log
Peak white is 8.00903 in linear scale and 1.08676 in compressed scale. "adobergb" 0…1Adobe RGB (1998 and Wide Gamut) "romm" 0…1ProPhoto, ROMM @@ -1468,6 +1499,11 @@

Parameters

in TV-range (16 to 240 for the Y’Cb’Cr’ chroma planes). This value has no meaning for float data.

+

logceis, logceid

+

Exposure index (EI) for the Arri Log C Alexa 2.x and 3.x curves. +Allowed values are: +160, 200, 250, 320, 400, 500, 640, 800 (default), 1000, 1280 and 1600.

+

cpuopt

Limits the CPU instruction set. −1: automatic (no limitation), @@ -1524,7 +1560,23 @@

IV) Troubleshooting

V) Changelog

-

r22, 2019.12.11

+

r24, 202?-??-??

+
    +
  • bitdepth: added dithering mode 9: quasirandom sequences.
  • +
  • bitdepth: added a triangular probability distribution function (TPDF) for the dithering patterns and noises, along with the associated parameters tpdfo and tpdfn.
  • +
  • bitdepth: added corplane parameter to prevent colored noise in RGB processing.
  • +
+ +

r23, 2021-07-14

+
    +
  • transfer: added an Exposure Index (EI) parameter for the Arri Log C Alexa 2.x and 3.x curves.
  • +
  • bitdepth: properly sets the _ColorRange attribute.
  • +
  • Doesn’t output a debug message when AVSTP is not found.
  • +
  • Fixed a concurrency issue by using a more recent toolkit when compiling with MSVC.
  • +
  • Windows XP is not supported any more.
  • +
+ +

r22, 2019-12-11

  • bitdepth: upconversions for full range data now scale to the maximum value instead of shifting bits. Thanks to Z4ST1N for the report.
  • matrix: added support for the YDzDx, ICtCp-PQ and ICtCp-HLG colorspaces.
  • @@ -1543,25 +1595,25 @@

    V) Changelog

  • Fixed compilation for Linux on ARM or aarch64. Binaries not tested yet.
-

r21, 2019.12.08

+

r21, 2019-12-08

  • transfer: fixed highlight clipping for several high dynamic range transfer curves, thanks to groucho86 for the report.
-

r20, 2016.03.25

+

r20, 2016-03-25

  • primaries: fixed a bug preventing to set all primaries individually without specifying any preset.
  • primaries: fixed a bug in the color conversion, thanks to J1Man for having spotted it.
-

r19, 2016.03.19

+

r19, 2016-03-19

  • primaries: refined the values for the Adobe Wide gamut and BT.2020 primaries.
  • primaries: added DCI-P3, ACES AP0/AP1, S-Gamut, S-Gamut3.Cine, ALEXA and V-Gamut presets.
  • transfer: added ACEScc, ERIMM, S-Log2, S-Log3 and V-Log curves.
-

r18, 2016.03.08

+

r18, 2016-03-08

  • Added the primaries function to convert between gamuts.
  • The “full” range is now closer to what is specified in the standards.
  • @@ -1569,7 +1621,7 @@

    V) Changelog

  • transfer: added the Adobe RGB and ProPhoto / ROMM curves.
-

r17, 2015.07.08

+

r17, 2015-07-08

  • bitdepth: added “Void and cluster” dithering method and its patsize parameter.
  • bitdepth: added floating point implementation for the Ostromoukhov dithering
  • @@ -1577,7 +1629,7 @@

    V) Changelog

  • bitdepth: fixed incorrect conversion from float to 8-bit integer using the “fast” modes with SSE2 instruction set.
-

r16, 2015.07.01

+

r16, 2015-07-01

  • bitdepth: added support for 11-bit and 14-bit integer input.
  • bitdepth: fixed a slight plane inconsistency when dithering grey multi-plane pictures using an error diffusion algorithm.
  • @@ -1586,18 +1638,18 @@

    V) Changelog

  • transfer: added the blacklvl parameter.
-

r15, 2015.05.22

+

r15, 2015-05-22

  • resample and bitdepth: fixed a bug creating dark lines or weird patterns. Was introduced in r13 while trying to fix the buffer overflow problem. Thanks to feisty2 for spotting it.
  • resample: fixed the non-SIMD code path, causing crashes.
-

r14, 2015.05.20

+

r14, 2015-05-20

  • matrix: fixed a bug introducing wrong offsets in custom matrix coefficients, thanks to mawen1250 for the report.
-

r13, 2015.05.18

+

r13, 2015-05-18

  • matrix: optimized the SSE2 and AVX2 paths for integer data.
  • Added cpuopt to some functions, to manually limit the instruction set optimizations.
  • @@ -1606,23 +1658,23 @@

    V) Changelog

  • Removed the int16tofloat and floattoint16 temporary functions.
-

r12, 2015.05.08

+

r12, 2015-05-08

  • resample: fixed a crash in the AVX2 code path, thanks to HolyWu for spotting it.
-

r11, 2015.05.07

+

r11, 2015-05-07

  • transfer: fixed a bug in the SSE2 code path.
-

r10, 2015.05.06

+

r10, 2015-05-06

  • fmtconv is compatible with the older Vapoursynth versions again until API 3.2 is out.
  • Source code: fixed compilation problems.
-

r9, 2015.05.06

+

r9, 2015-05-06

  • Added the transfer function.
  • resample: Most kernel-related parameters are now arrays, allowing to specify different values for each plane.
  • @@ -1632,25 +1684,25 @@

    V) Changelog

  • bitdepth: SSE2 optimizations for the “fast” algorithm.
-

r8, 2013.11.30

+

r8, 2013-11-30

  • resample: Fixed bugs introduced in r7.
  • Fixed a range conversion issue in “plane copy” modes with source and destination formats are the same.
-

r7, 2013.11.27

+

r7, 2013-11-27

  • 64-bit windows version.
  • resample: A few optimizations for special cases.
  • resample: fixed the coefficients used in integer resizing, whose sum was sometimes off by a few units.
-

r6, 2013.08.24

+

r6, 2013-08-24

  • matrix: single-plane output now works correctly.
-

r5, 2013.08.18

+

r5, 2013-08-18

  • Added 12-bit support for all the functions.
  • Added matrix2020cl to convert between linear RGB and Y’Cb’Cr’ colorspaces using the BT.2020 constant luminance matrix.
  • @@ -1660,7 +1712,7 @@

    V) Changelog

  • resample: added SSE2 integer calculations for slight speed improvement. Activated by default, use flt=1 to compute everything in float (previous operating mode).
-

r4, 2012.12.09

+

r4, 2012-12-09

  • Added a documentation.
  • Filters now write some frame properties when known.
  • @@ -1677,7 +1729,7 @@

    V) Changelog

  • Added nativetostack16.
-

r3, 2012.11.23

+

r3, 2012-11-23

  • bitdepth: changed the bitdepth parameter to bits.
  • bitdepth: added SSE2 optimizations for upconversions.
  • @@ -1686,7 +1738,7 @@

    V) Changelog

  • resample: fixed the planes parameter previously interpreted as 0 (black or green screen).
-

r2, 2012.11.18

+

r2, 2012-11-18

  • bitdepth: implemented fast dither mode (but not in SSE2 yet).
  • bitdepth: optimized float-to-integer path.
  • @@ -1696,7 +1748,7 @@

    V) Changelog

  • resample: fixed white/magenta screen with 8-bit input and float output.
-

r1, 2012.11.16

+

r1, 2012-11-16

  • Initial release.
diff --git a/src/AvstpWrapper.cpp b/src/AvstpWrapper.cpp index b4f7d09..c7ced4c 100644 --- a/src/AvstpWrapper.cpp +++ b/src/AvstpWrapper.cpp @@ -22,6 +22,11 @@ To Public License, Version 2, as published by Sam Hocevar. See +// Define this macro to output error messages +#undef AvstpWrapper_DEBUG_VERBOSE + + + /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #if defined (_MSC_VER) @@ -159,10 +164,12 @@ AvstpWrapper::AvstpWrapper () #if defined (_MSC_VER) && defined (USE_AVSTP) if (_dll_hnd == 0) { +#if defined (AvstpWrapper_DEBUG_VERBOSE) ::OutputDebugStringW ( L"AvstpWrapper: cannot find avstp.dll." L"Usage restricted to single threading.\n" ); +#endif // throw std::runtime_error ("Cannot find avstp.dll."); #endif assign_fallback (); diff --git a/src/VapourSynth.h b/src/VapourSynth.h index fec53ad..b4e7ff0 100644 --- a/src/VapourSynth.h +++ b/src/VapourSynth.h @@ -461,10 +461,6 @@ typedef const VSFrameRef *(VS_CC *VSFilterGetFrame)(int n, int activationReason, -typedef int (VS_CC *VSGetOutputIndex)(VSFrameContext *frameCtx); - - - /* ============================================================================== Name: *VSFilterFree diff --git a/src/conc/AioAdd.h b/src/conc/AioAdd.h index 45fa578..ad59ef9 100644 --- a/src/conc/AioAdd.h +++ b/src/conc/AioAdd.h @@ -46,7 +46,7 @@ class AioAdd AioAdd (T operand); virtual ~AioAdd () = default; - inline T operator () (T old_val) const; + inline T operator () (T old_val) const noexcept; @@ -70,7 +70,9 @@ class AioAdd AioAdd () = delete; AioAdd (const AioAdd &other) = delete; + AioAdd (const AioAdd &&other) = delete; AioAdd & operator = (const AioAdd &other) = delete; + AioAdd & operator = (const AioAdd &&other) = delete; bool operator == (const AioAdd &other) const = delete; bool operator != (const AioAdd &other) const = delete; diff --git a/src/conc/AioAdd.hpp b/src/conc/AioAdd.hpp index 37be677..289b15e 100644 --- a/src/conc/AioAdd.hpp +++ b/src/conc/AioAdd.hpp @@ -43,9 +43,9 @@ AioAdd ::AioAdd (T operand) template -T AioAdd ::operator () (T old_val) const +T AioAdd ::operator () (T old_val) const noexcept { - return (old_val + _operand); + return old_val + _operand; } diff --git a/src/conc/AioMax.h b/src/conc/AioMax.h index 850a55e..054b6ff 100644 --- a/src/conc/AioMax.h +++ b/src/conc/AioMax.h @@ -44,9 +44,8 @@ class AioMax explicit inline AioMax (T operand); - virtual ~AioMax () = default; - inline T operator () (T old_val) const; + inline T operator () (T old_val) const noexcept; @@ -70,7 +69,9 @@ class AioMax AioMax () = delete; AioMax (const AioMax &other) = delete; + AioMax (const AioMax &&other) = delete; AioMax & operator = (const AioMax &other) = delete; + AioMax & operator = (const AioMax &&other) = delete; bool operator == (const AioMax &other) const = delete; bool operator != (const AioMax &other) const = delete; diff --git a/src/conc/AioMax.hpp b/src/conc/AioMax.hpp index 1ac6a29..1ea8ddf 100644 --- a/src/conc/AioMax.hpp +++ b/src/conc/AioMax.hpp @@ -45,9 +45,9 @@ AioMax ::AioMax (T operand) template -T AioMax ::operator () (T old_val) const +T AioMax ::operator () (T old_val) const noexcept { - return (std::max (old_val, _operand)); + return std::max (old_val, _operand); } diff --git a/src/conc/AioSub.h b/src/conc/AioSub.h index cce1528..64e5ed3 100644 --- a/src/conc/AioSub.h +++ b/src/conc/AioSub.h @@ -47,9 +47,8 @@ class AioSub explicit inline AioSub (T operand); - virtual ~AioSub () = default; - inline T operator () (T old_val) const; + inline T operator () (T old_val) const noexcept; @@ -73,7 +72,9 @@ class AioSub AioSub () = delete; AioSub (const AioSub &other) = delete; + AioSub (const AioSub &&other) = delete; AioSub & operator = (const AioSub &other) = delete; + AioSub & operator = (const AioSub &&other) = delete; bool operator == (const AioSub &other) const = delete; bool operator != (const AioSub &other) const = delete; diff --git a/src/conc/AioSub.hpp b/src/conc/AioSub.hpp index 2208095..ebf99d8 100644 --- a/src/conc/AioSub.hpp +++ b/src/conc/AioSub.hpp @@ -43,9 +43,9 @@ AioSub ::AioSub (T operand) template -T AioSub ::operator () (T old_val) const +T AioSub ::operator () (T old_val) const noexcept { - return (old_val - _operand); + return old_val - _operand; } diff --git a/src/conc/AtomicInt.h b/src/conc/AtomicInt.h index 2117805..db88c17 100644 --- a/src/conc/AtomicInt.h +++ b/src/conc/AtomicInt.h @@ -41,6 +41,8 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "conc/def.h" +#include + #include #if (conc_ARCHI == conc_ARCHI_X86) @@ -59,6 +61,14 @@ namespace conc template class AtomicInt { + static_assert ( + ( std::is_trivially_copyable ::value + && std::is_copy_constructible ::value + && std::is_move_constructible ::value + && std::is_copy_assignable ::value + && std::is_move_assignable ::value), + "Requirements on T" + ); /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ @@ -66,30 +76,30 @@ class AtomicInt typedef T DataType; - inline AtomicInt (); + inline AtomicInt () noexcept; inline explicit - AtomicInt (T val); - inline AtomicInt (const AtomicInt &other); + AtomicInt (T val) noexcept; + inline AtomicInt (const AtomicInt &other) noexcept; inline AtomicInt & - operator = (T other); + operator = (T other) noexcept; - inline operator T () const; + inline operator T () const noexcept; - inline T swap (T other); - inline T cas (T other, T comp); + inline T swap (T other) noexcept; + inline T cas (T other, T comp) noexcept; // Beware while using the result of these operators, modification and // read is not atomic. Use directly the AtomicIntOp instead. inline AtomicInt & - operator += (const T &other); + operator += (const T &other) noexcept; inline AtomicInt & - operator -= (const T &other); + operator -= (const T &other) noexcept; inline AtomicInt & - operator ++ (); - inline T operator ++ (int); + operator ++ () noexcept; + inline T operator ++ (int) noexcept; inline AtomicInt & - operator -- (); - inline T operator -- (int); + operator -- () noexcept; + inline T operator -- (int) noexcept; @@ -105,13 +115,14 @@ class AtomicInt #if (conc_ARCHI == conc_ARCHI_X86) - enum { SZ = sizeof (T) }; - enum { SL2 = (SZ > 16) ? -1 - : ((SZ > 8) ? 4 - : ((SZ > 4) ? 3 - : ((SZ > 2) ? 2 - : ((SZ > 1) ? 1 - : 0)))) }; + static constexpr int SZ = int (sizeof (T)); + static constexpr int SL2 = + (SZ > 16) ? -1 + : ((SZ > 8) ? 4 + : ((SZ > 4) ? 3 + : ((SZ > 2) ? 2 + : ((SZ > 1) ? 1 + : 0)))); typedef AtomicMem StoredTypeWrapper; typedef typename StoredTypeWrapper::DataType StoredType; diff --git a/src/conc/AtomicInt.hpp b/src/conc/AtomicInt.hpp index 490a4a7..a5f7e43 100644 --- a/src/conc/AtomicInt.hpp +++ b/src/conc/AtomicInt.hpp @@ -52,7 +52,7 @@ namespace conc template -AtomicInt ::AtomicInt () +AtomicInt ::AtomicInt () noexcept : _val () { #if (conc_ARCHI == conc_ARCHI_X86) @@ -63,7 +63,7 @@ AtomicInt ::AtomicInt () template -AtomicInt ::AtomicInt (T val) +AtomicInt ::AtomicInt (T val) noexcept : _val (val) { #if (conc_ARCHI == conc_ARCHI_X86) @@ -74,19 +74,18 @@ AtomicInt ::AtomicInt (T val) template -AtomicInt ::AtomicInt (const AtomicInt &other) +AtomicInt ::AtomicInt (const AtomicInt &other) noexcept : _val (T (other)) { #if (conc_ARCHI == conc_ARCHI_X86) assert (is_ptr_aligned_nz ((const void *) (&_val), sizeof (_val))); #endif // conc_ARCHI - assert (&other != 0); } template -AtomicInt & AtomicInt ::operator = (T other) +AtomicInt & AtomicInt ::operator = (T other) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) StoredTypeWrapper::swap (_val, other); @@ -100,7 +99,7 @@ AtomicInt & AtomicInt ::operator = (T other) template -AtomicInt ::operator T () const +AtomicInt ::operator T () const noexcept { #if (conc_ARCHI == conc_ARCHI_X86) return (T (_val)); @@ -112,7 +111,7 @@ AtomicInt ::operator T () const template -T AtomicInt ::swap (T other) +T AtomicInt ::swap (T other) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) return (T (StoredTypeWrapper::swap (_val, other))); @@ -124,12 +123,14 @@ T AtomicInt ::swap (T other) template -T AtomicInt ::cas (T other, T comp) +T AtomicInt ::cas (T other, T comp) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) return (T (StoredTypeWrapper::cas (_val, other, comp))); #else // conc_ARCHI - _val.compare_exchange_weak (comp, other); + // Some algorithms do something specific upon failure, so we need to + // use the strong version. + _val.compare_exchange_strong (comp, other); return (comp); #endif // conc_ARCHI } @@ -137,7 +138,7 @@ T AtomicInt ::cas (T other, T comp) template -AtomicInt & AtomicInt ::operator += (const T &other) +AtomicInt & AtomicInt ::operator += (const T &other) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) AioAdd ftor (other); @@ -152,7 +153,7 @@ AtomicInt & AtomicInt ::operator += (const T &other) template -AtomicInt & AtomicInt ::operator -= (const T &other) +AtomicInt & AtomicInt ::operator -= (const T &other) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) AioSub ftor (other); @@ -167,7 +168,7 @@ AtomicInt & AtomicInt ::operator -= (const T &other) template -AtomicInt & AtomicInt ::operator ++ () +AtomicInt & AtomicInt ::operator ++ () noexcept { #if (conc_ARCHI == conc_ARCHI_X86) return ((*this) += 1); @@ -180,7 +181,7 @@ AtomicInt & AtomicInt ::operator ++ () template -T AtomicInt ::operator ++ (int) +T AtomicInt ::operator ++ (int) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) const T prev = _val; @@ -194,7 +195,7 @@ T AtomicInt ::operator ++ (int) template -AtomicInt & AtomicInt ::operator -- () +AtomicInt & AtomicInt ::operator -- () noexcept { #if (conc_ARCHI == conc_ARCHI_X86) return ((*this) -= 1); @@ -207,7 +208,7 @@ AtomicInt & AtomicInt ::operator -- () template -T AtomicInt ::operator -- (int) +T AtomicInt ::operator -- (int) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) const T prev = _val; diff --git a/src/conc/AtomicIntOp.h b/src/conc/AtomicIntOp.h index b17811b..93f23ed 100644 --- a/src/conc/AtomicIntOp.h +++ b/src/conc/AtomicIntOp.h @@ -61,16 +61,16 @@ class AtomicIntOp template static inline void - exec (AtomicInt &atom, F &ftor); + exec (AtomicInt &atom, F &ftor) noexcept; template static inline T - exec_old (AtomicInt &atom, F &ftor); + exec_old (AtomicInt &atom, F &ftor) noexcept; template static inline T - exec_new (AtomicInt &atom, F &ftor); + exec_new (AtomicInt &atom, F &ftor) noexcept; template static inline void - exec_both (AtomicInt &atom, F &ftor, T &val_old, T &val_new); + exec_both (AtomicInt &atom, F &ftor, T &val_old, T &val_new) noexcept; diff --git a/src/conc/AtomicIntOp.hpp b/src/conc/AtomicIntOp.hpp index bb3030a..673531a 100644 --- a/src/conc/AtomicIntOp.hpp +++ b/src/conc/AtomicIntOp.hpp @@ -34,7 +34,7 @@ namespace conc template -void AtomicIntOp::exec (AtomicInt &atom, F &ftor) +void AtomicIntOp::exec (AtomicInt &atom, F &ftor) noexcept { T val_new; T val_old; @@ -44,7 +44,7 @@ void AtomicIntOp::exec (AtomicInt &atom, F &ftor) template -T AtomicIntOp::exec_old (AtomicInt &atom, F &ftor) +T AtomicIntOp::exec_old (AtomicInt &atom, F &ftor) noexcept { T val_new; T val_old; @@ -56,7 +56,7 @@ T AtomicIntOp::exec_old (AtomicInt &atom, F &ftor) template -T AtomicIntOp::exec_new (AtomicInt &atom, F &ftor) +T AtomicIntOp::exec_new (AtomicInt &atom, F &ftor) noexcept { T val_new; T val_old; @@ -68,7 +68,7 @@ T AtomicIntOp::exec_new (AtomicInt &atom, F &ftor) template -void AtomicIntOp::exec_both (AtomicInt &atom, F &ftor, T &val_old, T &val_new) +void AtomicIntOp::exec_both (AtomicInt &atom, F &ftor, T &val_old, T &val_new) noexcept { T val_cur; do diff --git a/src/conc/AtomicMem.h b/src/conc/AtomicMem.h index 2975666..ad985ec 100644 --- a/src/conc/AtomicMem.h +++ b/src/conc/AtomicMem.h @@ -53,9 +53,9 @@ class AtomicMem conc_TYPEDEF_ALIGN (4, DataType, DataTypeAlign); conc_FORCEINLINE static DataType - swap (volatile DataType &dest, DataType excg); + swap (volatile DataType &dest, DataType excg) noexcept; conc_FORCEINLINE static DataType - cas (volatile DataType &dest, DataType excg, DataType comp); + cas (volatile DataType &dest, DataType excg, DataType comp) noexcept; }; // class AtomicMem @@ -68,9 +68,9 @@ class AtomicMem <3> conc_TYPEDEF_ALIGN (8, DataType, DataTypeAlign); conc_FORCEINLINE static DataType - swap (volatile DataType &dest, DataType excg); + swap (volatile DataType &dest, DataType excg) noexcept; conc_FORCEINLINE static DataType - cas (volatile DataType &dest, DataType excg, DataType comp); + cas (volatile DataType &dest, DataType excg, DataType comp) noexcept; }; // class AtomicMem <3> @@ -85,9 +85,9 @@ class AtomicMem <4> conc_TYPEDEF_ALIGN (16, DataType, DataTypeAlign); conc_FORCEINLINE static DataType - swap (volatile DataType &dest, DataType excg); + swap (volatile DataType &dest, DataType excg) noexcept; conc_FORCEINLINE static DataType - cas (volatile DataType &dest, DataType excg, DataType comp); + cas (volatile DataType &dest, DataType excg, DataType comp) noexcept; }; // class AtomicMem <4> #endif // conc_HAS_CAS_128 diff --git a/src/conc/AtomicMem.hpp b/src/conc/AtomicMem.hpp index e639a6b..dbdfa24 100644 --- a/src/conc/AtomicMem.hpp +++ b/src/conc/AtomicMem.hpp @@ -36,7 +36,7 @@ namespace conc template -typename AtomicMem ::DataType AtomicMem ::swap (volatile DataType &dest, DataType excg) +typename AtomicMem ::DataType AtomicMem ::swap (volatile DataType &dest, DataType excg) noexcept { static_assert ((SL2 >= 0 && SL2 <= 2), ""); @@ -46,7 +46,7 @@ typename AtomicMem ::DataType AtomicMem ::swap (volatile DataType &des template -typename AtomicMem ::DataType AtomicMem ::cas (volatile DataType &dest, DataType excg, DataType comp) +typename AtomicMem ::DataType AtomicMem ::cas (volatile DataType &dest, DataType excg, DataType comp) noexcept { static_assert ((SL2 >= 0 && SL2 <= 2), ""); @@ -55,14 +55,14 @@ typename AtomicMem ::DataType AtomicMem ::cas (volatile DataType &dest -AtomicMem <3>::DataType AtomicMem <3>::swap (volatile DataType &dest, DataType excg) +AtomicMem <3>::DataType AtomicMem <3>::swap (volatile DataType &dest, DataType excg) noexcept { return (Interlocked::swap (dest, excg)); } -AtomicMem <3>::DataType AtomicMem <3>::cas (volatile DataType &dest, DataType excg, DataType comp) +AtomicMem <3>::DataType AtomicMem <3>::cas (volatile DataType &dest, DataType excg, DataType comp) noexcept { return (Interlocked::cas (dest, excg, comp)); } @@ -73,7 +73,7 @@ AtomicMem <3>::DataType AtomicMem <3>::cas (volatile DataType &dest, DataType ex -AtomicMem <4>::DataType AtomicMem <4>::swap (volatile DataType &dest, DataType excg) +AtomicMem <4>::DataType AtomicMem <4>::swap (volatile DataType &dest, DataType excg) noexcept { Interlocked::Data128 old; @@ -90,7 +90,7 @@ AtomicMem <4>::DataType AtomicMem <4>::swap (volatile DataType &dest, DataType e -AtomicMem <4>::DataType AtomicMem <4>::cas (volatile DataType &dest, DataType excg, DataType comp) +AtomicMem <4>::DataType AtomicMem <4>::cas (volatile DataType &dest, DataType excg, DataType comp) noexcept { Interlocked::Data128 old; diff --git a/src/conc/AtomicPtr.h b/src/conc/AtomicPtr.h index 293431a..85bdca8 100644 --- a/src/conc/AtomicPtr.h +++ b/src/conc/AtomicPtr.h @@ -48,18 +48,18 @@ class AtomicPtr public: - inline AtomicPtr (); - inline AtomicPtr (T *ptr); + inline AtomicPtr () noexcept; + inline AtomicPtr (T *ptr) noexcept; inline AtomicPtr & - operator = (T *other_ptr); + operator = (T *other_ptr) noexcept; - inline operator T * () const; + inline operator T * () const noexcept; - bool operator == (T *other_ptr) const; - bool operator != (T *other_ptr) const; + bool operator == (T *other_ptr) const noexcept; + bool operator != (T *other_ptr) const noexcept; - inline T * swap (T *other_ptr); - inline T * cas (T *other_ptr, T *comp_ptr); + inline T * swap (T *other_ptr) noexcept; + inline T * cas (T *other_ptr, T *comp_ptr) noexcept; @@ -73,7 +73,7 @@ class AtomicPtr private: - inline T * read_ptr () const; + inline T * read_ptr () const noexcept; #if (conc_ARCHI == conc_ARCHI_X86) diff --git a/src/conc/AtomicPtr.hpp b/src/conc/AtomicPtr.hpp index c6f83bc..f20c63e 100644 --- a/src/conc/AtomicPtr.hpp +++ b/src/conc/AtomicPtr.hpp @@ -41,7 +41,7 @@ namespace conc template -AtomicPtr ::AtomicPtr () +AtomicPtr ::AtomicPtr () noexcept : _ptr () { #if (conc_ARCHI == conc_ARCHI_X86) @@ -52,18 +52,23 @@ AtomicPtr ::AtomicPtr () template -AtomicPtr ::AtomicPtr (T *ptr) +AtomicPtr ::AtomicPtr (T *ptr) noexcept +#if (conc_ARCHI == conc_ARCHI_X86) +: _ptr () +#else // conc_ARCHI : _ptr (ptr) +#endif // conc_ARCHI { #if (conc_ARCHI == conc_ARCHI_X86) assert (is_ptr_aligned_nz ((const void *) (&_ptr), sizeof (_ptr))); + _ptr._void_ptr = ptr; #endif // conc_ARCHI } template -AtomicPtr & AtomicPtr ::operator = (T *other_ptr) +AtomicPtr & AtomicPtr ::operator = (T *other_ptr) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) Interlocked::swap (_ptr._void_ptr, other_ptr); @@ -77,7 +82,7 @@ AtomicPtr & AtomicPtr ::operator = (T *other_ptr) template -AtomicPtr ::operator T * () const +AtomicPtr ::operator T * () const noexcept { return (read_ptr ()); } @@ -85,7 +90,7 @@ AtomicPtr ::operator T * () const template -bool AtomicPtr ::operator == (T *other_ptr) const +bool AtomicPtr ::operator == (T *other_ptr) const noexcept { const T * ptr = read_ptr (); @@ -95,7 +100,7 @@ bool AtomicPtr ::operator == (T *other_ptr) const template -bool AtomicPtr ::operator != (T *other_ptr) const +bool AtomicPtr ::operator != (T *other_ptr) const noexcept { return (! ((*this) == other_ptr)); } @@ -103,7 +108,7 @@ bool AtomicPtr ::operator != (T *other_ptr) const template -T * AtomicPtr ::swap (T *other_ptr) +T * AtomicPtr ::swap (T *other_ptr) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) return (static_cast (Interlocked::swap ( @@ -118,7 +123,7 @@ T * AtomicPtr ::swap (T *other_ptr) template -T * AtomicPtr ::cas (T *other_ptr, T *comp_ptr) +T * AtomicPtr ::cas (T *other_ptr, T *comp_ptr) noexcept { #if (conc_ARCHI == conc_ARCHI_X86) return (static_cast (Interlocked::cas ( @@ -127,7 +132,9 @@ T * AtomicPtr ::cas (T *other_ptr, T *comp_ptr) comp_ptr ))); #else // conc_ARCHI - _ptr.compare_exchange_weak (comp_ptr, other_ptr); + // Some algorithms do something specific upon failure, so we need to + // use the strong version. + _ptr.compare_exchange_strong (comp_ptr, other_ptr); return (comp_ptr); #endif // conc_ARCHI } @@ -143,12 +150,12 @@ T * AtomicPtr ::cas (T *other_ptr, T *comp_ptr) template -T * AtomicPtr ::read_ptr () const +T * AtomicPtr ::read_ptr () const noexcept { #if (conc_ARCHI == conc_ARCHI_X86) - return (static_cast (_ptr._t_ptr)); + return _ptr._t_ptr; #else // conc_ARCHI - return (_ptr.load ()); + return _ptr.load (); #endif // conc_ARCHI } diff --git a/src/conc/AtomicPtrIntPair.h b/src/conc/AtomicPtrIntPair.h index b466ade..293575b 100644 --- a/src/conc/AtomicPtrIntPair.h +++ b/src/conc/AtomicPtrIntPair.h @@ -60,13 +60,13 @@ class AtomicPtrIntPair public: - AtomicPtrIntPair (); + AtomicPtrIntPair () noexcept; - void set (T * ptr, ptrdiff_t val); - void get (T * &ptr, ptrdiff_t &val) const; - T * get_ptr () const; - ptrdiff_t get_val () const; - bool cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptrdiff_t comp_val); + void set (T * ptr, intptr_t val) noexcept; + void get (T * &ptr, intptr_t &val) const noexcept; + T * get_ptr () const noexcept; + intptr_t get_val () const noexcept; + bool cas2 (T *new_ptr, intptr_t new_val, T *comp_ptr, intptr_t comp_val) noexcept; @@ -101,9 +101,8 @@ class AtomicPtrIntPair class RealContent { public: - T * volatile _ptr; - volatile intptr_t - _val; + T * _ptr; + intptr_t _val; }; static_assert (sizeof (RealContent) <= sizeof (DataType), ""); @@ -113,7 +112,7 @@ class AtomicPtrIntPair RealContent _content; }; - static void cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp); + static void cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp) noexcept; Combi _data; diff --git a/src/conc/AtomicPtrIntPair.hpp b/src/conc/AtomicPtrIntPair.hpp index 850894d..0ea5cfc 100644 --- a/src/conc/AtomicPtrIntPair.hpp +++ b/src/conc/AtomicPtrIntPair.hpp @@ -36,24 +36,24 @@ namespace conc template -AtomicPtrIntPair ::AtomicPtrIntPair () +AtomicPtrIntPair ::AtomicPtrIntPair () noexcept : _data () { - set (0, 0); + set (nullptr, 0); } template -void AtomicPtrIntPair ::set (T * ptr, ptrdiff_t val) +void AtomicPtrIntPair ::set (T * ptr, intptr_t val) noexcept { + const RealContent content = { ptr, val }; + #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS) - _data._content._ptr = ptr; - _data._content._val = val; + _data._content = content; #else // conc_ARCHI - const RealContent content = { ptr, val }; _data.store (content); #endif // conc_ARCHI @@ -62,7 +62,7 @@ void AtomicPtrIntPair ::set (T * ptr, ptrdiff_t val) template -void AtomicPtrIntPair ::get (T * &ptr, ptrdiff_t &val) const +void AtomicPtrIntPair ::get (T * &ptr, intptr_t &val) const noexcept { #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS) @@ -90,7 +90,7 @@ void AtomicPtrIntPair ::get (T * &ptr, ptrdiff_t &val) const template -T * AtomicPtrIntPair ::get_ptr () const +T * AtomicPtrIntPair ::get_ptr () const noexcept { #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS) @@ -108,7 +108,7 @@ T * AtomicPtrIntPair ::get_ptr () const template -ptrdiff_t AtomicPtrIntPair ::get_val () const +intptr_t AtomicPtrIntPair ::get_val () const noexcept { #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS) @@ -126,7 +126,7 @@ ptrdiff_t AtomicPtrIntPair ::get_val () const template -bool AtomicPtrIntPair ::cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptrdiff_t comp_val) +bool AtomicPtrIntPair ::cas2 (T *new_ptr, intptr_t new_val, T *comp_ptr, intptr_t comp_val) noexcept { #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS) @@ -148,7 +148,9 @@ bool AtomicPtrIntPair ::cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptr const RealContent val = { new_ptr , new_val }; RealContent expected = { comp_ptr, comp_val }; - return (_data.compare_exchange_weak (expected, val)); + // Some algorithms do something specific upon failure, so we need to + // use the strong version. + return (_data.compare_exchange_strong (expected, val)); #endif // conc_ARCHI } @@ -166,7 +168,7 @@ bool AtomicPtrIntPair ::cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptr #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS) template -void AtomicPtrIntPair ::cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp) +void AtomicPtrIntPair ::cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp) noexcept { #if (conc_WORD_SIZE == 64) diff --git a/src/conc/CellPool.h b/src/conc/CellPool.h index ce14cb1..1d38cda 100644 --- a/src/conc/CellPool.h +++ b/src/conc/CellPool.h @@ -65,7 +65,7 @@ class CellPool inline CellType * take_cell (bool autogrow_flag = false); - inline void return_cell (CellType &cell); + inline void return_cell (CellType &cell) noexcept; @@ -126,7 +126,9 @@ class CellPool private: CellPool (const CellPool &other) = delete; + CellPool (CellPool &&other) = delete; CellPool & operator = (const CellPool &other) = delete; + CellPool & operator = (CellPool &&other) = delete; bool operator == (const CellPool &other) const = delete; bool operator != (const CellPool &other) const = delete; diff --git a/src/conc/CellPool.hpp b/src/conc/CellPool.hpp index 5bd6152..821b4fe 100644 --- a/src/conc/CellPool.hpp +++ b/src/conc/CellPool.hpp @@ -53,7 +53,7 @@ CellPool ::CellPool () for (int zone_index = 0; zone_index < MAX_NBR_ZONES; ++zone_index) { - _m_ptr->_zone_list [zone_index] = 0; + _m_ptr->_zone_list [zone_index] = nullptr; } } @@ -73,13 +73,13 @@ template void CellPool ::clear_all () { #if !defined (NDEBUG) - size_t nbr_total_cells = + const size_t nbr_total_cells = compute_total_size_for_zones (_m_ptr->_nbr_zones); assert (_m_ptr->_nbr_avail_cells == nbr_total_cells); #endif - while (_cell_stack.pop () != 0) + while (_cell_stack.pop () != nullptr) { continue; } @@ -89,10 +89,10 @@ void CellPool ::clear_all () { AtomicPtr & zone_ptr_ref = _m_ptr->_zone_list [zone_index]; CellType * zone_ptr = zone_ptr_ref; - if (zone_ptr != 0) + if (zone_ptr != nullptr) { dealloc_cells (zone_ptr); - zone_ptr_ref = 0; + zone_ptr_ref = nullptr; } } _m_ptr->_nbr_zones = 0; @@ -113,8 +113,8 @@ void CellPool ::expand_to (size_t nbr_cells) while (total_size < nbr_cells && zone_index < MAX_NBR_ZONES) { AtomicPtr & zone_ptr_ref = _m_ptr->_zone_list [zone_index]; - CellType * zone_ptr = zone_ptr_ref; - if (zone_ptr == 0) + const CellType * zone_ptr = zone_ptr_ref; + if (zone_ptr == nullptr) { allocate_zone (cur_size, zone_ptr_ref); } @@ -134,7 +134,7 @@ void CellPool ::expand_to (size_t nbr_cells) template typename CellPool ::CellType * CellPool ::take_cell (bool autogrow_flag) { - CellType * cell_ptr = 0; + CellType * cell_ptr = nullptr; const int nbr_zones = _m_ptr->_nbr_zones; @@ -142,27 +142,32 @@ typename CellPool ::CellType * CellPool ::take_cell (bool autogrow_flag) { cell_ptr = _cell_stack.pop (); - if ((cell_ptr == 0) && autogrow_flag && (nbr_zones < MAX_NBR_ZONES)) + if ( cell_ptr == nullptr + && autogrow_flag + && nbr_zones < MAX_NBR_ZONES) { - const size_t new_size = compute_total_size_for_zones (nbr_zones + 1); + const size_t new_size = + compute_total_size_for_zones (nbr_zones + 1); expand_to (new_size); } } - while ((cell_ptr == 0) && autogrow_flag && (nbr_zones < MAX_NBR_ZONES)); + while ( cell_ptr == nullptr + && autogrow_flag + && nbr_zones < MAX_NBR_ZONES); - if (cell_ptr != 0) + if (cell_ptr != nullptr) { -- _m_ptr->_nbr_avail_cells; } - return (cell_ptr); + return cell_ptr; } // Thread-safe template -void CellPool ::return_cell (CellType &cell) +void CellPool ::return_cell (CellType &cell) noexcept { _cell_stack.push (cell); @@ -186,7 +191,7 @@ void CellPool ::allocate_zone (size_t cur_size, AtomicPtr & zone_p CellType * zone_ptr = alloc_cells (cur_size); - if (zone_ptr_ref.cas (zone_ptr, 0) != (CellType *)0) + if (zone_ptr_ref.cas (zone_ptr, nullptr) != static_cast (nullptr)) { // CAS has failed, meaning that another thread is allocating this zone. dealloc_cells (zone_ptr); @@ -235,7 +240,7 @@ size_t CellPool ::compute_total_size_for_zones (int nbr_zones) ++ zone_index; } - return (total_size); + return total_size; } @@ -275,7 +280,7 @@ typename CellPool ::CellType * CellPool ::alloc_cells (size_t n) throw; } - return (cell_ptr); + return cell_ptr; } diff --git a/src/conc/Interlocked.h b/src/conc/Interlocked.h index 1ad2d0a..f7cff4c 100644 --- a/src/conc/Interlocked.h +++ b/src/conc/Interlocked.h @@ -46,14 +46,14 @@ class Interlocked public: static conc_FORCEINLINE int32_t - swap (int32_t volatile &dest, int32_t excg); + swap (int32_t volatile &dest, int32_t excg) noexcept; static conc_FORCEINLINE int32_t - cas (int32_t volatile &dest, int32_t excg, int32_t comp); + cas (int32_t volatile &dest, int32_t excg, int32_t comp) noexcept; static conc_FORCEINLINE int64_t - swap (int64_t volatile &dest, int64_t excg); + swap (int64_t volatile &dest, int64_t excg) noexcept; static conc_FORCEINLINE int64_t - cas (int64_t volatile &dest, int64_t excg, int64_t comp); + cas (int64_t volatile &dest, int64_t excg, int64_t comp) noexcept; #if defined (conc_HAS_CAS_128) @@ -67,9 +67,9 @@ class Interlocked { public: conc_FORCEINLINE bool - operator == (const Data128 & other) const; + operator == (const Data128 & other) const noexcept; conc_FORCEINLINE bool - operator != (const Data128 & other) const; + operator != (const Data128 & other) const noexcept; int64_t _data [2]; }; @@ -82,16 +82,16 @@ class Interlocked #endif static conc_FORCEINLINE void - swap (Data128 &old, volatile Data128 &dest, const Data128 &excg); + swap (Data128 &old, volatile Data128 &dest, const Data128 &excg) noexcept; static conc_FORCEINLINE void - cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp); + cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp) noexcept; #endif static conc_FORCEINLINE void * - swap (void * volatile &dest_ptr, void *excg_ptr); + swap (void * volatile &dest_ptr, void *excg_ptr) noexcept; static conc_FORCEINLINE void * - cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr); + cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr) noexcept; diff --git a/src/conc/Interlocked.hpp b/src/conc/Interlocked.hpp index 957e933..8a14af4 100644 --- a/src/conc/Interlocked.hpp +++ b/src/conc/Interlocked.hpp @@ -47,7 +47,7 @@ namespace conc -int32_t Interlocked::swap (int32_t volatile &dest, int32_t excg) +int32_t Interlocked::swap (int32_t volatile &dest, int32_t excg) noexcept { assert (is_ptr_aligned_nz (&dest)); @@ -80,7 +80,7 @@ int32_t Interlocked::swap (int32_t volatile &dest, int32_t excg) -int32_t Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp) +int32_t Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp) noexcept { assert (is_ptr_aligned_nz (&dest)); @@ -100,6 +100,10 @@ int32_t Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp) ::LONG (comp) )); +#elif defined (__GNUC__) + + return (__sync_val_compare_and_swap (&dest, comp, excg)); + #elif defined (__APPLE__) return (::OSAtomicCompareAndSwap32Barrier ( @@ -108,10 +112,6 @@ int32_t Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp) const_cast (reinterpret_cast (&dest)) ) ? comp : excg); -#elif defined (__GNUC__) - - return (__sync_val_compare_and_swap (&dest, comp, excg)); - #else #error Unknown platform @@ -121,7 +121,7 @@ int32_t Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp) -int64_t Interlocked::swap (int64_t volatile &dest, int64_t excg) +int64_t Interlocked::swap (int64_t volatile &dest, int64_t excg) noexcept { assert (is_ptr_aligned_nz (&dest)); @@ -178,7 +178,7 @@ int64_t Interlocked::swap (int64_t volatile &dest, int64_t excg) -int64_t Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp) +int64_t Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp) noexcept { assert (is_ptr_aligned_nz (&dest)); @@ -214,6 +214,10 @@ int64_t Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp) return (old); +#elif defined (__GNUC__) + + return (__sync_val_compare_and_swap (&dest, comp, excg)); + #elif defined (__APPLE__) return (::OSAtomicCompareAndSwap64Barrier ( @@ -222,10 +226,6 @@ int64_t Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp) const_cast (reinterpret_cast (&dest)) ) ? comp : excg); -#elif defined (__GNUC__) - - return (__sync_val_compare_and_swap (&dest, comp, excg)); - #else #error Unknown platform @@ -239,7 +239,7 @@ int64_t Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp) -void Interlocked::swap (Data128 &old, volatile Data128 &dest, const Data128 &excg) +void Interlocked::swap (Data128 &old, volatile Data128 &dest, const Data128 &excg) noexcept { assert (is_ptr_aligned_nz (&dest)); @@ -254,7 +254,7 @@ void Interlocked::swap (Data128 &old, volatile Data128 &dest, const Data128 &exc -void Interlocked::cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp) +void Interlocked::cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp) noexcept { assert (is_ptr_aligned_nz (&dest)); @@ -322,13 +322,13 @@ void Interlocked::cas (Data128 &old, volatile Data128 &dest, const Data128 &excg #if defined (_MSC_VER) -bool Interlocked::Data128::operator == (const Data128 & other) const +bool Interlocked::Data128::operator == (const Data128 & other) const noexcept { return ( _data [0] == other._data [0] && _data [1] == other._data [1]); } -bool Interlocked::Data128::operator != (const Data128 & other) const +bool Interlocked::Data128::operator != (const Data128 & other) const noexcept { return ( _data [0] != other._data [0] || _data [1] != other._data [1]); @@ -345,25 +345,41 @@ bool Interlocked::Data128::operator != (const Data128 & other) const #pragma warning (4 : 4311 4312) #endif -void * Interlocked::swap (void * volatile &dest_ptr, void *excg_ptr) +void * Interlocked::swap (void * volatile &dest_ptr, void *excg_ptr) noexcept { + // We cannot just cast void * to IntPtr and relying on it to match + // either int32_t or int64_t, because it's possible that we have + // typedef long IntPtr; + // typedef long long int64_t; + // on a 64-bit system (i.e. macOS) thus making them incompatible. return (reinterpret_cast ( swap ( - *reinterpret_cast (&dest_ptr), - reinterpret_cast (excg_ptr) +#if conc_WORD_SIZE == 32 + *reinterpret_cast (&dest_ptr), + reinterpret_cast (excg_ptr) +#else + *reinterpret_cast (&dest_ptr), + reinterpret_cast (excg_ptr) +#endif ) )); } -void * Interlocked::cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr) +void * Interlocked::cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr) noexcept { return (reinterpret_cast ( cas ( - *reinterpret_cast (&dest_ptr), - reinterpret_cast (excg_ptr), - reinterpret_cast (comp_ptr) +#if conc_WORD_SIZE == 32 + *reinterpret_cast (&dest_ptr), + reinterpret_cast (excg_ptr), + reinterpret_cast (comp_ptr) +#else + *reinterpret_cast (&dest_ptr), + reinterpret_cast (excg_ptr), + reinterpret_cast (comp_ptr) +#endif ) )); } diff --git a/src/conc/LockFreeCell.h b/src/conc/LockFreeCell.h index 2d16236..d3ccb2f 100644 --- a/src/conc/LockFreeCell.h +++ b/src/conc/LockFreeCell.h @@ -54,7 +54,7 @@ class LockFreeCell typedef T ValueType; AtomicPtr > - _next_ptr; + _next_ptr { nullptr }; T _val; diff --git a/src/conc/LockFreeQueue.h b/src/conc/LockFreeQueue.h index f80aef0..bee4d49 100644 --- a/src/conc/LockFreeQueue.h +++ b/src/conc/LockFreeQueue.h @@ -75,8 +75,8 @@ class LockFreeQueue LockFreeQueue (); virtual ~LockFreeQueue () = default; - void enqueue (CellType &cell); - CellType * dequeue (); + void enqueue (CellType &cell) noexcept; + CellType * dequeue () noexcept; @@ -115,8 +115,11 @@ class LockFreeQueue private: LockFreeQueue (const LockFreeQueue &other) = delete; + LockFreeQueue (LockFreeQueue &&other) = delete; LockFreeQueue & operator = (const LockFreeQueue &other) = delete; + LockFreeQueue & + operator = (LockFreeQueue &&other) = delete; bool operator == (const LockFreeQueue &other) const = delete; bool operator != (const LockFreeQueue &other) const = delete; diff --git a/src/conc/LockFreeQueue.hpp b/src/conc/LockFreeQueue.hpp index c84b538..ca8b02a 100644 --- a/src/conc/LockFreeQueue.hpp +++ b/src/conc/LockFreeQueue.hpp @@ -39,7 +39,7 @@ template LockFreeQueue ::LockFreeQueue () : _m_ptr () { - _m_ptr->_dummy._next_ptr = 0; + _m_ptr->_dummy._next_ptr = nullptr; _m_ptr->_head.set (&_m_ptr->_dummy, 0); _m_ptr->_tail.set (&_m_ptr->_dummy, 0); } @@ -47,23 +47,24 @@ LockFreeQueue ::LockFreeQueue () template -void LockFreeQueue ::enqueue (CellType &cell) +void LockFreeQueue ::enqueue (CellType &cell) noexcept { - cell._next_ptr = 0; // set the cell next pointer to NULL + cell._next_ptr = nullptr; // set the cell next pointer to NULL - CellType * tail_ptr; - ptrdiff_t icount; + CellType * tail_ptr = nullptr; + intptr_t icount = 0; bool cont_flag = true; do // try until enqueue is done { - icount = _m_ptr->_tail.get_val (); // read the tail modification count - tail_ptr = _m_ptr->_tail.get_ptr (); // read the tail cell + // read the tail modification count + // read the tail cell + _m_ptr->_tail.get (tail_ptr, icount); // try to link the cell to the tail cell - void * old_ptr = tail_ptr->_next_ptr.cas (&cell, 0); + void * old_ptr = tail_ptr->_next_ptr.cas (&cell, nullptr); - if (old_ptr == 0) + if (old_ptr == nullptr) { cont_flag = false; // enqueue is done, exit the loop } @@ -82,32 +83,36 @@ void LockFreeQueue ::enqueue (CellType &cell) // Returns 0 if the queue is empty. template -typename LockFreeQueue ::CellType * LockFreeQueue ::dequeue () +typename LockFreeQueue ::CellType * LockFreeQueue ::dequeue () noexcept { - ptrdiff_t ocount; - ptrdiff_t icount; - CellType * head_ptr; - CellType * next_ptr; + constexpr int max_loop = 100; + int loop_cnt = 0; + intptr_t ocount = 0; + intptr_t icount = 0; + CellType * head_ptr = nullptr; + CellType * next_ptr = nullptr; do // try until dequeue is done { - ocount = _m_ptr->_head.get_val (); // read the head modification count + // read the head modification count + // read the head cell + _m_ptr->_head.get (head_ptr, ocount); icount = _m_ptr->_tail.get_val (); // read the tail modification count - head_ptr = _m_ptr->_head.get_ptr (); // read the head cell next_ptr = head_ptr->_next_ptr; // read the next cell - if (ocount == _m_ptr->_head.get_val ()) // ensures that next is a valid pointer to avoid failure when reading next value + const intptr_t ocount_tst = _m_ptr->_head.get_val (); + if (ocount == ocount_tst) // ensures that next is a valid pointer to avoid failure when reading next value { if (head_ptr == _m_ptr->_tail.get_ptr ()) // is queue empty or tail falling behind ? { - if (next_ptr == 0) // is queue empty ? + if (next_ptr == nullptr) // is queue empty ? { - return (0); // queue is empty: return NULL + return nullptr; // queue is empty: return NULL } // tail is pointing to head in a non empty queue, try to set tail to the next cell _m_ptr->_tail.cas2 (next_ptr, icount + 1, head_ptr, icount); } - else if (next_ptr != 0) // if we are not competing on the dummy next + else if (next_ptr != nullptr) // if we are not competing on the dummy next { // try to set tail to the next cell if (_m_ptr->_head.cas2 (next_ptr, ocount + 1, head_ptr, ocount)) @@ -116,6 +121,16 @@ typename LockFreeQueue ::CellType * LockFreeQueue ::dequeue () } } } + + ++ loop_cnt; + if (loop_cnt >= max_loop) + { + // This could indicate that the queue is: + // - corrupted + // - or in heavy contention + assert (false); + return nullptr; + } } while (true); diff --git a/src/conc/LockFreeStack.h b/src/conc/LockFreeStack.h index c3fb1f1..f00a8ae 100644 --- a/src/conc/LockFreeStack.h +++ b/src/conc/LockFreeStack.h @@ -67,8 +67,8 @@ class LockFreeStack LockFreeStack (); virtual ~LockFreeStack () = default; - void push (CellType &cell); - CellType * pop (); + void push (CellType &cell) noexcept; + CellType * pop () noexcept; @@ -92,8 +92,11 @@ class LockFreeStack private: LockFreeStack (const LockFreeStack &other) = delete; + LockFreeStack (LockFreeStack &&other) = delete; LockFreeStack & operator = (const LockFreeStack &other) = delete; + LockFreeStack & + operator = (LockFreeStack &&other) = delete; bool operator == (const LockFreeStack &other) const = delete; bool operator != (const LockFreeStack &other) const = delete; diff --git a/src/conc/LockFreeStack.hpp b/src/conc/LockFreeStack.hpp index 11e0ed3..45eeec9 100644 --- a/src/conc/LockFreeStack.hpp +++ b/src/conc/LockFreeStack.hpp @@ -37,16 +37,16 @@ template LockFreeStack ::LockFreeStack () : _head_ptr_ptr () { - _head_ptr_ptr->set (0, 0); + _head_ptr_ptr->set (nullptr, 0); } template -void LockFreeStack ::push (CellType &cell) +void LockFreeStack ::push (CellType &cell) noexcept { - CellType * head_ptr; - ptrdiff_t count; + CellType * head_ptr = nullptr; + intptr_t count = 0; do { head_ptr = _head_ptr_ptr->get_ptr (); @@ -60,28 +60,28 @@ void LockFreeStack ::push (CellType &cell) // Returns 0 if the stack is empty. template -typename LockFreeStack ::CellType * LockFreeStack ::pop () +typename LockFreeStack ::CellType * LockFreeStack ::pop () noexcept { - CellType * cell_ptr; + CellType * cell_ptr = nullptr; bool cont_flag = true; do { cell_ptr = _head_ptr_ptr->get_ptr (); - if (cell_ptr == 0) + if (cell_ptr == nullptr) { - cont_flag = false; // Empty stack. + cont_flag = false; // Empty stack. } else { - const ptrdiff_t count = _head_ptr_ptr->get_val (); - if (cell_ptr != 0) + const intptr_t count = _head_ptr_ptr->get_val (); + if (cell_ptr != nullptr) { CellType * next_ptr = cell_ptr->_next_ptr; if (_head_ptr_ptr->cas2 (next_ptr, count + 1, cell_ptr, count)) { - cell_ptr->_next_ptr = 0; + cell_ptr->_next_ptr = nullptr; cont_flag = false; } } diff --git a/src/conc/ObjPool.h b/src/conc/ObjPool.h index 2ce4056..81b14d3 100644 --- a/src/conc/ObjPool.h +++ b/src/conc/ObjPool.h @@ -67,9 +67,9 @@ class ObjPool ObjPool (); virtual ~ObjPool (); - void set_factory (Factory &fact); - Factory & use_factory () const; - void cleanup (); + void set_factory (Factory &fact) noexcept; + Factory & use_factory () const noexcept; + void cleanup () noexcept; T * take_obj (); void return_obj (T &obj); @@ -90,7 +90,7 @@ class ObjPool typedef typename PtrPool::CellType PtrCell; typedef LockFreeStack PtrStack; - int delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag); + int delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag) noexcept; Factory * _factory_ptr = 0; // 0 = not set PtrStack _stack_free; diff --git a/src/conc/ObjPool.hpp b/src/conc/ObjPool.hpp index d808aa6..2504104 100644 --- a/src/conc/ObjPool.hpp +++ b/src/conc/ObjPool.hpp @@ -86,7 +86,7 @@ Throws: Nothing */ template -void ObjPool ::set_factory (Factory &fact) +void ObjPool ::set_factory (Factory &fact) noexcept { _factory_ptr = &fact; } @@ -94,7 +94,7 @@ void ObjPool ::set_factory (Factory &fact) template -typename ObjPool ::Factory & ObjPool ::use_factory () const +typename ObjPool ::Factory & ObjPool ::use_factory () const noexcept { assert (_factory_ptr != 0); @@ -103,6 +103,36 @@ typename ObjPool ::Factory & ObjPool ::use_factory () const +/* +============================================================================== +Name: cleanup +Description: + Preliminary deletion of the pool content, also used during the pool + destruction. + Do not call it if some objects are still out of the pool! + Use with care. +Throws: Nothing +============================================================================== +*/ + +template +void ObjPool ::cleanup () noexcept +{ +#if ! defined (NDEBUG) + const int count_free = +#endif + delete_obj_stack (_stack_free, false); +#if ! defined (NDEBUG) + const int count_all = +#endif + delete_obj_stack (_stack_all, true); + + // False would mean that some cells are still out, in use. + assert (count_free == count_all); +} + + + /* ============================================================================== Name: take_obj @@ -113,7 +143,7 @@ Name: take_obj Returns: A pointer on the object, or 0 if no object is available and cannot be created for any reason. -Throws: Nothing +Throws: Depends on the factory ============================================================================== */ @@ -175,7 +205,7 @@ Name: return_obj - Do not return an object you didn't get from take_obj() Input parameters: - obj: Reference on the returned object. -Throws: Nothing +Throws: std::runtime_error ============================================================================== */ @@ -205,36 +235,6 @@ void ObjPool ::return_obj (T &obj) -/* -============================================================================== -Name: cleanup -Description: - Preliminary deletion of the pool content, also used during the pool - destruction. - Do not call it if some objects are still out of the pool! - Use with care. -Throws: Nothing -============================================================================== -*/ - -template -void ObjPool ::cleanup () -{ -#if ! defined (NDEBUG) - const int count_free = -#endif - delete_obj_stack (_stack_free, false); -#if ! defined (NDEBUG) - const int count_all = -#endif - delete_obj_stack (_stack_all, true); - - // False would mean that some cells are still out, in use. - assert (count_free == count_all); -} - - - /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ @@ -244,7 +244,7 @@ void ObjPool ::cleanup () template -int ObjPool ::delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag) +int ObjPool ::delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag) noexcept { typename PtrStack::CellType * cell_ptr = 0; int count = 0; diff --git a/src/conc/fnc.h b/src/conc/fnc.h index 83934bd..a51d9ab 100644 --- a/src/conc/fnc.h +++ b/src/conc/fnc.h @@ -35,11 +35,11 @@ namespace conc template -bool is_ptr_aligned (const T *ptr, int align); +bool is_ptr_aligned (const T *ptr, int align) noexcept; template -bool is_ptr_aligned_nz (const T *ptr, int align); +bool is_ptr_aligned_nz (const T *ptr, int align) noexcept; template -bool is_ptr_aligned_nz (const T *ptr); +bool is_ptr_aligned_nz (const T *ptr) noexcept; diff --git a/src/conc/fnc.hpp b/src/conc/fnc.hpp index 573e356..b280e9b 100644 --- a/src/conc/fnc.hpp +++ b/src/conc/fnc.hpp @@ -23,7 +23,7 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #include -#include +#include @@ -33,25 +33,25 @@ namespace conc template -bool is_ptr_aligned (const T *ptr, int align) +bool is_ptr_aligned (const T *ptr, int align) noexcept { assert (align > 0); assert ((align & -align) == align); - return ((reinterpret_cast (ptr) & (align - 1)) == 0); + return ((reinterpret_cast (ptr) & (align - 1)) == 0); } template -bool is_ptr_aligned_nz (const T *ptr, int align) +bool is_ptr_aligned_nz (const T *ptr, int align) noexcept { assert (align > 0); assert ((align & -align) == align); - return (ptr != 0 && is_ptr_aligned (ptr, align)); + return (ptr != nullptr && is_ptr_aligned (ptr, align)); } template -bool is_ptr_aligned_nz (const T *ptr) +bool is_ptr_aligned_nz (const T *ptr) noexcept { return (is_ptr_aligned_nz (ptr, sizeof (T))); } diff --git a/src/ffft/DynArray.h b/src/ffft/DynArray.h deleted file mode 100644 index ae36415..0000000 --- a/src/ffft/DynArray.h +++ /dev/null @@ -1,101 +0,0 @@ -/***************************************************************************** - - DynArray.h - By Laurent de Soras - ---- Legal stuff --- - -This program is free software. It comes without any warranty, to -the extent permitted by applicable law. You can redistribute it -and/or modify it under the terms of the Do What The Fuck You Want -To Public License, Version 2, as published by Sam Hocevar. See -http://sam.zoy.org/wtfpl/COPYING for more details. - -*Tab=3***********************************************************************/ - - - -#if ! defined (ffft_DynArray_HEADER_INCLUDED) -#define ffft_DynArray_HEADER_INCLUDED - -#if defined (_MSC_VER) - #pragma once - #pragma warning (4 : 4250) // "Inherits via dominance." -#endif - - - -/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - - - -namespace ffft -{ - - - -template -class DynArray -{ - -/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - -public: - - typedef T DataType; - - DynArray (); - explicit DynArray (long sz); - ~DynArray (); - - inline long size () const; - inline void resize (long sz); - - inline const DataType & - operator [] (long pos) const; - inline DataType & - operator [] (long pos); - - - -/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - -protected: - - - -/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - -private: - - DataType * _data_ptr; - long _len; - - - -/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - -private: - - DynArray (const DynArray &other); - DynArray & operator = (const DynArray &other); - bool operator == (const DynArray &other); - bool operator != (const DynArray &other); - -}; // class DynArray - - - -} // namespace ffft - - - -#include "ffft/DynArray.hpp" - - - -#endif // ffft_DynArray_HEADER_INCLUDED - - - -/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/ffft/DynArray.hpp b/src/ffft/DynArray.hpp deleted file mode 100644 index 42d57ba..0000000 --- a/src/ffft/DynArray.hpp +++ /dev/null @@ -1,144 +0,0 @@ -/***************************************************************************** - - DynArray.hpp - By Laurent de Soras - ---- Legal stuff --- - -This program is free software. It comes without any warranty, to -the extent permitted by applicable law. You can redistribute it -and/or modify it under the terms of the Do What The Fuck You Want -To Public License, Version 2, as published by Sam Hocevar. See -http://sam.zoy.org/wtfpl/COPYING for more details. - -*Tab=3***********************************************************************/ - - - -#if defined (ffft_DynArray_CURRENT_CODEHEADER) - #error Recursive inclusion of DynArray code header. -#endif -#define ffft_DynArray_CURRENT_CODEHEADER - -#if ! defined (ffft_DynArray_CODEHEADER_INCLUDED) -#define ffft_DynArray_CODEHEADER_INCLUDED - - - -/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - -#include - - - -namespace ffft -{ - - - -/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - - - -template -DynArray ::DynArray () -: _data_ptr (0) -, _len (0) -{ - // Nothing -} - - - -template -DynArray ::DynArray (long sz) -: _data_ptr (0) -, _len (0) -{ - assert (sz >= 0); - if (sz > 0) - { - _data_ptr = new DataType [sz]; - _len = sz; - } -} - - - -template -DynArray ::~DynArray () -{ - delete [] _data_ptr; - _data_ptr = 0; - _len = 0; -} - - - -template -long DynArray ::size () const -{ - return (_len); -} - - - -template -void DynArray ::resize (long sz) -{ - assert (sz >= 0); - if (sz > 0) - { - DataType * old_data_ptr = _data_ptr; - DataType * tmp_data_ptr = new DataType [sz]; - - _data_ptr = tmp_data_ptr; - _len = sz; - - delete [] old_data_ptr; - } -} - - - -template -const typename DynArray ::DataType & DynArray ::operator [] (long pos) const -{ - assert (pos >= 0); - assert (pos < _len); - - return (_data_ptr [pos]); -} - - - -template -typename DynArray ::DataType & DynArray ::operator [] (long pos) -{ - assert (pos >= 0); - assert (pos < _len); - - return (_data_ptr [pos]); -} - - - -/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - - - -/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - - - -} // namespace ffft - - - -#endif // ffft_DynArray_CODEHEADER_INCLUDED - -#undef ffft_DynArray_CURRENT_CODEHEADER - - - -/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/ffft/FFTReal.h b/src/ffft/FFTReal.h index d0ca448..b799ebe 100644 --- a/src/ffft/FFTReal.h +++ b/src/ffft/FFTReal.h @@ -27,9 +27,10 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -#include "ffft/def.h" -#include "ffft/DynArray.h" -#include "ffft/OscSinCos.h" +#include "ffft/def.h" +#include "ffft/OscSinCos.h" + +#include @@ -46,18 +47,25 @@ class FFTReal public: - enum { MAX_BIT_DEPTH = 30 }; // So length can be represented as long int + // So length can be represented as long int + static constexpr int MAX_BIT_DEPTH = 30; typedef DT DataType; - explicit FFTReal (long length); - virtual ~FFTReal () {} + explicit FFTReal (long length); + FFTReal (const FFTReal &other) = default; + FFTReal (FFTReal &&other) = default; + + virtual ~FFTReal () = default; + + FFTReal & operator = (const FFTReal &other) = default; + FFTReal & operator = (FFTReal &&other) = default; - long get_length () const; - void do_fft (DataType f [], const DataType x []) const; - void do_ifft (const DataType f [], DataType x []) const; - void rescale (DataType x []) const; - DataType * use_buffer () const; + long get_length () const noexcept; + void do_fft (DataType f [], const DataType x []) const noexcept; + void do_ifft (const DataType f [], DataType x []) const noexcept; + void rescale (DataType x []) const noexcept; + DataType * use_buffer () const noexcept; @@ -72,45 +80,45 @@ class FFTReal private: // Over this bit depth, we use direct calculation for sin/cos - enum { TRIGO_BD_LIMIT = 12 }; + static constexpr int TRIGO_BD_LIMIT = 12; typedef OscSinCos OscType; - void init_br_lut (); - void init_trigo_lut (); - void init_trigo_osc (); + void init_br_lut (); + void init_trigo_lut (); + void init_trigo_osc (); ffft_FORCEINLINE const long * - get_br_ptr () const; + get_br_ptr () const noexcept; ffft_FORCEINLINE const DataType * - get_trigo_ptr (int level) const; + get_trigo_ptr (int level) const noexcept; ffft_FORCEINLINE long - get_trigo_level_index (int level) const; - - inline void compute_fft_general (DataType f [], const DataType x []) const; - inline void compute_direct_pass_1_2 (DataType df [], const DataType x []) const; - inline void compute_direct_pass_3 (DataType df [], const DataType sf []) const; - inline void compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const; - inline void compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const; - inline void compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const; - - inline void compute_ifft_general (const DataType f [], DataType x []) const; - inline void compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const; - inline void compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const; - inline void compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const; - inline void compute_inverse_pass_3 (DataType df [], const DataType sf []) const; - inline void compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const; - - const long _length; - const int _nbr_bits; - DynArray - _br_lut; - DynArray - _trigo_lut; - mutable DynArray - _buffer; - mutable DynArray - _trigo_osc; + get_trigo_level_index (int level) const noexcept; + + inline void compute_fft_general (DataType f [], const DataType x []) const noexcept; + inline void compute_direct_pass_1_2 (DataType df [], const DataType x []) const noexcept; + inline void compute_direct_pass_3 (DataType df [], const DataType sf []) const noexcept; + inline void compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const noexcept; + inline void compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept; + inline void compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept; + + inline void compute_ifft_general (const DataType f [], DataType x []) const noexcept; + inline void compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const noexcept; + inline void compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept; + inline void compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept; + inline void compute_inverse_pass_3 (DataType df [], const DataType sf []) const noexcept; + inline void compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const noexcept; + + const long _length; + const int _nbr_bits; + std::vector + _br_lut; + std::vector + _trigo_lut; + mutable std::vector + _buffer; + mutable std::vector + _trigo_osc; @@ -118,11 +126,9 @@ class FFTReal private: - FFTReal (); - FFTReal (const FFTReal &other); - FFTReal & operator = (const FFTReal &other); - bool operator == (const FFTReal &other); - bool operator != (const FFTReal &other); + FFTReal () = delete; + bool operator == (const FFTReal &other) = delete; + bool operator != (const FFTReal &other) = delete; }; // class FFTReal @@ -132,7 +138,7 @@ class FFTReal -#include "ffft/FFTReal.hpp" +#include "ffft/FFTReal.hpp" diff --git a/src/ffft/FFTReal.hpp b/src/ffft/FFTReal.hpp index 6b19cd2..37c0ef0 100644 --- a/src/ffft/FFTReal.hpp +++ b/src/ffft/FFTReal.hpp @@ -27,8 +27,8 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -#include -#include +#include +#include @@ -37,7 +37,15 @@ namespace ffft -static inline bool FFTReal_is_pow2 (long x) +#if (__cplusplus >= 201402L) + #define ffft_CONSTEXPR14 constexpr +#else + #define ffft_CONSTEXPR14 +#endif + + + +static inline ffft_CONSTEXPR14 bool FFTReal_is_pow2 (long x) noexcept { assert (x > 0); @@ -46,11 +54,11 @@ static inline bool FFTReal_is_pow2 (long x) -static inline int FFTReal_get_next_pow2 (long x) +static inline ffft_CONSTEXPR14 int FFTReal_get_next_pow2 (long x) noexcept { --x; - int p = 0; + int p = 0; while ((x & ~0xFFFFL) != 0) { p += 16; @@ -116,9 +124,9 @@ Throws: Nothing */ template -long FFTReal
::get_length () const +long FFTReal
::get_length () const noexcept { - return (_length); + return _length; } @@ -140,11 +148,11 @@ Throws: Nothing */ template -void FFTReal
::do_fft (DataType f [], const DataType x []) const +void FFTReal
::do_fft (DataType f [], const DataType x []) const noexcept { - assert (f != 0); + assert (f != nullptr); assert (f != use_buffer ()); - assert (x != 0); + assert (x != nullptr); assert (x != use_buffer ()); assert (x != f); @@ -160,8 +168,8 @@ void FFTReal
::do_fft (DataType f [], const DataType x []) const f [1] = x [0] - x [2]; f [3] = x [1] - x [3]; - const DataType b_0 = x [0] + x [2]; - const DataType b_2 = x [1] + x [3]; + const DataType b_0 = x [0] + x [2]; + const DataType b_2 = x [1] + x [3]; f [0] = b_0 + b_2; f [2] = b_0 - b_2; @@ -201,11 +209,11 @@ Throws: Nothing */ template -void FFTReal
::do_ifft (const DataType f [], DataType x []) const +void FFTReal
::do_ifft (const DataType f [], DataType x []) const noexcept { - assert (f != 0); + assert (f != nullptr); assert (f != use_buffer ()); - assert (x != 0); + assert (x != nullptr); assert (x != use_buffer ()); assert (x != f); @@ -218,8 +226,8 @@ void FFTReal
::do_ifft (const DataType f [], DataType x []) const // 4-point IFFT else if (_nbr_bits == 2) { - const DataType b_0 = f [0] + f [2]; - const DataType b_2 = f [0] - f [2]; + const DataType b_0 = f [0] + f [2]; + const DataType b_2 = f [0] - f [2]; x [0] = b_0 + f [1] * 2; x [2] = b_0 - f [1] * 2; @@ -256,13 +264,13 @@ Throws: Nothing */ template -void FFTReal
::rescale (DataType x []) const +void FFTReal
::rescale (DataType x []) const noexcept { const DataType mul = DataType (1.0 / _length); if (_length < 4) { - long i = _length - 1; + long i = _length - 1; do { x [i] *= mul; @@ -276,7 +284,7 @@ void FFTReal
::rescale (DataType x []) const assert ((_length & 3) == 0); // Could be optimized with SIMD instruction sets (needs alignment check) - long i = _length - 4; + long i = _length - 4; do { x [i + 0] *= mul; @@ -307,13 +315,18 @@ Throws: Nothing */ template -typename FFTReal
::DataType * FFTReal
::use_buffer () const +typename FFTReal
::DataType * FFTReal
::use_buffer () const noexcept { - return (&_buffer [0]); + return _buffer.data (); } +template +constexpr int FFTReal
::MAX_BIT_DEPTH; + + + /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ @@ -325,15 +338,15 @@ typename FFTReal
::DataType * FFTReal
::use_buffer () const template void FFTReal
::init_br_lut () { - const long length = 1L << _nbr_bits; + const long length = 1L << _nbr_bits; _br_lut.resize (length); _br_lut [0] = 0; - long br_index = 0; + long br_index = 0; for (long cnt = 1; cnt < length; ++cnt) { // ++br_index (bit reversed) - long bit = length >> 1; + long bit = length >> 1; while (((br_index ^= bit) & bit) == 0) { bit >>= 1; @@ -348,23 +361,21 @@ void FFTReal
::init_br_lut () template void FFTReal
::init_trigo_lut () { - using namespace std; - if (_nbr_bits > 3) { - const long total_len = (1L << (_nbr_bits - 1)) - 4; + const long total_len = (1L << (_nbr_bits - 1)) - 4; _trigo_lut.resize (total_len); for (int level = 3; level < _nbr_bits; ++level) { - const long level_len = 1L << (level - 1); - DataType * const level_ptr = + const long level_len = 1L << (level - 1); + DataType * const level_ptr = &_trigo_lut [get_trigo_level_index (level)]; - const double mul = PI / (level_len << 1); + const double mul = PI / double (level_len << 1); for (long i = 0; i < level_len; ++ i) { - level_ptr [i] = static_cast (cos (i * mul)); + level_ptr [i] = static_cast (cos (double (i) * mul)); } } } @@ -375,17 +386,17 @@ void FFTReal
::init_trigo_lut () template void FFTReal
::init_trigo_osc () { - const int nbr_osc = _nbr_bits - TRIGO_BD_LIMIT; + const int nbr_osc = _nbr_bits - TRIGO_BD_LIMIT; if (nbr_osc > 0) { _trigo_osc.resize (nbr_osc); for (int osc_cnt = 0; osc_cnt < nbr_osc; ++osc_cnt) { - OscType & osc = _trigo_osc [osc_cnt]; + OscType & osc = _trigo_osc [osc_cnt]; - const long len = 1L << (TRIGO_BD_LIMIT + osc_cnt); - const double mul = (0.5 * PI) / len; + const long len = 1L << (TRIGO_BD_LIMIT + osc_cnt); + const double mul = (0.5 * PI) / double (len); osc.set_step (mul); } } @@ -394,7 +405,7 @@ void FFTReal
::init_trigo_osc () template -const long * FFTReal
::get_br_ptr () const +const long * FFTReal
::get_br_ptr () const noexcept { return (&_br_lut [0]); } @@ -402,7 +413,7 @@ const long * FFTReal
::get_br_ptr () const template -const typename FFTReal
::DataType * FFTReal
::get_trigo_ptr (int level) const +const typename FFTReal
::DataType * FFTReal
::get_trigo_ptr (int level) const noexcept { assert (level >= 3); @@ -412,7 +423,7 @@ const typename FFTReal
::DataType * FFTReal
::get_trigo_ptr (int level) template -long FFTReal
::get_trigo_level_index (int level) const +long FFTReal
::get_trigo_level_index (int level) const noexcept { assert (level >= 3); @@ -423,16 +434,16 @@ long FFTReal
::get_trigo_level_index (int level) const // Transform in several passes template -void FFTReal
::compute_fft_general (DataType f [], const DataType x []) const +void FFTReal
::compute_fft_general (DataType f [], const DataType x []) const noexcept { - assert (f != 0); + assert (f != nullptr); assert (f != use_buffer ()); - assert (x != 0); + assert (x != nullptr); assert (x != use_buffer ()); assert (x != f); - DataType * sf; - DataType * df; + DataType * sf; + DataType * df; if ((_nbr_bits & 1) != 0) { @@ -452,7 +463,7 @@ void FFTReal
::compute_fft_general (DataType f [], const DataType x []) cons { compute_direct_pass_n (df, sf, pass); - DataType * const temp_ptr = df; + DataType * const temp_ptr = df; df = sf; sf = temp_ptr; } @@ -461,27 +472,27 @@ void FFTReal
::compute_fft_general (DataType f [], const DataType x []) cons template -void FFTReal
::compute_direct_pass_1_2 (DataType df [], const DataType x []) const +void FFTReal
::compute_direct_pass_1_2 (DataType df [], const DataType x []) const noexcept { - assert (df != 0); - assert (x != 0); + assert (df != nullptr); + assert (x != nullptr); assert (df != x); - const long* const bit_rev_lut_ptr = get_br_ptr (); - long coef_index = 0; + const long * const bit_rev_lut_ptr = get_br_ptr (); + long coef_index = 0; do { - const long rev_index_0 = bit_rev_lut_ptr [coef_index ]; - const long rev_index_1 = bit_rev_lut_ptr [coef_index + 1]; - const long rev_index_2 = bit_rev_lut_ptr [coef_index + 2]; - const long rev_index_3 = bit_rev_lut_ptr [coef_index + 3]; + const long rev_index_0 = bit_rev_lut_ptr [coef_index]; + const long rev_index_1 = bit_rev_lut_ptr [coef_index + 1]; + const long rev_index_2 = bit_rev_lut_ptr [coef_index + 2]; + const long rev_index_3 = bit_rev_lut_ptr [coef_index + 3]; DataType * const df2 = df + coef_index; df2 [1] = x [rev_index_0] - x [rev_index_1]; df2 [3] = x [rev_index_2] - x [rev_index_3]; - const DataType sf_0 = x [rev_index_0] + x [rev_index_1]; - const DataType sf_2 = x [rev_index_2] + x [rev_index_3]; + const DataType sf_0 = x [rev_index_0] + x [rev_index_1]; + const DataType sf_2 = x [rev_index_2] + x [rev_index_3]; df2 [0] = sf_0 + sf_2; df2 [2] = sf_0 - sf_2; @@ -494,20 +505,20 @@ void FFTReal
::compute_direct_pass_1_2 (DataType df [], const DataType x []) template -void FFTReal
::compute_direct_pass_3 (DataType df [], const DataType sf []) const +void FFTReal
::compute_direct_pass_3 (DataType df [], const DataType sf []) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); - const DataType sqrt2_2 = DataType (SQRT2 * 0.5); - long coef_index = 0; + const DataType sqrt2_2 = DataType (SQRT2 * 0.5); + long coef_index = 0; do { - DataType v; + DataType v; - df [coef_index] = sf [coef_index ] + sf [coef_index + 4]; - df [coef_index + 4] = sf [coef_index ] - sf [coef_index + 4]; + df [coef_index] = sf [coef_index] + sf [coef_index + 4]; + df [coef_index + 4] = sf [coef_index] - sf [coef_index + 4]; df [coef_index + 2] = sf [coef_index + 2]; df [coef_index + 6] = sf [coef_index + 6]; @@ -527,10 +538,10 @@ void FFTReal
::compute_direct_pass_3 (DataType df [], const DataType sf []) template -void FFTReal
::compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const +void FFTReal
::compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); assert (pass >= 3); assert (pass < _nbr_bits); @@ -548,25 +559,25 @@ void FFTReal
::compute_direct_pass_n (DataType df [], const DataType sf [], template -void FFTReal
::compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const +void FFTReal
::compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); assert (pass >= 3); assert (pass < _nbr_bits); - const long nbr_coef = 1 << pass; - const long h_nbr_coef = nbr_coef >> 1; - const long d_nbr_coef = nbr_coef << 1; - long coef_index = 0; + const long nbr_coef = 1 << pass; + const long h_nbr_coef = nbr_coef >> 1; + const long d_nbr_coef = nbr_coef << 1; + long coef_index = 0; const DataType * const cos_ptr = get_trigo_ptr (pass); do { - const DataType * const sf1r = sf + coef_index; - const DataType * const sf2r = sf1r + nbr_coef; - DataType * const dfr = df + coef_index; - DataType * const dfi = dfr + nbr_coef; + const DataType * const sf1r = sf + coef_index; + const DataType * const sf2r = sf1r + nbr_coef; + DataType * const dfr = df + coef_index; + DataType * const dfi = dfr + nbr_coef; // Extreme coefficients are always real dfr [0] = sf1r [0] + sf2r [0]; @@ -575,20 +586,20 @@ void FFTReal
::compute_direct_pass_n_lut (DataType df [], const DataType sf dfi [h_nbr_coef] = sf2r [h_nbr_coef]; // Others are conjugate complex numbers - const DataType * const sf1i = sf1r + h_nbr_coef; - const DataType * const sf2i = sf1i + nbr_coef; + const DataType * const sf1i = sf1r + h_nbr_coef; + const DataType * const sf2i = sf1i + nbr_coef; for (long i = 1; i < h_nbr_coef; ++ i) { - const DataType c = cos_ptr [ i]; // cos (i*PI/nbr_coef); - const DataType s = cos_ptr [h_nbr_coef - i]; // sin (i*PI/nbr_coef); - DataType v; + const DataType c = cos_ptr [i]; // cos (i*PI/nbr_coef); + const DataType s = cos_ptr [h_nbr_coef - i]; // sin (i*PI/nbr_coef); + DataType v; v = sf2r [i] * c - sf2i [i] * s; - dfr [ i] = sf1r [i] + v; + dfr [i] = sf1r [i] + v; dfi [-i] = sf1r [i] - v; // dfr [nbr_coef - i] = v = sf2r [i] * s + sf2i [i] * c; - dfi [ i] = v + sf1i [i]; + dfi [i] = v + sf1i [i]; dfi [nbr_coef - i] = v - sf1i [i]; } @@ -600,25 +611,25 @@ void FFTReal
::compute_direct_pass_n_lut (DataType df [], const DataType sf template -void FFTReal
::compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const +void FFTReal
::compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); assert (pass > TRIGO_BD_LIMIT); assert (pass < _nbr_bits); - const long nbr_coef = 1 << pass; - const long h_nbr_coef = nbr_coef >> 1; - const long d_nbr_coef = nbr_coef << 1; - long coef_index = 0; - OscType & osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)]; + const long nbr_coef = 1 << pass; + const long h_nbr_coef = nbr_coef >> 1; + const long d_nbr_coef = nbr_coef << 1; + long coef_index = 0; + OscType & osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)]; do { - const DataType * const sf1r = sf + coef_index; - const DataType * const sf2r = sf1r + nbr_coef; - DataType * const dfr = df + coef_index; - DataType * const dfi = dfr + nbr_coef; + const DataType * const sf1r = sf + coef_index; + const DataType * const sf2r = sf1r + nbr_coef; + DataType * const dfr = df + coef_index; + DataType * const dfi = dfr + nbr_coef; osc.clear_buffers (); @@ -629,21 +640,21 @@ void FFTReal
::compute_direct_pass_n_osc (DataType df [], const DataType sf dfi [h_nbr_coef] = sf2r [h_nbr_coef]; // Others are conjugate complex numbers - const DataType * const sf1i = sf1r + h_nbr_coef; - const DataType * const sf2i = sf1i + nbr_coef; + const DataType * const sf1i = sf1r + h_nbr_coef; + const DataType * const sf2i = sf1i + nbr_coef; for (long i = 1; i < h_nbr_coef; ++ i) { osc.step (); - const DataType c = osc.get_cos (); - const DataType s = osc.get_sin (); - DataType v; + const DataType c = osc.get_cos (); + const DataType s = osc.get_sin (); + DataType v; v = sf2r [i] * c - sf2i [i] * s; - dfr [ i] = sf1r [i] + v; + dfr [i] = sf1r [i] + v; dfi [-i] = sf1r [i] - v; // dfr [nbr_coef - i] = v = sf2r [i] * s + sf2i [i] * c; - dfi [ i] = v + sf1i [i]; + dfi [i] = v + sf1i [i]; dfi [nbr_coef - i] = v - sf1i [i]; } @@ -656,26 +667,26 @@ void FFTReal
::compute_direct_pass_n_osc (DataType df [], const DataType sf // Transform in several pass template -void FFTReal
::compute_ifft_general (const DataType f [], DataType x []) const +void FFTReal
::compute_ifft_general (const DataType f [], DataType x []) const noexcept { - assert (f != 0); + assert (f != nullptr); assert (f != use_buffer ()); - assert (x != 0); + assert (x != nullptr); assert (x != use_buffer ()); assert (x != f); - DataType * sf = const_cast (f); - DataType * df; - DataType * df_temp; + DataType * sf = const_cast (f); + DataType * df; + DataType * df_temp; if (_nbr_bits & 1) { - df = use_buffer (); + df = use_buffer (); df_temp = x; } else { - df = x; + df = x; df_temp = use_buffer (); } @@ -685,7 +696,7 @@ void FFTReal
::compute_ifft_general (const DataType f [], DataType x []) con if (pass < _nbr_bits - 1) { - DataType * const temp_ptr = df; + DataType * const temp_ptr = df; df = sf; sf = temp_ptr; } @@ -703,10 +714,10 @@ void FFTReal
::compute_ifft_general (const DataType f [], DataType x []) con template -void FFTReal
::compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const +void FFTReal
::compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); assert (pass >= 3); assert (pass < _nbr_bits); @@ -724,25 +735,25 @@ void FFTReal
::compute_inverse_pass_n (DataType df [], const DataType sf [], template -void FFTReal
::compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const +void FFTReal
::compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); assert (pass >= 3); assert (pass < _nbr_bits); - const long nbr_coef = 1 << pass; - const long h_nbr_coef = nbr_coef >> 1; - const long d_nbr_coef = nbr_coef << 1; - long coef_index = 0; + const long nbr_coef = 1 << pass; + const long h_nbr_coef = nbr_coef >> 1; + const long d_nbr_coef = nbr_coef << 1; + long coef_index = 0; const DataType * const cos_ptr = get_trigo_ptr (pass); do { - const DataType * const sfr = sf + coef_index; - const DataType * const sfi = sfr + nbr_coef; - DataType * const df1r = df + coef_index; - DataType * const df2r = df1r + nbr_coef; + const DataType * const sfr = sf + coef_index; + const DataType * const sfi = sfr + nbr_coef; + DataType * const df1r = df + coef_index; + DataType * const df2r = df1r + nbr_coef; // Extreme coefficients are always real df1r [0] = sfr [0] + sfi [0]; // + sfr [nbr_coef] @@ -751,16 +762,16 @@ void FFTReal
::compute_inverse_pass_n_lut (DataType df [], const DataType sf df2r [h_nbr_coef] = sfi [h_nbr_coef] * 2; // Others are conjugate complex numbers - DataType * const df1i = df1r + h_nbr_coef; - DataType * const df2i = df1i + nbr_coef; + DataType * const df1i = df1r + h_nbr_coef; + DataType * const df2i = df1i + nbr_coef; for (long i = 1; i < h_nbr_coef; ++ i) { - df1r [i] = sfr [i] + sfi [ -i]; // + sfr [nbr_coef - i] + df1r [i] = sfr [i] + sfi [-i]; // + sfr [nbr_coef - i] df1i [i] = sfi [i] - sfi [nbr_coef - i]; - const DataType c = cos_ptr [i ]; // cos (i*PI/nbr_coef); - const DataType s = cos_ptr [h_nbr_coef - i]; // sin (i*PI/nbr_coef); - const DataType vr = sfr [i] - sfi [ -i]; // - sfr [nbr_coef - i] + const DataType c = cos_ptr [i]; // cos (i*PI/nbr_coef); + const DataType s = cos_ptr [h_nbr_coef - i]; // sin (i*PI/nbr_coef); + const DataType vr = sfr [i] - sfi [-i]; // - sfr [nbr_coef - i] const DataType vi = sfi [i] + sfi [nbr_coef - i]; df2r [i] = vr * c + vi * s; @@ -775,25 +786,25 @@ void FFTReal
::compute_inverse_pass_n_lut (DataType df [], const DataType sf template -void FFTReal
::compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const +void FFTReal
::compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); assert (pass > TRIGO_BD_LIMIT); assert (pass < _nbr_bits); - const long nbr_coef = 1 << pass; - const long h_nbr_coef = nbr_coef >> 1; - const long d_nbr_coef = nbr_coef << 1; - long coef_index = 0; - OscType & osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)]; + const long nbr_coef = 1 << pass; + const long h_nbr_coef = nbr_coef >> 1; + const long d_nbr_coef = nbr_coef << 1; + long coef_index = 0; + OscType & osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)]; do { - const DataType * const sfr = sf + coef_index; - const DataType * const sfi = sfr + nbr_coef; - DataType * const df1r = df + coef_index; - DataType * const df2r = df1r + nbr_coef; + const DataType * const sfr = sf + coef_index; + const DataType * const sfi = sfr + nbr_coef; + DataType * const df1r = df + coef_index; + DataType * const df2r = df1r + nbr_coef; osc.clear_buffers (); @@ -808,13 +819,13 @@ void FFTReal
::compute_inverse_pass_n_osc (DataType df [], const DataType sf DataType * const df2i = df1i + nbr_coef; for (long i = 1; i < h_nbr_coef; ++ i) { - df1r [i] = sfr [i] + sfi [ -i]; // + sfr [nbr_coef - i] + df1r [i] = sfr [i] + sfi [-i]; // + sfr [nbr_coef - i] df1i [i] = sfi [i] - sfi [nbr_coef - i]; osc.step (); const DataType c = osc.get_cos (); const DataType s = osc.get_sin (); - const DataType vr = sfr [i] - sfi [ -i]; // - sfr [nbr_coef - i] + const DataType vr = sfr [i] - sfi [-i]; // - sfr [nbr_coef - i] const DataType vi = sfi [i] + sfi [nbr_coef - i]; df2r [i] = vr * c + vi * s; @@ -829,26 +840,26 @@ void FFTReal
::compute_inverse_pass_n_osc (DataType df [], const DataType sf template -void FFTReal
::compute_inverse_pass_3 (DataType df [], const DataType sf []) const +void FFTReal
::compute_inverse_pass_3 (DataType df [], const DataType sf []) const noexcept { - assert (df != 0); - assert (sf != 0); + assert (df != nullptr); + assert (sf != nullptr); assert (df != sf); - const DataType sqrt2_2 = DataType (SQRT2 * 0.5); - long coef_index = 0; + const DataType sqrt2_2 = DataType (SQRT2 * 0.5); + long coef_index = 0; do { - df [coef_index ] = sf [coef_index ] + sf [coef_index + 4]; - df [coef_index + 4] = sf [coef_index ] - sf [coef_index + 4]; + df [coef_index] = sf [coef_index] + sf [coef_index + 4]; + df [coef_index + 4] = sf [coef_index] - sf [coef_index + 4]; df [coef_index + 2] = sf [coef_index + 2] * 2; df [coef_index + 6] = sf [coef_index + 6] * 2; df [coef_index + 1] = sf [coef_index + 1] + sf [coef_index + 3]; df [coef_index + 3] = sf [coef_index + 5] - sf [coef_index + 7]; - const DataType vr = sf [coef_index + 1] - sf [coef_index + 3]; - const DataType vi = sf [coef_index + 5] + sf [coef_index + 7]; + const DataType vr = sf [coef_index + 1] - sf [coef_index + 3]; + const DataType vi = sf [coef_index + 5] + sf [coef_index + 7]; df [coef_index + 5] = (vr + vi) * sqrt2_2; df [coef_index + 7] = (vi - vr) * sqrt2_2; @@ -861,22 +872,22 @@ void FFTReal
::compute_inverse_pass_3 (DataType df [], const DataType sf []) template -void FFTReal
::compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const +void FFTReal
::compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const noexcept { - assert (x != 0); - assert (sf != 0); - assert (x != sf); + assert (x != nullptr); + assert (sf != nullptr); + assert (x != sf); - const long * bit_rev_lut_ptr = get_br_ptr (); - const DataType * sf2 = sf; - long coef_index = 0; + const long * bit_rev_lut_ptr = get_br_ptr (); + const DataType * sf2 = sf; + long coef_index = 0; do { { - const DataType b_0 = sf2 [0] + sf2 [2]; - const DataType b_2 = sf2 [0] - sf2 [2]; - const DataType b_1 = sf2 [1] * 2; - const DataType b_3 = sf2 [3] * 2; + const DataType b_0 = sf2 [0] + sf2 [2]; + const DataType b_2 = sf2 [0] - sf2 [2]; + const DataType b_1 = sf2 [1] * 2; + const DataType b_3 = sf2 [3] * 2; x [bit_rev_lut_ptr [0]] = b_0 + b_1; x [bit_rev_lut_ptr [1]] = b_0 - b_1; @@ -884,10 +895,10 @@ void FFTReal
::compute_inverse_pass_1_2 (DataType x [], const DataType sf [] x [bit_rev_lut_ptr [3]] = b_2 - b_3; } { - const DataType b_0 = sf2 [4] + sf2 [6]; - const DataType b_2 = sf2 [4] - sf2 [6]; - const DataType b_1 = sf2 [5] * 2; - const DataType b_3 = sf2 [7] * 2; + const DataType b_0 = sf2 [4] + sf2 [6]; + const DataType b_2 = sf2 [4] - sf2 [6]; + const DataType b_1 = sf2 [5] * 2; + const DataType b_3 = sf2 [7] * 2; x [bit_rev_lut_ptr [4]] = b_0 + b_1; x [bit_rev_lut_ptr [5]] = b_0 - b_1; @@ -895,8 +906,8 @@ void FFTReal
::compute_inverse_pass_1_2 (DataType x [], const DataType sf [] x [bit_rev_lut_ptr [7]] = b_2 - b_3; } - sf2 += 8; - coef_index += 8; + sf2 += 8; + coef_index += 8; bit_rev_lut_ptr += 8; } while (coef_index < _length); @@ -904,6 +915,11 @@ void FFTReal
::compute_inverse_pass_1_2 (DataType x [], const DataType sf [] +template +constexpr int FFTReal
::TRIGO_BD_LIMIT; + + + } // namespace ffft diff --git a/src/ffft/OscSinCos.h b/src/ffft/OscSinCos.h index afaf1d5..6c535b9 100644 --- a/src/ffft/OscSinCos.h +++ b/src/ffft/OscSinCos.h @@ -27,7 +27,7 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -#include "ffft/def.h" +#include "ffft/def.h" @@ -46,19 +46,17 @@ class OscSinCos typedef T DataType; - OscSinCos (); - ffft_FORCEINLINE void - set_step (double angle_rad); + set_step (double angle_rad) noexcept; ffft_FORCEINLINE DataType - get_cos () const; + get_cos () const noexcept; ffft_FORCEINLINE DataType - get_sin () const; + get_sin () const noexcept; ffft_FORCEINLINE void - step (); + step () noexcept; ffft_FORCEINLINE void - clear_buffers (); + clear_buffers () noexcept; @@ -72,10 +70,10 @@ class OscSinCos private: - DataType _pos_cos; // Current phase expressed with sin and cos. [-1 ; 1] - DataType _pos_sin; // - - DataType _step_cos; // Phase increment per step, [-1 ; 1] - DataType _step_sin; // - + DataType _pos_cos { 1 }; // Current phase expressed with sin and cos. [-1 ; 1] + DataType _pos_sin { 0 }; // - + DataType _step_cos { 1 }; // Phase increment per step, [-1 ; 1] + DataType _step_sin { 0 }; // - @@ -83,10 +81,8 @@ class OscSinCos private: - OscSinCos (const OscSinCos &other); - OscSinCos & operator = (const OscSinCos &other); - bool operator == (const OscSinCos &other); - bool operator != (const OscSinCos &other); + bool operator == (const OscSinCos &other) const = delete; + bool operator != (const OscSinCos &other) const = delete; }; // class OscSinCos @@ -96,7 +92,7 @@ class OscSinCos -#include "ffft/OscSinCos.hpp" +#include "ffft/OscSinCos.hpp" diff --git a/src/ffft/OscSinCos.hpp b/src/ffft/OscSinCos.hpp index 04fe703..05f4202 100644 --- a/src/ffft/OscSinCos.hpp +++ b/src/ffft/OscSinCos.hpp @@ -27,7 +27,7 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -#include +#include namespace std { } @@ -43,22 +43,8 @@ namespace ffft template -OscSinCos ::OscSinCos () -: _pos_cos (1) -, _pos_sin (0) -, _step_cos (1) -, _step_sin (0) +void OscSinCos ::set_step (double angle_rad) noexcept { - // Nothing -} - - - -template -void OscSinCos ::set_step (double angle_rad) -{ - using namespace std; - _step_cos = static_cast (cos (angle_rad)); _step_sin = static_cast (sin (angle_rad)); } @@ -66,7 +52,7 @@ void OscSinCos ::set_step (double angle_rad) template -typename OscSinCos ::DataType OscSinCos ::get_cos () const +typename OscSinCos ::DataType OscSinCos ::get_cos () const noexcept { return (_pos_cos); } @@ -74,7 +60,7 @@ typename OscSinCos ::DataType OscSinCos ::get_cos () const template -typename OscSinCos ::DataType OscSinCos ::get_sin () const +typename OscSinCos ::DataType OscSinCos ::get_sin () const noexcept { return (_pos_sin); } @@ -82,10 +68,10 @@ typename OscSinCos ::DataType OscSinCos ::get_sin () const template -void OscSinCos ::step () +void OscSinCos ::step () noexcept { - const DataType old_cos = _pos_cos; - const DataType old_sin = _pos_sin; + const DataType old_cos = _pos_cos; + const DataType old_sin = _pos_sin; _pos_cos = old_cos * _step_cos - old_sin * _step_sin; _pos_sin = old_cos * _step_sin + old_sin * _step_cos; @@ -94,7 +80,7 @@ void OscSinCos ::step () template -void OscSinCos ::clear_buffers () +void OscSinCos ::clear_buffers () noexcept { _pos_cos = static_cast (1); _pos_sin = static_cast (0); diff --git a/src/ffft/def.h b/src/ffft/def.h index a1e6ff8..2f7f8ee 100644 --- a/src/ffft/def.h +++ b/src/ffft/def.h @@ -34,8 +34,8 @@ namespace ffft -const double PI = 3.1415926535897932384626433832795; -const double SQRT2 = 1.41421356237309514547462185873883; +constexpr double PI = 3.1415926535897932384626433832795; +constexpr double SQRT2 = 1.41421356237309514547462185873883; #if defined (_MSC_VER) @@ -47,6 +47,20 @@ const double SQRT2 = 1.41421356237309514547462185873883; #endif +// Compiler type +#define ffft_COMPILER_UNKNOWN (-1) +#define ffft_COMPILER_GCC (1) +#define ffft_COMPILER_MSVC (2) + +#if defined (__GNUC__) || defined (__clang__) + #define ffft_COMPILER ffft_COMPILER_GCC +#elif defined (_MSC_VER) + #define ffft_COMPILER ffft_COMPILER_MSVC +#else + #define ffft_COMPILER ffft_COMPILER_UNKNOWN +#endif + + } // namespace ffft diff --git a/src/fmtc/Bitdepth.cpp b/src/fmtc/Bitdepth.cpp index 78f284a..2ec282a 100644 --- a/src/fmtc/Bitdepth.cpp +++ b/src/fmtc/Bitdepth.cpp @@ -25,11 +25,8 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #include "fmtc/Bitdepth.h" +#include "fmtc/fnc.h" #include "fmtc/SplFmtUtl.h" -#if (fstb_ARCHI == fstb_ARCHI_X86) - #include "fmtcl/ProxyRwSse2.h" -#endif -#include "fmtcl/VoidAndCluster.h" #include "fstb/def.h" #include "fstb/fnc.h" #include "vsutl/CpuOpt.h" @@ -41,6 +38,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include + namespace fmtc { @@ -52,7 +50,7 @@ namespace fmtc Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCore &core, const ::VSAPI &vsapi) : vsutl::FilterBase (vsapi, "bitdepth", ::fmParallel, 0) -, _clip_src_sptr (vsapi.propGetNode (&in, "clip", 0, 0), vsapi) +, _clip_src_sptr (vsapi.propGetNode (&in, "clip", 0, nullptr), vsapi) , _vi_in (*_vsapi.getVideoInfo (_clip_src_sptr.get ())) , _vi_out (_vi_in) #if defined (_MSC_VER) @@ -63,40 +61,15 @@ Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo #if defined (_MSC_VER) #pragma warning (pop) #endif -, _splfmt_src (fmtcl::SplFmt_ILLEGAL) -, _splfmt_dst (fmtcl::SplFmt_ILLEGAL) -, _scale_info_arr () -, _upconv_flag (false) -, _sse2_flag (false) -, _avx2_flag (false) -, _full_range_in_flag (false) -, _full_range_out_flag (false) -, _dmode (get_arg_int (in, out, "dmode", DMode_FILTERLITE)) -, _ampo (get_arg_flt (in, out, "ampo", 1.0)) -, _ampn (get_arg_flt (in, out, "ampn", 0.0)) -, _dyn_flag (get_arg_int (in, out, "dyn", 0) != 0) -, _static_noise_flag (get_arg_int (in, out, "staticnoise", 0) != 0) -, _pat_size (get_arg_int (in, out, "patsize", PAT_WIDTH)) -, _ampo_i (0) -, _ampn_i (0) -, _ampe_i (0) -, _ampe_f (0) -, _ampn_f (0) -, _errdif_flag (false) -, _simple_flag (false) -, _dither_pat_arr () -, _buf_factory_uptr () -, _process_seg_int_int_ptr (0) -, _process_seg_flt_int_ptr (0) { fstb::unused (user_data_ptr); vsutl::CpuOpt cpu_opt (*this, in, out); - _sse2_flag = cpu_opt.has_sse2 (); - _avx2_flag = cpu_opt.has_avx2 (); + const bool sse2_flag = cpu_opt.has_sse2 (); + const bool avx2_flag = cpu_opt.has_avx2 (); // Checks the input clip - if (_vi_in.format == 0) + if (_vi_in.format == nullptr) { throw_inval_arg ("only constant pixel formats are supported."); } @@ -123,7 +96,8 @@ Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo } } - _splfmt_src = SplFmtUtl::conv_from_vsformat (fmt_src); + const auto splfmt_src = SplFmtUtl::conv_from_vsformat (fmt_src); + const auto col_fam = conv_colfam_to_fmtcl (fmt_src); // Destination colorspace const ::VSFormat& fmt_dst = get_output_colorspace (in, out, core, fmt_src); @@ -155,119 +129,71 @@ Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo // Format is validated _vi_out.format = &fmt_dst; - _splfmt_dst = SplFmtUtl::conv_from_vsformat (fmt_dst); + const auto splfmt_dst = SplFmtUtl::conv_from_vsformat (fmt_dst); + + const int w = _vi_in.width; // May be <= 0 // Conversion-related things + bool range_def_src_flag = false; _full_range_in_flag = (get_arg_int ( - in, out, "fulls" , vsutl::is_full_range_default (fmt_src) ? 1 : 0 + in, out, "fulls" , vsutl::is_full_range_default (fmt_src) ? 1 : 0, + 0, &range_def_src_flag ) != 0); + bool range_def_dst_flag = false; _full_range_out_flag = (get_arg_int ( - in, out, "fulld", (_full_range_in_flag) ? 1 : 0 + in, out, "fulld", (_full_range_in_flag) ? 1 : 0, + 0, &range_def_dst_flag ) != 0); - - // No dithering required - if ( ( fmt_src.sampleType == ::stInteger - && ( fmt_dst.sampleType == ::stFloat - || ( fmt_src.bitsPerSample <= fmt_dst.bitsPerSample - && ! _full_range_in_flag - && ! _full_range_out_flag))) - || ( fmt_src.sampleType == ::stFloat - && fmt_dst.sampleType == ::stFloat)) - { - _upconv_flag = true; - } - - for (int plane_index = 0; plane_index < fmt_dst.numPlanes; ++plane_index) - { - SclInf & scl_inf = _scale_info_arr [plane_index]; - vsutl::compute_fmt_mac_cst ( - scl_inf._info._gain, - scl_inf._info._add_cst, - *_vi_out.format, _full_range_out_flag, - fmt_src, _full_range_in_flag, - plane_index - ); - - if ( _upconv_flag - && fmt_src.sampleType == ::stInteger - && fmt_dst.sampleType == ::stFloat) - { - scl_inf._ptr = &scl_inf._info; - } - else - { - scl_inf._ptr = 0; - } - } + _range_def_flag = (range_def_src_flag || range_def_dst_flag); // Dithering parameters - if (_dmode == DMode_ROUND_ALIAS) + fmtcl::Dither::DMode dmode = static_cast ( + get_arg_int (in, out, "dmode", fmtcl::Dither::DMode_FILTERLITE) + ); + if (dmode == fmtcl::Dither::DMode_ROUND_ALIAS) { - _dmode = DMode_ROUND; + dmode = fmtcl::Dither::DMode_ROUND; } - if ( _dmode < 0 - || _dmode >= DMode_NBR_ELT) + if ( dmode < 0 + || dmode >= fmtcl::Dither::DMode_NBR_ELT) { throw_inval_arg ("invalid dmode."); } - if (_ampo < 0) + const double ampo = get_arg_flt (in, out, "ampo", 1.0); + if (ampo < 0) { throw_inval_arg ("ampo cannot be negative."); } - if (_ampn < 0) + + const double ampn = get_arg_flt (in, out, "ampn", 0.0); + if (ampn < 0) { throw_inval_arg ("ampn cannot be negative."); } - if (_pat_size < 4 || PAT_WIDTH % _pat_size != 0) + const int pat_size = + get_arg_int (in, out, "patsize", fmtcl::Dither::_max_pat_width); + if (pat_size < 4 || fmtcl::Dither::_max_pat_width % pat_size != 0) { throw_inval_arg ("Wrong value for patsize."); } - int w = _vi_in.width; - if (_vi_in.width <= 0) - { - w = MAX_UNK_WIDTH; - } - _buf_factory_uptr = - std::unique_ptr (new fmtcl::ErrDifBufFactory (w)); - _buf_pool.set_factory (*_buf_factory_uptr); - - build_dither_pat (); - - const int amp_mul = 1 << AMP_BITS; - const int ampo_i_raw = fstb::round_int (_ampo * amp_mul); - const int ampn_i_raw = fstb::round_int (_ampn * amp_mul); - _ampo_i = std::min (ampo_i_raw, 127); - _ampn_i = std::min (ampn_i_raw, 127); - _ampn_f = float (_ampn * (1.0f / 4294967296.0f)); // / (2 ^ 32) - - _simple_flag = (ampo_i_raw == amp_mul && ampn_i_raw == 0); - - if (_errdif_flag) - { - _ampe_i = fstb::limit ( - fstb::round_int ((_ampo - 1) * (128 << AMP_BITS)), - 0, - (2048 << AMP_BITS) - 1 - ); - _ampe_f = fstb::limit (float (_ampo) - 1, 0.0f, 8.0f); - } - - // Processing function initialisation - if (_errdif_flag) - { - init_fnc_errdiff (); - } - else if (_dmode == DMode_FAST) - { - init_fnc_fast (); - } - else - { - init_fnc_ordered (); - } + const bool dyn_flag = (get_arg_int (in, out, "dyn", 0) != 0); + const bool static_noise_flag = (get_arg_int (in, out, "staticnoise", 0) != 0); + const bool correlated_planes_flag = (get_arg_int (in, out, "corplane", 0) != 0); + const bool tpdfo_flag = (get_arg_int (in, out, "tpdfo", 0) != 0); + const bool tpdfn_flag = (get_arg_int (in, out, "tpdfn", 0) != 0); + + _engine_uptr = std::make_unique ( + splfmt_src, fmt_src.bitsPerSample, _full_range_in_flag, + splfmt_dst, fmt_dst.bitsPerSample, _full_range_out_flag, + col_fam, fmt_dst.numPlanes, w, + dmode, pat_size, ampo, ampn, + dyn_flag, static_noise_flag, correlated_planes_flag, + tpdfo_flag, tpdfn_flag, + sse2_flag, avx2_flag + ); } @@ -286,8 +212,8 @@ const ::VSFrameRef * Bitdepth::get_frame (int n, int activation_reason, void * & { assert (n >= 0); - ::VSFrameRef * dst_ptr = 0; - ::VSNodeRef & node = *_clip_src_sptr; + ::VSFrameRef * dst_ptr = nullptr; + ::VSNodeRef & node = *_clip_src_sptr; if (activation_reason == ::arInitial) { @@ -302,8 +228,8 @@ const ::VSFrameRef * Bitdepth::get_frame (int n, int activation_reason, void * & ); const ::VSFrameRef & src = *src_sptr; - const int w = _vsapi.getFrameWidth (&src, 0); - const int h = _vsapi.getFrameHeight (&src, 0); + const int w = _vsapi.getFrameWidth (&src, 0); + const int h = _vsapi.getFrameHeight (&src, 0); dst_ptr = _vsapi.newVideoFrame (_vi_out.format, w, h, &src, &core); const int ret_val = _plane_processor.process_frame ( @@ -312,11 +238,19 @@ const ::VSFrameRef * Bitdepth::get_frame (int n, int activation_reason, void * & if (ret_val != 0) { _vsapi.freeFrame (dst_ptr); - dst_ptr = 0; + dst_ptr = nullptr; + } + + // Output frame properties + ::VSMap & dst_prop = *(_vsapi.getFramePropsRW (dst_ptr)); + if (_range_def_flag) + { + const int cr_val = (_full_range_out_flag) ? 0 : 1; + _vsapi.propSetInt (&dst_prop, "_ColorRange", cr_val, ::paReplace); } } - return (dst_ptr); + return dst_ptr; } @@ -328,7 +262,7 @@ const ::VSFrameRef * Bitdepth::get_frame (int n, int activation_reason, void * & int Bitdepth::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void *frame_data_ptr, ::VSFrameContext &frame_ctx, ::VSCore &core, const vsutl::NodeRefSPtr &src_node1_sptr, const vsutl::NodeRefSPtr &src_node2_sptr, const vsutl::NodeRefSPtr &src_node3_sptr) { fstb::unused (frame_data_ptr, core, src_node2_sptr, src_node3_sptr); - assert (src_node1_sptr.get () != 0); + assert (src_node1_sptr.get () != nullptr); int ret_val = 0; @@ -353,43 +287,11 @@ int Bitdepth::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void try { - if (_upconv_flag) - { - fmtcl::BitBltConv blitter (_sse2_flag, _avx2_flag); - blitter.bitblt ( - _splfmt_dst, _vi_out.format->bitsPerSample, - data_dst_ptr, 0, stride_dst, - _splfmt_src, _vi_in.format->bitsPerSample, - data_src_ptr, 0, stride_src, - w, h, - _scale_info_arr [plane_index]._ptr - ); - } - else - { - uint32_t rnd_state = plane_index << 16; - if (_static_noise_flag) - { - rnd_state += 55555; - } - else - { - rnd_state += n; - } - - const int pat_index = (n + plane_index) & (PAT_PERIOD - 1); - const PatData& pattern = _dither_pat_arr [pat_index]; - - dither_plane ( - _splfmt_dst, _vi_out.format->bitsPerSample, - data_dst_ptr, stride_dst, - _splfmt_src, _vi_in.format->bitsPerSample, - data_src_ptr, stride_src, - w, h, - _scale_info_arr [plane_index]._info, - pattern, rnd_state - ); - } + _engine_uptr->process_plane ( + data_dst_ptr, stride_dst, + data_src_ptr, stride_src, + w, h, n, plane_index + ); } catch (std::exception &e) @@ -404,7 +306,7 @@ int Bitdepth::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void } } - return (ret_val); + return ret_val; } @@ -433,7 +335,7 @@ const ::VSFormat & Bitdepth::get_output_colorspace (const ::VSMap &in, ::VSMap & if (dst_csp != undef) { fmt_dst_ptr = _vsapi.getFormatPreset (dst_csp, &core); - if (fmt_dst_ptr == 0) + if (fmt_dst_ptr == nullptr) { throw_inval_arg ("unknown output colorspace."); } @@ -492,9 +394,9 @@ const ::VSFormat & Bitdepth::get_output_colorspace (const ::VSMap &in, ::VSMap & } catch (...) { - fmt_dst_ptr = 0; + fmt_dst_ptr = nullptr; } - if (fmt_dst_ptr == 0) + if (fmt_dst_ptr == nullptr) { throw_rt_err ( "couldn\'t get a pixel format identifier for the output clip." @@ -502,1836 +404,10 @@ const ::VSFormat & Bitdepth::get_output_colorspace (const ::VSMap &in, ::VSMap & } } - return (*fmt_dst_ptr); -} - - - -void Bitdepth::build_dither_pat () -{ - _errdif_flag = false; - - switch (_dmode) - { - case DMode_BAYER: - build_dither_pat_bayer (); - break; - - case DMode_FILTERLITE: - case DMode_STUCKI: - case DMode_ATKINSON: - case DMode_FLOYD: - case DMode_OSTRO: - _errdif_flag = true; - break; - - case DMode_ROUND: - case DMode_FAST: - default: - build_dither_pat_round (); - break; - - case DMode_VOIDCLUST: - build_dither_pat_void_and_cluster (_pat_size); - break; - } -} - - - -void Bitdepth::build_dither_pat_round () -{ - PatData & pat_data = _dither_pat_arr [0]; - for (int y = 0; y < PAT_WIDTH; ++y) - { - for (int x = 0; x < PAT_WIDTH; ++x) - { - pat_data [y] [x] = 0; - } - } - - build_next_dither_pat (); -} - - - -void Bitdepth::build_dither_pat_bayer () -{ - assert (fstb::is_pow_2 (int (PAT_WIDTH))); - - PatData & pat_data = _dither_pat_arr [0]; - for (int y = 0; y < PAT_WIDTH; ++y) - { - for (int x = 0; x < PAT_WIDTH; ++x) - { - pat_data [y] [x] = -128; - } - } - - for (int dith_size = 2; dith_size <= PAT_WIDTH; dith_size <<= 1) - { - for (int y = 0; y < PAT_WIDTH; y += 2) - { - for (int x = 0; x < PAT_WIDTH; x += 2) - { - const int xx = (x >> 1) + (PAT_WIDTH >> 1); - const int yy = (y >> 1) + (PAT_WIDTH >> 1); - const int val = (pat_data [yy] [xx] + 128) >> 2; - pat_data [y ] [x ] = int16_t (val + 0-128); - pat_data [y ] [x + 1] = int16_t (val + 128-128); - pat_data [y + 1] [x ] = int16_t (val + 192-128); - pat_data [y + 1] [x + 1] = int16_t (val + 64-128); - } - } - } - - build_next_dither_pat (); -} - - - -void Bitdepth::build_dither_pat_void_and_cluster (int w) -{ - assert (PAT_WIDTH % w == 0); - fmtcl::VoidAndCluster vc_gen; - fmtcl::MatrixWrap pat_raw (w, w); - vc_gen.create_matrix (pat_raw); - - PatData & pat_data = _dither_pat_arr [0]; - const int area = w * w; - for (int y = 0; y < PAT_WIDTH; ++y) - { - for (int x = 0; x < PAT_WIDTH; ++x) - { - pat_data [y] [x] = int16_t (pat_raw (x, y) * 256 / area - 128); - } - } - - build_next_dither_pat (); -} - - - -void Bitdepth::build_next_dither_pat () -{ - for (int seq = 1; seq < PAT_PERIOD; ++seq) - { - const int angle = (_dyn_flag) ? seq & 3 : 0; - copy_dither_pat_rotate ( - _dither_pat_arr [seq], - _dither_pat_arr [0], - angle - ); - } -} - - - -void Bitdepth::copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle) -{ - assert (angle >= 0); - assert (angle < 4); - - static const int sin_arr [4] = { 0, 1, 0, -1 }; - const int s = sin_arr [ angle ]; - const int c = sin_arr [(angle + 1) & 3]; - - assert (fstb::is_pow_2 (int (PAT_WIDTH))); - const int mask = PAT_WIDTH - 1; - - for (int y = 0; y < PAT_WIDTH; ++y) - { - for (int x = 0; x < PAT_WIDTH; ++x) - { - const int xs = (x * c - y * s) & mask; - const int ys = (x * s + y * c) & mask; - - dst [y] [x] = src [ys] [xs]; - } - } -} - - - -// All possible combinations -#define fmtc_Bitdepth_SPAN_INT(SETP, NAMP, NAMF, simple_flag, dst_res, dst_fmt, src_res, src_fmt) \ - switch ( ((simple_flag) << 30) \ - + ((dst_res) << 24) + ((dst_fmt) << 16) \ - + ((src_res) << 8) + (src_fmt)) \ - { \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 9) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 10) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 10) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 16) \ - } - -// All possible combinations using float as intermediary data -#define fmtc_Bitdepth_SPAN_FLT(SETP, NAMP, NAMF, simple_flag, dst_res, dst_fmt, src_res, src_fmt) \ - switch ( ((simple_flag) << 30) \ - + ((dst_res) << 24) + ((dst_fmt) << 16) \ - + ((src_res) << 8) + (src_fmt)) \ - { \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT8 , uint8_t , 8) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 9) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 10) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t , 8, fmtcl::SplFmt_FLOAT, float , 32) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT8 , uint8_t , 8) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 9) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 10) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 9, fmtcl::SplFmt_FLOAT, float , 32) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT8 , uint8_t , 8) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 9) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 10) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_FLOAT, float , 32) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT8 , uint8_t , 8) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 9) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 10) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_FLOAT, float , 32) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT8 , uint8_t , 8) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 9) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 10) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 11) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 12) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 14) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 16) \ - SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_FLOAT, float , 32) \ - } - -#define fmtc_Bitdepth_SET_FNC_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ - case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_int_int_ptr = \ - &ThisType::process_seg_##NAMF##_int_int_cpp ; \ - break; \ - case (true << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_int_int_ptr = \ - &ThisType::process_seg_##NAMF##_int_int_cpp ; \ - break; - -#define fmtc_Bitdepth_SET_FNC_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ - case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_flt_int_ptr = \ - &ThisType::process_seg_##NAMF##_flt_int_cpp ; \ - break; \ - case (true << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_flt_int_ptr = \ - &ThisType::process_seg_##NAMF##_flt_int_cpp ; \ - break; - -#define fmtc_Bitdepth_SET_FNC_INT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ - case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_int_int_ptr = \ - &ThisType::process_seg_##NAMF##_int_int_sse2 ; \ - break; \ - case (true << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_int_int_ptr = \ - &ThisType::process_seg_##NAMF##_int_int_sse2 ; \ - break; - -#define fmtc_Bitdepth_SET_FNC_FLT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ - case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_flt_int_ptr = \ - &ThisType::process_seg_##NAMF##_flt_int_sse2 ; \ - break; \ - case (true << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_flt_int_ptr = \ - &ThisType::process_seg_##NAMF##_flt_int_sse2 ; \ - break; - -#define fmtc_Bitdepth_SET_FNC_ERRDIF_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ - case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_int_int_ptr = \ - &ThisType::process_seg_errdif_int_int_cpp >; \ - break; \ - case (true << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_int_int_ptr = \ - &ThisType::process_seg_errdif_int_int_cpp >; \ - break; - -#define fmtc_Bitdepth_SET_FNC_ERRDIF_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ - case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_flt_int_ptr = \ - &ThisType::process_seg_errdif_flt_int_cpp >; \ - break; \ - case (true << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ - _process_seg_flt_int_ptr = \ - &ThisType::process_seg_errdif_flt_int_cpp >; \ - break; - - - -void Bitdepth::init_fnc_fast () -{ - const fmtcl::SplFmt dst_fmt = _splfmt_dst; - const int dst_res = _vi_out.format->bitsPerSample; - const fmtcl::SplFmt src_fmt = _splfmt_src; - const int src_res = _vi_in.format->bitsPerSample; - - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_INT, fast, fast, false, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_FLT, fast, fast, false, - dst_res, dst_fmt, src_res, src_fmt - ) - -#if (fstb_ARCHI == fstb_ARCHI_X86) - if (_sse2_flag) - { - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_INT_SSE2, fast, fast, false, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_FLT_SSE2, fast, fast, false, - dst_res, dst_fmt, src_res, src_fmt - ) - } -#endif -} - - - -void Bitdepth::init_fnc_ordered () -{ - assert (! _errdif_flag); - - const fmtcl::SplFmt dst_fmt = _splfmt_dst; - const int dst_res = _vi_out.format->bitsPerSample; - const fmtcl::SplFmt src_fmt = _splfmt_src; - const int src_res = _vi_in.format->bitsPerSample; - - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_INT, ord, ord, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_FLT, ord, ord, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - -#if (fstb_ARCHI == fstb_ARCHI_X86) - if (_sse2_flag) - { - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_INT_SSE2, ord, ord, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_FLT_SSE2, ord, ord, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - } -#endif -} - - - -void Bitdepth::init_fnc_errdiff () -{ - assert (_errdif_flag); - - const fmtcl::SplFmt dst_fmt = _splfmt_dst; - const int dst_res = _vi_out.format->bitsPerSample; - const fmtcl::SplFmt src_fmt = _splfmt_src; - const int src_res = _vi_in.format->bitsPerSample; - - switch (_dmode) - { - case DMode_FILTERLITE: - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, FilterLite, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, FilterLite, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - break; - - case DMode_STUCKI: - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, Stucki, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, Stucki, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - break; - - case DMode_ATKINSON: - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, Atkinson, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, Atkinson, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - break; - - case DMode_FLOYD: - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, FloydSteinberg, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, FloydSteinberg, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - break; - - case DMode_OSTRO: - fmtc_Bitdepth_SPAN_INT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, Ostromoukhov, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - fmtc_Bitdepth_SPAN_FLT ( - fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, Ostromoukhov, _simple_flag, - dst_res, dst_fmt, src_res, src_fmt - ) - break; - - default: - break; - } - -} - - - -#undef fmtc_Bitdepth_SET_FNC_INT -#undef fmtc_Bitdepth_SET_FNC_FLT -#undef fmtc_Bitdepth_SPAN_INT -#undef fmtc_Bitdepth_SPAN_FLT - - - -void Bitdepth::dither_plane (fmtcl::SplFmt dst_fmt, int dst_res, uint8_t *dst_ptr, int dst_stride, fmtcl::SplFmt src_fmt, int src_res, const uint8_t *src_ptr, int src_stride, int w, int h, const fmtcl::BitBltConv::ScaleInfo &scale_info, const PatData &pattern, uint32_t rnd_state) -{ - fstb::unused (dst_fmt); - assert (dst_fmt >= 0); - assert (dst_fmt < fmtcl::SplFmt_NBR_ELT); - assert (dst_res >= 8); - assert (dst_ptr != 0); - assert (src_fmt >= 0); - assert (src_fmt < fmtcl::SplFmt_NBR_ELT); - assert (src_res >= 8); - assert (src_ptr != 0); - assert (w > 0); - assert (h > 0); - - SegContext ctx; - ctx._rnd_state = rnd_state; - ctx._scale_info_ptr = &scale_info; - - const bool sc_flag = - ( src_fmt == fmtcl::SplFmt_FLOAT - || ! fstb::is_eq (scale_info._gain * ((uint64_t (1)) << (src_res - dst_res)), 1.0, 1e-6) - || ! fstb::is_null (scale_info._add_cst, 1e-6)); - - void (ThisType::* process_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const = - (sc_flag) - ? _process_seg_flt_int_ptr - : _process_seg_int_int_ptr; - assert (process_ptr != 0); - - fmtcl::ErrDifBuf * ed_buf_ptr = 0; - if (_errdif_flag) - { - ed_buf_ptr = _buf_pool.take_obj (); - if (ed_buf_ptr == 0) - { - throw_rt_err ("cannot allocate memory for temporary buffer."); - } - ed_buf_ptr->clear ((sc_flag) ? sizeof (float) : sizeof (int16_t)); - } - - switch (_dmode) - { - case DMode_BAYER: - case DMode_ROUND: - case DMode_VOIDCLUST: - ctx._pattern_ptr = &pattern; - break; - - case DMode_FAST: - // Nothing - break; - - case DMode_FILTERLITE: - case DMode_STUCKI: - case DMode_ATKINSON: - case DMode_FLOYD: - case DMode_OSTRO: - ctx._ed_buf_ptr = ed_buf_ptr; - break; - - default: - assert (false); - throw_logic_err ("unexpected dithering algorithm"); - break; - } - - for (int y = 0; y < h; ++y) - { - ctx._y = y; - - (this->*process_ptr) (dst_ptr, src_ptr, w, ctx); - - src_ptr += src_stride; - dst_ptr += dst_stride; - } - - if (ed_buf_ptr != 0) - { - _buf_pool.return_obj (*ed_buf_ptr); - ed_buf_ptr = 0; - } -} - - - -template -void Bitdepth::process_seg_fast_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - - enum { DIF_BITS = SRC_BITS - DST_BITS }; - static_assert (DIF_BITS >= 0, "This function cannot increase bidepth."); - - const SRC_TYPE * src_n_ptr = reinterpret_cast (src_ptr); - DST_TYPE * dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); - - for (int pos = 0; pos < w; ++pos) - { - const int s = src_n_ptr [pos]; - const int pix = s >> DIF_BITS; - dst_n_ptr [pos] = static_cast (pix); - } -} - - - -template -void Bitdepth::process_seg_fast_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - assert (ctx._scale_info_ptr != 0); - - const SRC_TYPE * src_n_ptr = reinterpret_cast (src_ptr); - DST_TYPE * dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); - - const float mul = float (ctx._scale_info_ptr->_gain); - const float add = float (ctx._scale_info_ptr->_add_cst); - const int vmax = (1 << DST_BITS) - 1; - - for (int pos = 0; pos < w; ++pos) - { - float s = float (src_n_ptr [pos]); - s = s * mul + add; - const int quant = fstb::conv_int_fast (s); - const int pix = fstb::limit (quant, 0, vmax); - dst_n_ptr [pos] = static_cast (pix); - } -} - - - -#if (fstb_ARCHI == fstb_ARCHI_X86) - - - -template -void Bitdepth::process_seg_fast_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - - enum { DIF_BITS = SRC_BITS - DST_BITS }; - static_assert (DIF_BITS >= 0, "This function cannot increase bidepth."); - - typedef typename fmtcl::ProxyRwSse2 ::PtrConst::Type SrcPtr; - typedef typename fmtcl::ProxyRwSse2 ::Ptr::Type DstPtr; - SrcPtr src_n_ptr = reinterpret_cast (src_ptr); - DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); - const __m128i zero = _mm_setzero_si128 (); - const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); - - for (int pos = 0; pos < w; pos += 8) - { - const __m128i s = - fmtcl::ProxyRwSse2 ::read_i16 (src_n_ptr + pos, zero); - const __m128i pix = _mm_srli_epi16 (s, DIF_BITS); - fmtcl::ProxyRwSse2 ::write_i16 (dst_n_ptr + pos, pix, mask_lsb); - } -} - - - -template -void Bitdepth::process_seg_fast_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - assert (ctx._scale_info_ptr != 0); - - typedef typename fmtcl::ProxyRwSse2 ::PtrConst::Type SrcPtr; - typedef typename fmtcl::ProxyRwSse2 ::Ptr::Type DstPtr; - SrcPtr src_n_ptr = reinterpret_cast (src_ptr); - DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); - - const __m128 mul = _mm_set1_ps (float (ctx._scale_info_ptr->_gain)); - const __m128 add = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst)); - const __m128 vmax = _mm_set1_ps (float ((1 << DST_BITS) - 1)); - const __m128 zero_f = _mm_setzero_ps (); - const __m128i zero_i = _mm_setzero_si128 (); - const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); - const __m128i sign_bit = _mm_set1_epi16 (-0x8000); - const __m128 offset = _mm_set1_ps (-32768); - - for (int pos = 0; pos < w; pos += 8) - { - __m128 s0; - __m128 s1; - fmtcl::ProxyRwSse2 ::read_flt ( - src_n_ptr + pos, s0, s1, zero_i - ); - s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add); - s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add); - s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f); - s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f); - fmtcl::ProxyRwSse2 ::write_flt ( - dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset - ); - } -} - - - -#endif - - - -template -void Bitdepth::process_seg_ord_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - - enum { DIF_BITS = SRC_BITS - DST_BITS }; - static_assert (DIF_BITS >= 1, "This function must reduce bidepth."); - - const PatRow & pattern = ctx.extract_pattern_row (); - uint32_t & rnd_state = ctx._rnd_state; - - const SRC_TYPE * src_n_ptr = reinterpret_cast (src_ptr); - DST_TYPE * dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); - - const int rcst = 1 << (DIF_BITS - 1); - const int vmax = (1 << DST_BITS) - 1; - - const int ao = _ampo_i; // s8 - const int an = _ampn_i; // s8 - - for (int pos = 0; pos < w; ++pos) - { - if (! S_FLAG) - { - generate_rnd (rnd_state); - } - - const int s = src_n_ptr [pos]; - - const int dith_o = pattern [pos & (PAT_WIDTH - 1)]; // s8 - int dither; - if (S_FLAG) - { - enum { DIT_SHFT = 8 - DIF_BITS }; - dither = fstb::sshift_r (dith_o); - } - else - { - const int dith_n = int8_t (rnd_state >> 24); // s8 - - enum { DIT_SHFT = AMP_BITS + 8 - DIF_BITS }; - dither = fstb::sshift_r (dith_o * ao + dith_n * an); // s16 = s8 * s8 // s16 = s16 >> cst - } - const int sum = s + dither; // s16+ - const int quant = (sum + rcst) >> DIF_BITS; // s16 - - const int pix = fstb::limit (quant, 0, vmax); - dst_n_ptr [pos] = static_cast (pix); - } - - if (! S_FLAG) - { - generate_rnd_eol (rnd_state); - } + return *fmt_dst_ptr; } -template -void Bitdepth::process_seg_ord_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - - const SRC_TYPE * src_n_ptr = reinterpret_cast (src_ptr); - DST_TYPE * dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); - - const PatRow & pattern = ctx.extract_pattern_row (); - uint32_t & rnd_state = ctx._rnd_state; - - const int ao = _ampo_i; // s8 - const int an = _ampn_i; // s8 - - const float mul = float (ctx._scale_info_ptr->_gain); - const float add = float (ctx._scale_info_ptr->_add_cst); - const float qt = 1.0f / (1 << ((S_FLAG ? 0 : AMP_BITS) + 8)); - const int vmax = (1 << DST_BITS) - 1; - - for (int pos = 0; pos < w; ++pos) - { - if (! S_FLAG) - { - generate_rnd (rnd_state); - } - - float s = float (src_n_ptr [pos]); - s = s * mul + add; - - const int dith_o = pattern [pos & (PAT_WIDTH - 1)]; // s8 - float dither; - if (S_FLAG) - { - dither = dith_o * qt; - } - else - { - const int dith_n = int8_t (rnd_state >> 24); // s8 - dither = (dith_o * ao + dith_n * an) * qt; - } - const float sum = s + dither; - const int quant = fstb::round_int (sum); - - const int pix = fstb::limit (quant, 0, vmax); - dst_n_ptr [pos] = static_cast (pix); - } - - if (! S_FLAG) - { - generate_rnd_eol (rnd_state); - } -} - - - -#if (fstb_ARCHI == fstb_ARCHI_X86) - - - -template -void Bitdepth::process_seg_ord_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - - enum { DIF_BITS = SRC_BITS - DST_BITS }; - static_assert (DIF_BITS >= 0, "This function cannot increase bidepth."); - - const PatRow & pattern = ctx.extract_pattern_row (); - uint32_t & rnd_state = ctx._rnd_state; - - typedef typename fmtcl::ProxyRwSse2 ::PtrConst::Type SrcPtr; - typedef typename fmtcl::ProxyRwSse2 ::Ptr::Type DstPtr; - SrcPtr src_n_ptr = reinterpret_cast (src_ptr); - DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); - const __m128i zero = _mm_setzero_si128 (); - const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); - const __m128i c128_16 = _mm_set1_epi16 (0x80); - const __m128i sign_bit = _mm_set1_epi16 (-0x8000); - const __m128i rcst = _mm_set1_epi16 (1 << (DIF_BITS - 1)); - const __m128i vmax = _mm_set1_epi16 ((1 << DST_BITS) - 1); - - const __m128i ampo_i = _mm_set1_epi16 (int16_t (_ampo_i)); // 8 ?16 [0 ; 255] - const __m128i ampn_i = _mm_set1_epi16 (int16_t (_ampn_i)); // 8 ?16 [0 ; 255] - - for (int pos = 0; pos < w; pos += 8) - { - const __m128i s = // 8 u16 - fmtcl::ProxyRwSse2 ::read_i16 (src_n_ptr + pos, zero); - - __m128i dith_o = - _mm_load_si128 (reinterpret_cast ( - &pattern [pos & (PAT_WIDTH - 1)] - ) - ); - - __m128i dither; - if (S_FLAG) - { - enum { DIT_SHFT = 8 - DIF_BITS }; - dither = _mm_srai_epi16 (dith_o, DIT_SHFT); - } - else - { - // Random generation - generate_rnd (rnd_state); - const uint32_t rnd_03 = rnd_state; - generate_rnd (rnd_state); - const uint32_t rnd_47 = rnd_state; - const __m128i rnd_val = _mm_set_epi32 (0, 0, rnd_47, rnd_03); - - __m128i dith_n = - _mm_unpacklo_epi8 (rnd_val, zero); // 8 ?16 [0 ; 255] - dith_n = _mm_sub_epi16 (dith_n, c128_16); // 8 s16 [-128 ; 127] - - dith_o = _mm_mullo_epi16 (dith_o, ampo_i); // 8 s16 (full range) - dith_n = _mm_mullo_epi16 (dith_n, ampn_i); // 8 s16 (full range) - dither = _mm_adds_epi16 (dith_o, dith_n); // 8 s16 = s8 * s8 - - enum { DIT_SHFT = AMP_BITS + 8 - DIF_BITS }; - dither = _mm_srai_epi16 (dither, DIT_SHFT); // 8 s16 = s16 >> cst - } - - const __m128i dith_rcst = _mm_adds_epi16 (dither, rcst); - - __m128i quant; - if (S_FLAG && SRC_BITS < 16) - { - __m128i sum = _mm_adds_epi16 (s, dith_rcst); - quant = _mm_srai_epi16 (sum, DIF_BITS); - } - else - { - __m128i sum = _mm_xor_si128 (s, sign_bit); // 8 s16 - sum = _mm_adds_epi16 (sum, dith_rcst); - sum = _mm_xor_si128 (sum, sign_bit); // 8 u16 - quant = _mm_srli_epi16 (sum, DIF_BITS); - } - - __m128i pix = quant; - if (SRC_BITS < 16) - { - pix = _mm_max_epi16 (pix, zero); - pix = _mm_min_epi16 (pix, vmax); - } - - fmtcl::ProxyRwSse2 ::write_i16 (dst_n_ptr + pos, pix, mask_lsb); - } - - if (! S_FLAG) - { - generate_rnd_eol (rnd_state); - } -} - - - -template -void Bitdepth::process_seg_ord_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - assert (((_mm_getcsr () >> 13) & 3) == 0); // 00 = Round to nearest (even) - - const PatRow & pattern = ctx.extract_pattern_row (); - uint32_t & rnd_state = ctx._rnd_state; - - const float qt_cst = 1.0f / ( - 65536.0f * float (1 << ((S_FLAG ? 0 : AMP_BITS) + 8)) - ); - - typedef typename fmtcl::ProxyRwSse2 ::PtrConst::Type SrcPtr; - typedef typename fmtcl::ProxyRwSse2 ::Ptr::Type DstPtr; - SrcPtr src_n_ptr = reinterpret_cast (src_ptr); - DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); - const __m128 zero_f = _mm_setzero_ps (); - const __m128i zero_i = _mm_setzero_si128 (); - const __m128i c128_16 = _mm_set1_epi16 (0x80); - const __m128 mul = _mm_set1_ps (float (ctx._scale_info_ptr->_gain)); - const __m128 add = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst)); - const __m128 qt = _mm_set1_ps (qt_cst); - const __m128 vmax = _mm_set1_ps ((1 << DST_BITS) - 1); - const __m128 offset = _mm_set1_ps (-32768); - const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); - const __m128i sign_bit = _mm_set1_epi16 (-0x8000); - - const __m128i ampo_i = _mm_set1_epi16 (int16_t (_ampo_i)); // 8 ?16 [0 ; 255] - const __m128i ampn_i = _mm_set1_epi16 (int16_t (_ampn_i)); // 8 ?16 [0 ; 255] - - for (int pos = 0; pos < w; pos += 8) - { - __m128 s0; - __m128 s1; - fmtcl::ProxyRwSse2 ::read_flt ( - src_n_ptr + pos, s0, s1, zero_i - ); - s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add); - s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add); - - __m128i dith_o = - _mm_load_si128 (reinterpret_cast ( - &pattern [pos & (PAT_WIDTH - 1)] - ) - ); - - __m128i dither; - if (S_FLAG) - { - dither = dith_o; - } - else - { - // Random generation - generate_rnd (rnd_state); - const uint32_t rnd_03 = rnd_state; - generate_rnd (rnd_state); - const uint32_t rnd_47 = rnd_state; - const __m128i rnd_val = _mm_set_epi32 (0, 0, rnd_47, rnd_03); - - __m128i dith_n = - _mm_unpacklo_epi8 (rnd_val, zero_i); // 8 ?16 [0 ; 255] - dith_n = _mm_sub_epi16 (dith_n, c128_16); // 8 s16 [-128 ; 127] - - dith_o = _mm_mullo_epi16 (dith_o, ampo_i); // 8 s16 (full range) - dith_n = _mm_mullo_epi16 (dith_n, ampn_i); // 8 s16 (full range) - dither = _mm_adds_epi16 (dith_o, dith_n); // 8 s16 = s8 * s8 - } - - __m128i dither_03i = _mm_unpacklo_epi16 (zero_i, dither); // 4 s32 << 16 - __m128i dither_47i = _mm_unpackhi_epi16 (zero_i, dither); // 4 s32 << 16 - __m128 dither_03 = _mm_cvtepi32_ps (dither_03i); - __m128 dither_47 = _mm_cvtepi32_ps (dither_47i); - dither_03 = _mm_mul_ps (dither_03, qt); - dither_47 = _mm_mul_ps (dither_47, qt); - - s0 = _mm_add_ps (s0, dither_03); - s1 = _mm_add_ps (s1, dither_47); - - s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f); - s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f); - - fmtcl::ProxyRwSse2 ::write_flt ( - dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset - ); - } - - if (! S_FLAG) - { - generate_rnd_eol (rnd_state); - } -} - - - -#endif // fstb_ARCHI_X86 - - - -template -void Bitdepth::process_seg_errdif_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - assert (ctx._y >= 0); - - typedef typename ERRDIF::SrcType SRC_TYPE; - typedef typename ERRDIF::DstType DST_TYPE; - enum { SRC_BITS = ERRDIF::SRC_BITS }; - enum { DST_BITS = ERRDIF::DST_BITS }; - - uint32_t & rnd_state = ctx._rnd_state; - fmtcl::ErrDifBuf & ed_buf = *ctx._ed_buf_ptr; - - const SRC_TYPE * src_n_ptr = reinterpret_cast (src_ptr); - DST_TYPE * dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); - - const int ae = _ampe_i; - - // Makes e1 point on the default buffer line for single-line - // error diffusor because we use it in prepare_next_line() - int e0 = 0; - int e1 = 0; - if (ERRDIF::NBR_ERR_LINES == 2) - { - e0 = ctx._y & 1 ; - e1 = 1 - (ctx._y & 1); - } - int16_t * err0_ptr = ed_buf.get_buf (e0); - int16_t * err1_ptr = ed_buf.get_buf (e1); - - int err_nxt0 = ed_buf.use_mem (0); - int err_nxt1 = ed_buf.use_mem (1); - - // Forward - if ((ctx._y & 1) == 0) - { - for (int x = 0; x < w; ++x) - { - int err = err_nxt0; - SRC_TYPE src_raw; - - quantize_pix_int ( - dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, _ampn_i - ); - ERRDIF::template diffuse <1> ( - err, err_nxt0, err_nxt1, - err0_ptr + x, err1_ptr + x, src_raw - ); - } - ERRDIF::prepare_next_line (err1_ptr + w); - } - - // Backward - else - { - for (int x = w - 1; x >= 0; --x) - { - int err = err_nxt0; - SRC_TYPE src_raw; - - quantize_pix_int ( - dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, _ampn_i - ); - ERRDIF::template diffuse <-1> ( - err, err_nxt0, err_nxt1, - err0_ptr + x, err1_ptr + x, src_raw - ); - } - ERRDIF::prepare_next_line (err1_ptr - 1); - } - - ed_buf.use_mem (0) = int16_t (err_nxt0); - ed_buf.use_mem (1) = int16_t (err_nxt1); - - if (! S_FLAG) - { - generate_rnd_eol (rnd_state); - } -} - - - -template -void Bitdepth::process_seg_errdif_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const -{ - assert (dst_ptr != 0); - assert (src_ptr != 0); - assert (w > 0); - assert (ctx._y >= 0); - - typedef typename ERRDIF::SrcType SRC_TYPE; - typedef typename ERRDIF::DstType DST_TYPE; - enum { SRC_BITS = ERRDIF::SRC_BITS }; - enum { DST_BITS = ERRDIF::DST_BITS }; - - uint32_t & rnd_state = ctx._rnd_state; - fmtcl::ErrDifBuf & ed_buf = *ctx._ed_buf_ptr; - - const SRC_TYPE * src_n_ptr = reinterpret_cast (src_ptr); - DST_TYPE * dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); - - const float mul = float (ctx._scale_info_ptr->_gain); - const float add = float (ctx._scale_info_ptr->_add_cst); - const float ae = float (_ampe_f); - const float an = float (_ampn_f); - - // Makes e1 point on the default buffer line for single-line - // error diffusor because we use it in prepare_next_line() - int e0 = 0; - int e1 = 0; - if (ERRDIF::NBR_ERR_LINES == 2) - { - e0 = ctx._y & 1 ; - e1 = 1 - (ctx._y & 1); - } - float * err0_ptr = ed_buf.get_buf (e0); - float * err1_ptr = ed_buf.get_buf (e1); - - float err_nxt0 = ed_buf.use_mem (0); - float err_nxt1 = ed_buf.use_mem (1); - - // Forward - if ((ctx._y & 1) == 0) - { - for (int x = 0; x < w; ++x) - { - float err = err_nxt0; - SRC_TYPE src_raw; - - quantize_pix_flt ( - dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add - ); - ERRDIF::template diffuse <1> ( - err, err_nxt0, err_nxt1, - err0_ptr + x, err1_ptr + x, src_raw - ); - } - ERRDIF::prepare_next_line (err1_ptr + w); - } - - // Backward - else - { - for (int x = w - 1; x >= 0; --x) - { - float err = err_nxt0; - SRC_TYPE src_raw; - - quantize_pix_flt ( - dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add - ); - ERRDIF::template diffuse <-1> ( - err, err_nxt0, err_nxt1, - err0_ptr + x, err1_ptr + x, src_raw - ); - } - ERRDIF::prepare_next_line (err1_ptr - 1); - } - - ed_buf.use_mem (0) = err_nxt0; - ed_buf.use_mem (1) = err_nxt1; - - if (! S_FLAG) - { - generate_rnd_eol (rnd_state); - } -} - - - -void Bitdepth::generate_rnd (uint32_t &state) -{ - state = state * uint32_t (1664525) + 1013904223; -} - - - -void Bitdepth::generate_rnd_eol (uint32_t &state) -{ - state = state * uint32_t (1103515245) + 12345; - if ((state & 0x2000000) != 0) - { - state = state * uint32_t (134775813) + 1; - } -} - - - -Bitdepth::SegContext::SegContext () -: _pattern_ptr (0) -, _rnd_state (0) -, _scale_info_ptr (0) -, _ed_buf_ptr (0) -, _y (-1) -{ - // Nothing -} - - - -const Bitdepth::PatRow & Bitdepth::SegContext::extract_pattern_row () const -{ - assert (_pattern_ptr != 0); - assert (_y >= 0); - - return ((*_pattern_ptr) [_y & (PAT_WIDTH - 1)]); -} - - - -template -void Bitdepth::quantize_pix_int (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, int &err, uint32_t &rnd_state, int ampe_i, int ampn_i) -{ - enum { DIF_BITS = SRC_BITS - DST_BITS }; - enum { TMP_BITS = - (DIF_BITS < 6 && SRC_BITS < ERR_RES && DST_BITS < ERR_RES) - ? ERR_RES - : SRC_BITS }; - enum { TMP_SHFT = TMP_BITS - SRC_BITS }; - enum { TMP_INVS = TMP_BITS - DST_BITS }; - - const int rcst = 1 << (TMP_INVS - 1); - const int vmax = (1 << DST_BITS) - 1; - - src_raw = src_ptr [x]; - const int src = src_raw << TMP_SHFT; - const int preq = src + err; - - int sum = preq; - if (! S_FLAG) - { - enum { DIT_SHFT = AMP_BITS + 8 - TMP_INVS }; // May be negative - - generate_rnd (rnd_state); - const int rnd_val = int8_t (rnd_state >> 24); // s8 - const int err_add = (err < 0) ? -ampe_i : ampe_i; - const int noise = - fstb::sshift_r (rnd_val * ampn_i + err_add); // s16 = s8 * s8 // s16 = s16 >> cst - - sum += noise; - } - - const int quant = (sum + rcst) >> TMP_INVS; - - err = preq - (quant << TMP_INVS); - const int pix = fstb::limit (quant, 0, vmax); - - dst_ptr [x] = static_cast (pix); -} - - - -template -static inline SRC_TYPE Bitdepth_extract_src (SRC_TYPE src_read, float src) -{ - fstb::unused (src); - - return (src_read); -} - -static inline float Bitdepth_extract_src (float src_read, float src) -{ - fstb::unused (src_read); - - return (src); -} - -template -void Bitdepth::quantize_pix_flt (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, float &err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add) -{ - const int vmax = (1 << DST_BITS) - 1; - - const SRC_TYPE src_read = src_ptr [x]; - const float src = float (src_read) * mul + add; - src_raw = Bitdepth_extract_src (src_read, src); - const float preq = src + err; - - float sum = preq; - if (! S_FLAG) - { - generate_rnd (rnd_state); - const int32_t rnd_val = int32_t (rnd_state); // Signed - const float err_add = (err < 0) ? -ampe_f : (err > 0) ? ampe_f : 0; - const float noise = rnd_val * ampn_f + err_add; - - sum += noise; - } - - const int quant = fstb::round_int (sum); - - err = preq - float (quant); - const int pix = fstb::limit (quant, 0, vmax); - - dst_ptr [x] = static_cast (pix); -} - - - -// Original coefficients : 7, 3, 5, 1 -// Optimised coefficients for serpentine scan: 7, 4, 5, 0 -// Source: -// Sam Hocevar and Gary Niger, -// Reinstating Floyd-Steinberg: Improved Metrics for Quality Assessment -// of Error Diffusion Algorithms, -// Lecture Notes in Computer Science LNCS 5099, pp. 3845, 2008 -// (Proceedings of the International Conference on Image and Signal Processing -// ICISP 2008) ISSN 0302-9743 - -#define fmtc_Bitdepth_FS_OPTIMIZED_SERPENTINE_COEF - -template -template -void Bitdepth::DiffuseFloydSteinberg ::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (err_nxt1, err1_ptr, src_raw); - -#if defined (fmtc_Bitdepth_FS_OPTIMIZED_SERPENTINE_COEF) - const int e1 = 0; - const int e3 = (err * 4 + 8) >> 4; -#else - const int e1 = (err + 8) >> 4; - const int e3 = (err * 3 + 8) >> 4; -#endif - const int e5 = (err * 5 + 8) >> 4; - const int e7 = err - e1 - e3 - e5; - spread_error (e1, e3, e5, e7, err_nxt0, err0_ptr); -} - -template -template -void Bitdepth::DiffuseFloydSteinberg ::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (err_nxt1, err1_ptr, src_raw); - -#if defined (fmtc_Bitdepth_FS_OPTIMIZED_SERPENTINE_COEF) - const float e1 = 0; - const float e3 = err * (4.0f / 16); -#else - const float e1 = err * (1.0f / 16); - const float e3 = err * (3.0f / 16); -#endif - const float e5 = err * (5.0f / 16); - const float e7 = err * (7.0f / 16); - spread_error (e1, e3, e5, e7, err_nxt0, err0_ptr); -} - -template -template -void Bitdepth::DiffuseFloydSteinberg ::prepare_next_line (EB *err_ptr) -{ - // Nothing - fstb::unused (err_ptr); -} - -template -template -void Bitdepth::DiffuseFloydSteinberg ::spread_error (ET e1, ET e3, ET e5, ET e7, ET &err_nxt0, EB *err0_ptr) -{ - err_nxt0 = err0_ptr [DIR]; - err0_ptr [-DIR] += EB (e3); - err0_ptr [ 0] += EB (e5); - err0_ptr [ DIR] = EB (e1); - err_nxt0 += e7; -} - - - -template -template -void Bitdepth::DiffuseFilterLite ::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (err_nxt1, err1_ptr, src_raw); - - const int e1 = (err + 2) >> 2; - const int e2 = err - 2 * e1; - spread_error (e1, e2, err_nxt0, err0_ptr); -} - -template -template -void Bitdepth::DiffuseFilterLite ::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (err_nxt1, err1_ptr, src_raw); - - const float e1 = err * (1.0f / 4); - const float e2 = err * (2.0f / 4); - spread_error (e1, e2, err_nxt0, err0_ptr); -} - -template -template -void Bitdepth::DiffuseFilterLite ::prepare_next_line (EB *err_ptr) -{ - err_ptr [0] = EB (0); -} - -template -template -void Bitdepth::DiffuseFilterLite ::spread_error (ET e1, ET e2, ET &err_nxt0, EB *err0_ptr) -{ - err_nxt0 = err0_ptr [DIR]; - err0_ptr [-DIR] += EB (e1); - err0_ptr [ 0] = EB (e1); - err_nxt0 += e2; -} - - - -template -template -void Bitdepth::DiffuseStucki ::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (src_raw); - - const int m = (err << 4) / 42; - const int e1 = (m + 8) >> 4; - const int e2 = (m + 4) >> 3; - const int e4 = (m + 2) >> 2; -// const int e8 = (m + 1) >> 1; - const int sum = (e1 << 1) + ((e2 + e4) << 2); - const int e8 = (err - sum + 1) >> 1; - spread_error (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr); -} - -template -template -void Bitdepth::DiffuseStucki ::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (src_raw); - - const float e1 = err * (1.0f / 42); - const float e2 = err * (2.0f / 42); - const float e4 = err * (4.0f / 42); - const float e8 = err * (8.0f / 42); - spread_error (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr); -} - -template -template -void Bitdepth::DiffuseStucki ::prepare_next_line (EB *err_ptr) -{ - // Nothing - fstb::unused (err_ptr); -} - -template -template -void Bitdepth::DiffuseStucki ::spread_error (ET e1, ET e2, ET e4, ET e8, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr) -{ - err_nxt0 = err_nxt1 + e8; - err_nxt1 = err1_ptr [DIR * 2] + e4; - err0_ptr [-DIR * 2] += EB (e2); - err0_ptr [-DIR ] += EB (e4); - err0_ptr [ 0 ] += EB (e8); - err0_ptr [ DIR ] += EB (e4); - err0_ptr [ DIR * 2] += EB (e2); - err1_ptr [-DIR * 2] += EB (e1); - err1_ptr [-DIR ] += EB (e2); - err1_ptr [ 0 ] += EB (e4); - err1_ptr [ DIR ] += EB (e2); - err1_ptr [ DIR * 2] = EB (e1); -} - - - -template -template -void Bitdepth::DiffuseAtkinson ::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (src_raw); - - const int e1 = (err + 4) >> 3; - spread_error (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr); -} - -template -template -void Bitdepth::DiffuseAtkinson ::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (src_raw); - - const float e1 = err * (1.0f / 8); - spread_error (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr); -} - -template -template -void Bitdepth::DiffuseAtkinson ::prepare_next_line (EB *err_ptr) -{ - err_ptr [0] = EB (0); -} - -template -template -void Bitdepth::DiffuseAtkinson ::spread_error (ET e1, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr) -{ - err_nxt0 = err_nxt1 + e1; - err_nxt1 = err1_ptr [2 * DIR] + e1; - err0_ptr [-DIR] += EB (e1); - err0_ptr [ 0] += EB (e1); - err0_ptr [+DIR] += EB (e1); - err1_ptr [ 0] = EB (e1); -} - - - -// Victor Ostromoukhov, -// A Simple and Efficient Error-Diffusion Algorithm -// Proceedings of SIGGRAPH 2001, in ACM Computer Graphics, -// Annual Conference Series, pp. 567-572, 2001. -// Not optimised at all -template -template -void Bitdepth::DiffuseOstromoukhov ::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (err_nxt1, err1_ptr); - - enum { DIF_BITS = SRC_BITS - DST_BITS }; - - const int index = fstb::sshift_l < - int, - DiffuseOstromoukhov::T_BITS - DIF_BITS - > (src_raw) & DiffuseOstromoukhov::T_MASK; - const typename DiffuseOstromoukhov ::TableEntry & te = - DiffuseOstromoukhov ::_table [index]; - const int d = te._sum; - - const int e1 = err * te._c0 / d; - const int e2 = err * te._c1 / d; - const int e3 = err - e1 - e2; - - spread_error (e1, e2, e3, err_nxt0, err0_ptr); -} - -template -template -int Bitdepth::DiffuseOstromoukhovBase2 ::get_index (SRC_TYPE src_raw) -{ - enum { DIF_BITS = SRC_BITS - DST_BITS }; - - return (fstb::sshift_l < - int, - DiffuseOstromoukhovBase::T_BITS - DIF_BITS - > (src_raw) & DiffuseOstromoukhovBase::T_MASK); -} - -template -int Bitdepth::DiffuseOstromoukhovBase2 ::get_index (float src_raw) -{ - return ( fstb::round_int (src_raw * DiffuseOstromoukhovBase::T_LEN) - & DiffuseOstromoukhovBase::T_MASK); -} - -template -template -void Bitdepth::DiffuseOstromoukhov ::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw) -{ - fstb::unused (err_nxt1, err1_ptr); - - const int index = DiffuseOstromoukhov::get_index (src_raw); - const typename DiffuseOstromoukhov ::TableEntry & te = - DiffuseOstromoukhov ::_table [index]; - const float invd = te._inv_sum; - - const float e1 = err * te._c0 * invd; - const float e2 = err * te._c1 * invd; - const float e3 = err - e1 - e2; - - spread_error (e1, e2, e3, err_nxt0, err0_ptr); -} - -template -template -void Bitdepth::DiffuseOstromoukhov ::prepare_next_line (EB *err_ptr) -{ - err_ptr [0] = EB (0); -} - -template -template -void Bitdepth::DiffuseOstromoukhov ::spread_error (ET e1, ET e2, ET e3, ET &err_nxt0, EB *err0_ptr) -{ - err_nxt0 = err0_ptr [DIR]; - err0_ptr [-DIR] += EB (e2); - err0_ptr [ 0] = EB (e3); - err_nxt0 += e1; -} - - - -const Bitdepth::DiffuseOstromoukhovBase::TableEntry Bitdepth::DiffuseOstromoukhovBase::_table [T_LEN] = -{ - { 13, 0, 5, 18, 1.0f / 18 }, - { 13, 0, 5, 18, 1.0f / 18 }, - { 21, 0, 10, 31, 1.0f / 31 }, - { 7, 0, 4, 11, 1.0f / 11 }, - { 8, 0, 5, 13, 1.0f / 13 }, - { 47, 3, 28, 78, 1.0f / 78 }, - { 23, 3, 13, 39, 1.0f / 39 }, - { 15, 3, 8, 26, 1.0f / 26 }, - { 22, 6, 11, 39, 1.0f / 39 }, - { 43, 15, 20, 78, 1.0f / 78 }, - { 7, 3, 3, 13, 1.0f / 13 }, - { 501, 224, 211, 936, 1.0f / 936 }, - { 249, 116, 103, 468, 1.0f / 468 }, - { 165, 80, 67, 312, 1.0f / 312 }, - { 123, 62, 49, 234, 1.0f / 234 }, - { 489, 256, 191, 936, 1.0f / 936 }, - { 81, 44, 31, 156, 1.0f / 156 }, - { 483, 272, 181, 936, 1.0f / 936 }, - { 60, 35, 22, 117, 1.0f / 117 }, - { 53, 32, 19, 104, 1.0f / 104 }, - { 237, 148, 83, 468, 1.0f / 468 }, - { 471, 304, 161, 936, 1.0f / 936 }, - { 3, 2, 1, 6, 1.0f / 6 }, - { 481, 314, 185, 980, 1.0f / 980 }, - { 354, 226, 155, 735, 1.0f / 735 }, - { 1389, 866, 685, 2940, 1.0f / 2940 }, - { 227, 138, 125, 490, 1.0f / 490 }, - { 267, 158, 163, 588, 1.0f / 588 }, - { 327, 188, 220, 735, 1.0f / 735 }, - { 61, 34, 45, 140, 1.0f / 140 }, - { 627, 338, 505, 1470, 1.0f / 1470 }, - { 1227, 638, 1075, 2940, 1.0f / 2940 }, - - { 20, 10, 19, 49, 1.0f / 49 }, - { 1937, 1000, 1767, 4704, 1.0f / 4704 }, - { 977, 520, 855, 2352, 1.0f / 2352 }, - { 657, 360, 551, 1568, 1.0f / 1568 }, - { 71, 40, 57, 168, 1.0f / 168 }, - { 2005, 1160, 1539, 4704, 1.0f / 4704 }, - { 337, 200, 247, 784, 1.0f / 784 }, - { 2039, 1240, 1425, 4704, 1.0f / 4704 }, - { 257, 160, 171, 588, 1.0f / 588 }, - { 691, 440, 437, 1568, 1.0f / 1568 }, - { 1045, 680, 627, 2352, 1.0f / 2352 }, - { 301, 200, 171, 672, 1.0f / 672 }, - { 177, 120, 95, 392, 1.0f / 392 }, - { 2141, 1480, 1083, 4704, 1.0f / 4704 }, - { 1079, 760, 513, 2352, 1.0f / 2352 }, - { 725, 520, 323, 1568, 1.0f / 1568 }, - { 137, 100, 57, 294, 1.0f / 294 }, - { 2209, 1640, 855, 4704, 1.0f / 4704 }, - { 53, 40, 19, 112, 1.0f / 112 }, - { 2243, 1720, 741, 4704, 1.0f / 4704 }, - { 565, 440, 171, 1176, 1.0f / 1176 }, - { 759, 600, 209, 1568, 1.0f / 1568 }, - { 1147, 920, 285, 2352, 1.0f / 2352 }, - { 2311, 1880, 513, 4704, 1.0f / 4704 }, - { 97, 80, 19, 196, 1.0f / 196 }, - { 335, 280, 57, 672, 1.0f / 672 }, - { 1181, 1000, 171, 2352, 1.0f / 2352 }, - { 793, 680, 95, 1568, 1.0f / 1568 }, - { 599, 520, 57, 1176, 1.0f / 1176 }, - { 2413, 2120, 171, 4704, 1.0f / 4704 }, - { 405, 360, 19, 784, 1.0f / 784 }, - { 2447, 2200, 57, 4704, 1.0f / 4704 }, - - { 11, 10, 0, 21, 1.0f / 21 }, - { 158, 151, 3, 312, 1.0f / 312 }, - { 178, 179, 7, 364, 1.0f / 364 }, - { 1030, 1091, 63, 2184, 1.0f / 2184 }, - { 248, 277, 21, 546, 1.0f / 546 }, - { 318, 375, 35, 728, 1.0f / 728 }, - { 458, 571, 63, 1092, 1.0f / 1092 }, - { 878, 1159, 147, 2184, 1.0f / 2184 }, - { 5, 7, 1, 13, 1.0f / 13 }, - { 172, 181, 37, 390, 1.0f / 390 }, - { 97, 76, 22, 195, 1.0f / 195 }, - { 72, 41, 17, 130, 1.0f / 130 }, - { 119, 47, 29, 195, 1.0f / 195 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 65, 18, 17, 100, 1.0f / 100 }, - { 95, 29, 26, 150, 1.0f / 150 }, - { 185, 62, 53, 300, 1.0f / 300 }, - { 30, 11, 9, 50, 1.0f / 50 }, - { 35, 14, 11, 60, 1.0f / 60 }, - { 85, 37, 28, 150, 1.0f / 150 }, - { 55, 26, 19, 100, 1.0f / 100 }, - { 80, 41, 29, 150, 1.0f / 150 }, - { 155, 86, 59, 300, 1.0f / 300 }, - { 5, 3, 2, 10, 1.0f / 10 }, - - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 305, 176, 119, 600, 1.0f / 600 }, - { 155, 86, 59, 300, 1.0f / 300 }, - { 105, 56, 39, 200, 1.0f / 200 }, - { 80, 41, 29, 150, 1.0f / 150 }, - { 65, 32, 23, 120, 1.0f / 120 }, - { 55, 26, 19, 100, 1.0f / 100 }, - { 335, 152, 113, 600, 1.0f / 600 }, - { 85, 37, 28, 150, 1.0f / 150 }, - { 115, 48, 37, 200, 1.0f / 200 }, - { 35, 14, 11, 60, 1.0f / 60 }, - { 355, 136, 109, 600, 1.0f / 600 }, - { 30, 11, 9, 50, 1.0f / 50 }, - { 365, 128, 107, 600, 1.0f / 600 }, - { 185, 62, 53, 300, 1.0f / 300 }, - { 25, 8, 7, 40, 1.0f / 40 }, - { 95, 29, 26, 150, 1.0f / 150 }, - { 385, 112, 103, 600, 1.0f / 600 }, - { 65, 18, 17, 100, 1.0f / 100 }, - { 395, 104, 101, 600, 1.0f / 600 }, - { 4, 1, 1, 6, 1.0f / 6 }, - - // Symetric - { 4, 1, 1, 6, 1.0f / 6 }, - { 395, 104, 101, 600, 1.0f / 600 }, - { 65, 18, 17, 100, 1.0f / 100 }, - { 385, 112, 103, 600, 1.0f / 600 }, - { 95, 29, 26, 150, 1.0f / 150 }, - { 25, 8, 7, 40, 1.0f / 40 }, - { 185, 62, 53, 300, 1.0f / 300 }, - { 365, 128, 107, 600, 1.0f / 600 }, - { 30, 11, 9, 50, 1.0f / 50 }, - { 355, 136, 109, 600, 1.0f / 600 }, - { 35, 14, 11, 60, 1.0f / 60 }, - { 115, 48, 37, 200, 1.0f / 200 }, - { 85, 37, 28, 150, 1.0f / 150 }, - { 335, 152, 113, 600, 1.0f / 600 }, - { 55, 26, 19, 100, 1.0f / 100 }, - { 65, 32, 23, 120, 1.0f / 120 }, - { 80, 41, 29, 150, 1.0f / 150 }, - { 105, 56, 39, 200, 1.0f / 200 }, - { 155, 86, 59, 300, 1.0f / 300 }, - { 305, 176, 119, 600, 1.0f / 600 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - { 5, 3, 2, 10, 1.0f / 10 }, - - { 5, 3, 2, 10, 1.0f / 10 }, - { 155, 86, 59, 300, 1.0f / 300 }, - { 80, 41, 29, 150, 1.0f / 150 }, - { 55, 26, 19, 100, 1.0f / 100 }, - { 85, 37, 28, 150, 1.0f / 150 }, - { 35, 14, 11, 60, 1.0f / 60 }, - { 30, 11, 9, 50, 1.0f / 50 }, - { 185, 62, 53, 300, 1.0f / 300 }, - { 95, 29, 26, 150, 1.0f / 150 }, - { 65, 18, 17, 100, 1.0f / 100 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 4, 1, 1, 6, 1.0f / 6 }, - { 119, 47, 29, 195, 1.0f / 195 }, - { 72, 41, 17, 130, 1.0f / 130 }, - { 97, 76, 22, 195, 1.0f / 195 }, - { 172, 181, 37, 390, 1.0f / 390 }, - { 5, 7, 1, 13, 1.0f / 13 }, - { 878, 1159, 147, 2184, 1.0f / 2184 }, - { 458, 571, 63, 1092, 1.0f / 1092 }, - { 318, 375, 35, 728, 1.0f / 728 }, - { 248, 277, 21, 546, 1.0f / 546 }, - { 1030, 1091, 63, 2184, 1.0f / 2184 }, - { 178, 179, 7, 364, 1.0f / 364 }, - { 158, 151, 3, 312, 1.0f / 312 }, - { 11, 10, 0, 21, 1.0f / 21 }, - - { 2447, 2200, 57, 4704, 1.0f / 4704 }, - { 405, 360, 19, 784, 1.0f / 784 }, - { 2413, 2120, 171, 4704, 1.0f / 4704 }, - { 599, 520, 57, 1176, 1.0f / 1176 }, - { 793, 680, 95, 1568, 1.0f / 1568 }, - { 1181, 1000, 171, 2352, 1.0f / 2352 }, - { 335, 280, 57, 672, 1.0f / 672 }, - { 97, 80, 19, 196, 1.0f / 196 }, - { 2311, 1880, 513, 4704, 1.0f / 4704 }, - { 1147, 920, 285, 2352, 1.0f / 2352 }, - { 759, 600, 209, 1568, 1.0f / 1568 }, - { 565, 440, 171, 1176, 1.0f / 1176 }, - { 2243, 1720, 741, 4704, 1.0f / 4704 }, - { 53, 40, 19, 112, 1.0f / 112 }, - { 2209, 1640, 855, 4704, 1.0f / 4704 }, - { 137, 100, 57, 294, 1.0f / 294 }, - { 725, 520, 323, 1568, 1.0f / 1568 }, - { 1079, 760, 513, 2352, 1.0f / 2352 }, - { 2141, 1480, 1083, 4704, 1.0f / 4704 }, - { 177, 120, 95, 392, 1.0f / 392 }, - { 301, 200, 171, 672, 1.0f / 672 }, - { 1045, 680, 627, 2352, 1.0f / 2352 }, - { 691, 440, 437, 1568, 1.0f / 1568 }, - { 257, 160, 171, 588, 1.0f / 588 }, - { 2039, 1240, 1425, 4704, 1.0f / 4704 }, - { 337, 200, 247, 784, 1.0f / 784 }, - { 2005, 1160, 1539, 4704, 1.0f / 4704 }, - { 71, 40, 57, 168, 1.0f / 168 }, - { 657, 360, 551, 1568, 1.0f / 1568 }, - { 977, 520, 855, 2352, 1.0f / 2352 }, - { 1937, 1000, 1767, 4704, 1.0f / 4704 }, - { 20, 10, 19, 49, 1.0f / 49 }, - - { 1227, 638, 1075, 2940, 1.0f / 2940 }, - { 627, 338, 505, 1470, 1.0f / 1470 }, - { 61, 34, 45, 140, 1.0f / 140 }, - { 327, 188, 220, 735, 1.0f / 735 }, - { 267, 158, 163, 588, 1.0f / 588 }, - { 227, 138, 125, 490, 1.0f / 490 }, - { 1389, 866, 685, 2940, 1.0f / 2940 }, - { 354, 226, 155, 735, 1.0f / 735 }, - { 481, 314, 185, 980, 1.0f / 980 }, - { 3, 2, 1, 6, 1.0f / 6 }, - { 471, 304, 161, 936, 1.0f / 936 }, - { 237, 148, 83, 468, 1.0f / 468 }, - { 53, 32, 19, 104, 1.0f / 104 }, - { 60, 35, 22, 117, 1.0f / 117 }, - { 483, 272, 181, 936, 1.0f / 936 }, - { 81, 44, 31, 156, 1.0f / 156 }, - { 489, 256, 191, 936, 1.0f / 936 }, - { 123, 62, 49, 234, 1.0f / 234 }, - { 165, 80, 67, 312, 1.0f / 312 }, - { 249, 116, 103, 468, 1.0f / 468 }, - { 501, 224, 211, 936, 1.0f / 936 }, - { 7, 3, 3, 13, 1.0f / 13 }, - { 43, 15, 20, 78, 1.0f / 78 }, - { 22, 6, 11, 39, 1.0f / 39 }, - { 15, 3, 8, 26, 1.0f / 26 }, - { 23, 3, 13, 39, 1.0f / 39 }, - { 47, 3, 28, 78, 1.0f / 78 }, - { 8, 0, 5, 13, 1.0f / 13 }, - { 7, 0, 4, 11, 1.0f / 11 }, - { 21, 0, 10, 31, 1.0f / 31 }, - { 13, 0, 5, 18, 1.0f / 18 }, - { 13, 0, 5, 18, 1.0f / 18 } -}; - - } // namespace fmtc diff --git a/src/fmtc/Bitdepth.h b/src/fmtc/Bitdepth.h index bae3667..b6aea37 100644 --- a/src/fmtc/Bitdepth.h +++ b/src/fmtc/Bitdepth.h @@ -27,21 +27,14 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -#include "conc/ObjPool.h" -#include "fmtcl/BitBltConv.h" -#include "fmtcl/ErrDifBuf.h" -#include "fmtcl/ErrDifBufFactory.h" -#include "fmtcl/SplFmt.h" -#include "fstb/ArrayAlign.h" +#include "fmtcl/Dither.h" #include "vsutl/FilterBase.h" #include "vsutl/NodeRefSPtr.h" #include "vsutl/PlaneProcCbInterface.h" #include "vsutl/PlaneProcessor.h" #include "VapourSynth.h" -#include #include -#include @@ -84,256 +77,9 @@ class Bitdepth private: - static const int MAX_NBR_PLANES = 3; - static const int PAT_WIDTH = 32; // Number of pixels for halftone dithering - static const int PAT_PERIOD = 4; // Must be a power of 2 (because cycled with & as modulo) - static const int AMP_BITS = 5; // Bit depth of the amplitude fractionnal part. The whole thing is 7 bits, and we need a few bits for the integer part. - static const int ERR_RES = 24; // Resolution (bits) of the temporary data for error diffusion when source bitdepth is not high enough (relative to the destination bitdepth) to guarantee an accurate error diffusion. - static const int MAX_UNK_WIDTH = 65536; // Maximum width (pixels) for variable formats - - enum DMode - { - DMode_ROUND_ALIAS = -1, - DMode_BAYER = 0, - DMode_ROUND, // 1 - DMode_FAST, // 2 - DMode_FILTERLITE, // 3 - DMode_STUCKI, // 4 - DMode_ATKINSON, // 5 - DMode_FLOYD, // 6 - DMode_OSTRO, // 7 - DMode_VOIDCLUST, // 8 - - DMode_NBR_ELT - }; - - class SclInf - { - public: - fmtcl::BitBltConv::ScaleInfo - _info; - fmtcl::BitBltConv::ScaleInfo * // 0 if _info is not used. - _ptr = 0; - }; - - typedef int16_t PatRow [PAT_WIDTH]; // Contains data in [-128; +127] - typedef PatRow PatData [PAT_WIDTH]; // [y] [x] - typedef fstb::ArrayAlign PatDataArray; - - class SegContext - { - public: - inline SegContext (); - inline const PatRow & - extract_pattern_row () const; - const PatData* _pattern_ptr; // Ordered dithering - uint32_t _rnd_state; // Anything excepted fast mode - const fmtcl::BitBltConv::ScaleInfo * // Float processing - _scale_info_ptr; - fmtcl::ErrDifBuf * // Error diffusion - _ed_buf_ptr; - int _y; // Ordered dithering and error diffusion - }; - const ::VSFormat & get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const; - void build_dither_pat (); - void build_dither_pat_round (); - void build_dither_pat_bayer (); - void build_dither_pat_void_and_cluster (int w); - void build_next_dither_pat (); - void copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle); - void init_fnc_fast (); - void init_fnc_ordered (); - void init_fnc_errdiff (); - - void dither_plane (fmtcl::SplFmt dst_fmt, int dst_res, uint8_t *dst_ptr, int dst_stride, fmtcl::SplFmt src_fmt, int src_res, const uint8_t *src_ptr, int src_stride, int w, int h, const fmtcl::BitBltConv::ScaleInfo &scale_info, const PatData &pattern, uint32_t rnd_state); - - template - void process_seg_fast_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const; - template - void process_seg_fast_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; - -#if (fstb_ARCHI == fstb_ARCHI_X86) - template - void process_seg_fast_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const; - template - void process_seg_fast_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; -#endif - - template - void process_seg_ord_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; - template - void process_seg_ord_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; - -#if (fstb_ARCHI == fstb_ARCHI_X86) - template - void process_seg_ord_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; - template - void process_seg_ord_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; -#endif - - template - void process_seg_errdif_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; - template - void process_seg_errdif_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; - - static inline void - generate_rnd (uint32_t &state); - static inline void - generate_rnd_eol (uint32_t &state); - - template - static inline void - quantize_pix_int (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, int &err, uint32_t &rnd_state, int ampe_i, int ampn_i); - template - static inline void - quantize_pix_flt (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, float &err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add); - - template - class ErrDifAddParam - { - public: - typedef DT DstType; - typedef ST SrcType; - static const int DST_BITS = DB; - static const int SRC_BITS = SB; - static const int NBR_ERR_LINES = EL; - }; - - template - class DiffuseFloydSteinberg - : public ErrDifAddParam - { - public: - template - static fstb_FORCEINLINE void - diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - prepare_next_line (EB *err_ptr); - private: - template - static fstb_FORCEINLINE void - spread_error (ET e1, ET e3, ET e5, ET e7, ET &err_nxt0, EB *err0_ptr); - }; - - template - class DiffuseFilterLite - : public ErrDifAddParam - { - public: - template - static fstb_FORCEINLINE void - diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - prepare_next_line (EB *err_ptr); - private: - template - static fstb_FORCEINLINE void - spread_error (ET e1, ET e2, ET &err_nxt0, EB *err0_ptr); - }; - - template - class DiffuseStucki - : public ErrDifAddParam - { - public: - template - static fstb_FORCEINLINE void - diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - prepare_next_line (EB *err_ptr); - private: - template - static fstb_FORCEINLINE void - spread_error (ET e1, ET e2, ET e4, ET e8, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr); - }; - - template - class DiffuseAtkinson - : public ErrDifAddParam - { - public: - template - static fstb_FORCEINLINE void - diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - prepare_next_line (EB *err_ptr); - private: - template - static fstb_FORCEINLINE void - spread_error (ET e1, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr); - }; - - class DiffuseOstromoukhovBase - { - public: - struct TableEntry - { - int _c0; - int _c1; - int _c2; // Actually not used - int _sum; - float _inv_sum; // Possible optimization: store 1/_c0 and 1/_c1 instead of this field. - }; - static const int T_BITS = 8; - static const int T_LEN = 1 << T_BITS; - static const int T_MASK = T_LEN - 1; - - static const TableEntry - _table [T_LEN]; - }; - - template - class DiffuseOstromoukhovBase2 - : public DiffuseOstromoukhovBase - { - public: - template - static inline int - get_index (SRC_TYPE src_raw); - static inline int - get_index (float src_raw); - }; - - template - class DiffuseOstromoukhov - : public ErrDifAddParam - , public DiffuseOstromoukhovBase2 - { - public: - template - static fstb_FORCEINLINE void - diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw); - template - static fstb_FORCEINLINE void - prepare_next_line (EB *err_ptr); - private: - template - static fstb_FORCEINLINE void - spread_error (ET e1, ET e2, ET e3, ET &err_nxt0, EB *err0_ptr); - }; - vsutl::NodeRefSPtr _clip_src_sptr; const ::VSVideoInfo @@ -342,42 +88,13 @@ class Bitdepth vsutl::PlaneProcessor _plane_processor; - fmtcl::SplFmt _splfmt_src; - fmtcl::SplFmt _splfmt_dst; - - std::array - _scale_info_arr; - bool _upconv_flag; - bool _sse2_flag; - bool _avx2_flag; - bool _full_range_in_flag; - bool _full_range_out_flag; - - int _dmode; - double _ampo; - double _ampn; - bool _dyn_flag; - bool _static_noise_flag; - int _pat_size; // Must be a divisor of PAT_WIDTH - - int _ampo_i; // [0 ; 127], 1.0 = 1 << AMP_BITS - int _ampn_i; // [0 ; 127], 1.0 = 1 << AMP_BITS - int _ampe_i; // [0 ; 2047], 1.0 = 256 - float _ampe_f; - float _ampn_f; - bool _errdif_flag; // Indicates a dithering method using error diffusion. - bool _simple_flag; // Simplified implementation for ampo == 1 and ampn == 0 - PatDataArray _dither_pat_arr; // Contains levels for ordered dithering - - conc::ObjPool - _buf_pool; - std::unique_ptr - _buf_factory_uptr; - - void (ThisType::* - _process_seg_int_int_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; - void (ThisType::* - _process_seg_flt_int_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const; + + bool _full_range_in_flag = false; + bool _full_range_out_flag = false; + bool _range_def_flag = false; + + std::unique_ptr + _engine_uptr; @@ -387,7 +104,9 @@ class Bitdepth Bitdepth () = delete; Bitdepth (const Bitdepth &other) = delete; + Bitdepth (Bitdepth &&other) = delete; Bitdepth & operator = (const Bitdepth &other) = delete; + Bitdepth & operator = (Bitdepth &&other) = delete; bool operator == (const Bitdepth &other) const = delete; bool operator != (const Bitdepth &other) const = delete; diff --git a/src/fmtc/Matrix.cpp b/src/fmtc/Matrix.cpp index 2ec329d..b8d93e4 100644 --- a/src/fmtc/Matrix.cpp +++ b/src/fmtc/Matrix.cpp @@ -31,7 +31,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fmtc/Matrix.h" #include "fmtc/fnc.h" -#include "fmtcl/Mat4.h" +#include "fmtcl/MatrixUtil.h" #include "fstb/def.h" #include "fstb/fnc.h" #include "vsutl/CpuOpt.h" @@ -77,9 +77,9 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC _avx_flag = cpu_opt.has_avx (); _avx2_flag = cpu_opt.has_avx2 (); - _proc_uptr = std::unique_ptr (new fmtcl::MatrixProc ( + _proc_uptr = std::make_unique ( _sse_flag, _sse2_flag, _avx_flag, _avx2_flag - )); + ); // Checks the input clip if (_vi_in.format == 0) @@ -93,7 +93,7 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC { throw_inval_arg ("input must be 4:4:4."); } - if (fmt_src.numPlanes != NBR_PLANES) + if (fmt_src.numPlanes != _nbr_planes) { throw_inval_arg ("greyscale format not supported as input."); } @@ -107,7 +107,7 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC throw_inval_arg ("pixel bitdepth not supported."); } - if (_plane_out >= NBR_PLANES) + if (_plane_out >= _nbr_planes) { throw_inval_arg ( "singleout is a plane index and must be -1 or ranging from 0 to 3." @@ -172,7 +172,7 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC _vi_out.format = fmt_dst_ptr; const ::VSFormat &fmt_dst = *fmt_dst_ptr; - const int nbr_expected_coef = NBR_PLANES * (NBR_PLANES + 1); + const int nbr_expected_coef = _nbr_planes * (_nbr_planes + 1); bool mat_init_flag = false; @@ -192,8 +192,14 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC fmtcl::Mat4 m2s; fmtcl::Mat4 m2d; - make_mat_from_str (m2s, mats, true); - make_mat_from_str (m2d, matd, false); + if (fmtcl::MatrixUtil::make_mat_from_str (m2s, mats, true) != 0) + { + throw_inval_arg ("unknown source matrix identifier."); + } + if (fmtcl::MatrixUtil::make_mat_from_str (m2d, matd, false) != 0) + { + throw_inval_arg ("unknown destination matrix identifier."); + } _csp_out = find_cs_from_mat_str (*this, matd, false); _mat_main = m2d * m2s; @@ -223,13 +229,13 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC throw_inval_arg ("coef has a wrong number of elements."); } - for (int y = 0; y < NBR_PLANES + 1; ++y) + for (int y = 0; y < _nbr_planes + 1; ++y) { - for (int x = 0; x < NBR_PLANES + 1; ++x) + for (int x = 0; x < _nbr_planes + 1; ++x) { _mat_main [y] [x] = (x == y) ? 1 : 0; - if ( (x < fmt_src.numPlanes || x == NBR_PLANES) + if ( (x < fmt_src.numPlanes || x == _nbr_planes) && y < fmt_dst.numPlanes) { int err = 0; @@ -319,26 +325,26 @@ const ::VSFrameRef * Matrix::get_frame (int n, int activation_reason, void * &fr const int h = _vsapi.getFrameHeight (&src, 0); dst_ptr = _vsapi.newVideoFrame (_vi_out.format, w, h, &src, &core); - uint8_t * const dst_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] = + uint8_t * const dst_ptr_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getWritePtr (dst_ptr, 0), (_plane_out >= 0) ? 0 : _vsapi.getWritePtr (dst_ptr, 1), (_plane_out >= 0) ? 0 : _vsapi.getWritePtr (dst_ptr, 2) }; - const int dst_str_arr [fmtcl::MatrixProc::NBR_PLANES] = + const int dst_str_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getStride (dst_ptr, 0), (_plane_out >= 0) ? 0 : _vsapi.getStride (dst_ptr, 1), (_plane_out >= 0) ? 0 : _vsapi.getStride (dst_ptr, 2) }; const uint8_t * const - src_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] = + src_ptr_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getReadPtr (&src, 0), _vsapi.getReadPtr (&src, 1), _vsapi.getReadPtr (&src, 2) }; - const int src_str_arr [fmtcl::MatrixProc::NBR_PLANES] = + const int src_str_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getStride (&src, 0), _vsapi.getStride (&src, 1), @@ -371,7 +377,7 @@ const ::VSFrameRef * Matrix::get_frame (int n, int activation_reason, void * &fr } } - return (dst_ptr); + return dst_ptr; } @@ -406,62 +412,15 @@ void Matrix::select_def_mat (std::string &mat, const ::VSFormat &fmt) // mat should be already converted to lower case fmtcl::ColorSpaceH265 Matrix::find_cs_from_mat_str (const vsutl::FilterBase &flt, const std::string &mat, bool allow_2020cl_flag) { - fmtcl::ColorSpaceH265 cs = fmtcl::ColorSpaceH265_UNSPECIFIED; + const auto cs = + fmtcl::MatrixUtil::find_cs_from_mat_str (mat, allow_2020cl_flag); - if (mat.empty () || mat == "rgb") - { - cs = fmtcl::ColorSpaceH265_RGB; - } - else if (mat == "601") - { - cs = fmtcl::ColorSpaceH265_SMPTE170M; - } - else if (mat == "709") - { - cs = fmtcl::ColorSpaceH265_BT709; - } - else if (mat == "240") - { - cs = fmtcl::ColorSpaceH265_SMPTE240M; - } - else if (mat == "fcc") - { - cs = fmtcl::ColorSpaceH265_FCC; - } - else if (mat == "ycgco" || mat == "ycocg") - { - cs = fmtcl::ColorSpaceH265_YCGCO; - } - else if (mat == "2020") - { - cs = fmtcl::ColorSpaceH265_BT2020NCL; - } - else if (mat == "2020cl" && allow_2020cl_flag) - { - cs = fmtcl::ColorSpaceH265_BT2020CL; - } - else if (mat == "ydzdx") - { - cs = fmtcl::ColorSpaceH265_YDZDX; - } - else if (mat == "lms") - { - cs = fmtcl::ColorSpaceH265_LMS; - } - else if (mat == "ictcp_pq") - { - cs = fmtcl::ColorSpaceH265_ICTCP_PQ; - } - else if (mat == "ictcp_hlg") - { - cs = fmtcl::ColorSpaceH265_ICTCP_HLG; - } - else + if (cs == fmtcl::ColorSpaceH265_UNDEF) { flt.throw_inval_arg ("unknown matrix identifier."); } - return (cs); + return cs; } @@ -474,6 +433,10 @@ fmtcl::ColorSpaceH265 Matrix::find_cs_from_mat_str (const vsutl::FilterBase &flt +constexpr int Matrix::_nbr_planes; + + + const ::VSFormat * Matrix::get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src, int &plane_out, bool &force_col_fam_flag) const { force_col_fam_flag = false; @@ -485,9 +448,12 @@ const ::VSFormat * Matrix::get_output_colorspace (const ::VSMap &in, ::VSMap &ou if (csp_dst != ::pfNone) { fmt_dst_ptr = _vsapi.getFormatPreset (csp_dst, &core); - if (fmt_dst_ptr == 0) + if (fmt_dst_ptr == nullptr) { throw_inval_arg ("unknown output colorspace."); + // The following return statement is never reached, it just prevents + // false positive when compiling with -Wnull-dereference + return &fmt_src; } else { @@ -538,17 +504,17 @@ const ::VSFormat * Matrix::get_output_colorspace (const ::VSMap &in, ::VSMap &ou } catch (...) { - fmt_dst_ptr = 0; + fmt_dst_ptr = nullptr; } - if (fmt_dst_ptr == 0) + if (fmt_dst_ptr == nullptr) { throw_rt_err ( "couldn\'t get a pixel format identifier for the output clip." ); } - return (fmt_dst_ptr); + return fmt_dst_ptr; } @@ -626,238 +592,7 @@ const ::VSFormat * Matrix::find_dst_col_fam (fmtcl::ColorSpaceH265 tmp_csp, cons } } - return (fmt_dst_ptr); -} - - - -void Matrix::make_mat_from_str (fmtcl::Mat4 &m, const std::string &mat, bool to_rgb_flag) const -{ - if (mat.empty () || mat == "rgb") - { - m[0][0] = 1; m[0][1] = 0; m[0][2] = 0; - m[1][0] = 0; m[1][1] = 1; m[1][2] = 0; - m[2][0] = 0; m[2][1] = 0; m[2][2] = 1; - m.clean3 (1); - } - else if (mat == "601") - { - make_mat_yuv (m, 0.299, 0.587, 0.114, to_rgb_flag); - } - else if (mat == "709") - { - make_mat_yuv (m, 0.2126, 0.7152, 0.0722, to_rgb_flag); - } - else if (mat == "240") - { - make_mat_yuv (m, 0.212, 0.701, 0.087, to_rgb_flag); - } - else if (mat == "fcc") - { - make_mat_yuv (m, 0.30, 0.59, 0.11, to_rgb_flag); - } - else if (mat == "ycgco" || mat == "ycocg") - { - make_mat_ycgco (m, to_rgb_flag); - } - else if (mat == "2020") - { - make_mat_yuv (m, 0.2627, 0.678, 0.0593, to_rgb_flag); - } - else if (mat == "ydzdx") - { - make_mat_ydzdx (m, to_rgb_flag); - } - else if (mat == "lms") - { - make_mat_lms (m, to_rgb_flag); - } - else if (mat == "ictcp_pq") - { - make_mat_ictcp (m, false, to_rgb_flag); - } - else if (mat == "ictcp_hlg") - { - make_mat_ictcp (m, true, to_rgb_flag); - } - else - { - throw_inval_arg ("unknown matrix identifier."); - } -} - - - -/* -kr/kg/kb matrix (Rec. ITU-T H.265 2019-06, p. 413): - -R = Y + V*(1-Kr) -G = Y - U*(1-Kb)*Kb/Kg - V*(1-Kr)*Kr/Kg -B = Y + U*(1-Kb) - -Y = R * Kr + G * Kg + B * Kb -U = (B-Y)/(1-Kb) = - R * Kr/(1-Kb) - G * Kg/(1-Kb) + B -V = (R-Y)/(1-Kr) = R - G * Kg/(1-Kr) - B * Kb/(1-Kr) - -The given equations work for R, G, B in range [0 ; 1] and U and V in range -[-1 ; 1]. Scaling must be applied to match the required range for U and V. - -R, G, B, Y range : [0 ; 1] -U, V range : [-0.5 ; 0.5] -*/ - -void Matrix::make_mat_yuv (fmtcl::Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag) -{ - assert (! fstb::is_null (kg)); - assert (! fstb::is_eq (kb, 1.0)); - assert (! fstb::is_eq (kr, 1.0)); - - const double r = 0.5; - const double x = 1.0 / r; - if (to_rgb_flag) - { - m[0][0] = 1; m[0][1] = 0; m[0][2] = x*(1-kr) ; - m[1][0] = 1; m[1][1] = x*(kb-1)*kb/kg; m[1][2] = x*(kr-1)*kr/kg; - m[2][0] = 1; m[2][1] = x*(1-kb) ; m[2][2] = 0; - } - - else - { - m[0][0] = kr ; m[0][1] = kg ; m[0][2] = kb ; - m[1][0] = r*kr/(kb-1); m[1][1] = r*kg/(kb-1); m[1][2] = r ; - m[2][0] = r ; m[2][1] = r*kg/(kr-1); m[2][2] = r*kb/(kr-1); - } - - m.clean3 (1); -} - - - -/* -YCgCo matrix (Rec. ITU-T H.265 2019-06, p. 413): - -R = Y - Cg + Co -G = Y + Cg -B = Y - Cg - Co - -Y = 0.25 * R + 0.5 * G + 0.25 * B -Cg = -0.25 * R + 0.5 * G - 0.25 * B -Co = 0.5 * R - 0.5 * B - -R, G, B, Y range : [0 ; 1] -Cg, Co range : [-0.5 ; 0.5] - -Note: this implementation is not exactly the same as specified because the -standard specifies specific steps to apply the RGB-to-YCgCo matrix, leading -to different roundings. -*/ - -void Matrix::make_mat_ycgco (fmtcl::Mat4 &m, bool to_rgb_flag) -{ - if (to_rgb_flag) - { - m[0][0] = 1; m[0][1] = -1; m[0][2] = 1; - m[1][0] = 1; m[1][1] = 1; m[1][2] = 0; - m[2][0] = 1; m[2][1] = -1; m[2][2] = -1; - } - else - { - m[0][0] = 0.25; m[0][1] = 0.5; m[0][2] = 0.25; - m[1][0] = -0.25; m[1][1] = 0.5; m[1][2] = -0.25; - m[2][0] = 0.5 ; m[2][1] = 0 ; m[2][2] = -0.5 ; - } - - m.clean3 (1); -} - - - -/* -YDzDx transform (Rec. ITU-T H.265 2019-06, p. 414) - -Y = G -Dz = 0.5 * (0.986566 * B - Y) -Dx = 0.5 * (R - 0.991902 * Y) - -Y = G -Dz = - 0.5 * G + 0.493283 * B -Dx = 0.5 * R - 0.495951 * G -*/ - -void Matrix::make_mat_ydzdx (fmtcl::Mat4 &m, bool to_rgb_flag) -{ - fmtcl::Mat3 m3; - m3[0][0] = 0 ; m3[0][1] = 1 ; m3[0][2] = 0; - m3[1][0] = 0 ; m3[1][1] = -0.5 ; m3[1][2] = 0.493283; - m3[2][0] = 0.5; m3[2][1] = -0.495951; m3[2][2] = 0; - - if (to_rgb_flag) - { - m3.invert (); - } - - m.insert3 (m3); - m.clean3 (1); -} - - - -/* -LMS transform (Rec. ITU-T H.265 2019-06, p. 411) - -LMS is an intermediate colorspace for ICtCp transforms. -LMS data are conveyed on RGB planes. -Here, to_rgb_flag indicates real RGB target. -*/ - -void Matrix::make_mat_lms (fmtcl::Mat4 &m, bool to_rgb_flag) -{ - fmtcl::Mat3 m3; - m3[0][0] = 1688; m3[0][1] = 2146; m3[0][2] = 262; - m3[1][0] = 683; m3[1][1] = 2951; m3[1][2] = 462; - m3[2][0] = 99; m3[2][1] = 309; m3[2][2] = 3688; - m3 *= 1.0 / 4096; - - if (to_rgb_flag) - { - m3.invert (); - } - - m.insert3 (m3); - m.clean3 (1); -} - - - -/* -ICtCp transfrom from and to LMS (Rec. ITU-T H.265 2019-06, p. 414) - -LMS data are conveyed on RGB planes. -*/ - -void Matrix::make_mat_ictcp (fmtcl::Mat4 &m, bool hlg_flag, bool to_lms_flag) -{ - fmtcl::Mat3 m3; - m3[0][0] = 2048; m3[0][1] = 2048; m3[0][2] = 0; - if (hlg_flag) - { - m3[1][0] = 3625; m3[1][1] = -7465; m3[1][2] = 3840; - m3[2][0] = 9500; m3[2][1] = -9212; m3[2][2] = -288; - } - else - { - m3[1][0] = 6610; m3[1][1] = -13613; m3[1][2] = 7003; - m3[2][0] = 17933; m3[2][1] = -17390; m3[2][2] = -543; - } - m3 *= 1.0 / 4096; - - if (to_lms_flag) - { - m3.invert (); - } - - m.insert3 (m3); - m.clean3 (1); + return fmt_dst_ptr; } diff --git a/src/fmtc/Matrix.h b/src/fmtc/Matrix.h index 2f70c7d..b53aa8e 100644 --- a/src/fmtc/Matrix.h +++ b/src/fmtc/Matrix.h @@ -30,6 +30,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fstb/def.h" #include "fmtcl/CoefArrInt.h" #include "fmtcl/ColorSpaceH265.h" +#include "fmtcl/Mat4.h" #include "fmtcl/MatrixProc.h" #include "fstb/AllocAlign.h" #include "vsutl/FilterBase.h" @@ -47,11 +48,6 @@ To Public License, Version 2, as published by Sam Hocevar. See -namespace fmtcl -{ - class Mat4; -} - namespace fmtc { @@ -91,8 +87,7 @@ class Matrix private: - static const int NBR_PLANES = 3; - static const int SHIFT_INT = 12; // Number of bits for the fractional part + static constexpr int _nbr_planes = 3; enum Dir { @@ -107,13 +102,6 @@ class Matrix const ::VSFormat * find_dst_col_fam (fmtcl::ColorSpaceH265 tmp_csp, const ::VSFormat *fmt_dst_ptr, const ::VSFormat &fmt_src, ::VSCore &core); - void make_mat_from_str (fmtcl::Mat4 &m, const std::string &mat, bool to_rgb_flag) const; - - static void make_mat_yuv (fmtcl::Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag); - static void make_mat_ycgco (fmtcl::Mat4 &m, bool to_rgb_flag); - static void make_mat_ydzdx (fmtcl::Mat4 &m, bool to_rgb_flag); - static void make_mat_lms (fmtcl::Mat4 &m, bool to_rgb_flag); - static void make_mat_ictcp (fmtcl::Mat4 &m, bool hlg_flag, bool to_lms_flag); vsutl::NodeRefSPtr _clip_src_sptr; @@ -146,7 +134,9 @@ class Matrix Matrix () = delete; Matrix (const Matrix &other) = delete; + Matrix (Matrix &&other) = delete; Matrix & operator = (const Matrix &other) = delete; + Matrix & operator = (Matrix &&other) = delete; bool operator == (const Matrix &other) const = delete; bool operator != (const Matrix &other) const = delete; diff --git a/src/fmtc/Matrix2020CL.cpp b/src/fmtc/Matrix2020CL.cpp index 54c88d3..9b86e76 100644 --- a/src/fmtc/Matrix2020CL.cpp +++ b/src/fmtc/Matrix2020CL.cpp @@ -268,7 +268,7 @@ const ::VSFrameRef * Matrix2020CL::get_frame (int n, int activation_reason, void } } - return (dst_ptr); + return dst_ptr; } @@ -350,7 +350,7 @@ const ::VSFormat & Matrix2020CL::get_output_colorspace (const ::VSMap &in, ::VSM ); } - return (*fmt_dst_ptr); + return *fmt_dst_ptr; } diff --git a/src/fmtc/Matrix2020CL.h b/src/fmtc/Matrix2020CL.h index 099dcbc..343c089 100644 --- a/src/fmtc/Matrix2020CL.h +++ b/src/fmtc/Matrix2020CL.h @@ -110,7 +110,9 @@ class Matrix2020CL Matrix2020CL () = delete; Matrix2020CL (const Matrix2020CL &other) = delete; + Matrix2020CL (Matrix2020CL &&other) = delete; Matrix2020CL & operator = (const Matrix2020CL &other) = delete; + Matrix2020CL & operator = (Matrix2020CL &&other) = delete; bool operator == (const Matrix2020CL &other) const = delete; bool operator != (const Matrix2020CL &other) const = delete; diff --git a/src/fmtc/Primaries.cpp b/src/fmtc/Primaries.cpp index dd793bf..11a1724 100644 --- a/src/fmtc/Primaries.cpp +++ b/src/fmtc/Primaries.cpp @@ -26,7 +26,9 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fmtc/fnc.h" #include "fmtc/Primaries.h" +#include "fmtcl/fnc.h" #include "fmtcl/Mat3.h" +#include "fmtcl/PrimUtil.h" #include "fstb/def.h" #include "fstb/fnc.h" #include "vsutl/CpuOpt.h" @@ -90,19 +92,20 @@ Primaries::Primaries (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VS _vi_out.format = &fmt_dst; // Primaries - _prim_s.init (*this, in, out, "prims"); - _prim_s.init (*this, in, out, "rs", "gs", "bs", "ws"); + init (_prim_s, *this, in, out, "prims"); + init (_prim_s, *this, in, out, "rs", "gs", "bs", "ws"); if (! _prim_s.is_ready ()) { throw_inval_arg ("input primaries not set."); } _prim_d = _prim_s; - _prim_d.init (*this, in, out, "primd"); - _prim_d.init (*this, in, out, "rd", "gd", "bd", "wd"); + init (_prim_d, *this, in, out, "primd"); + init (_prim_d, *this, in, out, "rd", "gd", "bd", "wd"); assert (_prim_d.is_ready ()); - const fmtcl::Mat3 mat_conv = compute_conversion_matrix (); + const fmtcl::Mat3 mat_conv = + fmtcl::PrimUtil::compute_conversion_matrix (_prim_s, _prim_d); _mat_main.insert3 (mat_conv); _mat_main.clean3 (1); @@ -154,26 +157,26 @@ const ::VSFrameRef * Primaries::get_frame (int n, int activation_reason, void * const int h = _vsapi.getFrameHeight (&src, 0); dst_ptr = _vsapi.newVideoFrame (_vi_out.format, w, h, &src, &core); - uint8_t * const dst_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] = + uint8_t * const dst_ptr_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getWritePtr (dst_ptr, 0), _vsapi.getWritePtr (dst_ptr, 1), _vsapi.getWritePtr (dst_ptr, 2) }; - const int dst_str_arr [fmtcl::MatrixProc::NBR_PLANES] = + const int dst_str_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getStride (dst_ptr, 0), _vsapi.getStride (dst_ptr, 1), _vsapi.getStride (dst_ptr, 2) }; const uint8_t * const - src_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] = + src_ptr_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getReadPtr (&src, 0), _vsapi.getReadPtr (&src, 1), _vsapi.getReadPtr (&src, 2) }; - const int src_str_arr [fmtcl::MatrixProc::NBR_PLANES] = + const int src_str_arr [fmtcl::MatrixProc::_nbr_planes] = { _vsapi.getStride (&src, 0), _vsapi.getStride (&src, 1), @@ -213,92 +216,7 @@ const ::VSFrameRef * Primaries::get_frame (int n, int activation_reason, void * -void Primaries::RgbSystem::init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0) -{ - assert (preset_0 != 0); - - std::string preset_str = filter.get_arg_str (in, out, preset_0, ""); - fstb::conv_to_lower_case (preset_str); - _preset = conv_string_to_primaries (preset_str); - if (_preset >= 0) - { - set (_preset); - } -} - - - -void Primaries::RgbSystem::init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 []) -{ - assert (r_0 != 0); - assert (g_0 != 0); - assert (b_0 != 0); - assert (w_0 != 0); - - const bool ready_old_flag = is_ready (); - std::array rgb_old = _rgb; - Vec2 w_old = _white; - - const char * name_0_arr [NBR_PLANES] = { r_0, g_0, b_0 }; - for (int k = 0; k < NBR_PLANES; ++k) - { - _init_flag_arr [k] |= - read_coord_tuple (_rgb [k], filter, in, out, name_0_arr [k]); - } - - _init_flag_arr [NBR_PLANES] |= - read_coord_tuple (_white, filter, in, out, w_0); - - if (ready_old_flag && is_ready () && (rgb_old != _rgb || w_old != _white)) - { - _preset = fmtcl::PrimariesPreset_UNDEF; - } -} - - - -bool Primaries::RgbSystem::read_coord_tuple (Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0) -{ - bool set_flag = false; - typedef std::vector Vect; - Vect v_def; - - Vect c_v = filter.get_arg_vflt (in, out, name_0, v_def); - if (c_v.size () != 0) - { - if (c_v.size () != c.size ()) - { - fstb::snprintf4all ( - filter._filter_error_msg_0, - filter._max_error_buf_len, - "%s: wrong number of coordinates (expected %d).", - name_0, - int (c.size ()) - ); - filter.throw_inval_arg (filter._filter_error_msg_0); - } - double sum = 0; - for (size_t k = 0; k < c_v.size (); ++k) - { - sum += c_v [k]; - c [k] = c_v [k]; - } - if (c [1] == 0) - { - fstb::snprintf4all ( - filter._filter_error_msg_0, - filter._max_error_buf_len, - "%s: y coordinate cannot be 0.", - name_0 - ); - filter.throw_inval_arg (filter._filter_error_msg_0); - } - - set_flag = true; - } - - return (set_flag); -} +constexpr int Primaries::_nbr_planes; @@ -339,239 +257,97 @@ void Primaries::check_colorspace (const ::VSFormat &fmt, const char *inout_0) co throw_inval_arg (_filter_error_msg_0); } - assert (fmt.numPlanes == NBR_PLANES); + assert (fmt.numPlanes == _nbr_planes); } -fmtcl::Mat3 Primaries::compute_conversion_matrix () const +void Primaries::init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0) { - fmtcl::Mat3 rgb2xyz = compute_rgb2xyz (_prim_s); - fmtcl::Mat3 xyz2rgb = compute_rgb2xyz (_prim_d).invert (); - fmtcl::Mat3 adapt = compute_chroma_adapt (_prim_s, _prim_d); + assert (preset_0 != 0); - return xyz2rgb * adapt * rgb2xyz; + std::string preset_str = filter.get_arg_str (in, out, preset_0, ""); + fstb::conv_to_lower_case (preset_str); + prim._preset = fmtcl::PrimUtil::conv_string_to_primaries (preset_str); + if (prim._preset >= 0) + { + prim.set (prim._preset); + } } -// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html -fmtcl::Mat3 Primaries::compute_rgb2xyz (const RgbSystem &prim) +void Primaries::init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 []) { - fmtcl::Mat3 m; + assert (r_0 != 0); + assert (g_0 != 0); + assert (b_0 != 0); + assert (w_0 != 0); - if (prim._preset == fmtcl::PrimariesPreset_CIEXYZ) - { - m = fmtcl::Mat3 (1, fmtcl::Mat3::Preset_DIAGONAL); - } + const bool ready_old_flag = prim.is_ready (); + std::array rgb_old = prim._rgb; + fmtcl::RgbSystem::Vec2 w_old = prim._white; - else + const char * name_0_arr [_nbr_planes] = { r_0, g_0, b_0 }; + for (int k = 0; k < _nbr_planes; ++k) { - const fmtcl::Vec3 white = conv_xy_to_xyz (prim._white); - - fmtcl::Mat3 xyzrgb; - for (int k = 0; k < NBR_PLANES; ++k) - { - fmtcl::Vec3 comp_xyz = conv_xy_to_xyz (prim._rgb [k]); - xyzrgb.set_col (k, comp_xyz); - } - - fmtcl::Vec3 s = xyzrgb.compute_inverse () * white; - - for (int u = 0; u < NBR_PLANES; ++u) - { - m.set_col (u, xyzrgb.get_col (u) * s [u]); - } + prim._init_flag_arr [k] |= + read_coord_tuple (prim._rgb [k], filter, in, out, name_0_arr [k]); } - return m; -} - - + prim._init_flag_arr [_nbr_planes] |= + read_coord_tuple (prim._white, filter, in, out, w_0); -// http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html -fmtcl::Mat3 Primaries::compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d) -{ - fmtcl::Vec3 white_s = conv_xy_to_xyz (prim_s._white); - fmtcl::Vec3 white_d = conv_xy_to_xyz (prim_d._white); - - // Bradford adaptation - const fmtcl::Mat3 ma ({ - fmtcl::Vec3 ( 0.8951, 0.2664, -0.1614), - fmtcl::Vec3 (-0.7502, 1.7135, 0.0367), - fmtcl::Vec3 ( 0.0389, -0.0685, 1.0296) - }); - - fmtcl::Vec3 crd_s = ma * white_s; - fmtcl::Vec3 crd_d = ma * white_d; - fmtcl::Mat3 scale (0.0); - for (int k = 0; k < NBR_PLANES; ++k) + if ( ready_old_flag && prim.is_ready () + && (rgb_old != prim._rgb || w_old != prim._white)) { - assert (crd_s [k] != 0); - scale [k] [k] = crd_d [k] / crd_s [k]; + prim._preset = fmtcl::PrimariesPreset_UNDEF; } - - return ma.compute_inverse () * scale * ma; } -// Obtains X, Y, Z from (x, y) -// Y is assumed to be 1.0 -// X = x / y -// Z = (1 - x - y) / y -// http://www.brucelindbloom.com/index.html?Eqn_xyY_to_XYZ.html -fmtcl::Vec3 Primaries::conv_xy_to_xyz (const RgbSystem::Vec2 &xy) +bool Primaries::read_coord_tuple (fmtcl::RgbSystem::Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0) { - fmtcl::Vec3 xyz; + bool set_flag = false; + typedef std::vector Vect; + Vect v_def; - // When y is null, X = Y = Z = 0. - if (fstb::is_null (xy [1])) - { - xyz [0] = 0; - xyz [1] = 0; - xyz [2] = 0; - } - else + Vect c_v = filter.get_arg_vflt (in, out, name_0, v_def); + if (c_v.size () != 0) { - xyz [0] = xy [0] / xy [1]; - xyz [1] = 1; - xyz [2] = (1 - xy [0] - xy [1]) / xy [1]; - } - - return xyz; -} - - + if (c_v.size () != c.size ()) + { + fstb::snprintf4all ( + filter._filter_error_msg_0, + filter._max_error_buf_len, + "%s: wrong number of coordinates (expected %d).", + name_0, + int (c.size ()) + ); + filter.throw_inval_arg (filter._filter_error_msg_0); + } + double sum = 0; + for (size_t k = 0; k < c_v.size (); ++k) + { + sum += c_v [k]; + c [k] = c_v [k]; + } + if (c [1] == 0) + { + fstb::snprintf4all ( + filter._filter_error_msg_0, + filter._max_error_buf_len, + "%s: y coordinate cannot be 0.", + name_0 + ); + filter.throw_inval_arg (filter._filter_error_msg_0); + } -// str should be already converted to lower case -fmtcl::PrimariesPreset Primaries::conv_string_to_primaries (const std::string &str) -{ - fmtcl::PrimariesPreset preset = fmtcl::PrimariesPreset_UNDEF; - - if ( str == "709" - || str == "1361" - || str == "61966-2-1" - || str == "61966-2-4" - || str == "hdtv" - || str == "srgb") - { - preset = fmtcl::PrimariesPreset_BT709; - } - else if ( str == "470m" - || str == "ntsc") - { - preset = fmtcl::PrimariesPreset_FCC; - } - else if ( str == "470m93" - || str == "ntscj") - { - preset = fmtcl::PrimariesPreset_NTSCJ; - } - else if ( str == "470bg" - || str == "601-625" - || str == "1358-625" - || str == "1700-625" - || str == "pal" - || str == "secam") - { - preset = fmtcl::PrimariesPreset_BT470BG; - } - else if ( str == "170m" - || str == "601-525" - || str == "1358-525" - || str == "1700-525") - { - preset = fmtcl::PrimariesPreset_SMPTE170M; - } - else if ( str == "240m") - { - preset = fmtcl::PrimariesPreset_SMPTE240M; - } - else if ( str == "filmc") - { - preset = fmtcl::PrimariesPreset_GENERIC_FILM; - } - else if ( str == "2020" - || str == "2100" - || str == "uhdtv") - { - preset = fmtcl::PrimariesPreset_BT2020; - } - else if ( str == "61966-2-2" - || str == "scrgb") - { - preset = fmtcl::PrimariesPreset_SCRGB; - } - else if ( str == "adobe98") - { - preset = fmtcl::PrimariesPreset_ADOBE_RGB_98; - } - else if ( str == "adobewide") - { - preset = fmtcl::PrimariesPreset_ADOBE_RGB_WIDE; - } - else if ( str == "apple") - { - preset = fmtcl::PrimariesPreset_APPLE_RGB; - } - else if ( str == "photopro" - || str == "romm") - { - preset = fmtcl::PrimariesPreset_ROMM; - } - else if ( str == "ciergb") - { - preset = fmtcl::PrimariesPreset_CIERGB; - } - else if ( str == "ciexyz") - { - preset = fmtcl::PrimariesPreset_CIEXYZ; - } - else if ( str == "p3d65" - || str == "dcip3") - { - preset = fmtcl::PrimariesPreset_P3D65; - } - else if ( str == "aces") - { - preset = fmtcl::PrimariesPreset_ACES; - } - else if ( str == "ap1") - { - preset = fmtcl::PrimariesPreset_ACESAP1; - } - else if ( str == "sgamut" - || str == "sgamut3") - { - preset = fmtcl::PrimariesPreset_SGAMUT; - } - else if ( str == "sgamut3cine") - { - preset = fmtcl::PrimariesPreset_SGAMUT3CINE; - } - else if ( str == "alexa") - { - preset = fmtcl::PrimariesPreset_ALEXA; - } - else if ( str == "vgamut") - { - preset = fmtcl::PrimariesPreset_VGAMUT; - } - else if ( str == "p3dci") - { - preset = fmtcl::PrimariesPreset_P3DCI; - } - else if ( str == "p3d60") - { - preset = fmtcl::PrimariesPreset_P3D60; - } - else if ( str == "3213") - { - preset = fmtcl::PrimariesPreset_EBU3213E; + set_flag = true; } - return preset; + return (set_flag); } diff --git a/src/fmtc/Primaries.h b/src/fmtc/Primaries.h index 5a3546b..63ed96c 100644 --- a/src/fmtc/Primaries.h +++ b/src/fmtc/Primaries.h @@ -70,28 +70,13 @@ class Primaries private: - static const int NBR_PLANES = 3; - - class RgbSystem - : public fmtcl::RgbSystem - { - public: - RgbSystem () = default; - void init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0); - void init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 []); - static bool read_coord_tuple (Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0); - }; + static constexpr int _nbr_planes = fmtcl::RgbSystem::_nbr_planes; void check_colorspace (const ::VSFormat &fmt, const char *inout_0) const; - fmtcl::Mat3 compute_conversion_matrix () const; - static fmtcl::Mat3 - compute_rgb2xyz (const RgbSystem &prim); - static fmtcl::Mat3 - compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d); - static fmtcl::Vec3 - conv_xy_to_xyz (const RgbSystem::Vec2 &xy); - static fmtcl::PrimariesPreset - conv_string_to_primaries (const std::string &preset); + + static void init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0); + static void init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 []); + static bool read_coord_tuple (fmtcl::RgbSystem::Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0); vsutl::NodeRefSPtr _clip_src_sptr; @@ -104,8 +89,10 @@ class Primaries bool _avx_flag; bool _avx2_flag; - RgbSystem _prim_s; - RgbSystem _prim_d; + fmtcl::RgbSystem + _prim_s; + fmtcl::RgbSystem + _prim_d; fmtcl::Mat4 _mat_main; @@ -120,7 +107,9 @@ class Primaries Primaries () = delete; Primaries (const Primaries &other) = delete; + Primaries (Primaries &&other) = delete; Primaries & operator = (const Primaries &other) = delete; + Primaries & operator = (Primaries &&other) = delete; bool operator == (const Primaries &other) const = delete; bool operator != (const Primaries &other) const = delete; diff --git a/src/fmtc/Resample.cpp b/src/fmtc/Resample.cpp index db7a62d..637c8e2 100644 --- a/src/fmtc/Resample.cpp +++ b/src/fmtc/Resample.cpp @@ -24,8 +24,10 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ +#include "fmtc/fnc.h" #include "fmtc/Resample.h" #include "fmtc/SplFmtUtl.h" +#include "fmtcl/ResampleUtil.h" #include "fstb/def.h" #include "vsutl/CpuOpt.h" #include "vsutl/fnc.h" @@ -203,7 +205,7 @@ Resample::Resample (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo for (int plane_index = 0; plane_index < fmt_src.numPlanes; ++plane_index) { - PlaneData & plane_data = _plane_data_arr [plane_index]; + auto & plane_data = _plane_data_arr [plane_index]; vsutl::compute_fmt_mac_cst ( plane_data._gain, plane_data._add_cst, @@ -275,10 +277,10 @@ Resample::Resample (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo const int nbr_sh = _vsapi.propNumElements (&in, "sh"); for (int plane_index = 0; plane_index < fmt_src.numPlanes; ++plane_index) { - PlaneData & plane_data = _plane_data_arr [plane_index]; + auto & plane_data = _plane_data_arr [plane_index]; // Source window - Win & s = plane_data._win; + auto & s = plane_data._win; if (plane_index > 0) { s = _plane_data_arr [plane_index - 1]._win; @@ -430,7 +432,7 @@ Resample::Resample (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo ); } - create_plane_specs (); + create_all_plane_specs (); } @@ -557,7 +559,7 @@ const ::VSFrameRef * Resample::get_frame (int n, int activation_reason, void * & } } - return (dst_ptr); + return dst_ptr; } @@ -567,15 +569,15 @@ fmtcl::ChromaPlacement Resample::conv_str_to_chroma_placement (const vsutl::Filt fmtcl::ChromaPlacement cp_val = fmtcl::ChromaPlacement_MPEG1; fstb::conv_to_lower_case (cplace); - if (strcmp (cplace.c_str (), "mpeg1") == 0) + if (cplace == "mpeg1") { cp_val = fmtcl::ChromaPlacement_MPEG1; } - else if (strcmp (cplace.c_str (), "mpeg2") == 0) + else if (cplace == "mpeg2") { cp_val = fmtcl::ChromaPlacement_MPEG2; } - else if (strcmp (cplace.c_str (), "dv") == 0) + else if (cplace == "dv") { cp_val = fmtcl::ChromaPlacement_DV; } @@ -584,7 +586,7 @@ fmtcl::ChromaPlacement Resample::conv_str_to_chroma_placement (const vsutl::Filt flt.throw_inval_arg ("unexpected cplace string."); } - return (cp_val); + return cp_val; } @@ -637,7 +639,7 @@ int Resample::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void _plane_processor.fill_plane (dst, val, plane_index); } - return (ret_val); + return ret_val; } @@ -646,6 +648,10 @@ int Resample::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void +constexpr int Resample::_max_nbr_planes; + + + const ::VSFormat & Resample::get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const { const ::VSFormat * fmt_dst_ptr = &fmt_src; @@ -697,7 +703,7 @@ const ::VSFormat & Resample::get_output_colorspace (const ::VSMap &in, ::VSMap & ); } - return (*fmt_dst_ptr); + return *fmt_dst_ptr; } @@ -712,7 +718,7 @@ bool Resample::cumulate_flag (bool flag, const ::VSMap &in, ::VSMap &out, const flag = (val != 0); } - return (flag); + return flag; } @@ -809,10 +815,12 @@ int Resample::process_plane_proc (::VSFrameRef &dst, int n, int plane_index, voi const FrameInfo & frame_info = *reinterpret_cast (frame_data_ptr); - const InterlacingType itl_s = - get_itl_type (frame_info._itl_s_flag, frame_info._top_s_flag); - const InterlacingType itl_d = - get_itl_type (frame_info._itl_d_flag, frame_info._top_d_flag); + const fmtcl::InterlacingType itl_s = fmtcl::InterlacingType_get ( + frame_info._itl_s_flag, frame_info._top_s_flag + ); + const fmtcl::InterlacingType itl_d = fmtcl::InterlacingType_get ( + frame_info._itl_d_flag, frame_info._top_d_flag + ); try { @@ -845,7 +853,7 @@ int Resample::process_plane_proc (::VSFrameRef &dst, int n, int plane_index, voi ret_val = -1; } - return (ret_val); + return ret_val; } @@ -882,7 +890,7 @@ int Resample::process_plane_copy (::VSFrameRef &dst, int n, int plane_index, voi const bool src_flt_flag = (_src_type == fmtcl::SplFmt_FLOAT); if (dst_flt_flag != src_flt_flag) { - const PlaneData & plane_data = _plane_data_arr [plane_index]; + const auto & plane_data = _plane_data_arr [plane_index]; scale_info._gain = plane_data._gain; scale_info._add_cst = plane_data._add_cst; @@ -896,20 +904,20 @@ int Resample::process_plane_copy (::VSFrameRef &dst, int n, int plane_index, voi w, h, scale_info_ptr ); - return (ret_val); + return ret_val; } -fmtcl::FilterResize * Resample::create_or_access_plane_filter (int plane_index, InterlacingType itl_d, InterlacingType itl_s) +fmtcl::FilterResize * Resample::create_or_access_plane_filter (int plane_index, fmtcl::InterlacingType itl_d, fmtcl::InterlacingType itl_s) { assert (plane_index >= 0); assert (itl_d >= 0); - assert (itl_d < InterlacingType_NBR_ELT); + assert (itl_d < fmtcl::InterlacingType_NBR_ELT); assert (itl_s >= 0); - assert (itl_s < InterlacingType_NBR_ELT); + assert (itl_s < fmtcl::InterlacingType_NBR_ELT); - const PlaneData & plane_data = _plane_data_arr [plane_index]; + const auto & plane_data = _plane_data_arr [plane_index]; const fmtcl::ResampleSpecPlane & key = plane_data._spec_arr [itl_d] [itl_s]; std::lock_guard autolock (_filter_mutex); @@ -917,7 +925,7 @@ fmtcl::FilterResize * Resample::create_or_access_plane_filter (int plane_index, std::unique_ptr & filter_uptr = _filter_uptr_map [key]; if (filter_uptr.get () == 0) { - filter_uptr = std::unique_ptr (new fmtcl::FilterResize ( + filter_uptr = std::make_unique ( key, *(plane_data._kernel_arr [fmtcl::FilterResize::Dir_H]._k_uptr), *(plane_data._kernel_arr [fmtcl::FilterResize::Dir_V]._k_uptr), @@ -925,98 +933,32 @@ fmtcl::FilterResize * Resample::create_or_access_plane_filter (int plane_index, plane_data._gain, _src_type, _src_res, _dst_type, _dst_res, _int_flag, _sse2_flag, _avx2_flag - )); + ); } - return (filter_uptr.get ()); + return filter_uptr.get (); } -void Resample::create_plane_specs () +void Resample::create_all_plane_specs () { - fmtcl::ResampleSpecPlane spec; - - const int src_w = _vi_in.width; - const int src_h = _vi_in.height; - const int dst_w = _vi_out.width; - const int dst_h = _vi_out.height; + const fmtcl::ColorFamily src_cf = fmtc::conv_colfam_to_fmtcl (*_vi_in.format); + const fmtcl::ColorFamily dst_cf = fmtc::conv_colfam_to_fmtcl (*_vi_out.format); + const int src_ss_h = _vi_in.format->subSamplingW; + const int src_ss_v = _vi_in.format->subSamplingH; + const int dst_ss_h = _vi_out.format->subSamplingW; + const int dst_ss_v = _vi_out.format->subSamplingH; const int nbr_planes = _vi_in.format->numPlanes; - for (int plane_index = 0; plane_index < nbr_planes; ++plane_index) { - PlaneData & plane_data = _plane_data_arr [plane_index]; - - spec._src_width = - vsutl::compute_plane_width (*_vi_in.format, plane_index, src_w); - spec._src_height = - vsutl::compute_plane_height (*_vi_in.format, plane_index, src_h); - spec._dst_width = - vsutl::compute_plane_width (*_vi_out.format, plane_index, dst_w); - spec._dst_height = - vsutl::compute_plane_height (*_vi_out.format, plane_index, dst_h); - - const int subspl_h = src_w / spec._src_width; - const int subspl_v = src_h / spec._src_height; - - const Win & s = plane_data._win; - spec._win_x = s._x / subspl_h; - spec._win_y = s._y / subspl_v; - spec._win_w = s._w / subspl_h; - spec._win_h = s._h / subspl_v; - - spec._add_cst = plane_data._add_cst; - spec._kernel_scale_h = plane_data._kernel_scale_h; - spec._kernel_scale_v = plane_data._kernel_scale_v; - spec._kernel_hash_h = plane_data._kernel_arr [fmtcl::FilterResize::Dir_H].get_hash (); - spec._kernel_hash_v = plane_data._kernel_arr [fmtcl::FilterResize::Dir_V].get_hash (); - - for (int itl_d = 0; itl_d < InterlacingType_NBR_ELT; ++itl_d) - { - for (int itl_s = 0; itl_s < InterlacingType_NBR_ELT; ++itl_s) - { - double cp_s_h = 0; - double cp_s_v = 0; - double cp_d_h = 0; - double cp_d_v = 0; - if (plane_data._preserve_center_flag) - { - fmtcl::ChromaPlacement_compute_cplace ( - cp_s_h, cp_s_v, _cplace_s, plane_index, - _vi_in.format->subSamplingW, _vi_in.format->subSamplingH, - (_vi_in.format->colorFamily == ::cmRGB), - (itl_s != InterlacingType_FRAME), - (itl_s == InterlacingType_TOP) - ); - fmtcl::ChromaPlacement_compute_cplace ( - cp_d_h, cp_d_v, _cplace_d, plane_index, - _vi_out.format->subSamplingW, _vi_out.format->subSamplingH, - (_vi_out.format->colorFamily == ::cmRGB), - (itl_d != InterlacingType_FRAME), - (itl_d == InterlacingType_TOP) - ); - } - - spec._center_pos_src_h = cp_s_h; - spec._center_pos_src_v = cp_s_v; - spec._center_pos_dst_h = cp_d_h; - spec._center_pos_dst_v = cp_d_v; - - plane_data._spec_arr [itl_d] [itl_s] = spec; - } // for itl_s - } // for itl_d - } // for plane_index -} - - - -Resample::InterlacingType Resample::get_itl_type (bool itl_flag, bool top_flag) -{ - return ( - (itl_flag) ? ((top_flag) ? InterlacingType_TOP - : InterlacingType_BOT) - : InterlacingType_FRAME - ); + auto & plane_data = _plane_data_arr [plane_index]; + fmtcl::ResampleUtil::create_plane_specs ( + plane_data, plane_index, + src_cf, _src_width , src_ss_h, _src_height , src_ss_v, _cplace_s, + dst_cf, _vi_out.width, dst_ss_h, _vi_out.height, dst_ss_v, _cplace_d + ); + } } diff --git a/src/fmtc/Resample.h b/src/fmtc/Resample.h index 66b44ae..e5a3070 100644 --- a/src/fmtc/Resample.h +++ b/src/fmtc/Resample.h @@ -28,8 +28,11 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #include "fmtcl/ChromaPlacement.h" +#include "fmtcl/ColorFamily.h" #include "fmtcl/FilterResize.h" +#include "fmtcl/InterlacingType.h" #include "fmtcl/KernelData.h" +#include "fmtcl/ResamplePlaneData.h" #include "fmtcl/ResampleSpecPlane.h" #include "vsutl/FilterBase.h" #include "vsutl/NodeRefSPtr.h" @@ -86,7 +89,7 @@ class Resample private: - static const int MAX_NBR_PLANES = 3; + static constexpr int _max_nbr_planes = 3; enum InterlacingParam { @@ -106,55 +109,16 @@ class Resample FieldOrder_NBR_ELT }; - enum InterlacingType - { - InterlacingType_FRAME = 0, - InterlacingType_TOP, - InterlacingType_BOT, - - InterlacingType_NBR_ELT - }; - - class Win - { - public: - double _x; // Data is in full coordinates whatever the plane (never subsampled) - double _y; - double _w; - double _h; - }; - class FrameInfo { public: - bool _itl_s_flag; - bool _top_s_flag; - bool _itl_d_flag; - bool _top_d_flag; + bool _itl_s_flag = false; + bool _top_s_flag = false; + bool _itl_d_flag = false; + bool _top_d_flag = false; }; - // Array order: [dest] [src] - typedef std::array SpecSrcArray; - typedef std::array SpecArray; - - class PlaneData - { - public: - typedef std::array < - fmtcl::KernelData, - fmtcl::FilterResize::Dir_NBR_ELT - > KernelArray; - Win _win; - SpecArray _spec_arr; // Contains the spec (used as a key) for each plane/interlacing combination - KernelArray _kernel_arr; - double _kernel_scale_h; // Can be negative (forced scaling) - double _kernel_scale_v; // Can be negative (forced scaling) - double _gain; - double _add_cst; - bool _preserve_center_flag; - }; - - typedef std::array PlaneDataArray; + typedef std::array PlaneDataArray; const ::VSFormat & get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const; @@ -163,11 +127,8 @@ class Resample int process_plane_proc (::VSFrameRef &dst, int n, int plane_index, void *frame_data_ptr, ::VSFrameContext &frame_ctx, ::VSCore &core, const vsutl::NodeRefSPtr &src_node1_sptr); int process_plane_copy (::VSFrameRef &dst, int n, int plane_index, void *frame_data_ptr, ::VSFrameContext &frame_ctx, ::VSCore &core, const vsutl::NodeRefSPtr &src_node1_sptr); fmtcl::FilterResize * - create_or_access_plane_filter (int plane_index, InterlacingType itl_d, InterlacingType itl_s); - void create_plane_specs (); - - static InterlacingType - get_itl_type (bool itl_flag, bool top_flag); + create_or_access_plane_filter (int plane_index, fmtcl::InterlacingType itl_d, fmtcl::InterlacingType itl_s); + void create_all_plane_specs (); vsutl::NodeRefSPtr _clip_src_sptr; @@ -222,7 +183,9 @@ class Resample Resample () = delete; Resample (const Resample &other) = delete; + Resample (Resample &&other) = delete; Resample & operator = (const Resample &other) = delete; + Resample & operator = (Resample &&other) = delete; bool operator == (const Resample &other) const = delete; bool operator != (const Resample &other) const = delete; diff --git a/src/fmtc/Transfer.cpp b/src/fmtc/Transfer.cpp index 8dc5b8d..688f61f 100644 --- a/src/fmtc/Transfer.cpp +++ b/src/fmtc/Transfer.cpp @@ -28,22 +28,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fmtc/Transfer.h" #include "fmtc/fnc.h" -#include "fmtcl/TransOp2084.h" -#include "fmtcl/TransOpAcesCc.h" -#include "fmtcl/TransOpAffine.h" -#include "fmtcl/TransOpBypass.h" -#include "fmtcl/TransOpCanonLog.h" -#include "fmtcl/TransOpCompose.h" -#include "fmtcl/TransOpContrast.h" -#include "fmtcl/TransOpErimm.h" -#include "fmtcl/TransOpFilmStream.h" -#include "fmtcl/TransOpHlg.h" -#include "fmtcl/TransOpLinPow.h" -#include "fmtcl/TransOpLogC.h" -#include "fmtcl/TransOpLogTrunc.h" -#include "fmtcl/TransOpPow.h" -#include "fmtcl/TransOpSLog.h" -#include "fmtcl/TransOpSLog3.h" +#include "fmtcl/TransUtil.h" #include "fstb/fnc.h" #include "vsutl/CpuOpt.h" #include "vsutl/fnc.h" @@ -80,6 +65,8 @@ Transfer::Transfer (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, : , _full_range_dst_flag (get_arg_int (in, out, "fulld", 1) != 0) , _curve_s (fmtcl::TransCurve_UNDEF) , _curve_d (fmtcl::TransCurve_UNDEF) +, _logc_ei_s (fmtcl::TransOpLogC::ExpIdx_800) +, _logc_ei_d (fmtcl::TransOpLogC::ExpIdx_800) , _loglut_flag (false) #if defined (_MSC_VER) #pragma warning (push) @@ -135,7 +122,56 @@ Transfer::Transfer (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, : // Output format is validated. _vi_out.format = &fmt_dst; - init_table (); + // Other parameters + _curve_s = fmtcl::TransUtil::conv_string_to_curve (_transs); + if (_curve_s == fmtcl::TransCurve_UNDEF) + { + throw_inval_arg ("invalid transs value."); + } + _curve_d = fmtcl::TransUtil::conv_string_to_curve (_transd); + if (_curve_d == fmtcl::TransCurve_UNDEF) + { + throw_inval_arg ("invalid transd value."); + } + + const int logc_ei_raw_s = get_arg_int (in, out, "logceis", 800); + _logc_ei_s = fmtcl::TransOpLogC::conv_logc_ei (logc_ei_raw_s); + if (_logc_ei_s == fmtcl::TransOpLogC::ExpIdx_INVALID) + { + throw_inval_arg ("invalid logceis value."); + } + + const int logc_ei_raw_d = get_arg_int (in, out, "logceid", 800); + _logc_ei_d = fmtcl::TransOpLogC::conv_logc_ei (logc_ei_raw_d); + if (_logc_ei_d == fmtcl::TransOpLogC::ExpIdx_INVALID) + { + throw_inval_arg ("invalid logceid value."); + } + + if (_contrast <= 0) + { + throw_inval_arg ("invalid cont value."); + } + if (_gcor <= 0) + { + throw_inval_arg ("invalid gcor value."); + } + if (_lvl_black < 0) + { + throw_inval_arg ("invalid blacklvl value."); + } + + // Finally... + const fmtcl::PicFmt src_fmt = + conv_vsfmt_to_picfmt (*_vi_in.format , _full_range_src_flag); + const fmtcl::PicFmt dst_fmt = + conv_vsfmt_to_picfmt (*_vi_out.format, _full_range_dst_flag); + _lut_uptr = fmtcl::TransUtil::build_lut ( + dst_fmt, _curve_d, _logc_ei_d, + src_fmt, _curve_s, _logc_ei_s, + _contrast, _gcor, _lvl_black, + _sse2_flag, _avx2_flag + ); } @@ -201,7 +237,7 @@ const ::VSFrameRef * Transfer::get_frame (int n, int activation_reason, void * & } } - return (dst_ptr); + return dst_ptr; } @@ -318,388 +354,7 @@ const ::VSFormat & Transfer::get_output_colorspace (const ::VSMap &in, ::VSMap & ); } - return (*fmt_dst_ptr); -} - - - -void Transfer::init_table () -{ - _curve_s = conv_string_to_curve (*this, _transs); - _curve_d = conv_string_to_curve (*this, _transd); - OpSPtr op_s = conv_curve_to_op (_curve_s, true ); - OpSPtr op_d = conv_curve_to_op (_curve_d, false); - - // Linear or log LUT? - _loglut_flag = false; - if ( _vi_in.format->sampleType == ::stFloat - && _curve_s == fmtcl::TransCurve_LINEAR) - { - // Curves with extended range or with fast-evolving slope at 0. - // Actually we could just use the log LUT for all the curves...? - // 10 bits per stop + interpolation should be enough for all of them. - // What about the speed? - if ( _curve_d == fmtcl::TransCurve_470BG - || _curve_d == fmtcl::TransCurve_LINEAR - || _curve_d == fmtcl::TransCurve_61966_2_4 - || _curve_d == fmtcl::TransCurve_2084 - || _curve_d == fmtcl::TransCurve_428 - || _curve_d == fmtcl::TransCurve_HLG - || _curve_d == fmtcl::TransCurve_1886 - || _curve_d == fmtcl::TransCurve_1886A - || _curve_d == fmtcl::TransCurve_SLOG - || _curve_d == fmtcl::TransCurve_SLOG2 - || _curve_d == fmtcl::TransCurve_SLOG3 - || _curve_d == fmtcl::TransCurve_LOGC2 - || _curve_d == fmtcl::TransCurve_LOGC3 - || _curve_d == fmtcl::TransCurve_CANONLOG - || _curve_d == fmtcl::TransCurve_ACESCC - || _curve_d == fmtcl::TransCurve_ERIMM) - { - _loglut_flag = true; - } - if (_gcor < 0.5) - { - _loglut_flag = true; - } - if (fabs (_contrast) >= 3.0/2 || fabs (_contrast) <= 2.0/3) - { - _loglut_flag = true; - } - } - - // Black level - const double lw = op_s->get_max (); - if (_lvl_black > 0 && _lvl_black < lw) - { - /* - Black level (brightness) and contrast settings as defined - in ITU-R BT.1886: - L = a' * fi (V + b') - - With: - fi = EOTF (gamma to linear) - L = Lb for V = 0 - L = Lw for V = Vmax - - For power functions, could be rewritten as: - L = fi (a * V + b) - - Substitution: - Lb = fi ( b) - Lw = fi (a * Vmax + b) - - Then, given: - f = OETF (linear to gamma) - - We get: - f (Lb) = b - f (Lw) = a * Vmax + b - - b = f (Lb) - a = (f (Lw) - f (Lb)) / Vmax - */ - OpSPtr oetf = conv_curve_to_op (_curve_s, false); - const double lwg = (*oetf) (lw ); - const double lbg = (*oetf) (_lvl_black); - const double vmax = lwg; - const double a = (lwg - lbg) / vmax; - const double b = lbg; - OpSPtr op_a (new fmtcl::TransOpAffine (a, b)); - op_s = OpSPtr (new fmtcl::TransOpCompose (op_a, op_s)); - } - - // Gamma correction - if (! fstb::is_eq (_gcor, 1.0)) - { - OpSPtr op_g (new fmtcl::TransOpPow (true, _gcor, 1, 1e6)); - op_d = OpSPtr (new fmtcl::TransOpCompose (op_g, op_d)); - } - - // Contrast - if (! fstb::is_eq (_contrast, 1.0)) - { - OpSPtr op_c (new fmtcl::TransOpContrast (_contrast)); - op_d = OpSPtr (new fmtcl::TransOpCompose (op_c, op_d)); - } - - // LUTify - OpSPtr op_f (new fmtcl::TransOpCompose (op_s, op_d)); - - const fmtcl::SplFmt src_fmt = conv_vsfmt_to_splfmt (*_vi_in.format); - const fmtcl::SplFmt dst_fmt = conv_vsfmt_to_splfmt (*_vi_out.format); - _lut_uptr = std::unique_ptr (new fmtcl::TransLut ( - *op_f, _loglut_flag, - src_fmt, _vi_in.format->bitsPerSample, _full_range_src_flag, - dst_fmt, _vi_out.format->bitsPerSample, _full_range_dst_flag, - _sse2_flag, _avx2_flag - )); -} - - - -// str should be already converted to lower case -fmtcl::TransCurve Transfer::conv_string_to_curve (const vsutl::FilterBase &flt, const std::string &str) -{ - fmtcl::TransCurve c = fmtcl::TransCurve_UNDEF; - if (str == "709") - { - c = fmtcl::TransCurve_709; - } - else if (str == "470m") - { - c = fmtcl::TransCurve_470M; - } - else if (str == "470bg") - { - c = fmtcl::TransCurve_470BG; - } - else if (str == "601") - { - c = fmtcl::TransCurve_601; - } - else if (str == "240") - { - c = fmtcl::TransCurve_240; - } - else if (str.empty () || str == "linear") - { - c = fmtcl::TransCurve_LINEAR; - } - else if (str == "log100") - { - c = fmtcl::TransCurve_LOG100; - } - else if (str == "log316") - { - c = fmtcl::TransCurve_LOG316; - } - else if (str == "61966-2-4") - { - c = fmtcl::TransCurve_61966_2_4; - } - else if (str == "1361") - { - c = fmtcl::TransCurve_1361; - } - else if (str == "61966-2-1" || str == "srgb" || str == "sycc") - { - c = fmtcl::TransCurve_SRGB; - } - else if (str == "2020_10") - { - c = fmtcl::TransCurve_2020_10; - } - else if (str == "2020_12" || str == "2020") - { - c = fmtcl::TransCurve_2020_12; - } - else if (str == "2084") - { - c = fmtcl::TransCurve_2084; - } - else if (str == "428-1" || str == "428") - { - c = fmtcl::TransCurve_428; - } - else if (str == "hlg") - { - c = fmtcl::TransCurve_HLG; - } - else if (str == "1886") - { - c = fmtcl::TransCurve_1886; - } - else if (str == "1886a") - { - c = fmtcl::TransCurve_1886A; - } - else if (str == "filmstream") - { - c = fmtcl::TransCurve_FILMSTREAM; - } - else if (str == "slog") - { - c = fmtcl::TransCurve_SLOG; - } - else if (str == "logc2") - { - c = fmtcl::TransCurve_LOGC2; - } - else if (str == "logc3") - { - c = fmtcl::TransCurve_LOGC3; - } - else if (str == "canonlog") - { - c = fmtcl::TransCurve_CANONLOG; - } - else if (str == "adobergb") - { - c = fmtcl::TransCurve_ADOBE_RGB; - } - else if (str == "romm") - { - c = fmtcl::TransCurve_ROMM_RGB; - } - else if (str == "acescc") - { - c = fmtcl::TransCurve_ACESCC; - } - else if (str == "erimm") - { - c = fmtcl::TransCurve_ERIMM; - } - else if (str == "slog2") - { - c = fmtcl::TransCurve_SLOG2; - } - else if (str == "slog3") - { - c = fmtcl::TransCurve_SLOG3; - } - else if (str == "vlog") - { - c = fmtcl::TransCurve_VLOG; - } - else - { - flt.throw_inval_arg ("unknown matrix identifier."); - } - - - return (c); -} - - - -Transfer::OpSPtr Transfer::conv_curve_to_op (fmtcl::TransCurve c, bool inv_flag) -{ - assert (c >= 0); - - OpSPtr ptr; - - switch (c) - { - case fmtcl::TransCurve_709: - case fmtcl::TransCurve_601: - case fmtcl::TransCurve_2020_10: - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5)); - break; - case fmtcl::TransCurve_470BG: - ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 2.8)); - break; - case fmtcl::TransCurve_240: - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.1115, 0.0228, 0.45, 4.0)); - break; - case fmtcl::TransCurve_LINEAR: - ptr = OpSPtr (new fmtcl::TransOpBypass); - break; - case fmtcl::TransCurve_LOG100: - ptr = OpSPtr (new fmtcl::TransOpLogTrunc (inv_flag, 0.5, 0.01)); - break; - case fmtcl::TransCurve_LOG316: - ptr = OpSPtr (new fmtcl::TransOpLogTrunc (inv_flag, 0.4, sqrt (10) / 1000)); - break; - case fmtcl::TransCurve_61966_2_4: - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -1e9, 1e9)); - break; - case fmtcl::TransCurve_1361: - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -0.25, 1.33, 4)); - break; - case fmtcl::TransCurve_470M: // Assumed display gamma 2.2, almost like sRGB. - case fmtcl::TransCurve_SRGB: -#if 1 - { - // More exact formula giving C1 continuity - // https://en.wikipedia.org/wiki/SRGB#Theory_of_the_transformation - const double gamma = 2.4; - const double alpha = 1.055; - const double k0 = (alpha - 1) / (gamma - 1); - const double phi = - (pow (alpha, gamma) * pow (gamma - 1, gamma - 1)) - / (pow (alpha - 1, gamma - 1) * pow (gamma, gamma)); - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, alpha, k0 / phi, 1.0 / gamma, phi)); - } -#else - // Rounded constants used in IEC 61966-2-1 - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.055, 0.04045 / 12.92, 1.0 / 2.4, 12.92)); -#endif - break; - case fmtcl::TransCurve_2020_12: - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.09929682680944, 0.018053968510807, 0.45, 4.5)); - break; - case fmtcl::TransCurve_2084: - ptr = OpSPtr (new fmtcl::TransOp2084 (inv_flag)); - break; - case fmtcl::TransCurve_428: - ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 2.6, 48.0 / 52.37)); - break; - case fmtcl::TransCurve_HLG: - ptr = OpSPtr (new fmtcl::TransOpHlg (inv_flag)); - break; - case fmtcl::TransCurve_1886: - ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 2.4)); - break; - case fmtcl::TransCurve_1886A: - { - const double a1 = 2.6; - const double a2 = 3.0; - const double k0 = 0.35; - const double slope = pow (k0, a2 - a1); - const double beta = pow (k0, a1); - ptr = OpSPtr (new fmtcl::TransOpLinPow ( - inv_flag, 1, beta, 1.0 / a1, slope, 0, 1, 1, 1.0 / a2 - )); - } - break; - case fmtcl::TransCurve_FILMSTREAM: - ptr = OpSPtr (new fmtcl::TransOpFilmStream (inv_flag)); - break; - case fmtcl::TransCurve_SLOG: - ptr = OpSPtr (new fmtcl::TransOpSLog (inv_flag, false)); - break; - case fmtcl::TransCurve_LOGC2: - ptr = OpSPtr (new fmtcl::TransOpLogC (inv_flag, fmtcl::TransOpLogC::Type_LOGC_V2)); - break; - case fmtcl::TransCurve_LOGC3: - ptr = OpSPtr (new fmtcl::TransOpLogC (inv_flag, fmtcl::TransOpLogC::Type_LOGC_V3)); - break; - case fmtcl::TransCurve_CANONLOG: - ptr = OpSPtr (new fmtcl::TransOpCanonLog (inv_flag)); - break; - case fmtcl::TransCurve_ADOBE_RGB: - ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 563.0 / 256)); - break; - case fmtcl::TransCurve_ROMM_RGB: - ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1, 0.001953, 1.0 / 1.8, 16)); - break; - case fmtcl::TransCurve_ACESCC: - ptr = OpSPtr (new fmtcl::TransOpAcesCc (inv_flag)); - break; - case fmtcl::TransCurve_ERIMM: - ptr = OpSPtr (new fmtcl::TransOpErimm (inv_flag)); - break; - case fmtcl::TransCurve_SLOG2: - ptr = OpSPtr (new fmtcl::TransOpSLog (inv_flag, true)); - break; - case fmtcl::TransCurve_SLOG3: - ptr = OpSPtr (new fmtcl::TransOpSLog3 (inv_flag)); - break; - case fmtcl::TransCurve_VLOG: - ptr = OpSPtr (new fmtcl::TransOpLogC (inv_flag, fmtcl::TransOpLogC::Type_VLOG)); - break; - default: - assert (false); - break; - } - - if (ptr.get () == 0) - { - ptr = OpSPtr (new fmtcl::TransOpBypass); - } - - return (ptr); + return *fmt_dst_ptr; } diff --git a/src/fmtc/Transfer.h b/src/fmtc/Transfer.h index 0597854..be3b009 100644 --- a/src/fmtc/Transfer.h +++ b/src/fmtc/Transfer.h @@ -32,6 +32,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fmtcl/TransCurve.h" #include "fmtcl/TransLut.h" #include "fmtcl/TransOpInterface.h" +#include "fmtcl/TransOpLogC.h" #include "vsutl/FilterBase.h" #include "vsutl/NodeRefSPtr.h" #include "vsutl/PlaneProcCbInterface.h" @@ -84,17 +85,11 @@ class Transfer const ::VSFormat & get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const; - void init_table (); - - static fmtcl::TransCurve - conv_string_to_curve (const vsutl::FilterBase &flt, const std::string &str); - static OpSPtr conv_curve_to_op (fmtcl::TransCurve c, bool inv_flag); - vsutl::NodeRefSPtr _clip_src_sptr; const ::VSVideoInfo - _vi_in; // Input. Must be declared after _clip_src_sptr because of initialisation order. - ::VSVideoInfo _vi_out; // Output. Must be declared after _vi_in. + _vi_in; // Input. Must be declared after _clip_src_sptr because of initialisation order. + ::VSVideoInfo _vi_out; // Output. Must be declared after _vi_in. bool _sse2_flag; bool _avx2_flag; @@ -109,6 +104,10 @@ class Transfer _curve_s; fmtcl::TransCurve _curve_d; + fmtcl::TransOpLogC::ExpIdx // Exposure Index for the Arri Log C curves + _logc_ei_s; + fmtcl::TransOpLogC::ExpIdx + _logc_ei_d; bool _loglut_flag; vsutl::PlaneProcessor @@ -125,7 +124,9 @@ class Transfer Transfer () = delete; Transfer (const Transfer &other) = delete; + Transfer (Transfer &&other) = delete; Transfer & operator = (const Transfer &other) = delete; + Transfer & operator = (Transfer &&other) = delete; bool operator == (const Transfer &other) const = delete; bool operator != (const Transfer &other) const = delete; diff --git a/src/fmtc/fnc.cpp b/src/fmtc/fnc.cpp index 96d2343..f590fb3 100644 --- a/src/fmtc/fnc.cpp +++ b/src/fmtc/fnc.cpp @@ -25,7 +25,7 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #include "fmtc/fnc.h" -#include "fmtcl/Mat4.h" +#include "fmtcl/fnc.h" #include "fmtcl/MatrixProc.h" #include "vsutl/FilterBase.h" #include "vsutl/fnc.h" @@ -46,65 +46,23 @@ namespace fmtc -static void override_fmt_with_csp (::VSFormat &fmt, fmtcl::ColorSpaceH265 csp_out, int plane_out) -{ - if (plane_out >= 0) - { - fmt.numPlanes = 3; - if (csp_out == fmtcl::ColorSpaceH265_RGB) - { - fmt.colorFamily = ::cmRGB; - } - else if (csp_out == fmtcl::ColorSpaceH265_YCGCO) - { - fmt.colorFamily = ::cmYCoCg; - } - else - { - fmt.colorFamily = ::cmYUV; - } - } -} +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -// Int: depends on the input format (may be float too) -// R, G, B, Y: [0 ; 1] -// U, V, Cg, Co : [-0.5 ; 0.5] -static void make_mat_flt_int (fmtcl::Mat4 &m, bool to_flt_flag, const ::VSFormat &fmt, bool full_flag) +fmtcl::PicFmt conv_vsfmt_to_picfmt (const ::VSFormat &fmt, bool full_flag) { - ::VSFormat fmt2 (fmt); - fmt2.sampleType = ::stFloat; - - const ::VSFormat* fmt_src_ptr = &fmt2; - const ::VSFormat* fmt_dst_ptr = &fmt; - if (to_flt_flag) - { - std::swap (fmt_src_ptr, fmt_dst_ptr); - } - - double ay, by; - double ac, bc; - const int ch_plane = (fmt_dst_ptr->numPlanes > 1) ? 1 : 0; - vsutl::compute_fmt_mac_cst ( - ay, by, *fmt_dst_ptr, full_flag, *fmt_src_ptr, full_flag, 0 - ); - vsutl::compute_fmt_mac_cst ( - ac, bc, *fmt_dst_ptr, full_flag, *fmt_src_ptr, full_flag, ch_plane - ); + fmtcl::PicFmt pic_fmt; + pic_fmt._sf = conv_vsfmt_to_splfmt (fmt); + pic_fmt._res = fmt.bitsPerSample; + pic_fmt._col_fam = conv_colfam_to_fmtcl (fmt); + pic_fmt._full_flag = full_flag; - m[0][0] = ay; m[0][1] = 0; m[0][2] = 0; m[0][3] = by; - m[1][0] = 0; m[1][1] = ac; m[1][2] = 0; m[1][3] = bc; - m[2][0] = 0; m[2][1] = 0; m[2][2] = ac; m[2][3] = bc; - m[3][0] = 0; m[3][1] = 0; m[3][2] = 0; m[3][3] = 1; + return pic_fmt; } -/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ - - - fmtcl::SplFmt conv_vsfmt_to_splfmt (const ::VSFormat &fmt) { fmtcl::SplFmt splfmt = fmtcl::SplFmt_ILLEGAL; @@ -128,56 +86,40 @@ fmtcl::SplFmt conv_vsfmt_to_splfmt (const ::VSFormat &fmt) } } - return (splfmt); + return splfmt; } -void prepare_matrix_coef (const vsutl::FilterBase &filter, fmtcl::MatrixProc &mat_proc, const fmtcl::Mat4 &mat_main, const ::VSFormat &fmt_dst, bool full_range_dst_flag, const ::VSFormat &fmt_src, bool full_range_src_flag, fmtcl::ColorSpaceH265 csp_out, int plane_out) +fmtcl::ColorFamily conv_colfam_to_fmtcl (const ::VSFormat &fmt) { - const bool int_proc_flag = - ( fmt_src.sampleType == ::stInteger - && fmt_dst.sampleType == ::stInteger); + auto col_fam = fmtcl::ColorFamily_INVALID; - fmtcl::Mat4 m (1, fmtcl::Mat4::Preset_DIAGONAL); - - ::VSFormat fmt_dst2 = fmt_dst; - if (int_proc_flag) + switch (fmt.colorFamily) { - // For the coefficient calculation, use the same output bitdepth - // as the input. The bitdepth change will be done separately with - // a simple bitshift. - fmt_dst2.bitsPerSample = fmt_src.bitsPerSample; + case cmGray: col_fam = fmtcl::ColorFamily_GRAY; break; + case cmRGB: col_fam = fmtcl::ColorFamily_RGB; break; + case cmYUV: col_fam = fmtcl::ColorFamily_YUV; break; + case cmYCoCg: col_fam = fmtcl::ColorFamily_YCGCO; break; + default: assert (false); break; } - override_fmt_with_csp (fmt_dst2, csp_out, plane_out); + return col_fam; +} - fmtcl::Mat4 m1s; - fmtcl::Mat4 m1d; - make_mat_flt_int (m1s, true , fmt_src , full_range_src_flag); - make_mat_flt_int (m1d, false, fmt_dst2, full_range_dst_flag); - m *= m1d; - if (! int_proc_flag) - { - if (plane_out > 0 && vsutl::is_chroma_plane (fmt_dst2, plane_out)) - { - // When we extract a single plane, it's a conversion to R or - // to Y, so the outout range is always [0; 1]. Therefore we - // need to offset the chroma planes. - m [plane_out] [fmtcl::MatrixProc::NBR_PLANES] += 0.5; - } - } - m *= mat_main; - m *= m1s; - - const fmtcl::SplFmt splfmt_src = conv_vsfmt_to_splfmt (fmt_src); - const fmtcl::SplFmt splfmt_dst = conv_vsfmt_to_splfmt (fmt_dst); - const fmtcl::MatrixProc::Err ret_val = mat_proc.configure ( - m, int_proc_flag, - splfmt_src, fmt_src.bitsPerSample, - splfmt_dst, fmt_dst.bitsPerSample, - plane_out + + +void prepare_matrix_coef (const vsutl::FilterBase &filter, fmtcl::MatrixProc &mat_proc, const fmtcl::Mat4 &mat_main, const ::VSFormat &fmt_dst, bool full_range_dst_flag, const ::VSFormat &fmt_src, bool full_range_src_flag, fmtcl::ColorSpaceH265 csp_out, int plane_out) +{ + const fmtcl::PicFmt fmt_src_fmtcl = + conv_vsfmt_to_picfmt (fmt_src, full_range_src_flag); + const fmtcl::PicFmt fmt_dst_fmtcl = + conv_vsfmt_to_picfmt (fmt_dst, full_range_dst_flag); + + const int ret_val = fmtcl::prepare_matrix_coef ( + mat_proc, mat_main, fmt_dst_fmtcl, fmt_src_fmtcl, csp_out, plane_out ); + if (ret_val != fmtcl::MatrixProc::Err_OK) { if (ret_val == fmtcl::MatrixProc::Err_POSSIBLE_OVERFLOW) diff --git a/src/fmtc/fnc.h b/src/fmtc/fnc.h index 5403664..fc86889 100644 --- a/src/fmtc/fnc.h +++ b/src/fmtc/fnc.h @@ -27,7 +27,9 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ +#include "fmtcl/ColorFamily.h" #include "fmtcl/ColorSpaceH265.h" +#include "fmtcl/PicFmt.h" #include "fmtcl/SplFmt.h" @@ -49,7 +51,10 @@ namespace fmtc +fmtcl::PicFmt conv_vsfmt_to_picfmt (const ::VSFormat &fmt, bool full_flag); fmtcl::SplFmt conv_vsfmt_to_splfmt (const ::VSFormat &fmt); +fmtcl::ColorFamily + conv_colfam_to_fmtcl (const ::VSFormat &fmt); void prepare_matrix_coef (const vsutl::FilterBase &filter, fmtcl::MatrixProc &mat_proc, const fmtcl::Mat4 &mat_main, const ::VSFormat &fmt_dst, bool full_range_dst_flag, const ::VSFormat &fmt_src, bool full_range_src_flag, fmtcl::ColorSpaceH265 csp_out = fmtcl::ColorSpaceH265_UNSPECIFIED, int plane_out = -1); diff --git a/src/fmtc/version.h b/src/fmtc/version.h index 12e40c1..3bc8b7a 100644 --- a/src/fmtc/version.h +++ b/src/fmtc/version.h @@ -1,5 +1,5 @@ #pragma once -#define fmtc_VERSION "r22" +#define fmtc_VERSION "r23" #define fmtc_PLUGIN_NAME "fmtconv" #define fmtc_NAMESPACE "fmtc" diff --git a/src/fmtcl/Dither.cpp b/src/fmtcl/Dither.cpp new file mode 100644 index 0000000..77a30f5 --- /dev/null +++ b/src/fmtcl/Dither.cpp @@ -0,0 +1,2605 @@ +/***************************************************************************** + + Dither.cpp + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#if defined (_MSC_VER) + #pragma warning (1 : 4130 4223 4705 4706) + #pragma warning (4 : 4355 4786 4800) +#endif + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fstb/def.h" + +#include "fmtcl/Dither.h" +#include "fmtcl/fnc.h" +#include "fmtcl/PicFmt.h" +#if (fstb_ARCHI == fstb_ARCHI_X86) + #include "fmtcl/ProxyRwSse2.h" +#endif +#include "fmtcl/VoidAndCluster.h" +#include "fstb/fnc.h" + +#include +#include + +#include +#include + + + +namespace fmtcl +{ + + + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +constexpr int Dither::_max_nbr_planes; +constexpr int Dither::_max_pat_width; + + + +Dither::Dither ( + SplFmt src_fmt, int src_res, bool src_full_flag, + SplFmt dst_fmt, int dst_res, bool dst_full_flag, + ColorFamily color_fam, int nbr_planes, int w, + DMode dmode, int pat_size, double ampo, double ampn, + bool dyn_flag, bool static_noise_flag, bool correlated_planes_flag, + bool tpdfo_flag, bool tpdfn_flag, + bool sse2_flag, bool avx2_flag +) +: _splfmt_src (src_fmt) +, _splfmt_dst (dst_fmt) +, _src_res (src_res) +, _dst_res (dst_res) +, _full_range_in_flag (src_full_flag) +, _full_range_out_flag (dst_full_flag) +, _color_fam (color_fam) +, _nbr_planes (nbr_planes) +, _sse2_flag (sse2_flag) +, _avx2_flag (avx2_flag) +, _dmode (dmode) +, _pat_size (pat_size) +, _ampo (ampo) +, _ampn (ampn) +, _dyn_flag (dyn_flag) +, _static_noise_flag (static_noise_flag) +, _correlated_planes_flag (correlated_planes_flag) +, _tpdfo_flag (tpdfo_flag) +, _tpdfn_flag (tpdfn_flag) +{ + assert (src_fmt >= 0); + assert (src_fmt < SplFmt::SplFmt_NBR_ELT); + assert (dst_fmt >= 0); + assert (dst_fmt < SplFmt::SplFmt_NBR_ELT); + assert ( + (SplFmt_is_int (src_fmt) && ( ( src_res >= 8 + && src_res <= 12) + || src_res == 14 + || src_res == 16)) + || (SplFmt_is_float (src_fmt) && src_res == 32 ) + ); + assert ( + (SplFmt_is_int (dst_fmt) && ( ( dst_res >= 8 + && dst_res <= 10) + || dst_res == 12 + || dst_res == 16)) + || (SplFmt_is_float (dst_fmt) && dst_res == 32 ) + ); + assert (color_fam >= 0); + assert (color_fam < ColorFamily_NBR_ELT); + assert (nbr_planes > 0); + assert (nbr_planes <= _max_nbr_planes); + assert (dmode >= 0); + assert (dmode < DMode_NBR_ELT); + assert (pat_size >= 4); + assert (_max_pat_width % pat_size == 0); + assert (ampo >= 0); + assert (ampn >= 0); + + // No dithering required + if ( ( SplFmt_is_int (src_fmt) + && ( SplFmt_is_float (dst_fmt) + || ( _src_res <= _dst_res + && ! _full_range_in_flag + && ! _full_range_out_flag))) + || ( SplFmt_is_float (src_fmt) + && SplFmt_is_float (dst_fmt))) + { + _upconv_flag = true; + } + + // Data scaling parameters + for (int plane_index = 0; plane_index < nbr_planes; ++plane_index) + { + SclInf & scl_inf = _scale_info_arr [plane_index]; + fmtcl::compute_fmt_mac_cst ( + scl_inf._info._gain, + scl_inf._info._add_cst, + PicFmt { dst_fmt, dst_res, color_fam, _full_range_out_flag }, + PicFmt { src_fmt, src_res, color_fam, _full_range_in_flag }, + plane_index + ); + + scl_inf._ptr = nullptr; + if ( _upconv_flag + && SplFmt_is_int (src_fmt) + && SplFmt_is_float (dst_fmt)) + { + scl_inf._ptr = &scl_inf._info; + } + } + + if (w <= 0) + { + w = _max_unk_width; + } + _buf_factory_uptr = std::make_unique (w); + _buf_pool.set_factory (*_buf_factory_uptr); + + build_dither_pat (); + + // Amplitude precalculations + + // In case of TPDF, rescales the amplitude so the power is kept constant. + // Sum of two noises (uncorrelated signals) -> +3 dB + if (_tpdfo_flag) + { + ampo *= fstb::SQRT2 * 0.5; + } + if (_tpdfn_flag) + { + ampn *= fstb::SQRT2 * 0.5; + } + + const int amp_mul = 1 << _amp_bits; + const int ampo_i_raw = fstb::round_int (ampo * amp_mul); + const int ampn_i_raw = fstb::round_int (ampn * amp_mul); + _amp._o_i = std::min (ampo_i_raw, 127); + _amp._n_i = std::min (ampn_i_raw, 127); + _amp._n_f = float (ampn * (1.f / 256.f)); + + if (_errdif_flag) + { + _amp._e_i = fstb::limit ( + fstb::round_int ((ampo - 1) * (128 << _amp_bits)), + 0, + (2048 << _amp_bits) - 1 + ); + _amp._e_f = fstb::limit (float (ampo - 1), 0.f, 8.f); + } + + _simple_flag = (ampo_i_raw == amp_mul && ampn_i_raw == 0); + + // Processing function initialisation + if (_errdif_flag) + { + init_fnc_errdiff (); + } + else if (_dmode == DMode_QUASIRND) + { + init_fnc_quasirandom (); + } + else if (_dmode == DMode_FAST) + { + init_fnc_fast (); + } + else + { + init_fnc_ordered (); + } +} + + + +void Dither::process_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, int frame_index, int plane_index) +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + assert (h > 0); + assert (frame_index >= 0); + assert (plane_index >= 0); + assert (plane_index < _max_nbr_planes); + + if (_upconv_flag) + { + BitBltConv blitter (_sse2_flag, _avx2_flag); + blitter.bitblt ( + _splfmt_dst, _dst_res, dst_ptr, nullptr, dst_stride, + _splfmt_src, _src_res, src_ptr, nullptr, src_stride, + w, h, + _scale_info_arr [plane_index]._ptr + ); + } + else + { + dither_plane ( + dst_ptr, dst_stride, + src_ptr, src_stride, + w, h, + _scale_info_arr [plane_index]._info, + frame_index, plane_index + ); + } +} + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +constexpr int Dither::_pat_period; +constexpr int Dither::_amp_bits; +constexpr int Dither::_err_res; +constexpr int Dither::_max_unk_width; + + + +void Dither::build_dither_pat () +{ + _errdif_flag = false; + + switch (_dmode) + { + case DMode_BAYER: + build_dither_pat_bayer (); + break; + + case DMode_FILTERLITE: + case DMode_STUCKI: + case DMode_ATKINSON: + case DMode_FLOYD: + case DMode_OSTRO: + _errdif_flag = true; + _tpdfo_flag = false; + break; + + case DMode_ROUND: + case DMode_FAST: + default: + build_dither_pat_round (); + break; + + case DMode_VOIDCLUST: + build_dither_pat_void_and_cluster (_pat_size); + break; + + case DMode_QUASIRND: + // Nothing + break; + } +} + + + +void Dither::build_dither_pat_round () +{ + PatData & pat_data = _dither_pat_arr [0]; + for (int y = 0; y < _max_pat_width; ++y) + { + for (int x = 0; x < _max_pat_width; ++x) + { + pat_data [y] [x] = 0; + } + } + + build_next_dither_pat (); +} + + + +void Dither::build_dither_pat_bayer () +{ + assert (fstb::is_pow_2 (int (_max_pat_width))); + + PatData & pat_data = _dither_pat_arr [0]; + for (int y = 0; y < _max_pat_width; ++y) + { + for (int x = 0; x < _max_pat_width; ++x) + { + pat_data [y] [x] = -128; + } + } + + for (int dith_size = 2; dith_size <= _max_pat_width; dith_size <<= 1) + { + for (int y = 0; y < _max_pat_width; y += 2) + { + for (int x = 0; x < _max_pat_width; x += 2) + { + const int xx = (x >> 1) + (_max_pat_width >> 1); + const int yy = (y >> 1) + (_max_pat_width >> 1); + const int val = (pat_data [yy] [xx] + 128) >> 2; + pat_data [y ] [x ] = int16_t (val + 0-128); + pat_data [y ] [x + 1] = int16_t (val + 128-128); + pat_data [y + 1] [x ] = int16_t (val + 192-128); + pat_data [y + 1] [x + 1] = int16_t (val + 64-128); + } + } + } + + build_next_dither_pat (); +} + + + +void Dither::build_dither_pat_void_and_cluster (int w) +{ + assert (_max_pat_width % w == 0); + VoidAndCluster vc_gen; + MatrixWrap pat_raw (w, w); + vc_gen.create_matrix (pat_raw); + + PatData & pat_data = _dither_pat_arr [0]; + const int area = w * w; + for (int y = 0; y < _max_pat_width; ++y) + { + for (int x = 0; x < _max_pat_width; ++x) + { + pat_data [y] [x] = int16_t (pat_raw (x, y) * 256 / area - 128); + } + } + + build_next_dither_pat (); +} + + + +void Dither::build_next_dither_pat () +{ + if (_tpdfo_flag) + { + for (int y = 0; y < _max_pat_width; ++y) + { + for (int x = 0; x < _max_pat_width; ++x) + { + const int r = _dither_pat_arr [0] [y] [x]; + const int t = remap_tpdf_scalar (r); + _dither_pat_arr [0] [y] [x] = int16_t (t); + } + } + } + + for (int seq = 1; seq < _pat_period; ++seq) + { + const int angle = (_dyn_flag) ? seq & 3 : 0; + copy_dither_pat_rotate ( + _dither_pat_arr [seq], + _dither_pat_arr [0], + angle + ); + } +} + + + +void Dither::copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle) noexcept +{ + assert (angle >= 0); + assert (angle < 4); + + static const int sin_arr [4] = { 0, 1, 0, -1 }; + const int s = sin_arr [ angle ]; + const int c = sin_arr [(angle + 1) & 3]; + + assert (fstb::is_pow_2 (int (_max_pat_width))); + const int mask = _max_pat_width - 1; + + for (int y = 0; y < _max_pat_width; ++y) + { + for (int x = 0; x < _max_pat_width; ++x) + { + const int xs = (x * c - y * s) & mask; + const int ys = (x * s + y * c) & mask; + + dst [y] [x] = src [ys] [xs]; + } + } +} + + + +// All possible combinations +#define fmtcl_Dither_SPAN_INT(SETP, NAMP, NAMF, simple_flag, tpdfo_flag, tpdfn_flag, dst_res, dst_fmt, src_res, src_fmt) \ + switch ( ((simple_flag) << 7) \ + + ((tpdfo_flag) << 23) + ((tpdfn_flag) << 22) \ + + ((dst_res) << 24) + ((dst_fmt) << 16) \ + + ((src_res) << 8) + (src_fmt)) \ + { \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 9) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 10) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 10) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 16) \ + } + +// All possible combinations using float as intermediary data +#define fmtcl_Dither_SPAN_FLT(SETP, NAMP, NAMF, simple_flag, tpdfo_flag, tpdfn_flag, dst_res, dst_fmt, src_res, src_fmt) \ + switch ( ((simple_flag) << 7) \ + + ((tpdfo_flag) << 23) + ((tpdfn_flag) << 22) \ + + ((dst_res) << 24) + ((dst_fmt) << 16) \ + + ((src_res) << 8) + (src_fmt)) \ + { \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT8 , uint8_t , 8) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 9) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 10) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t , 8, SplFmt_FLOAT, float , 32) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT8 , uint8_t , 8) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 9) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 10) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 9, SplFmt_FLOAT, float , 32) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT8 , uint8_t , 8) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 9) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 10) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_FLOAT, float , 32) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT8 , uint8_t , 8) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 9) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 10) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_FLOAT, float , 32) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT8 , uint8_t , 8) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 9) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 10) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 11) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 12) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 14) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 16) \ + SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_FLOAT, float , 32) \ + } + + + +#define fmtcl_Dither_SET_FNC_MULTI(FCASE, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (false, false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (false, false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (false, true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (false, true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (true , false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (true , false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (true , true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + FCASE (true , true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) + +#define fmtcl_Dither_SET_FNC_INT_CASE(simple_flag, tpdfo_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + case (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \ + + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ + _process_seg_int_int_ptr = &process_seg_##NAMF##_int_int_cpp < \ + simple_flag, tpdfo_flag, tpdfn_flag, DT, DP, ST, SP \ + >; \ + break; + +#define fmtcl_Dither_SET_FNC_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_INT_CASE, \ + NAMP, NAMF, DF, DT, DP, SF, ST, SP) + +#define fmtcl_Dither_SET_FNC_FLT_CASE(simple_flag, tpdfo_flag, tpdfn_flag,NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + case (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \ + + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ + _process_seg_flt_int_ptr = &process_seg_##NAMF##_flt_int_cpp < \ + simple_flag, tpdfo_flag, tpdfn_flag, DT, DP, ST \ + >; \ + break; + +#define fmtcl_Dither_SET_FNC_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_FLT_CASE, \ + NAMP, NAMF, DF, DT, DP, SF, ST, SP) + +#define fmtcl_Dither_SET_FNC_INT_SSE2_CASE(simple_flag, tpdfo_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + case (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \ + + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ + _process_seg_int_int_ptr = &process_seg_##NAMF##_int_int_sse2 < \ + simple_flag, tpdfo_flag, tpdfn_flag, DF, DP, SF, SP \ + >; \ + break; + +#define fmtcl_Dither_SET_FNC_INT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_INT_SSE2_CASE, \ + NAMP, NAMF, DF, DT, DP, SF, ST, SP) + +#define fmtcl_Dither_SET_FNC_FLT_SSE2_CASE(simple_flag, tpdfo_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + case (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \ + + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ + _process_seg_flt_int_ptr = &process_seg_##NAMF##_flt_int_sse2 < \ + simple_flag, tpdfo_flag, tpdfn_flag, DF, DP, SF \ + >; \ + break; + +#define fmtcl_Dither_SET_FNC_FLT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_FLT_SSE2_CASE, \ + NAMP, NAMF, DF, DT, DP, SF, ST, SP) + + + +void Dither::init_fnc_fast () noexcept +{ + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_INT, fast, fast, false, false, false, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_FLT, fast, fast, false, false, false, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + +#if (fstb_ARCHI == fstb_ARCHI_X86) + if (_sse2_flag) + { + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_INT_SSE2, fast, fast, false, false, false, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_FLT_SSE2, fast, fast, false, false, false, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + } +#endif +} + + + +void Dither::init_fnc_ordered () noexcept +{ + assert (! _errdif_flag); + + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_INT, + ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_FLT, + ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + +#if (fstb_ARCHI == fstb_ARCHI_X86) + if (_sse2_flag) + { + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_INT_SSE2, + ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_FLT_SSE2, + ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + } +#endif +} + + + +void Dither::init_fnc_quasirandom () noexcept +{ + assert (! _errdif_flag); + + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_INT, + qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_FLT, + qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + +#if (fstb_ARCHI == fstb_ARCHI_X86) + if (_sse2_flag) + { + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_INT_SSE2, + qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_FLT_SSE2, + qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + } +#endif +} + + + +#undef fmtcl_Dither_SET_FNC_MULTI +#undef fmtcl_Dither_SET_FNC_INT_CASE +#undef fmtcl_Dither_SET_FNC_INT +#undef fmtcl_Dither_SET_FNC_FLT_CASE +#undef fmtcl_Dither_SET_FNC_FLT +#undef fmtcl_Dither_SET_FNC_INT_SSE2_CASE +#undef fmtcl_Dither_SET_FNC_INT_SSE2 +#undef fmtcl_Dither_SET_FNC_FLT_SSE2_CASE +#undef fmtcl_Dither_SET_FNC_FLT_SSE2 + + + +#define fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE(simple_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + case (simple_flag << 7) + (tpdfn_flag << 22) \ + + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ + _process_seg_int_int_ptr = &process_seg_errdif_int_int_cpp < \ + simple_flag, tpdfn_flag, Diffuse##NAMF \ + >; \ + break; + +#define fmtcl_Dither_SET_FNC_ERRDIF_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) + +#define fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE(simple_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + case (simple_flag << 7) + (tpdfn_flag << 22) \ + + (DP << 24) + (DF << 16) + (SP << 8) + SF: \ + _process_seg_flt_int_ptr = &process_seg_errdif_flt_int_cpp < \ + simple_flag, tpdfn_flag, Diffuse##NAMF \ + >; \ + break; + +#define fmtcl_Dither_SET_FNC_ERRDIF_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \ + fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) + + + +void Dither::init_fnc_errdiff () noexcept +{ + assert (_errdif_flag); + + switch (_dmode) + { + case DMode_FILTERLITE: + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_ERRDIF_INT, + errdif, FilterLite, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_ERRDIF_FLT, + errdif, FilterLite, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + break; + + case DMode_STUCKI: + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_ERRDIF_INT, + errdif, Stucki, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_ERRDIF_FLT, + errdif, Stucki, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + break; + + case DMode_ATKINSON: + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_ERRDIF_INT, + errdif, Atkinson, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_ERRDIF_FLT, + errdif, Atkinson, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + break; + + case DMode_FLOYD: + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_ERRDIF_INT, + errdif, FloydSteinberg, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_ERRDIF_FLT, + errdif, FloydSteinberg, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + break; + + case DMode_OSTRO: + fmtcl_Dither_SPAN_INT ( + fmtcl_Dither_SET_FNC_ERRDIF_INT, + errdif, Ostromoukhov, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + fmtcl_Dither_SPAN_FLT ( + fmtcl_Dither_SET_FNC_ERRDIF_FLT, + errdif, Ostromoukhov, _simple_flag, false, _tpdfn_flag, + _dst_res, _splfmt_dst, _src_res, _splfmt_src + ) + break; + + default: + break; + } +} + + + +#undef fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE +#undef fmtcl_Dither_SET_FNC_ERRDIF_INT +#undef fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE +#undef fmtcl_Dither_SET_FNC_ERRDIF_FLT + + + +#undef fmtcl_Dither_SPAN_INT +#undef fmtcl_Dither_SPAN_FLT + + + +void Dither::dither_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, const BitBltConv::ScaleInfo &scale_info, int frame_index, int plane_index) +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + assert (h > 0); + + SegContext ctx; + ctx._scale_info_ptr = &scale_info; + ctx._amp = _amp; + + uint32_t rnd_state = 0; + if (! _correlated_planes_flag) + { + rnd_state += plane_index << 16; + } + if (_static_noise_flag) + { + rnd_state += 55555; + } + else + { + rnd_state += frame_index; + } + ctx._rnd_state = rnd_state; + + const bool sc_flag = + ( _splfmt_src == SplFmt_FLOAT + || ! fstb::is_eq (scale_info._gain * double ((uint64_t (1)) << (_src_res - _dst_res)), 1.0, 1e-6) + || ! fstb::is_null (scale_info._add_cst, 1e-6)); + + void (* process_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) = + (sc_flag) + ? _process_seg_flt_int_ptr + : _process_seg_int_int_ptr; + assert (process_ptr != nullptr); + + ErrDifBuf * ed_buf_ptr = nullptr; + if (_errdif_flag) + { + ed_buf_ptr = _buf_pool.take_obj (); + if (ed_buf_ptr == nullptr) + { + throw std::runtime_error ( + "cannot allocate memory for temporary buffer." + ); + } + ed_buf_ptr->clear ((sc_flag) ? sizeof (float) : sizeof (int16_t)); + } + + switch (_dmode) + { + case DMode_BAYER: + case DMode_ROUND: + case DMode_VOIDCLUST: + { + int pat_index = 0; + if (! _correlated_planes_flag) + { + pat_index += plane_index; + } + if (_dyn_flag) + { + pat_index += frame_index; + } + pat_index &= _pat_period - 1; + const PatData& pattern = _dither_pat_arr [pat_index]; + ctx._pattern_ptr = &pattern; + } + break; + + case DMode_FAST: + // Nothing + break; + + case DMode_QUASIRND: + ctx._qrs_seed = 0; + if (_dyn_flag) + { + ctx._qrs_seed += uint32_t (frame_index * 73); + } + if (! _correlated_planes_flag) + { + ctx._qrs_seed += uint32_t (plane_index * 263); + } + break; + + case DMode_FILTERLITE: + case DMode_STUCKI: + case DMode_ATKINSON: + case DMode_FLOYD: + case DMode_OSTRO: + ctx._ed_buf_ptr = ed_buf_ptr; + break; + + default: + assert (false); + throw std::logic_error ("unexpected dithering algorithm"); + break; + } + + for (int y = 0; y < h; ++y) + { + ctx._y = y; + + (*process_ptr) (dst_ptr, src_ptr, w, ctx); + + src_ptr += src_stride; + dst_ptr += dst_stride; + } + + if (ed_buf_ptr != nullptr) + { + _buf_pool.return_obj (*ed_buf_ptr); + ed_buf_ptr = nullptr; + } +} + + + +template +void Dither::process_seg_fast_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + fstb::unused (ctx); + + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + + constexpr int dif_bits = SRC_BITS - DST_BITS; + static_assert (dif_bits >= 0, "This function cannot increase bidepth."); + + const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast (src_ptr); + DST_TYPE * fstb_RESTRICT dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); + + for (int pos = 0; pos < w; ++pos) + { + const int s = src_n_ptr [pos]; + const int pix = s >> dif_bits; + dst_n_ptr [pos] = static_cast (pix); + } +} + + + +template +void Dither::process_seg_fast_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + assert (ctx._scale_info_ptr != nullptr); + + const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast (src_ptr); + DST_TYPE * fstb_RESTRICT dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); + + const float mul = float (ctx._scale_info_ptr->_gain); + const float add = float (ctx._scale_info_ptr->_add_cst); + const int vmax = (1 << DST_BITS) - 1; + + for (int pos = 0; pos < w; ++pos) + { + float s = float (src_n_ptr [pos]); + s = s * mul + add; + const int quant = fstb::conv_int_fast (s); + const int pix = fstb::limit (quant, 0, vmax); + dst_n_ptr [pos] = static_cast (pix); + } +} + + + +#if (fstb_ARCHI == fstb_ARCHI_X86) + + + +template +void Dither::process_seg_fast_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + fstb::unused (ctx); + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + + constexpr int dif_bits = SRC_BITS - DST_BITS; + static_assert (dif_bits >= 0, "This function cannot increase bidepth."); + + typedef typename ProxyRwSse2 ::PtrConst::Type SrcPtr; + typedef typename ProxyRwSse2 ::Ptr::Type DstPtr; + SrcPtr src_n_ptr = reinterpret_cast (src_ptr); + DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); + const __m128i zero = _mm_setzero_si128 (); + const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); + + for (int pos = 0; pos < w; pos += 8) + { + const __m128i s = + ProxyRwSse2 ::read_i16 (src_n_ptr + pos, zero); + const __m128i pix = _mm_srli_epi16 (s, dif_bits); + ProxyRwSse2 ::write_i16 (dst_n_ptr + pos, pix, mask_lsb); + } +} + + + +template +void Dither::process_seg_fast_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + assert (ctx._scale_info_ptr != nullptr); + + typedef typename ProxyRwSse2 ::PtrConst::Type SrcPtr; + typedef typename ProxyRwSse2 ::Ptr::Type DstPtr; + SrcPtr src_n_ptr = reinterpret_cast (src_ptr); + DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); + + const __m128 mul = _mm_set1_ps (float (ctx._scale_info_ptr->_gain)); + const __m128 add = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst)); + const __m128 vmax = _mm_set1_ps (float ((1 << DST_BITS) - 1)); + const __m128 zero_f = _mm_setzero_ps (); + const __m128i zero_i = _mm_setzero_si128 (); + const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); + const __m128i sign_bit = _mm_set1_epi16 (-0x8000); + const __m128 offset = _mm_set1_ps (-32768); + + for (int pos = 0; pos < w; pos += 8) + { + __m128 s0; + __m128 s1; + ProxyRwSse2 ::read_flt ( + src_n_ptr + pos, s0, s1, zero_i + ); + s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add); + s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add); + s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f); + s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f); + ProxyRwSse2 ::write_flt ( + dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset + ); + } +} + + + +#endif + + + +template +void Dither::process_seg_ord_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + const PatRow & fstb_RESTRICT pattern = ctx.extract_pattern_row (); + + process_seg_common_int_int_cpp < + S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS + > (dst_ptr, src_ptr, w, ctx, + [&] (int pos) + { + return pattern [pos & (_max_pat_width - 1)]; + } + ); +} + + + +template +void Dither::process_seg_ord_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + const PatRow & fstb_RESTRICT pattern = ctx.extract_pattern_row (); + + process_seg_common_flt_int_cpp < + S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE + > (dst_ptr, src_ptr, w, ctx, + [&] (int pos) + { + return pattern [pos & (_max_pat_width - 1)]; + } + ); +} + + + +#if (fstb_ARCHI == fstb_ARCHI_X86) + + + +template +void Dither::process_seg_ord_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + const PatRow & fstb_RESTRICT pattern = ctx.extract_pattern_row (); + + process_seg_common_int_int_sse2 < + S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT, SRC_BITS + > (dst_ptr, src_ptr, w, ctx, + [&] (int pos) + { + return _mm_load_si128 (reinterpret_cast ( + &pattern [pos & (_max_pat_width - 1)] + )); // 8 s16 [-128 ; +127] + } + ); +} + + + +template +void Dither::process_seg_ord_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + const PatRow & fstb_RESTRICT pattern = ctx.extract_pattern_row (); + + process_seg_common_flt_int_sse2 < + S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT + > (dst_ptr, src_ptr, w, ctx, + [&] (int pos) + { + return _mm_load_si128 (reinterpret_cast ( + &pattern [pos & (_max_pat_width - 1)] + )); // 8 s16 [-128 ; +127] + } + ); +} + + + +#endif // fstb_ARCHI_X86 + + + +template +void Dither::process_seg_qrs_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + // alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0 + // Also: + // alpha1 = (curt (2) * sq (curt (3))) + // / (curt (9 - sqrt (69)) + curt (9 + sqrt (69))) + constexpr double alpha1 = 1.0 / 1.3247179572447460259609088544781; + constexpr double alpha2 = alpha1 * alpha1; + constexpr int sc_l2 = 16; // 16 bits of fractional values + constexpr float sc_mul = float (1 << sc_l2); + constexpr int qrs_shf = sc_l2 - 9; + constexpr int qrs_inc = int (alpha1 * sc_mul + 0.5f); + uint32_t qrs_cnt = uint32_t (std::llrint ( + (alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul + )); + + process_seg_common_int_int_cpp < + S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS + > (dst_ptr, src_ptr, w, ctx, + [&] (int /*pos*/) + { + const int p = (qrs_cnt >> qrs_shf) & 0x1FF; + int dith_o = (p > 255) ? 512 - 128 - p : p - 128; // s8 + qrs_cnt += qrs_inc; + + if (TO_FLAG) + { + dith_o = remap_tpdf_scalar (dith_o); + } + + return dith_o; + } + ); +} + + + +template +void Dither::process_seg_qrs_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + // alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0 + // Also: + // alpha1 = (curt (2) * sq (curt (3))) + // / (curt (9 - sqrt (69)) + curt (9 + sqrt (69))) + constexpr double alpha1 = 1.0 / 1.3247179572447460259609088544781; + constexpr double alpha2 = alpha1 * alpha1; + constexpr int sc_l2 = 16; // 16 bits of fractional values + constexpr float sc_mul = float (1 << sc_l2); + constexpr int qrs_shf = sc_l2 - 9; + constexpr int qrs_inc = int (alpha1 * sc_mul + 0.5f); + uint32_t qrs_cnt = uint32_t (std::llrint ( + (alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul + )); + + process_seg_common_flt_int_cpp < + S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE + > (dst_ptr, src_ptr, w, ctx, + [&] (int /*pos*/) + { + const int p = (qrs_cnt >> qrs_shf) & 0x1FF; + int dith_o = (p > 255) ? 512 - 128 - p : p - 128; // s8 + qrs_cnt += qrs_inc; + + if (TO_FLAG) + { + dith_o = remap_tpdf_scalar (dith_o); + } + + return dith_o; + } + ); +} + + + +#if (fstb_ARCHI == fstb_ARCHI_X86) + + + +template +void Dither::process_seg_qrs_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + // alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0 + // Also: + // alpha1 = (curt (2) * sq (curt (3))) + // / (curt (9 - sqrt (69)) + curt (9 + sqrt (69))) + constexpr double alpha1 = 1.0 / 1.3247179572447460259609088544781; + constexpr double alpha2 = alpha1 * alpha1; + constexpr int sc_l2 = 16; // 16 bits of fractional values + constexpr float sc_mul = float (1 << sc_l2); + constexpr int qrs_shf = sc_l2 - 9; + constexpr int qrs_inc = int (alpha1 * sc_mul + 0.5f); + uint32_t qrs_cnt = uint32_t (std::llrint ( + (alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul + )); + + const __m128i qrs_inc_4 = _mm_set1_epi32 (4 * qrs_inc); + __m128i qrs_cnt_4 = _mm_set1_epi32 (qrs_cnt); + const __m128i qrs_ofs = _mm_set_epi32 (qrs_inc * 3, qrs_inc * 2, qrs_inc, 0); + qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_ofs); + const __m128i qrs_msk = _mm_set1_epi32 (0x1FF); + const __m128i c128 = _mm_set1_epi16 (128); + const __m128i c256 = _mm_set1_epi16 (256); + const __m128i c384 = _mm_set1_epi16 (384); + + process_seg_common_int_int_sse2 < + S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT, SRC_BITS + > (dst_ptr, src_ptr, w, ctx, + [&] (int /*pos*/) + { + auto p03 = _mm_srli_epi32 (qrs_cnt_4, qrs_shf); + p03 = _mm_and_si128 (p03, qrs_msk); + qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4); + auto p47 = _mm_srli_epi32 (qrs_cnt_4, qrs_shf); + p47 = _mm_and_si128 (p47, qrs_msk); + qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4); + const auto p = _mm_packs_epi32 (p03, p47); + const auto tri_a = _mm_sub_epi16 (p, c128); + const auto tri_d = _mm_sub_epi16 (c384, p); + const auto cond = _mm_cmplt_epi16 (p, c256); + auto dith_o = _mm_or_si128 ( + _mm_and_si128 (cond, tri_a), + _mm_andnot_si128 (cond, tri_d) + ); + + if (TO_FLAG) + { + dith_o = remap_tpdf_vec (dith_o); + } + + return dith_o; // 8 s16 [-128 ; +127] or [-256 ; +255] + } + ); +} + + + +template +void Dither::process_seg_qrs_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + // alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0 + // Also: + // alpha1 = (curt (2) * sq (curt (3))) + // / (curt (9 - sqrt (69)) + curt (9 + sqrt (69))) + constexpr double alpha1 = 1.0 / 1.3247179572447460259609088544781; + constexpr double alpha2 = alpha1 * alpha1; + constexpr int sc_l2 = 16; // 16 bits of fractional values + constexpr float sc_mul = float (1 << sc_l2); + constexpr int qrs_shf = sc_l2 - 9; + constexpr int qrs_inc = int (alpha1 * sc_mul + 0.5f); + uint32_t qrs_cnt = uint32_t (std::llrint ( + (alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul + )); + + const __m128i qrs_inc_4 = _mm_set1_epi32 (4 * qrs_inc); + __m128i qrs_cnt_4 = _mm_set1_epi32 (qrs_cnt); + const __m128i qrs_ofs = _mm_set_epi32 (qrs_inc * 3, qrs_inc * 2, qrs_inc, 0); + qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_ofs); + const __m128i qrs_msk = _mm_set1_epi32 (0x1FF); + const __m128i c128 = _mm_set1_epi16 (128); + const __m128i c256 = _mm_set1_epi16 (256); + const __m128i c384 = _mm_set1_epi16 (384); + + process_seg_common_flt_int_sse2 < + S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT + > (dst_ptr, src_ptr, w, ctx, + [&] (int /*pos*/) + { + auto p03 = _mm_srli_epi32 (qrs_cnt_4, qrs_shf); + p03 = _mm_and_si128 (p03, qrs_msk); + qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4); + auto p47 = _mm_srli_epi32 (qrs_cnt_4, qrs_shf); + p47 = _mm_and_si128 (p47, qrs_msk); + qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4); + const auto p = _mm_packs_epi32 (p03, p47); + const auto tri_a = _mm_sub_epi16 (p, c128); + const auto tri_d = _mm_sub_epi16 (c384, p); + const auto cond = _mm_cmplt_epi16 (p, c256); + auto dith_o = _mm_or_si128 ( + _mm_and_si128 (cond, tri_a), + _mm_andnot_si128 (cond, tri_d) + ); + + if (TO_FLAG) + { + dith_o = remap_tpdf_vec (dith_o); + } + + return dith_o; // 8 s16 [-128 ; +127] + } + ); +} + + + +#endif // fstb_ARCHI_X86 + + + +template +void Dither::process_seg_common_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + + constexpr int dif_bits = SRC_BITS - DST_BITS; + static_assert (dif_bits >= 1, "This function must reduce bidepth."); + + uint32_t & rnd_state = ctx._rnd_state; + + const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast (src_ptr); + DST_TYPE * fstb_RESTRICT dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); + + const int rcst = 1 << (dif_bits - 1); + const int vmax = (1 << DST_BITS) - 1; + + const int ao = ctx._amp._o_i; // s8 + const int an = ctx._amp._n_i; // s8 + + for (int pos = 0; pos < w; ++pos) + { + const int s = src_n_ptr [pos]; + + const int dith_o = dither_fnc (pos); // s8 + int dither; + if (S_FLAG) + { + constexpr int dit_shft = 8 - dif_bits; + dither = fstb::sshift_r (dith_o); + } + else + { + const int dith_n = generate_dith_n_scalar (rnd_state); // s8 + + constexpr int dit_shft = _amp_bits + 8 - dif_bits; + dither = fstb::sshift_r (dith_o * ao + dith_n * an); // s16 = s8 * s8 // s16 = s16 >> cst + } + const int sum = s + dither; // s16+ + const int quant = (sum + rcst) >> dif_bits; // s16 + + const int pix = fstb::limit (quant, 0, vmax); + dst_n_ptr [pos] = static_cast (pix); + } + + if (! S_FLAG) + { + generate_rnd_eol (rnd_state); + } +} + + + +// int dither_fnc (int pos) noexcept; +// Must provide the ordered dither value, in [-128 ; +127] nominal range +// (doubled for TPDF) +template +void Dither::process_seg_common_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + + const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast (src_ptr); + DST_TYPE * fstb_RESTRICT dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); + + uint32_t & rnd_state = ctx._rnd_state; + + const int ao = ctx._amp._o_i; // s8 + const int an = ctx._amp._n_i; // s8 + + const float mul = float (ctx._scale_info_ptr->_gain); + const float add = float (ctx._scale_info_ptr->_add_cst); + const float qt = 1.0f / (1 << ((S_FLAG ? 0 : _amp_bits) + 8)); + const int vmax = (1 << DST_BITS) - 1; + + for (int pos = 0; pos < w; ++pos) + { + float s = float (src_n_ptr [pos]); + s = s * mul + add; + + const int dith_o = dither_fnc (pos); // s8 + + float dither; + if (S_FLAG) + { + dither = float (dith_o) * qt; + } + else + { + const int dith_n = generate_dith_n_scalar (rnd_state); // s8 + dither = float (dith_o * ao + dith_n * an) * qt; + } + const float sum = s + dither; + const int quant = fstb::round_int (sum); + + const int pix = fstb::limit (quant, 0, vmax); + dst_n_ptr [pos] = static_cast (pix); + } + + if (! S_FLAG) + { + generate_rnd_eol (rnd_state); + } +} + + + +template +int Dither::generate_dith_n_scalar (uint32_t &rnd_state) noexcept +{ + generate_rnd (rnd_state); + int dith_n = int8_t (rnd_state >> 24); + if (T_FLAG) + { + generate_rnd (rnd_state); + dith_n += int8_t (rnd_state >> 24); + } + + return dith_n; +} + + + +int Dither::remap_tpdf_scalar (int d) noexcept +{ + // [-128 ; 127] to [-32767 ; +32767], representing [-1 ; 1] (15-bit scale) + auto x2 = d * d; + x2 += x2; + x2 = std::min (x2, 0x7FFFF); // Saturated here because of the -min * -min overflow + auto x4 = (x2 * x2 ) >> 15; + auto x8 = (x4 * x4 ) >> 15; + auto x16 = (x8 * x8 ) >> 15; + auto x32 = (x16 * x16) >> 15; + + // 15-bit scale + constexpr int c3 = 0x8000 * 5 / 8; + constexpr int c33 = 0x8000 * 3 / 8; + + // 15-bit scale + auto sum_s15 = (x2 * c3 + x32 * c33) >> 15; + const auto x_s15 = d << 8; + const auto sum_s7 = (sum_s15 * x_s15) >> (30 - 7); + + d += sum_s7; + + return d; +} + + + +#if (fstb_ARCHI == fstb_ARCHI_X86) + + + +// __m128i dither_fnc (int pos) noexcept; +// Must provide the ordered dither values as a vector of 8 x int16_t, +// in [-128 ; +127] nominal range (doubled for TPDF) +template +void Dither::process_seg_common_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + + constexpr int dif_bits = SRC_BITS - DST_BITS; + static_assert (dif_bits >= 0, "This function cannot increase bidepth."); + + uint32_t & rnd_state = ctx._rnd_state; + + typedef typename ProxyRwSse2 ::PtrConst::Type SrcPtr; + typedef typename ProxyRwSse2 ::Ptr::Type DstPtr; + SrcPtr src_n_ptr = reinterpret_cast (src_ptr); + DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); + const __m128i zero = _mm_setzero_si128 (); + const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); + const __m128i sign_bit = _mm_set1_epi16 (-0x8000); + const __m128i rcst = _mm_set1_epi16 (1 << (dif_bits - 1)); + const __m128i vmax = _mm_set1_epi16 ((1 << DST_BITS) - 1); + + const __m128i ampo_i = _mm_set1_epi16 (int16_t (ctx._amp._o_i)); // 8 ?16 [0 ; 255] + const __m128i ampn_i = _mm_set1_epi16 (int16_t (ctx._amp._n_i)); // 8 ?16 [0 ; 255] + + for (int pos = 0; pos < w; pos += 8) + { + const __m128i s = // 8 u16 + ProxyRwSse2 ::read_i16 (src_n_ptr + pos, zero); + + // 8 s16 [-128 ; +127] or [-256 ; 255] + __m128i dith_o = dither_fnc (pos); + + __m128i dither; + if (S_FLAG) + { + constexpr int dit_shft = 8 - dif_bits; + dither = _mm_srai_epi16 (dith_o, dit_shft); + } + else + { + // Random generation. 8 s16 [-128 ; 127] or [-256 ; 255] + __m128i dith_n = generate_dith_n_vec (rnd_state); + + dith_o = _mm_mullo_epi16 (dith_o, ampo_i); // 8 s16 (full range) + dith_n = _mm_mullo_epi16 (dith_n, ampn_i); // 8 s16 (full range) + dither = _mm_adds_epi16 (dith_o, dith_n); // 8 s16 = s8 * s8 + + constexpr int dit_shft = _amp_bits + 8 - dif_bits; + dither = _mm_srai_epi16 (dither, dit_shft); // 8 s16 = s16 >> cst + } + + const __m128i dith_rcst = _mm_adds_epi16 (dither, rcst); + + __m128i quant; + if (S_FLAG && SRC_BITS < 16) + { + __m128i sum = _mm_adds_epi16 (s, dith_rcst); + quant = _mm_srai_epi16 (sum, dif_bits); + } + else + { + __m128i sum = _mm_xor_si128 (s, sign_bit); // 8 s16 + sum = _mm_adds_epi16 (sum, dith_rcst); + sum = _mm_xor_si128 (sum, sign_bit); // 8 u16 + quant = _mm_srli_epi16 (sum, dif_bits); + } + + __m128i pix = quant; + if (SRC_BITS < 16) + { + pix = _mm_max_epi16 (pix, zero); + pix = _mm_min_epi16 (pix, vmax); + } + + ProxyRwSse2 ::write_i16 (dst_n_ptr + pos, pix, mask_lsb); + } + + if (! S_FLAG) + { + generate_rnd_eol (rnd_state); + } +} + + + +template +void Dither::process_seg_common_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + assert (((_mm_getcsr () >> 13) & 3) == 0); // 00 = Round to nearest (even) + + uint32_t & rnd_state = ctx._rnd_state; + + const float qt_cst = 1.0f / ( + 65536.0f * float (1 << ((S_FLAG ? 0 : _amp_bits) + 8)) + ); + + typedef typename ProxyRwSse2 ::PtrConst::Type SrcPtr; + typedef typename ProxyRwSse2 ::Ptr::Type DstPtr; + SrcPtr src_n_ptr = reinterpret_cast (src_ptr); + DstPtr dst_n_ptr = reinterpret_cast (dst_ptr); + const __m128 zero_f = _mm_setzero_ps (); + const __m128i zero_i = _mm_setzero_si128 (); + const __m128 mul = _mm_set1_ps (float (ctx._scale_info_ptr->_gain)); + const __m128 add = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst)); + const __m128 qt = _mm_set1_ps (qt_cst); + const __m128 vmax = _mm_set1_ps ((1 << DST_BITS) - 1); + const __m128 offset = _mm_set1_ps (-32768); + const __m128i mask_lsb = _mm_set1_epi16 (0x00FF); + const __m128i sign_bit = _mm_set1_epi16 (-0x8000); + + const __m128i ampo_i = _mm_set1_epi16 (int16_t (ctx._amp._o_i)); // 8 ?16 [0 ; 255] + const __m128i ampn_i = _mm_set1_epi16 (int16_t (ctx._amp._n_i)); // 8 ?16 [0 ; 255] + + for (int pos = 0; pos < w; pos += 8) + { + __m128 s0; + __m128 s1; + ProxyRwSse2 ::read_flt ( + src_n_ptr + pos, s0, s1, zero_i + ); + s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add); + s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add); + + // 8 s16 [-128 ; +127] or [-256 ; 255] + __m128i dith_o = dither_fnc (pos); + + __m128i dither; + if (S_FLAG) + { + dither = dith_o; + } + else + { + // Random generation. 8 s16 [-128 ; 127] or [-256 ; 255] + __m128i dith_n = generate_dith_n_vec (rnd_state); + + dith_o = _mm_mullo_epi16 (dith_o, ampo_i); // 8 s16 (full range) + dith_n = _mm_mullo_epi16 (dith_n, ampn_i); // 8 s16 (full range) + dither = _mm_adds_epi16 (dith_o, dith_n); // 8 s16 = s8 * s8 + } + + __m128i dither_03i = _mm_unpacklo_epi16 (zero_i, dither); // 4 s32 << 16 + __m128i dither_47i = _mm_unpackhi_epi16 (zero_i, dither); // 4 s32 << 16 + __m128 dither_03 = _mm_cvtepi32_ps (dither_03i); + __m128 dither_47 = _mm_cvtepi32_ps (dither_47i); + dither_03 = _mm_mul_ps (dither_03, qt); + dither_47 = _mm_mul_ps (dither_47, qt); + + s0 = _mm_add_ps (s0, dither_03); + s1 = _mm_add_ps (s1, dither_47); + + s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f); + s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f); + + ProxyRwSse2 ::write_flt ( + dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset + ); + } + + if (! S_FLAG) + { + generate_rnd_eol (rnd_state); + } +} + + + +template +__m128i Dither::generate_dith_n_vec (uint32_t &rnd_state) noexcept +{ + generate_rnd (rnd_state); + const uint32_t rnd_03 = rnd_state; + generate_rnd (rnd_state); + const uint32_t rnd_47 = rnd_state; + const auto zero = _mm_setzero_si128 (); + + if (T_FLAG) + { + generate_rnd (rnd_state); + const uint32_t rnd_03x = rnd_state; + generate_rnd (rnd_state); + const uint32_t rnd_47x = rnd_state; + const auto rnd_val = _mm_set_epi32 (rnd_47x, rnd_03x, rnd_47, rnd_03); + const auto c256_16 = _mm_set1_epi16 (0x100); + const auto x0 = _mm_unpacklo_epi8 (rnd_val, zero); + const auto x1 = _mm_unpackhi_epi8 (rnd_val, zero); + const auto dith_n = _mm_sub_epi16 (_mm_add_epi16 (x0, x1), c256_16); + return dith_n; // 8 s16 [-256 ; 255] + } + + else + { + const auto rnd_val = _mm_set_epi32 (0, 0, rnd_47, rnd_03); + const auto c128_16 = _mm_set1_epi16 (0x80); + const auto x0 = _mm_unpacklo_epi8 (rnd_val, zero); // 8 ?16 [0 ; 255] + const auto dith_n = _mm_sub_epi16 (x0, c128_16); + + return dith_n; // 8 s16 [-128 ; 127] + } +} + + + +// d: 8 s16 [-128 ; 127] +// Returns: 8 s16 [-256 ; 255] +// Formula: +// f: [-1 ; +1] -> [-2 ; +2] +// x -> x + 5/8 * x^3 + 3/8 * x^33 +// as an approximation of: +// x -> 2 * sign (x) * (1 - sqrt (1 - abs (x))) +__m128i Dither::remap_tpdf_vec (__m128i d) noexcept +{ + // [-128 ; 127] to [-32767 ; +32767], representing [-1 ; 1] (15-bit scale) + auto x2 = _mm_mullo_epi16 (d , d ); + x2 = _mm_adds_epi16 (x2 , x2 ); // Saturated here because of the -min * -min overflow + auto x4 = _mm_mulhi_epi16 (x2 , x2 ); + x4 = _mm_add_epi16 (x4 , x4 ); + auto x8 = _mm_mulhi_epi16 (x4 , x4 ); + x8 = _mm_add_epi16 (x8 , x8 ); + auto x16 = _mm_mulhi_epi16 (x8 , x8 ); + x16 = _mm_add_epi16 (x16, x16); + auto x32 = _mm_mulhi_epi16 (x16, x16); + x32 = _mm_add_epi16 (x32, x32); + + // 15-bit scale + const auto c3 = _mm_set1_epi16 (0x8000 * 5 / 8); + const auto c33 = _mm_set1_epi16 (0x8000 * 3 / 8); + + // 14-bit scale, losing a bit of precision at each mul + auto sum_s14 = _mm_mulhi_epi16 (x2, c3); + sum_s14 = _mm_add_epi16 (sum_s14, _mm_mulhi_epi16 (x32, c33)); + + const auto x_s15 = _mm_slli_epi16 (d, 8); + const auto sum_s13 = _mm_mulhi_epi16 (sum_s14, x_s15); + + const auto sum_s7 = _mm_srai_epi16 (sum_s13, 13 - 7); + + d = _mm_add_epi16 (d, sum_s7); + + return d; +} + + + +#endif + + + +template +constexpr int Dither::ErrDifAddParam ::_dst_bits; +template +constexpr int Dither::ErrDifAddParam ::_src_bits; +template +constexpr int Dither::ErrDifAddParam ::_nbr_err_lines; + + + +template +void Dither::process_seg_errdif_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + assert (ctx._y >= 0); + + typedef typename ERRDIF::SrcType SRC_TYPE; + typedef typename ERRDIF::DstType DST_TYPE; + constexpr int src_bits = ERRDIF::_src_bits; + constexpr int dst_bits = ERRDIF::_dst_bits; + + uint32_t & rnd_state = ctx._rnd_state; + ErrDifBuf & fstb_RESTRICT ed_buf = *ctx._ed_buf_ptr; + + const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast (src_ptr); + DST_TYPE * fstb_RESTRICT dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); + + const int ae = ctx._amp._e_i; + + // Makes e1 point on the default buffer line for single-line + // error diffusor because we use it in prepare_next_line() + int e0 = 0; + int e1 = 0; + if (ERRDIF::_nbr_err_lines == 2) + { + e0 = ctx._y & 1 ; + e1 = 1 - (ctx._y & 1); + } + int16_t * err0_ptr = ed_buf.get_buf (e0); + int16_t * err1_ptr = ed_buf.get_buf (e1); + + int err_nxt0 = ed_buf.use_mem (0); + int err_nxt1 = ed_buf.use_mem (1); + + // Forward + if ((ctx._y & 1) == 0) + { + for (int x = 0; x < w; ++x) + { + int err = err_nxt0; + SRC_TYPE src_raw; + + quantize_pix_int < + S_FLAG, T_FLAG, DST_TYPE, dst_bits, SRC_TYPE, src_bits + > ( + dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, ctx._amp._n_i + ); + ERRDIF::template diffuse <1> ( + err, err_nxt0, err_nxt1, + err0_ptr + x, err1_ptr + x, src_raw + ); + } + ERRDIF::prepare_next_line (err1_ptr + w); + } + + // Backward + else + { + for (int x = w - 1; x >= 0; --x) + { + int err = err_nxt0; + SRC_TYPE src_raw; + + quantize_pix_int < + S_FLAG, T_FLAG, DST_TYPE, dst_bits, SRC_TYPE, src_bits + > ( + dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, ctx._amp._n_i + ); + ERRDIF::template diffuse <-1> ( + err, err_nxt0, err_nxt1, + err0_ptr + x, err1_ptr + x, src_raw + ); + } + ERRDIF::prepare_next_line (err1_ptr - 1); + } + + ed_buf.use_mem (0) = int16_t (err_nxt0); + ed_buf.use_mem (1) = int16_t (err_nxt1); + + if (! S_FLAG) + { + generate_rnd_eol (rnd_state); + } +} + + + +template +void Dither::process_seg_errdif_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (w > 0); + assert (ctx._y >= 0); + + typedef typename ERRDIF::SrcType SRC_TYPE; + typedef typename ERRDIF::DstType DST_TYPE; + constexpr int dst_bits = ERRDIF::_dst_bits; + + uint32_t & rnd_state = ctx._rnd_state; + ErrDifBuf & fstb_RESTRICT ed_buf = *ctx._ed_buf_ptr; + + const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast (src_ptr); + DST_TYPE * fstb_RESTRICT dst_n_ptr = reinterpret_cast < DST_TYPE *> (dst_ptr); + + const float mul = float (ctx._scale_info_ptr->_gain); + const float add = float (ctx._scale_info_ptr->_add_cst); + const float ae = float (ctx._amp._e_f); + const float an = float (ctx._amp._n_f); + + // Makes e1 point on the default buffer line for single-line + // error diffusor because we use it in prepare_next_line() + int e0 = 0; + int e1 = 0; + if (ERRDIF::_nbr_err_lines == 2) + { + e0 = ctx._y & 1 ; + e1 = 1 - (ctx._y & 1); + } + float * err0_ptr = ed_buf.get_buf (e0); + float * err1_ptr = ed_buf.get_buf (e1); + + float err_nxt0 = ed_buf.use_mem (0); + float err_nxt1 = ed_buf.use_mem (1); + + // Forward + if ((ctx._y & 1) == 0) + { + for (int x = 0; x < w; ++x) + { + float err = err_nxt0; + SRC_TYPE src_raw; + + quantize_pix_flt ( + dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add + ); + ERRDIF::template diffuse <1> ( + err, err_nxt0, err_nxt1, + err0_ptr + x, err1_ptr + x, src_raw + ); + } + ERRDIF::prepare_next_line (err1_ptr + w); + } + + // Backward + else + { + for (int x = w - 1; x >= 0; --x) + { + float err = err_nxt0; + SRC_TYPE src_raw; + + quantize_pix_flt ( + dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add + ); + ERRDIF::template diffuse <-1> ( + err, err_nxt0, err_nxt1, + err0_ptr + x, err1_ptr + x, src_raw + ); + } + ERRDIF::prepare_next_line (err1_ptr - 1); + } + + ed_buf.use_mem (0) = err_nxt0; + ed_buf.use_mem (1) = err_nxt1; + + if (! S_FLAG) + { + generate_rnd_eol (rnd_state); + } +} + + + +void Dither::generate_rnd (uint32_t &state) noexcept +{ + state = state * uint32_t (1664525) + 1013904223; +} + + + +void Dither::generate_rnd_eol (uint32_t &state) noexcept +{ + state = state * uint32_t (1103515245) + 12345; + if ((state & 0x2000000) != 0) + { + state = state * uint32_t (134775813) + 1; + } +} + + + +const Dither::PatRow & Dither::SegContext::extract_pattern_row () const noexcept +{ + assert (_pattern_ptr != nullptr); + assert (_y >= 0); + + return ((*_pattern_ptr) [_y & (_max_pat_width - 1)]); +} + + + +template +void Dither::quantize_pix_int (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, int & fstb_RESTRICT err, uint32_t &rnd_state, int ampe_i, int ampn_i) noexcept +{ + constexpr int dif_bits = SRC_BITS - DST_BITS; + constexpr int tmp_bits = + (dif_bits < 6 && SRC_BITS < _err_res && DST_BITS < _err_res) + ? _err_res + : SRC_BITS; + constexpr int tmp_shft = tmp_bits - SRC_BITS; + constexpr int tmp_invs = tmp_bits - DST_BITS; + + const int rcst = 1 << (tmp_invs - 1); + const int vmax = (1 << DST_BITS) - 1; + + src_raw = src_ptr [x]; + const int src = src_raw << tmp_shft; + const int preq = src + err; + + int sum = preq; + if (! S_FLAG) + { + constexpr int dit_shft = _amp_bits + 8 - tmp_invs; // May be negative + + const int dith_n = generate_dith_n_scalar (rnd_state); // s8 + const int err_add = (err < 0) ? -ampe_i : ampe_i; + const int noise = + fstb::sshift_r (dith_n * ampn_i + err_add); // s16 = s8 * s8 // s16 = s16 >> cst + + sum += noise; + } + + const int quant = (sum + rcst) >> tmp_invs; + + err = preq - (quant << tmp_invs); + const int pix = fstb::limit (quant, 0, vmax); + + dst_ptr [x] = static_cast (pix); +} + + + +template +static inline SRC_TYPE Dither_extract_src (SRC_TYPE src_read, float src) noexcept +{ + fstb::unused (src); + + return (src_read); +} + +static inline float Dither_extract_src (float src_read, float src) noexcept +{ + fstb::unused (src_read); + + return (src); +} + +template +void Dither::quantize_pix_flt (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, float & fstb_RESTRICT err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add) noexcept +{ + const int vmax = (1 << DST_BITS) - 1; + + const SRC_TYPE src_read = src_ptr [x]; + const float src = float (src_read) * mul + add; + src_raw = Dither_extract_src (src_read, src); + const float preq = src + err; + + float sum = preq; + if (! S_FLAG) + { + const int dith_n = generate_dith_n_scalar (rnd_state); // s8 + const float err_add = (err < 0) ? -ampe_f : (err > 0) ? ampe_f : 0; + const float noise = float (dith_n) * ampn_f + err_add; + + sum += noise; + } + + const int quant = fstb::round_int (sum); + + err = preq - float (quant); + const int pix = fstb::limit (quant, 0, vmax); + + dst_ptr [x] = static_cast (pix); +} + + + +// Original coefficients : 7, 3, 5, 1 +// Optimised coefficients for serpentine scan: 7, 4, 5, 0 +// Source: +// Sam Hocevar and Gary Niger, +// Reinstating Floyd-Steinberg: Improved Metrics for Quality Assessment +// of Error Diffusion Algorithms, +// Lecture Notes in Computer Science LNCS 5099, pp. 3845, 2008 +// (Proceedings of the International Conference on Image and Signal Processing +// ICISP 2008) ISSN 0302-9743 + +#define fmtcl_Dither_FS_OPTIMIZED_SERPENTINE_COEF + +template +template +void Dither::DiffuseFloydSteinberg ::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (err_nxt1, err1_ptr, src_raw); + +#if defined (fmtcl_Dither_FS_OPTIMIZED_SERPENTINE_COEF) + const int e1 = 0; + const int e3 = (err * 4 + 8) >> 4; +#else + const int e1 = (err + 8) >> 4; + const int e3 = (err * 3 + 8) >> 4; +#endif + const int e5 = (err * 5 + 8) >> 4; + const int e7 = err - e1 - e3 - e5; + spread_error (e1, e3, e5, e7, err_nxt0, err0_ptr); +} + +template +template +void Dither::DiffuseFloydSteinberg ::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (err_nxt1, err1_ptr, src_raw); + +#if defined (fmtcl_Dither_FS_OPTIMIZED_SERPENTINE_COEF) + const float e1 = 0; + const float e3 = err * (4.0f / 16); +#else + const float e1 = err * (1.0f / 16); + const float e3 = err * (3.0f / 16); +#endif + const float e5 = err * (5.0f / 16); + const float e7 = err * (7.0f / 16); + spread_error (e1, e3, e5, e7, err_nxt0, err0_ptr); +} + +template +template +void Dither::DiffuseFloydSteinberg ::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept +{ + // Nothing + fstb::unused (err_ptr); +} + +template +template +void Dither::DiffuseFloydSteinberg ::spread_error (ET e1, ET e3, ET e5, ET e7, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept +{ + err_nxt0 = err0_ptr [DIR]; + err0_ptr [-DIR] += EB (e3); + err0_ptr [ 0] += EB (e5); + err0_ptr [ DIR] = EB (e1); + err_nxt0 += e7; +} + + + +template +template +void Dither::DiffuseFilterLite ::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (err_nxt1, err1_ptr, src_raw); + + const int e1 = (err + 2) >> 2; + const int e2 = err - 2 * e1; + spread_error (e1, e2, err_nxt0, err0_ptr); +} + +template +template +void Dither::DiffuseFilterLite ::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (err_nxt1, err1_ptr, src_raw); + + const float e1 = err * (1.0f / 4); + const float e2 = err * (2.0f / 4); + spread_error (e1, e2, err_nxt0, err0_ptr); +} + +template +template +void Dither::DiffuseFilterLite ::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept +{ + err_ptr [0] = EB (0); +} + +template +template +void Dither::DiffuseFilterLite ::spread_error (ET e1, ET e2, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept +{ + err_nxt0 = err0_ptr [DIR]; + err0_ptr [-DIR] += EB (e1); + err0_ptr [ 0] = EB (e1); + err_nxt0 += e2; +} + + + +template +template +void Dither::DiffuseStucki ::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (src_raw); + + const int m = (err << 4) / 42; + const int e1 = (m + 8) >> 4; + const int e2 = (m + 4) >> 3; + const int e4 = (m + 2) >> 2; +// const int e8 = (m + 1) >> 1; + const int sum = (e1 << 1) + ((e2 + e4) << 2); + const int e8 = (err - sum + 1) >> 1; + spread_error (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr); +} + +template +template +void Dither::DiffuseStucki ::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (src_raw); + + const float e1 = err * (1.0f / 42); + const float e2 = err * (2.0f / 42); + const float e4 = err * (4.0f / 42); + const float e8 = err * (8.0f / 42); + spread_error (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr); +} + +template +template +void Dither::DiffuseStucki ::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept +{ + // Nothing + fstb::unused (err_ptr); +} + +template +template +void Dither::DiffuseStucki ::spread_error (ET e1, ET e2, ET e4, ET e8, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept +{ + err_nxt0 = err_nxt1 + e8; + err_nxt1 = err1_ptr [DIR * 2] + e4; + err0_ptr [-DIR * 2] += EB (e2); + err0_ptr [-DIR ] += EB (e4); + err0_ptr [ 0 ] += EB (e8); + err0_ptr [ DIR ] += EB (e4); + err0_ptr [ DIR * 2] += EB (e2); + err1_ptr [-DIR * 2] += EB (e1); + err1_ptr [-DIR ] += EB (e2); + err1_ptr [ 0 ] += EB (e4); + err1_ptr [ DIR ] += EB (e2); + err1_ptr [ DIR * 2] = EB (e1); +} + + + +template +template +void Dither::DiffuseAtkinson ::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (src_raw); + + const int e1 = (err + 4) >> 3; + spread_error (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr); +} + +template +template +void Dither::DiffuseAtkinson ::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (src_raw); + + const float e1 = err * (1.0f / 8); + spread_error (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr); +} + +template +template +void Dither::DiffuseAtkinson ::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept +{ + err_ptr [0] = EB (0); +} + +template +template +void Dither::DiffuseAtkinson ::spread_error (ET e1, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept +{ + err_nxt0 = err_nxt1 + e1; + err_nxt1 = err1_ptr [2 * DIR] + e1; + err0_ptr [-DIR] += EB (e1); + err0_ptr [ 0] += EB (e1); + err0_ptr [+DIR] += EB (e1); + err1_ptr [ 0] = EB (e1); +} + + + +constexpr int Dither::DiffuseOstromoukhovBase::_t_bits; +constexpr int Dither::DiffuseOstromoukhovBase::_t_len; +constexpr int Dither::DiffuseOstromoukhovBase::_t_mask; + + + +template +template +int Dither::DiffuseOstromoukhovBase2 ::get_index (SRC_TYPE src_raw) noexcept +{ + constexpr int dif_bits = SRC_BITS - DST_BITS; + + return (fstb::sshift_l < + int, + DiffuseOstromoukhovBase::_t_bits - dif_bits + > (src_raw) & DiffuseOstromoukhovBase::_t_mask); +} + +template +int Dither::DiffuseOstromoukhovBase2 ::get_index (float src_raw) noexcept +{ + return + fstb::round_int (src_raw * DiffuseOstromoukhovBase::_t_len) + & DiffuseOstromoukhovBase::_t_mask; +} + +// Victor Ostromoukhov, +// A Simple and Efficient Error-Diffusion Algorithm +// Proceedings of SIGGRAPH 2001, in ACM Computer Graphics, +// Annual Conference Series, pp. 567-572, 2001. +// Not optimised at all +template +template +void Dither::DiffuseOstromoukhov ::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (err_nxt1, err1_ptr); + + constexpr int dif_bits = SRC_BITS - DST_BITS; + + const int index = fstb::sshift_l < + int, + DiffuseOstromoukhov::_t_bits - dif_bits + > (src_raw) & DiffuseOstromoukhov::_t_mask; + const typename ThisType::TableEntry & fstb_RESTRICT te = ThisType::_table [index]; + const int d = te._sum; + + const int e1 = err * te._c0 / d; + const int e2 = err * te._c1 / d; + const int e3 = err - e1 - e2; + + spread_error (e1, e2, e3, err_nxt0, err0_ptr); +} + +template +template +void Dither::DiffuseOstromoukhov ::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept +{ + fstb::unused (err_nxt1, err1_ptr); + + const int index = DiffuseOstromoukhov::get_index (src_raw); + const typename ThisType::TableEntry & fstb_RESTRICT te = ThisType::_table [index]; + const float invd = te._inv_sum; + + const float e1 = err * float (te._c0) * invd; + const float e2 = err * float (te._c1) * invd; + const float e3 = err - e1 - e2; + + spread_error (e1, e2, e3, err_nxt0, err0_ptr); +} + +template +template +void Dither::DiffuseOstromoukhov ::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept +{ + err_ptr [0] = EB (0); +} + +template +template +void Dither::DiffuseOstromoukhov ::spread_error (ET e1, ET e2, ET e3, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept +{ + err_nxt0 = err0_ptr [DIR]; + err0_ptr [-DIR] += EB (e2); + err0_ptr [ 0] = EB (e3); + err_nxt0 += e1; +} + + + +const std::array < + Dither::DiffuseOstromoukhovBase::TableEntry, + Dither::DiffuseOstromoukhovBase::_t_len +> Dither::DiffuseOstromoukhovBase::_table = +{{ + { 13, 0, 5, 18, 1.0f / 18 }, + { 13, 0, 5, 18, 1.0f / 18 }, + { 21, 0, 10, 31, 1.0f / 31 }, + { 7, 0, 4, 11, 1.0f / 11 }, + { 8, 0, 5, 13, 1.0f / 13 }, + { 47, 3, 28, 78, 1.0f / 78 }, + { 23, 3, 13, 39, 1.0f / 39 }, + { 15, 3, 8, 26, 1.0f / 26 }, + { 22, 6, 11, 39, 1.0f / 39 }, + { 43, 15, 20, 78, 1.0f / 78 }, + { 7, 3, 3, 13, 1.0f / 13 }, + { 501, 224, 211, 936, 1.0f / 936 }, + { 249, 116, 103, 468, 1.0f / 468 }, + { 165, 80, 67, 312, 1.0f / 312 }, + { 123, 62, 49, 234, 1.0f / 234 }, + { 489, 256, 191, 936, 1.0f / 936 }, + { 81, 44, 31, 156, 1.0f / 156 }, + { 483, 272, 181, 936, 1.0f / 936 }, + { 60, 35, 22, 117, 1.0f / 117 }, + { 53, 32, 19, 104, 1.0f / 104 }, + { 237, 148, 83, 468, 1.0f / 468 }, + { 471, 304, 161, 936, 1.0f / 936 }, + { 3, 2, 1, 6, 1.0f / 6 }, + { 481, 314, 185, 980, 1.0f / 980 }, + { 354, 226, 155, 735, 1.0f / 735 }, + { 1389, 866, 685, 2940, 1.0f / 2940 }, + { 227, 138, 125, 490, 1.0f / 490 }, + { 267, 158, 163, 588, 1.0f / 588 }, + { 327, 188, 220, 735, 1.0f / 735 }, + { 61, 34, 45, 140, 1.0f / 140 }, + { 627, 338, 505, 1470, 1.0f / 1470 }, + { 1227, 638, 1075, 2940, 1.0f / 2940 }, + + { 20, 10, 19, 49, 1.0f / 49 }, + { 1937, 1000, 1767, 4704, 1.0f / 4704 }, + { 977, 520, 855, 2352, 1.0f / 2352 }, + { 657, 360, 551, 1568, 1.0f / 1568 }, + { 71, 40, 57, 168, 1.0f / 168 }, + { 2005, 1160, 1539, 4704, 1.0f / 4704 }, + { 337, 200, 247, 784, 1.0f / 784 }, + { 2039, 1240, 1425, 4704, 1.0f / 4704 }, + { 257, 160, 171, 588, 1.0f / 588 }, + { 691, 440, 437, 1568, 1.0f / 1568 }, + { 1045, 680, 627, 2352, 1.0f / 2352 }, + { 301, 200, 171, 672, 1.0f / 672 }, + { 177, 120, 95, 392, 1.0f / 392 }, + { 2141, 1480, 1083, 4704, 1.0f / 4704 }, + { 1079, 760, 513, 2352, 1.0f / 2352 }, + { 725, 520, 323, 1568, 1.0f / 1568 }, + { 137, 100, 57, 294, 1.0f / 294 }, + { 2209, 1640, 855, 4704, 1.0f / 4704 }, + { 53, 40, 19, 112, 1.0f / 112 }, + { 2243, 1720, 741, 4704, 1.0f / 4704 }, + { 565, 440, 171, 1176, 1.0f / 1176 }, + { 759, 600, 209, 1568, 1.0f / 1568 }, + { 1147, 920, 285, 2352, 1.0f / 2352 }, + { 2311, 1880, 513, 4704, 1.0f / 4704 }, + { 97, 80, 19, 196, 1.0f / 196 }, + { 335, 280, 57, 672, 1.0f / 672 }, + { 1181, 1000, 171, 2352, 1.0f / 2352 }, + { 793, 680, 95, 1568, 1.0f / 1568 }, + { 599, 520, 57, 1176, 1.0f / 1176 }, + { 2413, 2120, 171, 4704, 1.0f / 4704 }, + { 405, 360, 19, 784, 1.0f / 784 }, + { 2447, 2200, 57, 4704, 1.0f / 4704 }, + + { 11, 10, 0, 21, 1.0f / 21 }, + { 158, 151, 3, 312, 1.0f / 312 }, + { 178, 179, 7, 364, 1.0f / 364 }, + { 1030, 1091, 63, 2184, 1.0f / 2184 }, + { 248, 277, 21, 546, 1.0f / 546 }, + { 318, 375, 35, 728, 1.0f / 728 }, + { 458, 571, 63, 1092, 1.0f / 1092 }, + { 878, 1159, 147, 2184, 1.0f / 2184 }, + { 5, 7, 1, 13, 1.0f / 13 }, + { 172, 181, 37, 390, 1.0f / 390 }, + { 97, 76, 22, 195, 1.0f / 195 }, + { 72, 41, 17, 130, 1.0f / 130 }, + { 119, 47, 29, 195, 1.0f / 195 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 65, 18, 17, 100, 1.0f / 100 }, + { 95, 29, 26, 150, 1.0f / 150 }, + { 185, 62, 53, 300, 1.0f / 300 }, + { 30, 11, 9, 50, 1.0f / 50 }, + { 35, 14, 11, 60, 1.0f / 60 }, + { 85, 37, 28, 150, 1.0f / 150 }, + { 55, 26, 19, 100, 1.0f / 100 }, + { 80, 41, 29, 150, 1.0f / 150 }, + { 155, 86, 59, 300, 1.0f / 300 }, + { 5, 3, 2, 10, 1.0f / 10 }, + + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 305, 176, 119, 600, 1.0f / 600 }, + { 155, 86, 59, 300, 1.0f / 300 }, + { 105, 56, 39, 200, 1.0f / 200 }, + { 80, 41, 29, 150, 1.0f / 150 }, + { 65, 32, 23, 120, 1.0f / 120 }, + { 55, 26, 19, 100, 1.0f / 100 }, + { 335, 152, 113, 600, 1.0f / 600 }, + { 85, 37, 28, 150, 1.0f / 150 }, + { 115, 48, 37, 200, 1.0f / 200 }, + { 35, 14, 11, 60, 1.0f / 60 }, + { 355, 136, 109, 600, 1.0f / 600 }, + { 30, 11, 9, 50, 1.0f / 50 }, + { 365, 128, 107, 600, 1.0f / 600 }, + { 185, 62, 53, 300, 1.0f / 300 }, + { 25, 8, 7, 40, 1.0f / 40 }, + { 95, 29, 26, 150, 1.0f / 150 }, + { 385, 112, 103, 600, 1.0f / 600 }, + { 65, 18, 17, 100, 1.0f / 100 }, + { 395, 104, 101, 600, 1.0f / 600 }, + { 4, 1, 1, 6, 1.0f / 6 }, + + // Symetric + { 4, 1, 1, 6, 1.0f / 6 }, + { 395, 104, 101, 600, 1.0f / 600 }, + { 65, 18, 17, 100, 1.0f / 100 }, + { 385, 112, 103, 600, 1.0f / 600 }, + { 95, 29, 26, 150, 1.0f / 150 }, + { 25, 8, 7, 40, 1.0f / 40 }, + { 185, 62, 53, 300, 1.0f / 300 }, + { 365, 128, 107, 600, 1.0f / 600 }, + { 30, 11, 9, 50, 1.0f / 50 }, + { 355, 136, 109, 600, 1.0f / 600 }, + { 35, 14, 11, 60, 1.0f / 60 }, + { 115, 48, 37, 200, 1.0f / 200 }, + { 85, 37, 28, 150, 1.0f / 150 }, + { 335, 152, 113, 600, 1.0f / 600 }, + { 55, 26, 19, 100, 1.0f / 100 }, + { 65, 32, 23, 120, 1.0f / 120 }, + { 80, 41, 29, 150, 1.0f / 150 }, + { 105, 56, 39, 200, 1.0f / 200 }, + { 155, 86, 59, 300, 1.0f / 300 }, + { 305, 176, 119, 600, 1.0f / 600 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + { 5, 3, 2, 10, 1.0f / 10 }, + + { 5, 3, 2, 10, 1.0f / 10 }, + { 155, 86, 59, 300, 1.0f / 300 }, + { 80, 41, 29, 150, 1.0f / 150 }, + { 55, 26, 19, 100, 1.0f / 100 }, + { 85, 37, 28, 150, 1.0f / 150 }, + { 35, 14, 11, 60, 1.0f / 60 }, + { 30, 11, 9, 50, 1.0f / 50 }, + { 185, 62, 53, 300, 1.0f / 300 }, + { 95, 29, 26, 150, 1.0f / 150 }, + { 65, 18, 17, 100, 1.0f / 100 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 4, 1, 1, 6, 1.0f / 6 }, + { 119, 47, 29, 195, 1.0f / 195 }, + { 72, 41, 17, 130, 1.0f / 130 }, + { 97, 76, 22, 195, 1.0f / 195 }, + { 172, 181, 37, 390, 1.0f / 390 }, + { 5, 7, 1, 13, 1.0f / 13 }, + { 878, 1159, 147, 2184, 1.0f / 2184 }, + { 458, 571, 63, 1092, 1.0f / 1092 }, + { 318, 375, 35, 728, 1.0f / 728 }, + { 248, 277, 21, 546, 1.0f / 546 }, + { 1030, 1091, 63, 2184, 1.0f / 2184 }, + { 178, 179, 7, 364, 1.0f / 364 }, + { 158, 151, 3, 312, 1.0f / 312 }, + { 11, 10, 0, 21, 1.0f / 21 }, + + { 2447, 2200, 57, 4704, 1.0f / 4704 }, + { 405, 360, 19, 784, 1.0f / 784 }, + { 2413, 2120, 171, 4704, 1.0f / 4704 }, + { 599, 520, 57, 1176, 1.0f / 1176 }, + { 793, 680, 95, 1568, 1.0f / 1568 }, + { 1181, 1000, 171, 2352, 1.0f / 2352 }, + { 335, 280, 57, 672, 1.0f / 672 }, + { 97, 80, 19, 196, 1.0f / 196 }, + { 2311, 1880, 513, 4704, 1.0f / 4704 }, + { 1147, 920, 285, 2352, 1.0f / 2352 }, + { 759, 600, 209, 1568, 1.0f / 1568 }, + { 565, 440, 171, 1176, 1.0f / 1176 }, + { 2243, 1720, 741, 4704, 1.0f / 4704 }, + { 53, 40, 19, 112, 1.0f / 112 }, + { 2209, 1640, 855, 4704, 1.0f / 4704 }, + { 137, 100, 57, 294, 1.0f / 294 }, + { 725, 520, 323, 1568, 1.0f / 1568 }, + { 1079, 760, 513, 2352, 1.0f / 2352 }, + { 2141, 1480, 1083, 4704, 1.0f / 4704 }, + { 177, 120, 95, 392, 1.0f / 392 }, + { 301, 200, 171, 672, 1.0f / 672 }, + { 1045, 680, 627, 2352, 1.0f / 2352 }, + { 691, 440, 437, 1568, 1.0f / 1568 }, + { 257, 160, 171, 588, 1.0f / 588 }, + { 2039, 1240, 1425, 4704, 1.0f / 4704 }, + { 337, 200, 247, 784, 1.0f / 784 }, + { 2005, 1160, 1539, 4704, 1.0f / 4704 }, + { 71, 40, 57, 168, 1.0f / 168 }, + { 657, 360, 551, 1568, 1.0f / 1568 }, + { 977, 520, 855, 2352, 1.0f / 2352 }, + { 1937, 1000, 1767, 4704, 1.0f / 4704 }, + { 20, 10, 19, 49, 1.0f / 49 }, + + { 1227, 638, 1075, 2940, 1.0f / 2940 }, + { 627, 338, 505, 1470, 1.0f / 1470 }, + { 61, 34, 45, 140, 1.0f / 140 }, + { 327, 188, 220, 735, 1.0f / 735 }, + { 267, 158, 163, 588, 1.0f / 588 }, + { 227, 138, 125, 490, 1.0f / 490 }, + { 1389, 866, 685, 2940, 1.0f / 2940 }, + { 354, 226, 155, 735, 1.0f / 735 }, + { 481, 314, 185, 980, 1.0f / 980 }, + { 3, 2, 1, 6, 1.0f / 6 }, + { 471, 304, 161, 936, 1.0f / 936 }, + { 237, 148, 83, 468, 1.0f / 468 }, + { 53, 32, 19, 104, 1.0f / 104 }, + { 60, 35, 22, 117, 1.0f / 117 }, + { 483, 272, 181, 936, 1.0f / 936 }, + { 81, 44, 31, 156, 1.0f / 156 }, + { 489, 256, 191, 936, 1.0f / 936 }, + { 123, 62, 49, 234, 1.0f / 234 }, + { 165, 80, 67, 312, 1.0f / 312 }, + { 249, 116, 103, 468, 1.0f / 468 }, + { 501, 224, 211, 936, 1.0f / 936 }, + { 7, 3, 3, 13, 1.0f / 13 }, + { 43, 15, 20, 78, 1.0f / 78 }, + { 22, 6, 11, 39, 1.0f / 39 }, + { 15, 3, 8, 26, 1.0f / 26 }, + { 23, 3, 13, 39, 1.0f / 39 }, + { 47, 3, 28, 78, 1.0f / 78 }, + { 8, 0, 5, 13, 1.0f / 13 }, + { 7, 0, 4, 11, 1.0f / 11 }, + { 21, 0, 10, 31, 1.0f / 31 }, + { 13, 0, 5, 18, 1.0f / 18 }, + { 13, 0, 5, 18, 1.0f / 18 } +}}; + + + +} // namespace fmtcl + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/Dither.h b/src/fmtcl/Dither.h new file mode 100644 index 0000000..edcc32a --- /dev/null +++ b/src/fmtcl/Dither.h @@ -0,0 +1,448 @@ +/***************************************************************************** + + Dither.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_Dither_HEADER_INCLUDED) +#define fmtcl_Dither_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "conc/ObjPool.h" +#include "fmtcl/ColorFamily.h" +#include "fmtcl/BitBltConv.h" +#include "fmtcl/ErrDifBuf.h" +#include "fmtcl/ErrDifBufFactory.h" +#include "fmtcl/SplFmt.h" +#include "fstb/def.h" +#include "fstb/ArrayAlign.h" + +#include +#include +#include + + + +namespace fmtcl +{ + + + +class Dither +{ + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +public: + + static constexpr int _max_nbr_planes = 3; + static constexpr int _max_pat_width = 32; // Number of pixels for halftone dithering + + enum DMode + { + DMode_ROUND_ALIAS = -1, + DMode_BAYER = 0, + DMode_ROUND, // 1 + DMode_FAST, // 2 + DMode_FILTERLITE, // 3 + DMode_STUCKI, // 4 + DMode_ATKINSON, // 5 + DMode_FLOYD, // 6 + DMode_OSTRO, // 7 + DMode_VOIDCLUST, // 8 + DMode_QUASIRND, // 9 + + DMode_NBR_ELT + }; + + explicit Dither ( + SplFmt src_fmt, int src_res, bool src_full_flag, + SplFmt dst_fmt, int dst_res, bool dst_full_flag, + ColorFamily color_fam, int nbr_planes, int w, + DMode dmode, int pat_size, double ampo, double ampn, + bool dyn_flag, bool static_noise_flag, bool correlated_planes_flag, + bool tpdfo_flag, bool tpdfn_flag, + bool sse2_flag, bool avx2_flag + ); + + void process_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, int frame_index, int plane_index); + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +protected: + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + static constexpr int _pat_period = 4; // Must be a power of 2 (because cycled with & as modulo) + static constexpr int _amp_bits = 5; // Bit depth of the amplitude fractionnal part. The whole thing is 7 bits, and we need a few bits for the integer part. + static constexpr int _err_res = 24; // Resolution (bits) of the temporary data for error diffusion when source bitdepth is not high enough (relative to the destination bitdepth) to guarantee an accurate error diffusion. + static constexpr int _max_unk_width = 65536; // Maximum width (pixels) for variable formats + + class SclInf + { + public: + BitBltConv::ScaleInfo + _info; + BitBltConv::ScaleInfo * // 0 if _info is not used. + _ptr = 0; + }; + + typedef int16_t PatRow [_max_pat_width]; // Contains data in [-128; +127] + typedef PatRow PatData [_max_pat_width]; // [y] [x] + typedef fstb::ArrayAlign PatDataArray; + + class AmpInfo + { + public: + int _o_i = 0; // [0 ; 127], 1.0 = 1 << _amp_bits + int _n_i = 0; // [0 ; 127], 1.0 = 1 << _amp_bits + int _e_i = 0; // [0 ; 2047], 1.0 = 256 + float _e_f = 0; + float _n_f = 0; + }; + + class SegContext + { + public: + inline const PatRow & + extract_pattern_row () const noexcept; + const PatData* _pattern_ptr = nullptr; // Ordered dithering + uint32_t _rnd_state = 0; // Anything excepted fast mode + const BitBltConv::ScaleInfo * // Float processing + _scale_info_ptr = nullptr; + ErrDifBuf * // Error diffusion + _ed_buf_ptr = nullptr; + int _y = -1; // Ordered dithering and error diffusion + uint32_t _qrs_seed = 0; // For the quasirandom sequences + AmpInfo _amp; + }; + + void build_dither_pat (); + void build_dither_pat_round (); + void build_dither_pat_bayer (); + void build_dither_pat_void_and_cluster (int w); + void build_next_dither_pat (); + void copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle) noexcept; + void init_fnc_fast () noexcept; + void init_fnc_ordered () noexcept; + void init_fnc_quasirandom () noexcept; + void init_fnc_errdiff () noexcept; + + void dither_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, const BitBltConv::ScaleInfo &scale_info, int frame_index, int plane_index); + + template + static void process_seg_fast_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &/*ctx*/) noexcept; + template + static void process_seg_fast_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + +#if (fstb_ARCHI == fstb_ARCHI_X86) + template + static void process_seg_fast_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &/*ctx*/) noexcept; + template + static void process_seg_fast_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; +#endif + + template + static void process_seg_ord_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + template + static void process_seg_ord_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + +#if (fstb_ARCHI == fstb_ARCHI_X86) + template + static void process_seg_ord_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + template + static void process_seg_ord_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; +#endif + + template + static void process_seg_qrs_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + template + static void process_seg_qrs_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + +#if (fstb_ARCHI == fstb_ARCHI_X86) + template + static void process_seg_qrs_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + template + static void process_seg_qrs_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; +#endif + + template + static fstb_FORCEINLINE void + process_seg_common_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept; + template + static fstb_FORCEINLINE void + process_seg_common_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept; + template + static fstb_FORCEINLINE int + generate_dith_n_scalar (uint32_t &rnd_state) noexcept; + static fstb_FORCEINLINE int + remap_tpdf_scalar (int d) noexcept; + +#if (fstb_ARCHI == fstb_ARCHI_X86) + template + static fstb_FORCEINLINE void + process_seg_common_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept; + template + static fstb_FORCEINLINE void + process_seg_common_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept; + template + static fstb_FORCEINLINE __m128i + generate_dith_n_vec (uint32_t &rnd_state) noexcept; + static fstb_FORCEINLINE __m128i + remap_tpdf_vec (__m128i d) noexcept; +#endif + + template + static void process_seg_errdif_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + template + static void process_seg_errdif_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept; + + static inline void + generate_rnd (uint32_t &state) noexcept; + static inline void + generate_rnd_eol (uint32_t &state) noexcept; + + template + static inline void + quantize_pix_int (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, int & fstb_RESTRICT err, uint32_t &rnd_state, int ampe_i, int ampn_i) noexcept; + template + static inline void + quantize_pix_flt (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, float & fstb_RESTRICT err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add) noexcept; + + template + class ErrDifAddParam + { + public: + typedef DT DstType; + typedef ST SrcType; + static constexpr int _dst_bits = DB; + static constexpr int _src_bits = SB; + static constexpr int _nbr_err_lines = EL; + }; + + template + class DiffuseFloydSteinberg + : public ErrDifAddParam + { + public: + template + static fstb_FORCEINLINE void + diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept; + private: + template + static fstb_FORCEINLINE void + spread_error (ET e1, ET e3, ET e5, ET e7, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept; + }; + + template + class DiffuseFilterLite + : public ErrDifAddParam + { + public: + template + static fstb_FORCEINLINE void + diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept; + private: + template + static fstb_FORCEINLINE void + spread_error (ET e1, ET e2, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept; + }; + + template + class DiffuseStucki + : public ErrDifAddParam + { + public: + template + static fstb_FORCEINLINE void + diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept; + private: + template + static fstb_FORCEINLINE void + spread_error (ET e1, ET e2, ET e4, ET e8, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept; + }; + + template + class DiffuseAtkinson + : public ErrDifAddParam + { + public: + template + static fstb_FORCEINLINE void + diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept; + private: + template + static fstb_FORCEINLINE void + spread_error (ET e1, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept; + }; + + class DiffuseOstromoukhovBase + { + public: + struct TableEntry + { + int _c0; + int _c1; + int _c2; // Actually not used + int _sum; + float _inv_sum; // Possible optimization: store 1/_c0 and 1/_c1 instead of this field. + }; + + static constexpr int _t_bits = 8; + static constexpr int _t_len = 1 << _t_bits; + static constexpr int _t_mask = _t_len - 1; + + static const std::array + _table; + }; + + template + class DiffuseOstromoukhovBase2 + : public DiffuseOstromoukhovBase + { + public: + template + static inline int + get_index (SRC_TYPE src_raw) noexcept; + static inline int + get_index (float src_raw) noexcept; + }; + + template + class DiffuseOstromoukhov + : public ErrDifAddParam + , public DiffuseOstromoukhovBase2 + { + public: + typedef DiffuseOstromoukhov ThisType; + template + static fstb_FORCEINLINE void + diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept; + template + static fstb_FORCEINLINE void + prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept; + private: + template + static fstb_FORCEINLINE void + spread_error (ET e1, ET e2, ET e3, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept; + }; + + SplFmt _splfmt_src = SplFmt_ILLEGAL; + SplFmt _splfmt_dst = SplFmt_ILLEGAL; + int _src_res = 0; + int _dst_res = 0; + bool _full_range_in_flag = false; + bool _full_range_out_flag = false; + ColorFamily _color_fam = ColorFamily_INVALID; + int _nbr_planes = 0; + + std::array + _scale_info_arr; + bool _upconv_flag = false; + bool _sse2_flag = false; + bool _avx2_flag = false; + bool _range_def_flag = false; + + int _dmode = DMode_FAST; + int _pat_size = _max_pat_width; // Must be a divisor of _max_pat_width + double _ampo = 1; + double _ampn = 0; + bool _dyn_flag = false; + bool _static_noise_flag = false; + bool _correlated_planes_flag = false; + bool _tpdfo_flag = false; + bool _tpdfn_flag = false; + + bool _errdif_flag = false; // Indicates a dithering method using error diffusion. + bool _simple_flag = false; // Simplified implementation for ampo == 1 and ampn == 0 + PatDataArray _dither_pat_arr; // Contains levels for ordered dithering + + AmpInfo _amp; + + conc::ObjPool + _buf_pool; + std::unique_ptr + _buf_factory_uptr; + + void (* _process_seg_int_int_ptr) (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) = nullptr; + void (* _process_seg_flt_int_ptr) (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) = nullptr; + + + +/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + Dither () = delete; + Dither (const Dither &other) = delete; + Dither (Dither &&other) = delete; + Dither & operator = (const Dither &other) = delete; + Dither & operator = (Dither &&other) = delete; + bool operator == (const Dither &other) const = delete; + bool operator != (const Dither &other) const = delete; + +}; // class Dither + + + +} // namespace fmtcl + + + +//#include "fmtcl/Dither.hpp" + + + +#endif // fmtcl_Dither_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/InterlacingType.h b/src/fmtcl/InterlacingType.h new file mode 100644 index 0000000..4403a9e --- /dev/null +++ b/src/fmtcl/InterlacingType.h @@ -0,0 +1,70 @@ +/***************************************************************************** + + InterlacingType.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_InterlacingType_HEADER_INCLUDED) +#define fmtcl_InterlacingType_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +namespace fmtcl +{ + + + +enum InterlacingType +{ + InterlacingType_INVALID = -1, + + InterlacingType_FRAME = 0, + InterlacingType_TOP, + InterlacingType_BOT, + + InterlacingType_NBR_ELT + +}; // enum InterlacingType + + + +inline InterlacingType InterlacingType_get (bool itl_flag, bool top_flag) +{ + return + (itl_flag) ? ((top_flag) ? InterlacingType_TOP + : InterlacingType_BOT) + : InterlacingType_FRAME; +} + + + +} // namespace fmtcl + + + +//#include "fmtcl/InterlacingType.hpp" + + + +#endif // fmtcl_InterlacingType_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + diff --git a/src/fmtcl/KernelData.h b/src/fmtcl/KernelData.h index 4628d68..31c6524 100644 --- a/src/fmtcl/KernelData.h +++ b/src/fmtcl/KernelData.h @@ -50,8 +50,10 @@ class KernelData public: - KernelData () = default; - virtual ~KernelData () {} + KernelData () = default; + ~KernelData () = default; + KernelData (KernelData &&other) = default; + KernelData & operator = (KernelData &&other) = default; uint32_t get_hash () const; diff --git a/src/fmtcl/Matrix2020CLProc.cpp b/src/fmtcl/Matrix2020CLProc.cpp index aa70589..d8b9bfd 100644 --- a/src/fmtcl/Matrix2020CLProc.cpp +++ b/src/fmtcl/Matrix2020CLProc.cpp @@ -30,6 +30,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fmtcl/fnc.h" #include "fmtcl/Matrix2020CLProc.h" #include "fmtcl/Matrix2020CLProc_macro.h" +#include "fmtcl/PicFmt.h" #include "fmtcl/ProxyRwCpp.h" #include "fmtcl/TransOpLinPow.h" #include "fstb/fnc.h" @@ -242,14 +243,14 @@ Matrix2020CLProc::Err Matrix2020CLProc::setup_rgb_2_ycbcr () double b_c; compute_fmt_mac_cst ( a_y, b_y, - _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag, - _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true, + PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag }, + PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true }, 0 ); compute_fmt_mac_cst ( a_c, b_c, - _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag, - _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true, + PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag }, + PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true }, 1 ); const int dif_bits = RGB_INT_BITS - _dst_bits; @@ -358,14 +359,14 @@ Matrix2020CLProc::Err Matrix2020CLProc::setup_ycbcr_2_rgb () double b_c; compute_fmt_mac_cst ( a_y, b_y, - _src_fmt, _src_bits, ColorFamily_YUV, true, - _src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag, + PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, true }, + PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag }, 0 ); compute_fmt_mac_cst ( a_c, b_c, - _src_fmt, _src_bits, ColorFamily_YUV, true, - _src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag, + PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, true }, + PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag }, 1 ); const int dif_bits = RGB_INT_BITS - _src_bits; diff --git a/src/fmtcl/MatrixProc.cpp b/src/fmtcl/MatrixProc.cpp index fe616b1..79cafe2 100644 --- a/src/fmtcl/MatrixProc.cpp +++ b/src/fmtcl/MatrixProc.cpp @@ -83,7 +83,7 @@ MatrixProc::Err MatrixProc::configure (const Mat4 &m, bool int_proc_flag, SplFmt assert (dst_fmt < SplFmt_NBR_ELT); assert (dst_bits >= 8); assert (dst_bits <= 32); - assert (plane_out <= NBR_PLANES); + assert (plane_out <= _nbr_planes); assert ( (dst_fmt == SplFmt_FLOAT && src_fmt == SplFmt_FLOAT) || (dst_fmt != SplFmt_FLOAT && src_fmt != SplFmt_FLOAT)); @@ -196,7 +196,7 @@ MatrixProc::Err MatrixProc::configure (const Mat4 &m, bool int_proc_flag, SplFmt -void MatrixProc::process (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (_proc_ptr != 0); @@ -215,19 +215,19 @@ void MatrixProc::process (uint8_t * const dst_ptr_arr [NBR_PLANES], const int ds void MatrixProc::set_matrix_flt (const Mat4 &m, int plane_out) { - assert (plane_out <= NBR_PLANES); + assert (plane_out <= _nbr_planes); const int plane_beg = (plane_out >= 0) ? plane_out : 0; - const int plane_end = (plane_out >= 0) ? plane_out + 1 : NBR_PLANES; + const int plane_end = (plane_out >= 0) ? plane_out + 1 : _nbr_planes; - _coef_flt_arr.resize (NBR_PLANES * MAT_SIZE, 0); + _coef_flt_arr.resize (_nbr_planes * _mat_size, 0); for (int y = plane_beg; y < plane_end; ++y) { const int y_dest = (plane_out >= 0) ? 0 : y; - for (int x = 0; x < MAT_SIZE; ++x) + for (int x = 0; x < _mat_size; ++x) { const float c = float (m [y] [x]); - _coef_flt_arr [y_dest * MAT_SIZE + x] = c; + _coef_flt_arr [y_dest * _mat_size + x] = c; } } } @@ -236,7 +236,7 @@ void MatrixProc::set_matrix_flt (const Mat4 &m, int plane_out) MatrixProc::Err MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int src_bits, int dst_bits) { - assert (plane_out <= NBR_PLANES); + assert (plane_out <= _nbr_planes); assert (src_bits >= 8); assert (src_bits <= 16); assert (dst_bits >= 8); @@ -245,9 +245,9 @@ MatrixProc::Err MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr Err ret_val = Err_OK; const int plane_beg = (plane_out >= 0) ? plane_out : 0; - const int plane_end = (plane_out >= 0) ? plane_out + 1 : NBR_PLANES; + const int plane_end = (plane_out >= 0) ? plane_out + 1 : _nbr_planes; - _coef_int_arr.resize (NBR_PLANES * MAT_SIZE, 0); + _coef_int_arr.resize (_nbr_planes * _mat_size, 0); #if (fstb_ARCHI == fstb_ARCHI_X86) if (_sse2_flag || _avx2_flag) @@ -256,15 +256,15 @@ MatrixProc::Err MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr { _coef_simd_arr.set_avx2_mode (true); } - _coef_simd_arr.resize (NBR_PLANES * MAT_SIZE); + _coef_simd_arr.resize (_nbr_planes * _mat_size); } #endif // Coefficient scale - const double cintsc = double ((uint64_t (1)) << SHIFT_INT); + const double cintsc = double ((uint64_t (1)) << _shift_int); // Rounding constant - const int div_shift = SHIFT_INT + src_bits - dst_bits; + const int div_shift = _shift_int + src_bits - dst_bits; const int rnd = 1 << (div_shift - 1); for (int y = plane_beg; y < plane_end; ++y) @@ -282,11 +282,11 @@ MatrixProc::Err MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr double bias_flt = (dst_bits == 16) ? -1 : 0; #endif // fstb_ARCHI_X86 - for (int x = 0; x < MAT_SIZE; ++x) + for (int x = 0; x < _mat_size; ++x) { - const bool add_flag = (x == NBR_PLANES); + const bool add_flag = (x == _nbr_planes); const int y_dest = (plane_out >= 0) ? 0 : y; - const int index = y_dest * MAT_SIZE + x; + const int index = y_dest * _mat_size + x; const double c = m [y] [x]; double scaled_c = c * cintsc; @@ -338,7 +338,7 @@ MatrixProc::Err MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr if (dst_bits == 16 || src_bits == 16) { const double scale = double ( - (uint64_t (1)) << (src_bits + SHIFT_INT - 1) + (uint64_t (1)) << (src_bits + _shift_int - 1) ); const int bias = fstb::round_int (bias_flt * scale); @@ -428,7 +428,7 @@ void MatrixProc::setup_fnc_sse2 (bool int_proc_flag, SplFmt src_fmt, int src_bit template -void MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -437,7 +437,7 @@ void MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); typedef typename SRC::PtrConst::Type SrcPtr; typedef typename DST::Ptr::Type DstPtr; @@ -476,15 +476,15 @@ void MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co const int d0 = ( s0 * _coef_int_arr [ 0] + s1 * _coef_int_arr [ 1] + s2 * _coef_int_arr [ 2] - + _coef_int_arr [ 3]) >> (SHIFT_INT + SB - DB); + + _coef_int_arr [ 3]) >> (_shift_int + SB - DB); const int d1 = ( s0 * _coef_int_arr [ 4] + s1 * _coef_int_arr [ 5] + s2 * _coef_int_arr [ 6] - + _coef_int_arr [ 7]) >> (SHIFT_INT + SB - DB); + + _coef_int_arr [ 7]) >> (_shift_int + SB - DB); const int d2 = ( s0 * _coef_int_arr [ 8] + s1 * _coef_int_arr [ 9] + s2 * _coef_int_arr [10] - + _coef_int_arr [11]) >> (SHIFT_INT + SB - DB); + + _coef_int_arr [11]) >> (_shift_int + SB - DB); DST::template write_clip (dst_0_ptr, d0); DST::template write_clip (dst_1_ptr, d1); @@ -512,7 +512,7 @@ void MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co template -void MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -521,7 +521,7 @@ void MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); typedef typename SRC::PtrConst::Type SrcPtr; typedef typename DST::Ptr::Type DstPtr; @@ -554,7 +554,7 @@ void MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co const int d0 = ( s0 * _coef_int_arr [ 0] + s1 * _coef_int_arr [ 1] + s2 * _coef_int_arr [ 2] - + _coef_int_arr [ 3]) >> (SHIFT_INT + SB - DB); + + _coef_int_arr [ 3]) >> (_shift_int + SB - DB); DST::template write_clip (dst_0_ptr, d0); @@ -575,7 +575,7 @@ void MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co -void MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -584,7 +584,7 @@ void MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); const int sizeof_xt = int (sizeof (float)); assert (src_str_arr [0] % sizeof_xt == 0); assert (src_str_arr [1] % sizeof_xt == 0); @@ -645,7 +645,7 @@ void MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co -void MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -654,7 +654,7 @@ void MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); const int sizeof_xt = int (sizeof (float)); assert (src_str_arr [0] % sizeof_xt == 0); assert (src_str_arr [1] % sizeof_xt == 0); @@ -703,7 +703,7 @@ void MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co // DST and SRC are ProxyRwSse2 classes template -void MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -712,7 +712,7 @@ void MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); enum { BPS_SRC = (SB + 7) >> 3 }; enum { BPS_DST = (DB + 7) >> 3 }; @@ -754,7 +754,7 @@ void MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c dst_str_arr [plane_index], h )); - const int cind = plane_index * MAT_SIZE; + const int cind = plane_index * _mat_size; for (int x = 0; x < w; x += packsize) { @@ -765,7 +765,7 @@ void MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c const __m128i s1 = SrcS16R::read (src_1_ptr, zero, sign_bit); const __m128i s2 = SrcS16R::read (src_2_ptr, zero, sign_bit); - __m128i d0 = _mm_load_si128 (coef_ptr + cind + NBR_PLANES); + __m128i d0 = _mm_load_si128 (coef_ptr + cind + _nbr_planes); __m128i d1 = d0; // src is variable, up to 16-bit signed (full range, +1 = 32767+1) @@ -779,8 +779,8 @@ void MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c fstb::ToolsSse2::mac_s16_s16_s32 ( d0, d1, s2, _mm_load_si128 (coef_ptr + cind + 2)); - d0 = _mm_srai_epi32 (d0, SHIFT_INT + SB - DB); - d1 = _mm_srai_epi32 (d1, SHIFT_INT + SB - DB); + d0 = _mm_srai_epi32 (d0, _shift_int + SB - DB); + d1 = _mm_srai_epi32 (d1, _shift_int + SB - DB); __m128i val = _mm_packs_epi32 (d0, d1); @@ -806,7 +806,7 @@ void MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c -void MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -815,7 +815,7 @@ void MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); const int sizeof_xt = int (sizeof (float)); assert (src_str_arr [0] % sizeof_xt == 0); assert (src_str_arr [1] % sizeof_xt == 0); @@ -892,7 +892,7 @@ void MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], co -void MatrixProc::process_1_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_1_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -901,7 +901,7 @@ void MatrixProc::process_1_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); const int sizeof_xt = int (sizeof (float)); assert (src_str_arr [0] % sizeof_xt == 0); assert (src_str_arr [1] % sizeof_xt == 0); diff --git a/src/fmtcl/MatrixProc.h b/src/fmtcl/MatrixProc.h index 6e634ba..09289c8 100644 --- a/src/fmtcl/MatrixProc.h +++ b/src/fmtcl/MatrixProc.h @@ -60,8 +60,8 @@ class MatrixProc Err_INVALID_FORMAT_COMBINATION }; - static const int NBR_PLANES = 3; - static const int MAT_SIZE = NBR_PLANES + 1; + static constexpr int _nbr_planes = 3; + static constexpr int _mat_size = _nbr_planes + 1; explicit MatrixProc (bool sse_flag, bool sse2_flag, bool avx_flag, bool avx2_flag); virtual ~MatrixProc () {} @@ -71,7 +71,7 @@ class MatrixProc // All stride values are in bytes // h must be the frame height too, not only the processed stripe height // (required for Stack16 formats to compute the lsb offset) - void process (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; + void process (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; @@ -85,7 +85,7 @@ class MatrixProc private: - static const int SHIFT_INT = 12; // Number of bits for the fractional part + static constexpr int _shift_int = 12; // Number of bits for the fractional part void set_matrix_flt (const Mat4 &m, int plane_out); Err set_matrix_int (const Mat4 &m, int plane_out, int src_bits, int dst_bits); @@ -98,23 +98,23 @@ class MatrixProc #endif // fstb_ARCHI_X86 template - void process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; + void process_3_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; template - void process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; + void process_1_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; - void process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; - void process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; + void process_3_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; + void process_1_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; #if (fstb_ARCHI == fstb_ARCHI_X86) template - void process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; - void process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; - void process_1_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; + void process_n_int_sse2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; + void process_3_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; + void process_1_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; template - void process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; - void process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; - void process_1_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; + void process_n_int_avx2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; + void process_3_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; + void process_1_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; #endif // fstb_ARCHI_X86 bool _sse_flag; @@ -123,12 +123,12 @@ class MatrixProc bool _avx2_flag; void (ThisType::* // 0 = not set - _proc_ptr) (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const; + _proc_ptr) (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const; std::vector _coef_flt_arr; - // Integer coefficients are all scaled with SHIFT_INT. + // Integer coefficients are all scaled with _shift_int. // The additive coefficient contains the rounding constant too. std::vector _coef_int_arr; diff --git a/src/fmtcl/MatrixProc_avx.cpp b/src/fmtcl/MatrixProc_avx.cpp index fb81745..9c90059 100644 --- a/src/fmtcl/MatrixProc_avx.cpp +++ b/src/fmtcl/MatrixProc_avx.cpp @@ -70,7 +70,7 @@ void MatrixProc::setup_fnc_avx (bool int_proc_flag, SplFmt src_fmt, int src_bits -void MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -79,7 +79,7 @@ void MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); const int sizeof_xt = int (sizeof (float)); assert (src_str_arr [0] % sizeof_xt == 0); assert (src_str_arr [1] % sizeof_xt == 0); @@ -158,7 +158,7 @@ void MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], co -void MatrixProc::process_1_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_1_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -167,7 +167,7 @@ void MatrixProc::process_1_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], co assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); const int sizeof_xt = int (sizeof (float)); assert (src_str_arr [0] % sizeof_xt == 0); assert (src_str_arr [1] % sizeof_xt == 0); diff --git a/src/fmtcl/MatrixProc_avx2.cpp b/src/fmtcl/MatrixProc_avx2.cpp index 40a604a..0f508b7 100644 --- a/src/fmtcl/MatrixProc_avx2.cpp +++ b/src/fmtcl/MatrixProc_avx2.cpp @@ -94,7 +94,7 @@ void MatrixProc::setup_fnc_avx2 (bool int_proc_flag, SplFmt src_fmt, int src_bit // DST and SRC are ProxyRwAvx2 classes template -void MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const +void MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const { assert (dst_ptr_arr != 0); assert (dst_str_arr != 0); @@ -103,7 +103,7 @@ void MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c assert (w > 0); assert (h > 0); - static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes"); + static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes"); enum { BPS_SRC = (SB + 7) >> 3 }; enum { BPS_DST = (DB + 7) >> 3 }; @@ -145,7 +145,7 @@ void MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c dst_str_arr [plane_index], h )); - const int cind = plane_index * MAT_SIZE; + const int cind = plane_index * _mat_size; for (int x = 0; x < w; x += packsize) { @@ -156,7 +156,7 @@ void MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c const __m256i s1 = SrcS16R::read (src_1_ptr, zero, sign_bit); const __m256i s2 = SrcS16R::read (src_2_ptr, zero, sign_bit); - __m256i d0 = _mm256_load_si256 (coef_ptr + cind + NBR_PLANES); + __m256i d0 = _mm256_load_si256 (coef_ptr + cind + _nbr_planes); __m256i d1 = d0; // src is variable, up to 16-bit signed (full range, +1 = 32767+1) @@ -170,8 +170,8 @@ void MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c fstb::ToolsAvx2::mac_s16_s16_s32 ( d0, d1, s2, _mm256_load_si256 (coef_ptr + cind + 2)); - d0 = _mm256_srai_epi32 (d0, SHIFT_INT + SB - DB); - d1 = _mm256_srai_epi32 (d1, SHIFT_INT + SB - DB); + d0 = _mm256_srai_epi32 (d0, _shift_int + SB - DB); + d1 = _mm256_srai_epi32 (d1, _shift_int + SB - DB); __m256i val = _mm256_packs_epi32 (d0, d1); diff --git a/src/fmtcl/MatrixUtil.cpp b/src/fmtcl/MatrixUtil.cpp new file mode 100644 index 0000000..e76adac --- /dev/null +++ b/src/fmtcl/MatrixUtil.cpp @@ -0,0 +1,361 @@ +/***************************************************************************** + + MatrixUtil.cpp + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#if defined (_MSC_VER) + #pragma warning (1 : 4130 4223 4705 4706) + #pragma warning (4 : 4355 4786 4800) +#endif + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fstb/fnc.h" +#include "fmtcl/Mat3.h" +#include "fmtcl/Mat4.h" +#include "fmtcl/MatrixUtil.h" + +#include + + + +namespace fmtcl +{ + + + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +// mat should be already converted to lower case +// Returns ColorSpaceH265_UNDEF if mat is unknown +ColorSpaceH265 MatrixUtil::find_cs_from_mat_str (const std::string &mat, bool allow_2020cl_flag) +{ + ColorSpaceH265 cs = ColorSpaceH265_UNSPECIFIED; + + if (mat.empty () || mat == "rgb") + { + cs = ColorSpaceH265_RGB; + } + else if (mat == "601") + { + cs = ColorSpaceH265_SMPTE170M; + } + else if (mat == "709") + { + cs = ColorSpaceH265_BT709; + } + else if (mat == "240") + { + cs = ColorSpaceH265_SMPTE240M; + } + else if (mat == "fcc") + { + cs = ColorSpaceH265_FCC; + } + else if (mat == "ycgco" || mat == "ycocg") + { + cs = ColorSpaceH265_YCGCO; + } + else if (mat == "2020") + { + cs = ColorSpaceH265_BT2020NCL; + } + else if (mat == "2020cl" && allow_2020cl_flag) + { + cs = ColorSpaceH265_BT2020CL; + } + else if (mat == "ydzdx") + { + cs = ColorSpaceH265_YDZDX; + } + else if (mat == "lms") + { + cs = ColorSpaceH265_LMS; + } + else if (mat == "ictcp_pq") + { + cs = ColorSpaceH265_ICTCP_PQ; + } + else if (mat == "ictcp_hlg") + { + cs = ColorSpaceH265_ICTCP_HLG; + } + + // Unknown matrix identifier + else + { + assert (false); + cs = ColorSpaceH265_UNDEF; + } + + return cs; +} + + + +// Returns -1 if mat is unknown +int MatrixUtil::make_mat_from_str (Mat4 &m, const std::string &mat, bool to_rgb_flag) +{ + int ret_val = 0; + + if (mat.empty () || mat == "rgb") + { + m[0][0] = 1; m[0][1] = 0; m[0][2] = 0; + m[1][0] = 0; m[1][1] = 1; m[1][2] = 0; + m[2][0] = 0; m[2][1] = 0; m[2][2] = 1; + m.clean3 (1); + } + else if (mat == "601") + { + make_mat_yuv (m, 0.299, 0.587, 0.114, to_rgb_flag); + } + else if (mat == "709") + { + make_mat_yuv (m, 0.2126, 0.7152, 0.0722, to_rgb_flag); + } + else if (mat == "240") + { + make_mat_yuv (m, 0.212, 0.701, 0.087, to_rgb_flag); + } + else if (mat == "fcc") + { + make_mat_yuv (m, 0.30, 0.59, 0.11, to_rgb_flag); + } + else if (mat == "ycgco" || mat == "ycocg") + { + make_mat_ycgco (m, to_rgb_flag); + } + else if (mat == "2020") + { + make_mat_yuv (m, 0.2627, 0.678, 0.0593, to_rgb_flag); + } + else if (mat == "ydzdx") + { + make_mat_ydzdx (m, to_rgb_flag); + } + else if (mat == "lms") + { + make_mat_lms (m, to_rgb_flag); + } + else if (mat == "ictcp_pq") + { + make_mat_ictcp (m, false, to_rgb_flag); + } + else if (mat == "ictcp_hlg") + { + make_mat_ictcp (m, true, to_rgb_flag); + } + else + { + assert (false); + ret_val = -1; + } + + return ret_val; +} + + + +/* +kr/kg/kb matrix (Rec. ITU-T H.265 2019-06, p. 413): + +R = Y + V*(1-Kr) +G = Y - U*(1-Kb)*Kb/Kg - V*(1-Kr)*Kr/Kg +B = Y + U*(1-Kb) + +Y = R * Kr + G * Kg + B * Kb +U = (B-Y)/(1-Kb) = - R * Kr/(1-Kb) - G * Kg/(1-Kb) + B +V = (R-Y)/(1-Kr) = R - G * Kg/(1-Kr) - B * Kb/(1-Kr) + +The given equations work for R, G, B in range [0 ; 1] and U and V in range +[-1 ; 1]. Scaling must be applied to match the required range for U and V. + +R, G, B, Y range : [0 ; 1] +U, V range : [-0.5 ; 0.5] +*/ + +void MatrixUtil::make_mat_yuv (Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag) +{ + assert (! fstb::is_null (kg)); + assert (! fstb::is_eq (kb, 1.0)); + assert (! fstb::is_eq (kr, 1.0)); + + constexpr double r = 0.5; + constexpr double x = 1.0 / r; + if (to_rgb_flag) + { + m[0][0] = 1; m[0][1] = 0; m[0][2] = x*(1-kr) ; + m[1][0] = 1; m[1][1] = x*(kb-1)*kb/kg; m[1][2] = x*(kr-1)*kr/kg; + m[2][0] = 1; m[2][1] = x*(1-kb) ; m[2][2] = 0; + } + + else + { + m[0][0] = kr ; m[0][1] = kg ; m[0][2] = kb ; + m[1][0] = r*kr/(kb-1); m[1][1] = r*kg/(kb-1); m[1][2] = r ; + m[2][0] = r ; m[2][1] = r*kg/(kr-1); m[2][2] = r*kb/(kr-1); + } + + m.clean3 (1); +} + + + +/* +YCgCo matrix (Rec. ITU-T H.265 2019-06, p. 413): + +R = Y - Cg + Co +G = Y + Cg +B = Y - Cg - Co + +Y = 0.25 * R + 0.5 * G + 0.25 * B +Cg = -0.25 * R + 0.5 * G - 0.25 * B +Co = 0.5 * R - 0.5 * B + +R, G, B, Y range : [0 ; 1] +Cg, Co range : [-0.5 ; 0.5] + +Note: this implementation is not exactly the same as specified because the +standard specifies specific steps to apply the RGB-to-YCgCo matrix, leading +to different roundings. +*/ + +void MatrixUtil::make_mat_ycgco (Mat4 &m, bool to_rgb_flag) +{ + if (to_rgb_flag) + { + m[0][0] = 1; m[0][1] = -1; m[0][2] = 1; + m[1][0] = 1; m[1][1] = 1; m[1][2] = 0; + m[2][0] = 1; m[2][1] = -1; m[2][2] = -1; + } + else + { + m[0][0] = 0.25; m[0][1] = 0.5; m[0][2] = 0.25; + m[1][0] = -0.25; m[1][1] = 0.5; m[1][2] = -0.25; + m[2][0] = 0.5 ; m[2][1] = 0 ; m[2][2] = -0.5 ; + } + + m.clean3 (1); +} + + + +/* +YDzDx transform (Rec. ITU-T H.265 2019-06, p. 414) + +Y = G +Dz = 0.5 * (0.986566 * B - Y) +Dx = 0.5 * (R - 0.991902 * Y) + +Y = G +Dz = - 0.5 * G + 0.493283 * B +Dx = 0.5 * R - 0.495951 * G +*/ + +void MatrixUtil::make_mat_ydzdx (Mat4 &m, bool to_rgb_flag) +{ + Mat3 m3; + m3[0][0] = 0 ; m3[0][1] = 1 ; m3[0][2] = 0; + m3[1][0] = 0 ; m3[1][1] = -0.5 ; m3[1][2] = 0.493283; + m3[2][0] = 0.5; m3[2][1] = -0.495951; m3[2][2] = 0; + + if (to_rgb_flag) + { + m3.invert (); + } + + m.insert3 (m3); + m.clean3 (1); +} + + + +/* +LMS transform (Rec. ITU-T H.265 2019-06, p. 411) + +LMS is an intermediate colorspace for ICtCp transforms. +LMS data are conveyed on RGB planes. +Here, to_rgb_flag indicates real RGB target. +*/ + +void MatrixUtil::make_mat_lms (Mat4 &m, bool to_rgb_flag) +{ + Mat3 m3; + m3[0][0] = 1688; m3[0][1] = 2146; m3[0][2] = 262; + m3[1][0] = 683; m3[1][1] = 2951; m3[1][2] = 462; + m3[2][0] = 99; m3[2][1] = 309; m3[2][2] = 3688; + m3 *= 1.0 / 4096; + + if (to_rgb_flag) + { + m3.invert (); + } + + m.insert3 (m3); + m.clean3 (1); +} + + + +/* +ICtCp transfrom from and to LMS (Rec. ITU-T H.265 2019-06, p. 414) + +LMS data are conveyed on RGB planes. +*/ + +void MatrixUtil::make_mat_ictcp (Mat4 &m, bool hlg_flag, bool to_lms_flag) +{ + Mat3 m3; + m3[0][0] = 2048; m3[0][1] = 2048; m3[0][2] = 0; + if (hlg_flag) + { + m3[1][0] = 3625; m3[1][1] = -7465; m3[1][2] = 3840; + m3[2][0] = 9500; m3[2][1] = -9212; m3[2][2] = -288; + } + else + { + m3[1][0] = 6610; m3[1][1] = -13613; m3[1][2] = 7003; + m3[2][0] = 17933; m3[2][1] = -17390; m3[2][2] = -543; + } + m3 *= 1.0 / 4096; + + if (to_lms_flag) + { + m3.invert (); + } + + m.insert3 (m3); + m.clean3 (1); +} + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +} // namespace fmtcl + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/MatrixUtil.h b/src/fmtcl/MatrixUtil.h new file mode 100644 index 0000000..f94a256 --- /dev/null +++ b/src/fmtcl/MatrixUtil.h @@ -0,0 +1,98 @@ +/***************************************************************************** + + MatrixUtil.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_MatrixUtil_HEADER_INCLUDED) +#define fmtcl_MatrixUtil_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/ColorSpaceH265.h" + +#include + + + +namespace fmtcl +{ + + + +class Mat4; + +class MatrixUtil +{ + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +public: + + static ColorSpaceH265 + find_cs_from_mat_str (const std::string &mat, bool allow_2020cl_flag); + + static int make_mat_from_str (Mat4 &m, const std::string &mat, bool to_rgb_flag); + static void make_mat_yuv (Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag); + static void make_mat_ycgco (Mat4 &m, bool to_rgb_flag); + static void make_mat_ydzdx (Mat4 &m, bool to_rgb_flag); + static void make_mat_lms (Mat4 &m, bool to_rgb_flag); + static void make_mat_ictcp (Mat4 &m, bool hlg_flag, bool to_lms_flag); + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +protected: + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + + +/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + MatrixUtil () = delete; + MatrixUtil (const MatrixUtil &other) = delete; + MatrixUtil (MatrixUtil &&other) = delete; + MatrixUtil & operator = (const MatrixUtil &other) = delete; + MatrixUtil & operator = (MatrixUtil &&other) = delete; + bool operator == (const MatrixUtil &other) const = delete; + bool operator != (const MatrixUtil &other) const = delete; + +}; // class MatrixUtil + + + +} // namespace fmtcl + + + +//#include "fmtcl/MatrixUtil.hpp" + + + +#endif // fmtcl_MatrixUtil_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/PicFmt.h b/src/fmtcl/PicFmt.h new file mode 100644 index 0000000..9ab63cd --- /dev/null +++ b/src/fmtcl/PicFmt.h @@ -0,0 +1,91 @@ +/***************************************************************************** + + PicFmt.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_PicFmt_HEADER_INCLUDED) +#define fmtcl_PicFmt_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/ColorFamily.h" +#include "fmtcl/SplFmt.h" + + + +namespace fmtcl +{ + + + +class PicFmt +{ + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +public: + + bool is_valid () const noexcept + { + return ( + _sf >= 0 && _sf < SplFmt_NBR_ELT + && _res >= 8 + && _col_fam >= 0 && _col_fam < ColorFamily_NBR_ELT + ); + } + + SplFmt _sf = SplFmt_ILLEGAL; + int _res = 0; // Number of bits per sample + ColorFamily _col_fam = ColorFamily_INVALID; + bool _full_flag = false; // Full range + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +protected: + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + + +/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + +}; // class PicFmt + + + +} // namespace fmtcl + + + +//#include "fmtcl/PicFmt.hpp" + + + +#endif // fmtcl_PicFmt_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/PrimUtil.cpp b/src/fmtcl/PrimUtil.cpp new file mode 100644 index 0000000..8ea8ff7 --- /dev/null +++ b/src/fmtcl/PrimUtil.cpp @@ -0,0 +1,301 @@ +/***************************************************************************** + + PrimUtil.cpp + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#if defined (_MSC_VER) + #pragma warning (1 : 4130 4223 4705 4706) + #pragma warning (4 : 4355 4786 4800) +#endif + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/PrimUtil.h" +#include "fstb/fnc.h" + +#include + + + +namespace fmtcl +{ + + + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +constexpr int PrimUtil::_nbr_planes; + + + +Mat3 PrimUtil::compute_conversion_matrix (const RgbSystem &prim_s, const RgbSystem &prim_d) +{ + assert (prim_s.is_ready ()); + assert (prim_d.is_ready ()); + + const Mat3 rgb2xyz = compute_rgb2xyz (prim_s); + const Mat3 xyz2rgb = compute_rgb2xyz (prim_d).invert (); + const Mat3 adapt = compute_chroma_adapt (prim_s, prim_d); + + return xyz2rgb * adapt * rgb2xyz; +} + + + +// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html +Mat3 PrimUtil::compute_rgb2xyz (const RgbSystem &prim) +{ + assert (prim.is_ready ()); + + Mat3 m; + + if (prim._preset == PrimariesPreset_CIEXYZ) + { + m = Mat3 (1, Mat3::Preset_DIAGONAL); + } + + else + { + const Vec3 white = conv_xy_to_xyz (prim._white); + + Mat3 xyzrgb; + for (int k = 0; k < _nbr_planes; ++k) + { + Vec3 comp_xyz = conv_xy_to_xyz (prim._rgb [k]); + xyzrgb.set_col (k, comp_xyz); + } + + const Vec3 s = xyzrgb.compute_inverse () * white; + + for (int u = 0; u < _nbr_planes; ++u) + { + m.set_col (u, xyzrgb.get_col (u) * s [u]); + } + } + + return m; +} + + + +// http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html +Mat3 PrimUtil::compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d) +{ + assert (prim_s.is_ready ()); + assert (prim_d.is_ready ()); + + const Vec3 white_s = conv_xy_to_xyz (prim_s._white); + const Vec3 white_d = conv_xy_to_xyz (prim_d._white); + + // Bradford adaptation + const Mat3 ma ({ + Vec3 { 0.8951, 0.2664, -0.1614 }, + Vec3 { -0.7502, 1.7135, 0.0367 }, + Vec3 { 0.0389, -0.0685, 1.0296 } + }); + + Vec3 crd_s = ma * white_s; + Vec3 crd_d = ma * white_d; + Mat3 scale (0.0); + for (int k = 0; k < _nbr_planes; ++k) + { + assert (crd_s [k] != 0); + scale [k] [k] = crd_d [k] / crd_s [k]; + } + + return ma.compute_inverse () * scale * ma; +} + + + +// Obtains X, Y, Z from (x, y) +// Y is assumed to be 1.0 +// X = x / y +// Z = (1 - x - y) / y +// http://www.brucelindbloom.com/index.html?Eqn_xyY_to_XYZ.html +Vec3 PrimUtil::conv_xy_to_xyz (const RgbSystem::Vec2 &xy) +{ + Vec3 xyz; + + // When y is null, X = Y = Z = 0. + if (fstb::is_null (xy [1])) + { + xyz [0] = 0; + xyz [1] = 0; + xyz [2] = 0; + } + else + { + xyz [0] = xy [0] / xy [1]; + xyz [1] = 1; + xyz [2] = (1 - xy [0] - xy [1]) / xy [1]; + } + + return xyz; +} + + + +// str should be already converted to lower case +PrimariesPreset PrimUtil::conv_string_to_primaries (const std::string &str) +{ + assert (! str.empty ()); + + PrimariesPreset preset = PrimariesPreset_UNDEF; + + if ( str == "709" + || str == "1361" + || str == "61966-2-1" + || str == "61966-2-4" + || str == "hdtv" + || str == "srgb") + { + preset = PrimariesPreset_BT709; + } + else if ( str == "470m" + || str == "ntsc") + { + preset = PrimariesPreset_FCC; + } + else if ( str == "470m93" + || str == "ntscj") + { + preset = PrimariesPreset_NTSCJ; + } + else if ( str == "470bg" + || str == "601-625" + || str == "1358-625" + || str == "1700-625" + || str == "pal" + || str == "secam") + { + preset = PrimariesPreset_BT470BG; + } + else if ( str == "170m" + || str == "601-525" + || str == "1358-525" + || str == "1700-525") + { + preset = PrimariesPreset_SMPTE170M; + } + else if ( str == "240m") + { + preset = PrimariesPreset_SMPTE240M; + } + else if ( str == "filmc") + { + preset = PrimariesPreset_GENERIC_FILM; + } + else if ( str == "2020" + || str == "2100" + || str == "uhdtv") + { + preset = PrimariesPreset_BT2020; + } + else if ( str == "61966-2-2" + || str == "scrgb") + { + preset = PrimariesPreset_SCRGB; + } + else if ( str == "adobe98") + { + preset = PrimariesPreset_ADOBE_RGB_98; + } + else if ( str == "adobewide") + { + preset = PrimariesPreset_ADOBE_RGB_WIDE; + } + else if ( str == "apple") + { + preset = PrimariesPreset_APPLE_RGB; + } + else if ( str == "photopro" + || str == "romm") + { + preset = PrimariesPreset_ROMM; + } + else if ( str == "ciergb") + { + preset = PrimariesPreset_CIERGB; + } + else if ( str == "ciexyz") + { + preset = PrimariesPreset_CIEXYZ; + } + else if ( str == "p3d65" + || str == "dcip3") + { + preset = PrimariesPreset_P3D65; + } + else if ( str == "aces") + { + preset = PrimariesPreset_ACES; + } + else if ( str == "ap1") + { + preset = PrimariesPreset_ACESAP1; + } + else if ( str == "sgamut" + || str == "sgamut3") + { + preset = PrimariesPreset_SGAMUT; + } + else if ( str == "sgamut3cine") + { + preset = PrimariesPreset_SGAMUT3CINE; + } + else if ( str == "alexa") + { + preset = PrimariesPreset_ALEXA; + } + else if ( str == "vgamut") + { + preset = PrimariesPreset_VGAMUT; + } + else if ( str == "p3dci") + { + preset = PrimariesPreset_P3DCI; + } + else if ( str == "p3d60") + { + preset = PrimariesPreset_P3D60; + } + else if ( str == "3213") + { + preset = PrimariesPreset_EBU3213E; + } + + return preset; +} + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +} // namespace fmtcl + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/PrimUtil.h b/src/fmtcl/PrimUtil.h new file mode 100644 index 0000000..72cf11d --- /dev/null +++ b/src/fmtcl/PrimUtil.h @@ -0,0 +1,96 @@ +/***************************************************************************** + + PrimUtil.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_PrimUtil_HEADER_INCLUDED) +#define fmtcl_PrimUtil_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/Mat3.h" +#include "fmtcl/RgbSystem.h" + +#include + + + +namespace fmtcl +{ + + + +class PrimUtil +{ + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +public: + + static constexpr int _nbr_planes = RgbSystem::_nbr_planes; + + static Mat3 compute_conversion_matrix (const RgbSystem &prim_s, const RgbSystem &prim_d); + static Mat3 compute_rgb2xyz (const RgbSystem &prim); + static Mat3 compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d); + static Vec3 conv_xy_to_xyz (const RgbSystem::Vec2 &xy); + static PrimariesPreset + conv_string_to_primaries (const std::string &str); + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +protected: + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + + +/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + PrimUtil () = delete; + PrimUtil (const PrimUtil &other) = delete; + PrimUtil (PrimUtil &&other) = delete; + PrimUtil & operator = (const PrimUtil &other) = delete; + PrimUtil & operator = (PrimUtil &&other) = delete; + bool operator == (const PrimUtil &other) const = delete; + bool operator != (const PrimUtil &other) const = delete; + +}; // class PrimUtil + + + +} // namespace fmtcl + + + +//#include "fmtcl/PrimUtil.hpp" + + + +#endif // fmtcl_PrimUtil_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/ResamplePlaneData.h b/src/fmtcl/ResamplePlaneData.h new file mode 100644 index 0000000..e3c7503 --- /dev/null +++ b/src/fmtcl/ResamplePlaneData.h @@ -0,0 +1,121 @@ +/***************************************************************************** + + ResamplePlaneData.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_ResamplePlaneData_HEADER_INCLUDED) +#define fmtcl_ResamplePlaneData_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/FilterResize.h" +#include "fmtcl/InterlacingType.h" +#include "fmtcl/KernelData.h" +#include "fmtcl/ResampleSpecPlane.h" + +#include + + + +namespace fmtcl +{ + + + +class ResamplePlaneData +{ + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +public: + + ResamplePlaneData () = default; + ~ResamplePlaneData () = default; + ResamplePlaneData (ResamplePlaneData &&other) = default; + ResamplePlaneData & + operator = (ResamplePlaneData &&other) = default; + + // Array order: [dest] [src] + typedef std::array SpecSrcArray; + typedef std::array SpecArray; + + class Win + { + public: + // Data is in full coordinates whatever the plane (never subsampled) + double _x = 0; + double _y = 0; + double _w = 0; + double _h = 0; + }; + + typedef std::array < + KernelData, + FilterResize::Dir_NBR_ELT + > KernelArray; + + Win _win; + SpecArray _spec_arr; // Contains the spec (used as a key) for each plane/interlacing combination + KernelArray _kernel_arr; + double _kernel_scale_h = 1; // Can be negative (forced scaling) + double _kernel_scale_v = 1; // Can be negative (forced scaling) + double _gain = 1; + double _add_cst = 0; + bool _preserve_center_flag = true; + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +protected: + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + + +/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + ResamplePlaneData (const ResamplePlaneData &other) = delete; + ResamplePlaneData & + operator = (const ResamplePlaneData &other) = delete; + bool operator == (const ResamplePlaneData &other) const = delete; + bool operator != (const ResamplePlaneData &other) const = delete; + +}; // class ResamplePlaneData + + + +} // namespace fmtcl + + + +//#include "fmtcl/ResamplePlaneData.hpp" + + + +#endif // fmtcl_ResamplePlaneData_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/ResampleSpecPlane.cpp b/src/fmtcl/ResampleSpecPlane.cpp index ed7555c..17b7738 100644 --- a/src/fmtcl/ResampleSpecPlane.cpp +++ b/src/fmtcl/ResampleSpecPlane.cpp @@ -26,6 +26,8 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fmtcl/ResampleSpecPlane.h" +#include + #include @@ -41,57 +43,43 @@ namespace fmtcl bool ResampleSpecPlane::operator < (const ResampleSpecPlane &other) const { - if (_src_width < other._src_width ) { return (true ); } - if (_src_width > other._src_width ) { return (false); } - - if (_src_height < other._src_height ) { return (true ); } - if (_src_height > other._src_height ) { return (false); } - - if (_dst_width < other._dst_width ) { return (true ); } - if (_dst_width > other._dst_width ) { return (false); } - - if (_dst_height < other._dst_height ) { return (true ); } - if (_dst_height > other._dst_height ) { return (false); } - - if (_win_x < other._win_x ) { return (true ); } - if (_win_x > other._win_x ) { return (false); } - - if (_win_y < other._win_y ) { return (true ); } - if (_win_y > other._win_y ) { return (false); } - - if (_win_w < other._win_w ) { return (true ); } - if (_win_w > other._win_w ) { return (false); } - - if (_win_h < other._win_h ) { return (true ); } - if (_win_h > other._win_h ) { return (false); } - - if (_center_pos_src_h < other._center_pos_src_h) { return (true ); } - if (_center_pos_src_h > other._center_pos_src_h) { return (false); } - - if (_center_pos_src_v < other._center_pos_src_v) { return (true ); } - if (_center_pos_src_v > other._center_pos_src_v) { return (false); } - - if (_center_pos_dst_h < other._center_pos_dst_h) { return (true ); } - if (_center_pos_dst_h > other._center_pos_dst_h) { return (false); } - - if (_center_pos_dst_v < other._center_pos_dst_v) { return (true ); } - if (_center_pos_dst_v > other._center_pos_dst_v) { return (false); } - - if (_kernel_scale_h < other._kernel_scale_h ) { return (true ); } - if (_kernel_scale_h > other._kernel_scale_h ) { return (false); } - - if (_kernel_scale_v < other._kernel_scale_v ) { return (true ); } - if (_kernel_scale_v > other._kernel_scale_v ) { return (false); } - - if (_add_cst < other._add_cst ) { return (true ); } - - if (_kernel_hash_h < other._kernel_hash_h ) { return (true ); } - if (_kernel_hash_h > other._kernel_hash_h ) { return (false); } - - if (_kernel_hash_v < other._kernel_hash_v ) { return (true ); } - if (_kernel_hash_v > other._kernel_hash_v ) { return (false); } - - return (false); + return std::tie ( + _src_width, + _src_height, + _dst_width, + _dst_height, + _win_x, + _win_y, + _win_w, + _win_h, + _center_pos_src_h, + _center_pos_src_v, + _center_pos_dst_h, + _center_pos_dst_v, + _kernel_scale_h, + _kernel_scale_v, + _add_cst, + _kernel_hash_h, + _kernel_hash_v + ) < std::tie ( + other._src_width, + other._src_height, + other._dst_width, + other._dst_height, + other._win_x, + other._win_y, + other._win_w, + other._win_h, + other._center_pos_src_h, + other._center_pos_src_v, + other._center_pos_dst_h, + other._center_pos_dst_v, + other._kernel_scale_h, + other._kernel_scale_v, + other._add_cst, + other._kernel_hash_h, + other._kernel_hash_v + ); } diff --git a/src/fmtcl/ResampleUtil.cpp b/src/fmtcl/ResampleUtil.cpp new file mode 100644 index 0000000..2f36a5a --- /dev/null +++ b/src/fmtcl/ResampleUtil.cpp @@ -0,0 +1,126 @@ +/***************************************************************************** + + ResampleUtil.cpp + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#if defined (_MSC_VER) + #pragma warning (1 : 4130 4223 4705 4706) + #pragma warning (4 : 4355 4786 4800) +#endif + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/ChromaPlacement.h" +#include "fmtcl/FilterResize.h" +#include "fmtcl/fnc.h" +#include "fmtcl/InterlacingType.h" +#include "fmtcl/ResamplePlaneData.h" +#include "fmtcl/ResampleSpecPlane.h" +#include "fmtcl/ResampleUtil.h" + +#include + + + +namespace fmtcl +{ + + + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +void ResampleUtil::create_plane_specs (ResamplePlaneData &plane_data, int plane_index, ColorFamily src_cf, int src_w, int src_ss_h, int src_h, int src_ss_v, ChromaPlacement cplace_s, ColorFamily dst_cf, int dst_w, int dst_ss_h, int dst_h, int dst_ss_v, ChromaPlacement cplace_d) +{ + assert (plane_index >= 0); + + ResampleSpecPlane spec; + + spec._src_width = + compute_plane_width (src_cf, src_ss_h, src_w, plane_index); + spec._src_height = + compute_plane_height (src_cf, src_ss_v, src_h, plane_index); + spec._dst_width = + compute_plane_width (dst_cf, dst_ss_h, dst_w, plane_index); + spec._dst_height = + compute_plane_height (dst_cf, dst_ss_v, dst_h, plane_index); + + const double subspl_h = double (1 << src_ss_h); + const double subspl_v = double (1 << src_ss_v); + + const ResamplePlaneData::Win & s = plane_data._win; + spec._win_x = s._x / subspl_h; + spec._win_y = s._y / subspl_v; + spec._win_w = s._w / subspl_h; + spec._win_h = s._h / subspl_v; + + spec._add_cst = plane_data._add_cst; + spec._kernel_scale_h = plane_data._kernel_scale_h; + spec._kernel_scale_v = plane_data._kernel_scale_v; + spec._kernel_hash_h = plane_data._kernel_arr [FilterResize::Dir_H].get_hash (); + spec._kernel_hash_v = plane_data._kernel_arr [FilterResize::Dir_V].get_hash (); + + for (int itl_d = 0; itl_d < InterlacingType_NBR_ELT; ++itl_d) + { + for (int itl_s = 0; itl_s < InterlacingType_NBR_ELT; ++itl_s) + { + double cp_s_h = 0; + double cp_s_v = 0; + double cp_d_h = 0; + double cp_d_v = 0; + if (plane_data._preserve_center_flag) + { + ChromaPlacement_compute_cplace ( + cp_s_h, cp_s_v, cplace_s, plane_index, src_ss_h, src_ss_v, + (src_cf == ColorFamily_RGB), + (itl_s != InterlacingType_FRAME), + (itl_s == InterlacingType_TOP) + ); + ChromaPlacement_compute_cplace ( + cp_d_h, cp_d_v, cplace_d, plane_index, dst_ss_h, dst_ss_v, + (dst_cf == ColorFamily_RGB), + (itl_d != InterlacingType_FRAME), + (itl_d == InterlacingType_TOP) + ); + } + + spec._center_pos_src_h = cp_s_h; + spec._center_pos_src_v = cp_s_v; + spec._center_pos_dst_h = cp_d_h; + spec._center_pos_dst_v = cp_d_v; + + plane_data._spec_arr [itl_d] [itl_s] = spec; + } // for itl_s + } // for itl_d +} + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +} // namespace fmtcl + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/ResampleUtil.h b/src/fmtcl/ResampleUtil.h new file mode 100644 index 0000000..f09dfbb --- /dev/null +++ b/src/fmtcl/ResampleUtil.h @@ -0,0 +1,89 @@ +/***************************************************************************** + + ResampleUtil.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_ResampleUtil_HEADER_INCLUDED) +#define fmtcl_ResampleUtil_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/ChromaPlacement.h" +#include "fmtcl/ColorFamily.h" + + + +namespace fmtcl +{ + + + +class ResamplePlaneData; + +class ResampleUtil +{ + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +public: + + static void create_plane_specs (ResamplePlaneData &plane_data, int plane_index, ColorFamily src_cf, int src_w, int src_ss_h, int src_h, int src_ss_v, ChromaPlacement cplace_s, ColorFamily dst_cf, int dst_w, int dst_ss_h, int dst_h, int dst_ss_v, ChromaPlacement cplace_d); + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +protected: + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + + +/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + ResampleUtil () = delete; + ResampleUtil (const ResampleUtil &other) = delete; + ResampleUtil (ResampleUtil &&other) = delete; + ResampleUtil & operator = (const ResampleUtil &other) = delete; + ResampleUtil & operator = (ResampleUtil &&other) = delete; + bool operator == (const ResampleUtil &other) const = delete; + bool operator != (const ResampleUtil &other) const = delete; + +}; // class ResampleUtil + + + +} // namespace fmtcl + + + +//#include "fmtcl/ResampleUtil.hpp" + + + +#endif // fmtcl_ResampleUtil_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/RgbSystem.cpp b/src/fmtcl/RgbSystem.cpp index 12a7a93..25956fe 100644 --- a/src/fmtcl/RgbSystem.cpp +++ b/src/fmtcl/RgbSystem.cpp @@ -39,6 +39,10 @@ namespace fmtcl +constexpr int RgbSystem::_nbr_planes; + + + RgbSystem::Vec2::Vec2 (double c0, double c1) : Inherited ({ { c0, c1 } }) { diff --git a/src/fmtcl/RgbSystem.h b/src/fmtcl/RgbSystem.h index 1422e1d..1a1041f 100644 --- a/src/fmtcl/RgbSystem.h +++ b/src/fmtcl/RgbSystem.h @@ -45,29 +45,31 @@ class RgbSystem public: - static const int NBR_PLANES = 3; + static constexpr int _nbr_planes = 3; class Vec2 - : public std::array + : public std::array { - typedef std::array Inherited; + typedef std::array Inherited; public: Vec2 () = default; Vec2 (double c0, double c1); }; RgbSystem (); - RgbSystem (const RgbSystem &other) = default; - virtual ~RgbSystem () = default; - RgbSystem & operator = (const RgbSystem &other) = default; + RgbSystem (const RgbSystem &other) = default; + RgbSystem (RgbSystem &&other) = default; + virtual ~RgbSystem () = default; + RgbSystem & operator = (const RgbSystem &other) = default; + RgbSystem & operator = (RgbSystem &&other) = default; void set (PrimariesPreset preset); bool is_ready () const; - std::array // x,y coordinates for R, G and B + std::array // x,y coordinates for R, G and B _rgb; Vec2 _white; // XYZ coordinates for the ref. white - std::array // R, G, B, W + std::array // R, G, B, W _init_flag_arr; fmtcl::PrimariesPreset // If known _preset; diff --git a/src/fmtcl/Scaler.cpp b/src/fmtcl/Scaler.cpp index aa85390..9685313 100644 --- a/src/fmtcl/Scaler.cpp +++ b/src/fmtcl/Scaler.cpp @@ -932,7 +932,7 @@ void Scaler::build_scale_data () const float thr_1_flt = 1e-5f; if (info._kernel_size == 1) { - const float d_flt = fabs (_coef_flt_arr [info._coef_index] - 1.0f); + const float d_flt = fabsf (_coef_flt_arr [info._coef_index] - 1.0f); info._copy_flt_flag = (d_flt <= thr_1_flt); if (_can_int_flag) diff --git a/src/fmtcl/SplFmt.h b/src/fmtcl/SplFmt.h index 3b7421b..a02ac07 100644 --- a/src/fmtcl/SplFmt.h +++ b/src/fmtcl/SplFmt.h @@ -49,6 +49,8 @@ enum SplFmt +inline bool SplFmt_is_float (SplFmt fmt); +inline bool SplFmt_is_int (SplFmt fmt); inline int SplFmt_get_unit_size (SplFmt fmt); inline int SplFmt_get_data_size (SplFmt fmt); diff --git a/src/fmtcl/SplFmt.hpp b/src/fmtcl/SplFmt.hpp index d29be5b..cecea9e 100644 --- a/src/fmtcl/SplFmt.hpp +++ b/src/fmtcl/SplFmt.hpp @@ -35,6 +35,26 @@ namespace fmtcl +bool SplFmt_is_float (SplFmt fmt) +{ + assert (fmt >= 0); + assert (fmt < SplFmt_NBR_ELT); + + return (fmt == SplFmt_FLOAT); +} + + + +bool SplFmt_is_int (SplFmt fmt) +{ + assert (fmt >= 0); + assert (fmt < SplFmt_NBR_ELT); + + return (fmt != SplFmt_FLOAT); +} + + + int SplFmt_get_unit_size (SplFmt fmt) { assert (fmt >= 0); diff --git a/src/fmtcl/TransCurve.h b/src/fmtcl/TransCurve.h index 2ef520e..2783397 100644 --- a/src/fmtcl/TransCurve.h +++ b/src/fmtcl/TransCurve.h @@ -84,6 +84,18 @@ enum TransCurve +inline bool TransCurve_is_valid (TransCurve curve) +{ + return ( + curve >= 0 + && ( curve < TransCurve_NBR_ELT + || curve > TransCurve_ISO_RANGE_LAST) + && curve < TransCurve_NBR_ELT_CUSTOM + ); +} + + + } // namespace fmtcl diff --git a/src/fmtcl/TransLut.cpp b/src/fmtcl/TransLut.cpp index 56a9f60..718facb 100644 --- a/src/fmtcl/TransLut.cpp +++ b/src/fmtcl/TransLut.cpp @@ -284,7 +284,7 @@ void TransLut::MapperLog::find_index (const FloatIntMix &val, int &index, float static const uint32_t frac_mask = (1 << frac_size) - 1; const uint32_t val_u = val._i & 0x7FFFFFFF; - const float val_a = fabs (val._f); + const float val_a = fabsf (val._f); // index is set relatively to the x=0 index... if (val_a < val_min) @@ -300,7 +300,7 @@ void TransLut::MapperLog::find_index (const FloatIntMix &val, int &index, float else { index = ((val_u - base) >> frac_size) + 1; - frac = (val_u & frac_mask) * (1.0f / (1 << frac_size)); + frac = float (val_u & frac_mask) * (1.0f / (1 << frac_size)); } // ...and shifted or mirrored depending on the sign @@ -328,8 +328,8 @@ double TransLut::MapperLog::find_val (int index) const assert (index >= 0); assert (index < LOGLUT_SIZE); - static const float val_min = 1.0f / (int64_t (1) << -LOGLUT_MIN_L2); - static const int seg_size = 1 << LOGLUT_RES_L2; + static constexpr float val_min = 1.0f / (int64_t (1) << -LOGLUT_MIN_L2); + static constexpr int seg_size = 1 << LOGLUT_RES_L2; // float is OK because the values are exactly represented in float. float val = 0; @@ -339,8 +339,8 @@ double TransLut::MapperLog::find_val (int index) const const int ind_3 = std::abs (ind_2) - 1; const int log2_part = ind_3 >> LOGLUT_RES_L2; const int seg_part = ind_3 & (seg_size - 1); - const float lerp = seg_part * (1.0f / seg_size); - const float v0 = (int64_t (1) << log2_part) * val_min; + const float lerp = float (seg_part) * (1.0f / seg_size); + const float v0 = float (int64_t (1) << log2_part) * val_min; val = v0 * (1 + lerp); if (ind_2 < 0) { diff --git a/src/fmtcl/TransOpLogC.cpp b/src/fmtcl/TransOpLogC.cpp index fc778db..bf71302 100644 --- a/src/fmtcl/TransOpLogC.cpp +++ b/src/fmtcl/TransOpLogC.cpp @@ -38,27 +38,21 @@ namespace fmtcl -const double TransOpLogC::_noise_margin = -8.0 / 65536; - - - /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -TransOpLogC::TransOpLogC (bool inv_flag, Type type) +TransOpLogC::TransOpLogC (bool inv_flag, Type type, ExpIdx ei) : _inv_flag (inv_flag) -, _cut ((type == Type_VLOG) ? 0.01 : (type == Type_LOGC_V2) ? 0.000000 : 0.010591) -, _a ( (type == Type_VLOG) ? 1.0 : (type == Type_LOGC_V2) ? 5.061087 : 5.555556) -, _b ( (type == Type_VLOG) ? 0.00873 : (type == Type_LOGC_V2) ? 0.089004 : 0.052272) -, _c ( (type == Type_VLOG) ? 0.241514 : (type == Type_LOGC_V2) ? 0.247189 : 0.247190) -, _d ( (type == Type_VLOG) ? 0.598206 : (type == Type_LOGC_V2) ? 0.391007 : 0.385537) -, _e ( (type == Type_VLOG) ? 5.6 : (type == Type_LOGC_V2) ? 4.950469 : 5.367655) -, _f ( (type == Type_VLOG) ? 0.125 : (type == Type_LOGC_V2) ? 0.131313 : 0.092809) -, _n ( (type == Type_VLOG) ? 0 : _noise_margin) -, _cut_i (_e * _cut + _f) +, _n ((type == Type_VLOG) ? 0 : _noise_margin) +, _curve ( + (type == Type_VLOG ) ? _vlog + : (type == Type_LOGC_V2) ? _v2_table [ei] + : _v3_table [ei] + ) { - // Nothing + assert (ei >= 0); + assert (ei < ExpIdx_NBR_ELT); } @@ -66,12 +60,39 @@ TransOpLogC::TransOpLogC (bool inv_flag, Type type) // 1 is log peak white. double TransOpLogC::operator () (double x) const { - return ((_inv_flag) ? compute_inverse (x) : compute_direct (x)); + return (_inv_flag) ? compute_inverse (x) : compute_direct (x); } double TransOpLogC::get_max () const { - return (compute_inverse (1.0)); + return compute_inverse (1.0); +} + + + +TransOpLogC::ExpIdx TransOpLogC::conv_logc_ei (int val_raw) +{ + ExpIdx ei = ExpIdx_INVALID; + + switch (val_raw) + { + case 160: ei = ExpIdx_160; break; + case 200: ei = ExpIdx_200; break; + case 250: ei = ExpIdx_250; break; + case 320: ei = ExpIdx_320; break; + case 400: ei = ExpIdx_400; break; + case 500: ei = ExpIdx_500; break; + case 640: ei = ExpIdx_640; break; + case 800: ei = ExpIdx_800; break; + case 1000: ei = ExpIdx_1000; break; + case 1280: ei = ExpIdx_1280; break; + case 1600: ei = ExpIdx_1600; break; + default: + assert (false); + break; + } + + return ei; } @@ -88,28 +109,69 @@ double TransOpLogC::compute_direct (double x) const { x = std::max (x, _n); double y = - (x > _cut ) - ? _c * log10 (_a * x + _b) + _d - : _e * x + _f; + (x > _curve._cut ) + ? _curve._c * log10 (_curve._a * x + _curve._b) + _curve._d + : _curve._e * x + _curve._f; y = std::min (y, 1.0); - return (y); + return y; } double TransOpLogC::compute_inverse (double x) const { x = std::min (x, 1.0); double y = - (x > _cut_i) - ? (pow (10, (x - _d) / _c) - _b) / _a - : (x - _f) / _e; + (x > _curve._cut_i) + ? (pow (10, (x - _curve._d) / _curve._c) - _curve._b) / _curve._a + : (x - _curve._f) / _curve._e; y = std::max (y, _n); - return (y); + return y; } +const double TransOpLogC::_noise_margin = -8.0 / 65536; + +const TransOpLogC::CurveData TransOpLogC::_vlog = +{ + 0.01, 1.0, 0.00873, 0.241514, 0.598206, 5.6, 0.125, 5.6 * 0.01 + 0.125 +}; + +const std::array TransOpLogC::_v2_table = +{{ + // cut, a, b, c, d, e, f, e*cut+f + { 0.0, 5.061087, 0.089004, 0.269035, 0.391007, 6.332427, 0.108361, 0.108361 }, + { 0.0, 5.061087, 0.089004, 0.266007, 0.391007, 6.189953, 0.111543, 0.111543 }, + { 0.0, 5.061087, 0.089004, 0.262978, 0.391007, 6.034414, 0.114725, 0.114725 }, + { 0.0, 5.061087, 0.089004, 0.259627, 0.391007, 5.844973, 0.118246, 0.118246 }, + { 0.0, 5.061087, 0.089004, 0.256598, 0.391007, 5.656190, 0.121428, 0.121428 }, + { 0.0, 5.061087, 0.089004, 0.253569, 0.391007, 5.449261, 0.124610, 0.124610 }, + { 0.0, 5.061087, 0.089004, 0.250218, 0.391007, 5.198031, 0.128130, 0.128130 }, + { 0.0, 5.061087, 0.089004, 0.247189, 0.391007, 4.950469, 0.131313, 0.131313 }, + { 0.0, 5.061087, 0.089004, 0.244161, 0.391007, 4.684112, 0.134495, 0.134495 }, + { 0.0, 5.061087, 0.089004, 0.240810, 0.391007, 4.369609, 0.138015, 0.138015 }, + { 0.0, 5.061087, 0.089004, 0.237781, 0.391007, 4.070466, 0.141197, 0.141197 } +}}; + +const std::array TransOpLogC::_v3_table = +{{ + // cut, a, b, c, d, e, f, e*cut+f + { 0.005561, 5.555556, 0.080216, 0.269036, 0.381991, 5.842037, 0.092778, 0.125266 }, + { 0.006208, 5.555556, 0.076621, 0.266007, 0.382478, 5.776265, 0.092782, 0.128643 }, + { 0.006871, 5.555556, 0.072941, 0.262978, 0.382966, 5.710494, 0.092786, 0.132021 }, + { 0.007622, 5.555556, 0.068768, 0.259627, 0.383508, 5.637732, 0.092791, 0.135761 }, + { 0.008318, 5.555556, 0.064901, 0.256598, 0.383999, 5.571960, 0.092795, 0.139142 }, + { 0.009031, 5.555556, 0.060939, 0.253569, 0.384493, 5.506188, 0.092800, 0.142526 }, + { 0.009840, 5.555556, 0.056443, 0.250219, 0.385040, 5.433426, 0.092805, 0.146271 }, + { 0.010591, 5.555556, 0.052272, 0.247190, 0.385537, 5.367655, 0.092809, 0.149658 }, + { 0.011361, 5.555556, 0.047996, 0.244161, 0.386036, 5.301883, 0.092814, 0.153047 }, + { 0.012235, 5.555556, 0.043137, 0.240810, 0.386590, 5.229121, 0.092819, 0.156799 }, + { 0.013047, 5.555556, 0.038625, 0.237781, 0.387093, 5.163350, 0.092824, 0.160192 } +}}; + + + } // namespace fmtcl diff --git a/src/fmtcl/TransOpLogC.h b/src/fmtcl/TransOpLogC.h index 9a5af2f..37294c9 100644 --- a/src/fmtcl/TransOpLogC.h +++ b/src/fmtcl/TransOpLogC.h @@ -3,6 +3,11 @@ TransOpLogC.h Author: Laurent de Soras, 2015 +Source: +Harald Brendel, +ALEXA Log C Curve Usage in VFX, +ARRI, 2011-10-05 + --- Legal stuff --- This program is free software. It comes without any warranty, to @@ -29,6 +34,8 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fmtcl/TransOpInterface.h" +#include + namespace fmtcl @@ -53,13 +60,35 @@ class TransOpLogC Type_NBR_ELT }; - explicit TransOpLogC (bool inv_flag, Type type); + // Exposure Index (EI) + enum ExpIdx + { + ExpIdx_INVALID = -1, + + ExpIdx_160 = 0, + ExpIdx_200, + ExpIdx_250, + ExpIdx_320, + ExpIdx_400, + ExpIdx_500, + ExpIdx_640, + ExpIdx_800, + ExpIdx_1000, + ExpIdx_1280, + ExpIdx_1600, + + ExpIdx_NBR_ELT + }; + + explicit TransOpLogC (bool inv_flag, Type type, ExpIdx ei = ExpIdx_800); virtual ~TransOpLogC () {} // TransOpInterface virtual double operator () (double x) const; virtual double get_max () const; + static ExpIdx conv_logc_ei (int val_raw); + /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ @@ -72,22 +101,35 @@ class TransOpLogC private: + class CurveData + { + public: + double _cut; + double _a; + double _b; + double _c; + double _d; + double _e; + double _f; + double _cut_i; // _e * _cut + _f + }; + double compute_direct (double x) const; double compute_inverse (double x) const; const bool _inv_flag; - const double _cut; - const double _a; - const double _b; - const double _c; - const double _d; - const double _e; - const double _f; const double _n; - const double _cut_i; + const CurveData + _curve; static const double _noise_margin; + static const CurveData + _vlog; + static const std::array + _v2_table; + static const std::array + _v3_table; diff --git a/src/fmtcl/TransUtil.cpp b/src/fmtcl/TransUtil.cpp new file mode 100644 index 0000000..8da0469 --- /dev/null +++ b/src/fmtcl/TransUtil.cpp @@ -0,0 +1,471 @@ +/***************************************************************************** + + TransUtil.cpp + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#if defined (_MSC_VER) + #pragma warning (1 : 4130 4223 4705 4706) + #pragma warning (4 : 4355 4786 4800) +#endif + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/TransOp2084.h" +#include "fmtcl/TransOpAcesCc.h" +#include "fmtcl/TransOpAffine.h" +#include "fmtcl/TransOpBypass.h" +#include "fmtcl/TransOpCanonLog.h" +#include "fmtcl/TransOpCompose.h" +#include "fmtcl/TransOpContrast.h" +#include "fmtcl/TransOpErimm.h" +#include "fmtcl/TransOpFilmStream.h" +#include "fmtcl/TransOpHlg.h" +#include "fmtcl/TransOpLinPow.h" +#include "fmtcl/TransOpLogC.h" +#include "fmtcl/TransOpLogTrunc.h" +#include "fmtcl/TransOpPow.h" +#include "fmtcl/TransOpSLog.h" +#include "fmtcl/TransOpSLog3.h" +#include "fmtcl/TransUtil.h" +#include "fstb/fnc.h" + +#include + + + +namespace fmtcl +{ + + + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +std::unique_ptr TransUtil::build_lut (PicFmt dst_fmt, TransCurve curve_d, TransOpLogC::ExpIdx logc_ei_d, PicFmt src_fmt, TransCurve curve_s, TransOpLogC::ExpIdx logc_ei_s, double contrast, double gcor, double lvl_black, bool sse2_flag, bool avx2_flag) +{ + assert (dst_fmt.is_valid ()); + assert (TransCurve_is_valid (curve_d)); + assert (logc_ei_d >= 0); + assert (logc_ei_d < TransOpLogC::ExpIdx_NBR_ELT); + assert (src_fmt.is_valid ()); + assert (TransCurve_is_valid (curve_s)); + assert (logc_ei_s >= 0); + assert (logc_ei_s < TransOpLogC::ExpIdx_NBR_ELT); + assert (contrast > 0); + assert (gcor > 0); + assert (lvl_black >= 0); + + OpSPtr op_s = conv_curve_to_op (curve_s, true , logc_ei_s); + OpSPtr op_d = conv_curve_to_op (curve_d, false, logc_ei_d); + + // Linear or log LUT? + bool loglut_flag = false; + if ( SplFmt_is_float (src_fmt._sf) + && curve_s == TransCurve_LINEAR) + { + // Curves with extended range or with fast-evolving slope at 0. + // Actually we could just use the log LUT for all the curves...? + // 10 bits per stop + interpolation should be enough for all of them. + // What about the speed? + if ( curve_d == TransCurve_470BG + || curve_d == TransCurve_LINEAR + || curve_d == TransCurve_61966_2_4 + || curve_d == TransCurve_2084 + || curve_d == TransCurve_428 + || curve_d == TransCurve_HLG + || curve_d == TransCurve_1886 + || curve_d == TransCurve_1886A + || curve_d == TransCurve_SLOG + || curve_d == TransCurve_SLOG2 + || curve_d == TransCurve_SLOG3 + || curve_d == TransCurve_LOGC2 + || curve_d == TransCurve_LOGC3 + || curve_d == TransCurve_CANONLOG + || curve_d == TransCurve_ACESCC + || curve_d == TransCurve_ERIMM) + { + loglut_flag = true; + } + if (gcor < 0.5) + { + loglut_flag = true; + } + if (fabs (contrast) >= 3.0/2 || fabs (contrast) <= 2.0/3) + { + loglut_flag = true; + } + } + + // Black level + const double lw = op_s->get_max (); + if (lvl_black > 0 && lvl_black < lw) + { + /* + Black level (brightness) and contrast settings as defined + in ITU-R BT.1886: + L = a' * fi (V + b') + + With: + fi = EOTF (gamma to linear) + L = Lb for V = 0 + L = Lw for V = Vmax + + For power functions, could be rewritten as: + L = fi (a * V + b) + + Substitution: + Lb = fi ( b) + Lw = fi (a * Vmax + b) + + Then, given: + f = OETF (linear to gamma) + + We get: + f (Lb) = b + f (Lw) = a * Vmax + b + + b = f (Lb) + a = (f (Lw) - f (Lb)) / Vmax + */ + auto oetf = conv_curve_to_op (curve_s, false, logc_ei_s); + const double lwg = (*oetf) (lw ); + const double lbg = (*oetf) (lvl_black); + const double vmax = lwg; + const double a = (lwg - lbg) / vmax; + const double b = lbg; + auto op_a = std::make_shared (a, b); + op_s = std::make_shared (op_a, op_s); + } + + // Gamma correction + if (! fstb::is_eq (gcor, 1.0)) + { + auto op_g = + std::make_shared (true, gcor, 1, 1e6); + op_d = std::make_shared (op_g, op_d); + } + + // Contrast + if (! fstb::is_eq (contrast, 1.0)) + { + auto op_c = + std::make_shared (contrast); + op_d = std::make_shared (op_c, op_d); + } + + // LUTify + auto op_f = std::make_shared (op_s, op_d); + + auto lut_uptr = std::make_unique ( + *op_f, loglut_flag, + src_fmt._sf, src_fmt._res, src_fmt._full_flag, + dst_fmt._sf, dst_fmt._res, dst_fmt._full_flag, + sse2_flag, avx2_flag + ); + + return lut_uptr; +} + + + +// str should be already converted to lower case +TransCurve TransUtil::conv_string_to_curve (const std::string &str) +{ + assert (! str.empty ()); + + TransCurve c = TransCurve_UNDEF; + if (str == "709") + { + c = TransCurve_709; + } + else if (str == "470m") + { + c = TransCurve_470M; + } + else if (str == "470bg") + { + c = TransCurve_470BG; + } + else if (str == "601") + { + c = TransCurve_601; + } + else if (str == "240") + { + c = TransCurve_240; + } + else if (str.empty () || str == "linear") + { + c = TransCurve_LINEAR; + } + else if (str == "log100") + { + c = TransCurve_LOG100; + } + else if (str == "log316") + { + c = TransCurve_LOG316; + } + else if (str == "61966-2-4") + { + c = TransCurve_61966_2_4; + } + else if (str == "1361") + { + c = TransCurve_1361; + } + else if (str == "61966-2-1" || str == "srgb" || str == "sycc") + { + c = TransCurve_SRGB; + } + else if (str == "2020_10") + { + c = TransCurve_2020_10; + } + else if (str == "2020_12" || str == "2020") + { + c = TransCurve_2020_12; + } + else if (str == "2084") + { + c = TransCurve_2084; + } + else if (str == "428-1" || str == "428") + { + c = TransCurve_428; + } + else if (str == "hlg") + { + c = TransCurve_HLG; + } + else if (str == "1886") + { + c = TransCurve_1886; + } + else if (str == "1886a") + { + c = TransCurve_1886A; + } + else if (str == "filmstream") + { + c = TransCurve_FILMSTREAM; + } + else if (str == "slog") + { + c = TransCurve_SLOG; + } + else if (str == "logc2") + { + c = TransCurve_LOGC2; + } + else if (str == "logc3") + { + c = TransCurve_LOGC3; + } + else if (str == "canonlog") + { + c = TransCurve_CANONLOG; + } + else if (str == "adobergb") + { + c = TransCurve_ADOBE_RGB; + } + else if (str == "romm") + { + c = TransCurve_ROMM_RGB; + } + else if (str == "acescc") + { + c = TransCurve_ACESCC; + } + else if (str == "erimm") + { + c = TransCurve_ERIMM; + } + else if (str == "slog2") + { + c = TransCurve_SLOG2; + } + else if (str == "slog3") + { + c = TransCurve_SLOG3; + } + else if (str == "vlog") + { + c = TransCurve_VLOG; + } + else + { + assert (false); + } + + return c; +} + + + +TransUtil::OpSPtr TransUtil::conv_curve_to_op (TransCurve c, bool inv_flag, TransOpLogC::ExpIdx logc_ei) +{ + assert (c >= 0); + assert (logc_ei >= 0); + assert (logc_ei < TransOpLogC::ExpIdx_NBR_ELT); + + OpSPtr ptr; + + switch (c) + { + case TransCurve_709: + case TransCurve_601: + case TransCurve_2020_10: + ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5)); + break; + case TransCurve_470BG: + ptr = OpSPtr (new TransOpPow (inv_flag, 2.8)); + break; + case TransCurve_240: + ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.1115, 0.0228, 0.45, 4.0)); + break; + case TransCurve_LINEAR: + ptr = OpSPtr (new TransOpBypass); + break; + case TransCurve_LOG100: + ptr = OpSPtr (new TransOpLogTrunc (inv_flag, 0.5, 0.01)); + break; + case TransCurve_LOG316: + ptr = OpSPtr (new TransOpLogTrunc (inv_flag, 0.4, sqrt (10) / 1000)); + break; + case TransCurve_61966_2_4: + ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -1e9, 1e9)); + break; + case TransCurve_1361: + ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -0.25, 1.33, 4)); + break; + case TransCurve_470M: // Assumed display gamma 2.2, almost like sRGB. + case TransCurve_SRGB: +#if 1 + { + // More exact formula giving C1 continuity + // https://en.wikipedia.org/wiki/SRGB#Theory_of_the_transformation + const double gamma = 2.4; + const double alpha = 1.055; + const double k0 = (alpha - 1) / (gamma - 1); + const double phi = + (pow (alpha, gamma) * pow (gamma - 1, gamma - 1)) + / (pow (alpha - 1, gamma - 1) * pow (gamma, gamma)); + ptr = OpSPtr (new TransOpLinPow (inv_flag, alpha, k0 / phi, 1.0 / gamma, phi)); + } +#else + // Rounded constants used in IEC 61966-2-1 + ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.055, 0.04045 / 12.92, 1.0 / 2.4, 12.92)); +#endif + break; + case TransCurve_2020_12: + ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.09929682680944, 0.018053968510807, 0.45, 4.5)); + break; + case TransCurve_2084: + ptr = OpSPtr (new TransOp2084 (inv_flag)); + break; + case TransCurve_428: + ptr = OpSPtr (new TransOpPow (inv_flag, 2.6, 48.0 / 52.37)); + break; + case TransCurve_HLG: + ptr = OpSPtr (new TransOpHlg (inv_flag)); + break; + case TransCurve_1886: + ptr = OpSPtr (new TransOpPow (inv_flag, 2.4)); + break; + case TransCurve_1886A: + { + const double a1 = 2.6; + const double a2 = 3.0; + const double k0 = 0.35; + const double slope = pow (k0, a2 - a1); + const double beta = pow (k0, a1); + ptr = OpSPtr (new TransOpLinPow ( + inv_flag, 1, beta, 1.0 / a1, slope, 0, 1, 1, 1.0 / a2 + )); + } + break; + case TransCurve_FILMSTREAM: + ptr = OpSPtr (new TransOpFilmStream (inv_flag)); + break; + case TransCurve_SLOG: + ptr = OpSPtr (new TransOpSLog (inv_flag, false)); + break; + case TransCurve_LOGC2: + ptr = OpSPtr (new TransOpLogC ( + inv_flag, TransOpLogC::Type_LOGC_V2, logc_ei + )); + break; + case TransCurve_LOGC3: + ptr = OpSPtr (new TransOpLogC ( + inv_flag, TransOpLogC::Type_LOGC_V3, logc_ei + )); + break; + case TransCurve_CANONLOG: + ptr = OpSPtr (new TransOpCanonLog (inv_flag)); + break; + case TransCurve_ADOBE_RGB: + ptr = OpSPtr (new TransOpPow (inv_flag, 563.0 / 256)); + break; + case TransCurve_ROMM_RGB: + ptr = OpSPtr (new TransOpLinPow (inv_flag, 1, 0.001953, 1.0 / 1.8, 16)); + break; + case TransCurve_ACESCC: + ptr = OpSPtr (new TransOpAcesCc (inv_flag)); + break; + case TransCurve_ERIMM: + ptr = OpSPtr (new TransOpErimm (inv_flag)); + break; + case TransCurve_SLOG2: + ptr = OpSPtr (new TransOpSLog (inv_flag, true)); + break; + case TransCurve_SLOG3: + ptr = OpSPtr (new TransOpSLog3 (inv_flag)); + break; + case TransCurve_VLOG: + ptr = OpSPtr (new TransOpLogC (inv_flag, TransOpLogC::Type_VLOG)); + break; + default: + assert (false); + break; + } + + if (ptr.get () == 0) + { + ptr = OpSPtr (new TransOpBypass); + } + + return ptr; +} + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +} // namespace fmtcl + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/TransUtil.h b/src/fmtcl/TransUtil.h new file mode 100644 index 0000000..add44f1 --- /dev/null +++ b/src/fmtcl/TransUtil.h @@ -0,0 +1,100 @@ +/***************************************************************************** + + TransUtil.h + Author: Laurent de Soras, 2021 + +--- Legal stuff --- + +This program is free software. It comes without any warranty, to +the extent permitted by applicable law. You can redistribute it +and/or modify it under the terms of the Do What The Fuck You Want +To Public License, Version 2, as published by Sam Hocevar. See +http://www.wtfpl.net/ for more details. + +*Tab=3***********************************************************************/ + + + +#pragma once +#if ! defined (fmtcl_TransUtil_HEADER_INCLUDED) +#define fmtcl_TransUtil_HEADER_INCLUDED + + + +/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +#include "fmtcl/PicFmt.h" +#include "fmtcl/TransCurve.h" +#include "fmtcl/TransLut.h" +#include "fmtcl/TransOpInterface.h" +#include "fmtcl/TransOpLogC.h" + +#include +#include + + + +namespace fmtcl +{ + + + +class TransUtil +{ + +/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +public: + + typedef std::shared_ptr OpSPtr; + + static std::unique_ptr + build_lut (PicFmt dst_fmt, TransCurve curve_d, TransOpLogC::ExpIdx logc_ei_d, PicFmt src_fmt, TransCurve curve_s, TransOpLogC::ExpIdx logc_ei_s, double contrast, double gcor, double lvl_black, bool sse2_flag, bool avx2_flag); + + static TransCurve + conv_string_to_curve (const std::string &str); + static OpSPtr conv_curve_to_op (TransCurve c, bool inv_flag, TransOpLogC::ExpIdx logc_ei); + + + +/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +protected: + + + +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + + +/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + +private: + + TransUtil () = delete; + TransUtil (const TransUtil &other) = delete; + TransUtil (TransUtil &&other) = delete; + TransUtil & operator = (const TransUtil &other) = delete; + TransUtil & operator = (TransUtil &&other) = delete; + bool operator == (const TransUtil &other) const = delete; + bool operator != (const TransUtil &other) const = delete; + +}; // class TransUtil + + + +} // namespace fmtcl + + + +//#include "fmtcl/TransUtil.hpp" + + + +#endif // fmtcl_TransUtil_HEADER_INCLUDED + + + +/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/VoidAndCluster.h b/src/fmtcl/VoidAndCluster.h index 2f0305e..74be32d 100644 --- a/src/fmtcl/VoidAndCluster.h +++ b/src/fmtcl/VoidAndCluster.h @@ -3,6 +3,18 @@ VoidAndCluster.h Author: Laurent de Soras, 2015 +Reference: +Robert Ulichney, +The Void-And-Cluster Method for Dither Array Generation +Proc. SPIE, Human Vision, Visual Processing, and Digital Display IV, +vol. 1913, pp. 332-343, Feb. 1-4, 1993 + +*** TO DO: implement: +Hakan Ancin, Anoop K. Bhattacharjya, Joseph Shou-Pyng Shu, +New void-and-cluster method for improved halftone uniformity, +Journal of Electronic Imaging 8(1), January 1999, +https://doi.org/10.1117/1.482701 *** + --- Legal stuff --- This program is free software. It comes without any warranty, to diff --git a/src/fmtcl/fnc.cpp b/src/fmtcl/fnc.cpp index 49f1a0c..5174035 100644 --- a/src/fmtcl/fnc.cpp +++ b/src/fmtcl/fnc.cpp @@ -25,6 +25,11 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #include "fmtcl/fnc.h" +#include "fmtcl/Mat4.h" +#include "fmtcl/MatrixProc.h" +#include "fmtcl/PicFmt.h" + +#include #include #include @@ -36,10 +41,104 @@ namespace fmtcl +/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ + + + +static void override_fmt_with_csp (PicFmt &fmt, ColorSpaceH265 csp_out, int plane_out) +{ + if (plane_out >= 0) + { + if (csp_out == ColorSpaceH265_RGB) + { + fmt._col_fam = ColorFamily_RGB; + } + else if (csp_out == ColorSpaceH265_YCGCO) + { + fmt._col_fam = ColorFamily_YCGCO; + } + else + { + fmt._col_fam = ColorFamily_YUV; + } + } +} + + + +// Int: depends on the input format (may be float too) +// R, G, B, Y: [0 ; 1] +// U, V, Cg, Co : [-0.5 ; 0.5] +static void make_mat_flt_int (Mat4 &m, bool to_flt_flag, const PicFmt &fmt) +{ + PicFmt fmt2 (fmt); + fmt2._sf = SplFmt_FLOAT; + + const PicFmt * fmt_src_ptr = &fmt2; + const PicFmt * fmt_dst_ptr = &fmt; + if (to_flt_flag) + { + std::swap (fmt_src_ptr, fmt_dst_ptr); + } + + double ay, by; + double ac, bc; + const int ch_plane = (fmt_dst_ptr->_col_fam != ColorFamily_GRAY) ? 1 : 0; + compute_fmt_mac_cst (ay, by, *fmt_dst_ptr, *fmt_src_ptr, 0 ); + compute_fmt_mac_cst (ac, bc, *fmt_dst_ptr, *fmt_src_ptr, ch_plane); + + m[0][0] = ay; m[0][1] = 0; m[0][2] = 0; m[0][3] = by; + m[1][0] = 0; m[1][1] = ac; m[1][2] = 0; m[1][3] = bc; + m[2][0] = 0; m[2][1] = 0; m[2][2] = ac; m[2][3] = bc; + m[3][0] = 0; m[3][1] = 0; m[3][2] = 0; m[3][3] = 1; +} + + + /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ +int compute_plane_width (ColorFamily col_fam, int ss_h, int base_w, int plane_index) +{ + assert (col_fam >= 0); + assert (col_fam < ColorFamily_NBR_ELT); + assert (plane_index >= 0); + assert (ss_h >= 0); + assert (base_w >= 0); + + int plane_w = base_w; + if (is_chroma_plane (col_fam, plane_index)) + { + assert ((base_w & ((1 << ss_h) - 1)) == 0); + plane_w >>= ss_h; + } + + return plane_w; +} + + + +int compute_plane_height (ColorFamily col_fam, int ss_v, int base_h, int plane_index) +{ + assert (col_fam >= 0); + assert (col_fam < ColorFamily_NBR_ELT); + assert (plane_index >= 0); + assert (ss_v >= 0); + assert (base_h >= 0); + + int plane_h = base_h; + if (is_chroma_plane (col_fam, plane_index)) + { + assert ((base_h & ((1 << ss_v) - 1)) == 0); + plane_h >>= ss_v; + } + + return plane_h; +} + + + bool has_chroma (ColorFamily col_fam) { assert (col_fam >= 0); @@ -73,25 +172,21 @@ bool is_full_range_default (ColorFamily col_fam) -double compute_pix_scale (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag) +double compute_pix_scale (const PicFmt &fmt, int plane_index) { - assert (spl_fmt >= 0); - assert (spl_fmt < SplFmt_NBR_ELT); - assert (nbr_bits > 0); - assert (col_fam >= 0); - assert (col_fam < ColorFamily_NBR_ELT); + assert (fmt.is_valid ()); assert (plane_index >= 0); double scale = 1.0; - if (spl_fmt != SplFmt_FLOAT) + if (fmt._sf != SplFmt_FLOAT) { - const int bps_m8 = nbr_bits - 8; - if (full_flag) + const int bps_m8 = fmt._res - 8; + if (fmt._full_flag) { - scale = double ((uint64_t (1) << nbr_bits) - 1); + scale = double ((uint64_t (1) << fmt._res) - 1); } - else if (is_chroma_plane (col_fam, plane_index)) + else if (is_chroma_plane (fmt._col_fam, plane_index)) { scale = double ((uint64_t (224)) << bps_m8); } @@ -101,32 +196,28 @@ double compute_pix_scale (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int } } - return (scale); + return scale; } -double get_pix_min (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag) +double get_pix_min (const PicFmt &fmt, int plane_index) { - assert (spl_fmt >= 0); - assert (spl_fmt < SplFmt_NBR_ELT); - assert (nbr_bits > 0); - assert (col_fam >= 0); - assert (col_fam < ColorFamily_NBR_ELT); + assert (fmt.is_valid ()); assert (plane_index >= 0); double add_val = 0; - if (spl_fmt == SplFmt_FLOAT) + if (fmt._sf == SplFmt_FLOAT) { - if (is_chroma_plane (col_fam, plane_index)) + if (is_chroma_plane (fmt._col_fam, plane_index)) { add_val = -0.5; } } - else if (full_flag) + else if (fmt._full_flag) { - if (is_chroma_plane (col_fam, plane_index)) + if (is_chroma_plane (fmt._col_fam, plane_index)) { // So the neutral value (0) is exactly: 1 << (nbr_bits - 1) add_val = 0.5; @@ -134,38 +225,78 @@ double get_pix_min (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane } else { - add_val = double ((uint64_t (16)) << (nbr_bits - 8)); + add_val = double ((uint64_t (16)) << (fmt._res - 8)); } - return (add_val); + return add_val; } -void compute_fmt_mac_cst (double &gain, double &add_cst, SplFmt dst_spl_fmt, int dst_nbr_bits, ColorFamily dst_col_fam, bool dst_full_flag, SplFmt src_spl_fmt, int src_nbr_bits, ColorFamily src_col_fam, bool src_full_flag, int plane_index) +void compute_fmt_mac_cst (double &gain, double &add_cst, const PicFmt &dst_fmt, const PicFmt &src_fmt, int plane_index) { // (X_d - m_d) / S_d = (X_s - m_s) / S_s // X_d = X_s * (S_d / S_s) + (m_d - m_s * S_d / S_s) // gain add_cst - const double scale_src = compute_pix_scale ( - src_spl_fmt, src_nbr_bits, src_col_fam, plane_index, src_full_flag - ); - const double scale_dst = compute_pix_scale ( - dst_spl_fmt, dst_nbr_bits, dst_col_fam, plane_index, dst_full_flag - ); + const double scale_src = compute_pix_scale (src_fmt, plane_index); + const double scale_dst = compute_pix_scale (dst_fmt, plane_index); gain = scale_dst / scale_src; - const double cst_src = get_pix_min ( - src_spl_fmt, src_nbr_bits, src_col_fam, plane_index, src_full_flag - ); - const double cst_dst = get_pix_min ( - dst_spl_fmt, dst_nbr_bits, dst_col_fam, plane_index, dst_full_flag - ); + const double cst_src = get_pix_min (src_fmt, plane_index); + const double cst_dst = get_pix_min (dst_fmt, plane_index); add_cst = cst_dst - cst_src * gain; } +int prepare_matrix_coef (MatrixProc &mat_proc, const Mat4 &mat_main, const PicFmt &dst_fmt, const PicFmt &src_fmt, ColorSpaceH265 csp_out, int plane_out) +{ + const bool int_proc_flag = + (SplFmt_is_int (src_fmt._sf) && SplFmt_is_int (dst_fmt._sf)); + + Mat4 m (1, Mat4::Preset_DIAGONAL); + + PicFmt dst_fmt2 = dst_fmt; + if (int_proc_flag) + { + // For the coefficient calculation, use the same output bitdepth + // as the input. The bitdepth change will be done separately with + // a simple bitshift. + dst_fmt2._res = src_fmt._res; + } + + override_fmt_with_csp (dst_fmt2, csp_out, plane_out); + + Mat4 m1s; + Mat4 m1d; + make_mat_flt_int (m1s, true , src_fmt ); + make_mat_flt_int (m1d, false, dst_fmt2); + m *= m1d; + if (! int_proc_flag) + { + if (plane_out > 0 && is_chroma_plane (dst_fmt2._col_fam, plane_out)) + { + // When we extract a single plane, it's a conversion to R or + // to Y, so the outout range is always [0; 1]. Therefore we + // need to offset the chroma planes. + m [plane_out] [MatrixProc::_nbr_planes] += 0.5; + } + } + m *= mat_main; + m *= m1s; + + const MatrixProc::Err ret_val = mat_proc.configure ( + m, int_proc_flag, + src_fmt._sf, src_fmt._res, + dst_fmt._sf, dst_fmt._res, + plane_out + ); + + return ret_val; +} + + + /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ diff --git a/src/fmtcl/fnc.h b/src/fmtcl/fnc.h index d0c9659..6c908aa 100644 --- a/src/fmtcl/fnc.h +++ b/src/fmtcl/fnc.h @@ -28,6 +28,7 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #include "fmtcl/ColorFamily.h" +#include "fmtcl/ColorSpaceH265.h" #include "fmtcl/SplFmt.h" @@ -35,14 +36,21 @@ To Public License, Version 2, as published by Sam Hocevar. See namespace fmtcl { +class Mat4; +class MatrixProc; +class PicFmt; + +int compute_plane_width (ColorFamily col_fam, int ss_h, int base_w, int plane_index); +int compute_plane_height (ColorFamily col_fam, int ss_v, int base_h, int plane_index); bool has_chroma (ColorFamily col_fam); bool is_chroma_plane (ColorFamily col_fam, int plane_index); bool is_full_range_default (ColorFamily col_fam); -double compute_pix_scale (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag); -double get_pix_min (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag); -void compute_fmt_mac_cst (double &gain, double &add_cst, SplFmt dst_spl_fmt, int dst_nbr_bits, ColorFamily dst_col_fam, bool dst_full_flag, SplFmt src_spl_fmt, int src_nbr_bits, ColorFamily src_col_fam, bool src_full_flag, int plane_index); +double compute_pix_scale (const PicFmt &fmt, int plane_index); +double get_pix_min (const PicFmt &fmt, int plane_index); +void compute_fmt_mac_cst (double &gain, double &add_cst, const PicFmt &dst_fmt, const PicFmt &src_fmt, int plane_index); +int prepare_matrix_coef (MatrixProc &mat_proc, const Mat4 &mat_main, const PicFmt &dst_fmt, const PicFmt &src_fmt, ColorSpaceH265 csp_out, int plane_out); diff --git a/src/fstb/AllocAlign.h b/src/fstb/AllocAlign.h index ce54cfe..0f7cb75 100644 --- a/src/fstb/AllocAlign.h +++ b/src/fstb/AllocAlign.h @@ -30,6 +30,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include #include +#include @@ -46,7 +47,7 @@ class AllocAlign public: - static const long ALIGNMENT = ALIG; + static constexpr long ALIGNMENT = ALIG; typedef T value_type; typedef value_type * pointer; @@ -56,31 +57,36 @@ class AllocAlign typedef size_t size_type; typedef ptrdiff_t difference_type; - AllocAlign () = default; - AllocAlign (AllocAlign const &other) = default; + AllocAlign () = default; + AllocAlign (AllocAlign const &other) = default; template AllocAlign (AllocAlign const &/*other*/) {} - ~AllocAlign () = default; + ~AllocAlign () = default; // Address - inline pointer address (reference r); - inline const_pointer - address (const_reference r); + [[deprecated]] inline pointer + address (reference r) noexcept; + [[deprecated]] inline const_pointer + address (const_reference r) noexcept; // Memory allocation - inline pointer allocate (size_type n, typename std::allocator ::const_pointer ptr = 0); - inline void deallocate (pointer p, size_type n); + [[deprecated]] inline pointer + allocate (size_type n, const void *ptr); + inline pointer allocate (size_type n); + inline void deallocate (pointer p, size_type n) noexcept; // Size - inline size_type - max_size() const; + [[deprecated]] inline size_type + max_size () const noexcept; // Construction/destruction - inline void construct (pointer ptr, const T & t); - inline void destroy (pointer ptr); + [[deprecated]] inline void + construct (pointer ptr, const T & t); + [[deprecated]] inline void + destroy (pointer ptr); - inline bool operator == (AllocAlign const &other); - inline bool operator != (AllocAlign const &other); + inline bool operator == (AllocAlign const &other) noexcept; + inline bool operator != (AllocAlign const &other) noexcept; template struct rebind diff --git a/src/fstb/AllocAlign.hpp b/src/fstb/AllocAlign.hpp index 67538e9..9aeedc3 100644 --- a/src/fstb/AllocAlign.hpp +++ b/src/fstb/AllocAlign.hpp @@ -41,39 +41,46 @@ namespace fstb template -typename AllocAlign ::pointer AllocAlign ::address (reference r) +typename AllocAlign ::pointer AllocAlign ::address (reference r) noexcept { - return (&r); + return &r; } template -typename AllocAlign ::const_pointer AllocAlign ::address (const_reference r) +typename AllocAlign ::const_pointer AllocAlign ::address (const_reference r) noexcept { - return (&r); + return &r; } template -typename AllocAlign ::pointer AllocAlign ::allocate (size_type n, typename std::allocator ::const_pointer /*ptr*/) +typename AllocAlign ::pointer AllocAlign ::allocate (size_type n, const void *ptr) { - static_assert ((sizeof (ptrdiff_t) >= sizeof (void *)), ""); + fstb::unused (ptr); + return allocate (n); +} + - assert (n >= 0); +template +typename AllocAlign ::pointer AllocAlign ::allocate (size_type n) +{ const size_t nbr_bytes = sizeof (T) * n; #if defined (_MSC_VER) - pointer zone_ptr = reinterpret_cast ( + pointer zone_ptr = static_cast ( _aligned_malloc (nbr_bytes, ALIG) ); -#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__) +//#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__) +#elif (defined (_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \ + && ! defined (STM32H750xx) - pointer zone_ptr = 0; + pointer zone_ptr = nullptr; void * tmp_ptr; if (posix_memalign (&tmp_ptr, ALIG, nbr_bytes) == 0) { @@ -82,16 +89,16 @@ typename AllocAlign ::pointer AllocAlign ::allocate (size_type #else // Platform-independent implementation - const size_t ptr_size = sizeof (void *); - const size_t offset = ptr_size + ALIG - 1; + const size_t ptr_size = sizeof (void *); + const size_t offset = ptr_size + ALIG - 1; const size_t alloc_bytes = offset + nbr_bytes; - void * alloc_ptr = new char [alloc_bytes]; - pointer zone_ptr = 0; - if (alloc_ptr != 0) + void * alloc_ptr = new char [alloc_bytes]; + pointer zone_ptr = nullptr; + if (alloc_ptr != nullptr) { - const ptrdiff_t alloc_l = reinterpret_cast (alloc_ptr); - const ptrdiff_t zone_l = (alloc_l + offset) & (-ALIG); - assert (zone_l >= ptrdiff_t (alloc_l + ptr_size)); + const intptr_t alloc_l = reinterpret_cast (alloc_ptr); + const intptr_t zone_l = (alloc_l + offset) & (-ALIG); + assert (zone_l >= intptr_t (alloc_l + ptr_size)); void ** ptr_ptr = reinterpret_cast (zone_l - ptr_size); *ptr_ptr = alloc_ptr; zone_ptr = reinterpret_cast (zone_l); @@ -99,9 +106,11 @@ typename AllocAlign ::pointer AllocAlign ::allocate (size_type #endif - if (zone_ptr == 0) + if (zone_ptr == nullptr) { +#if defined (__cpp_exceptions) || ! defined (__GNUC__) throw std::bad_alloc (); +#endif } return (zone_ptr); @@ -110,27 +119,38 @@ typename AllocAlign ::pointer AllocAlign ::allocate (size_type template -void AllocAlign ::deallocate (pointer ptr, size_type /*n*/) +void AllocAlign ::deallocate (pointer ptr, size_type n) noexcept { - if (ptr != 0) + fstb::unused (n); + + if (ptr != nullptr) { #if defined (_MSC_VER) - _aligned_free (ptr); + try + { + _aligned_free (ptr); + } + catch (...) + { + assert (false); + } -#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__) +//#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__) +#elif (defined (_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \ + && ! defined (STM32H750xx) free (ptr); #else // Platform-independent implementation - const size_t ptr_size = sizeof (void *); - const ptrdiff_t zone_l = reinterpret_cast (ptr); - void ** ptr_ptr = reinterpret_cast (zone_l - ptr_size); + const size_t ptr_size = sizeof (void *); + const intptr_t zone_l = reinterpret_cast (ptr); + void ** ptr_ptr = reinterpret_cast (zone_l - ptr_size); void * alloc_ptr = *ptr_ptr; - assert (alloc_ptr != 0); - assert (reinterpret_cast (alloc_ptr) < zone_l); + assert (alloc_ptr != nullptr); + assert (reinterpret_cast (alloc_ptr) < zone_l); delete [] reinterpret_cast (alloc_ptr); @@ -141,7 +161,7 @@ void AllocAlign ::deallocate (pointer ptr, size_type /*n*/) template -typename AllocAlign ::size_type AllocAlign ::max_size () const +typename AllocAlign ::size_type AllocAlign ::max_size () const noexcept { static_assert ((static_cast (-1) > 0), ""); @@ -153,7 +173,7 @@ typename AllocAlign ::size_type AllocAlign ::max_size () const template void AllocAlign ::construct (pointer ptr, const T &t) { - assert (ptr != 0); + assert (ptr != nullptr); new (ptr) T (t); } @@ -163,7 +183,7 @@ void AllocAlign ::construct (pointer ptr, const T &t) template void AllocAlign ::destroy (pointer ptr) { - assert (ptr != 0); + assert (ptr != nullptr); ptr->~T (); } @@ -171,17 +191,17 @@ void AllocAlign ::destroy (pointer ptr) template -bool AllocAlign ::operator == (AllocAlign const &other) +bool AllocAlign ::operator == (AllocAlign const &other) noexcept { fstb::unused (other); - return (true); + return true; } template -bool AllocAlign ::operator != (AllocAlign const &other) +bool AllocAlign ::operator != (AllocAlign const &other) noexcept { return (! operator == (other)); } diff --git a/src/fstb/ArrayAlign.h b/src/fstb/ArrayAlign.h index 2039155..c892d39 100644 --- a/src/fstb/ArrayAlign.h +++ b/src/fstb/ArrayAlign.h @@ -63,16 +63,21 @@ class ArrayAlign ArrayAlign & operator = (const ArrayAlign &other); inline const Element & - operator [] (long pos) const; + operator [] (long pos) const noexcept; inline Element & - operator [] (long pos); + operator [] (long pos) noexcept; + + inline const Element * + data () const noexcept; + inline Element * + data () noexcept; static inline long - size (); + size () noexcept; static inline long - length (); + length () noexcept; static inline long - get_alignment (); + get_alignment () noexcept; diff --git a/src/fstb/ArrayAlign.hpp b/src/fstb/ArrayAlign.hpp index 350f455..f32d90a 100644 --- a/src/fstb/ArrayAlign.hpp +++ b/src/fstb/ArrayAlign.hpp @@ -22,8 +22,6 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ -#include "fstb/def.h" - #include #include @@ -54,10 +52,9 @@ class DestroyAux public: template static inline void - destroy_elt (T *ptr) + destroy_elt (T * /*ptr*/) { // Nothing - fstb::unused (ptr); } }; @@ -89,8 +86,6 @@ ArrayAlign ::ArrayAlign (const ArrayAlign &other) :/* _data () ,*/ _data_ptr (0) { - assert (&other != 0); - _data_ptr = reinterpret_cast ( reinterpret_cast (&_data [ALIGNMENT - 1]) & -ALIGNMENT ); @@ -122,8 +117,6 @@ ArrayAlign ::~ArrayAlign () template ArrayAlign & ArrayAlign ::operator = (const ArrayAlign &other) { - assert (&other != 0); - if (this != &other) { for (long pos = 0; pos < NBR_ELT; ++pos) @@ -132,55 +125,75 @@ ArrayAlign & ArrayAlign ::operator = (const ArrayAlign } } - return (*this); + return *this; } template -const typename ArrayAlign ::Element & ArrayAlign ::operator [] (long pos) const +const typename ArrayAlign ::Element & ArrayAlign ::operator [] (long pos) const noexcept { assert (_data_ptr != 0); assert (pos >= 0); assert (pos < NBR_ELT); - return (_data_ptr [pos]); + return _data_ptr [pos]; } template -typename ArrayAlign ::Element & ArrayAlign ::operator [] (long pos) +typename ArrayAlign ::Element & ArrayAlign ::operator [] (long pos) noexcept { assert (_data_ptr != 0); assert (pos >= 0); assert (pos < NBR_ELT); - return (_data_ptr [pos]); + return _data_ptr [pos]; +} + + + +template +const typename ArrayAlign ::Element * ArrayAlign ::data () const noexcept +{ + assert (_data_ptr != 0); + + return _data_ptr; +} + + + +template +typename ArrayAlign ::Element * ArrayAlign ::data () noexcept +{ + assert (_data_ptr != 0); + + return _data_ptr; } template -long ArrayAlign ::size () +long ArrayAlign ::size () noexcept { - return (NBR_ELT); + return NBR_ELT; } template -long ArrayAlign ::length () +long ArrayAlign ::length () noexcept { - return (NBR_ELT); + return NBR_ELT; } template -long ArrayAlign ::get_alignment () +long ArrayAlign ::get_alignment () noexcept { - return (ALIGNMENT); + return ALIGNMENT; } diff --git a/src/fstb/CpuId.cpp b/src/fstb/CpuId.cpp index 8e0de55..77d4634 100644 --- a/src/fstb/CpuId.cpp +++ b/src/fstb/CpuId.cpp @@ -25,9 +25,8 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ #include "fstb/CpuId.h" -#include "fstb/def.h" -#if (fstb_ARCHI == fstb_ARCHI_X86) +#if fstb_ARCHI == fstb_ARCHI_X86 #if defined (__GNUC__) #include #elif defined (_MSC_VER) @@ -48,10 +47,9 @@ namespace fstb -// https://en.wikipedia.org/wiki/CPUID CpuId::CpuId () { -#if (fstb_ARCHI == fstb_ARCHI_X86) +#if fstb_ARCHI == fstb_ARCHI_X86 unsigned int eax; unsigned int ebx; @@ -66,6 +64,7 @@ CpuId::CpuId () call_cpuid (0x00000001, 0, eax, ebx, ecx, edx); _mmx_flag = ((edx & (1L << 23)) != 0); + _fxsr_flag = ((edx & (1L << 24)) != 0); _sse_flag = ((edx & (1L << 25)) != 0); _sse2_flag = ((edx & (1L << 26)) != 0); _sse3_flag = ((ecx & (1L << 0)) != 0); @@ -74,6 +73,7 @@ CpuId::CpuId () _fma3_flag = ((ecx & (1L << 16)) != 0); _sse41_flag = ((ecx & (1L << 19)) != 0); _sse42_flag = ((ecx & (1L << 20)) != 0); + _abm_flag = ((ecx & (1L << 23)) != 0); _avx_flag = ((ecx & (1L << 28)) != 0); _f16c_flag = ((ecx & (1L << 29)) != 0); @@ -81,7 +81,9 @@ CpuId::CpuId () { // Extended Features call_cpuid (0x00000007, 0, eax, ebx, ecx, edx); + _bmi1_flag = ((ebx & (1L << 3)) != 0); _avx2_flag = ((ebx & (1L << 5)) != 0); + _bmi2_flag = ((ebx & (1L << 8)) != 0); _avx512f_flag = ((ebx & (1L << 16)) != 0); } @@ -94,6 +96,7 @@ CpuId::CpuId () _isse_flag = ((edx & (1L << 22)) != 0) || _sse_flag; _sse4a_flag = ((ecx & (1L << 6)) != 0); _fma4_flag = ((ecx & (1L << 16)) != 0); + _3dnow_flag = ((ecx & (1L << 31)) != 0); } #endif @@ -101,7 +104,7 @@ CpuId::CpuId () -#if (fstb_ARCHI == fstb_ARCHI_X86) +#if fstb_ARCHI == fstb_ARCHI_X86 void CpuId::call_cpuid (unsigned int fnc_nbr, unsigned int subfnc_nbr, unsigned int &v_eax, unsigned int &v_ebx, unsigned int &v_ecx, unsigned int &v_edx) { @@ -110,11 +113,10 @@ void CpuId::call_cpuid (unsigned int fnc_nbr, unsigned int subfnc_nbr, unsigned #if defined (__x86_64__) __cpuid_count (fnc_nbr, subfnc_nbr, v_eax, v_ebx, v_ecx, v_edx); #else - fstb::unused (subfnc_nbr); __cpuid (fnc_nbr, v_eax, v_ebx, v_ecx, v_edx); #endif -#elif (_MSC_VER) +#elif defined (_MSC_VER) int cpu_info [4]; __cpuidex (cpu_info, fnc_nbr, subfnc_nbr); diff --git a/src/fstb/CpuId.h b/src/fstb/CpuId.h index 1150591..764115a 100644 --- a/src/fstb/CpuId.h +++ b/src/fstb/CpuId.h @@ -45,15 +45,16 @@ class CpuId CpuId (); CpuId (const CpuId &other) = default; - virtual ~CpuId () = default; CpuId & operator = (const CpuId &other) = default; -#if (fstb_ARCHI == fstb_ARCHI_X86) +#if fstb_ARCHI == fstb_ARCHI_X86 static void call_cpuid (unsigned int fnc_nbr, unsigned int subfnc_nbr, unsigned int &v_eax, unsigned int &v_ebx, unsigned int &v_ecx, unsigned int &v_edx); #endif bool _mmx_flag = false; + bool _fxsr_flag = false; // FXSAVE, FXRESTOR, CR4 bit 9 + bool _3dnow_flag = false; bool _isse_flag = false; bool _sse_flag = false; bool _sse2_flag = false; @@ -69,6 +70,9 @@ class CpuId bool _avx512f_flag = false; bool _f16c_flag = false; // Half-precision FP bool _cx16_flag = false; // CMPXCHG16B + bool _abm_flag = false; // POPCNT + LZCNT + bool _bmi1_flag = false; // Bit Manipulation Instruction Set + bool _bmi2_flag = false; diff --git a/src/fstb/SingleObj.h b/src/fstb/SingleObj.h index 893a1a4..b4bca90 100644 --- a/src/fstb/SingleObj.h +++ b/src/fstb/SingleObj.h @@ -47,8 +47,8 @@ class SingleObj SingleObj (); virtual ~SingleObj (); - T * operator -> () const; - T & operator * () const; + T * operator -> () const noexcept; + T & operator * () const noexcept; @@ -72,8 +72,11 @@ class SingleObj private: SingleObj (const SingleObj &other) = delete; + SingleObj (const SingleObj &&other) = delete; SingleObj & operator = (const SingleObj &other) = delete; + SingleObj & + operator = (const SingleObj &&other) = delete; bool operator == (const SingleObj &other) const = delete; bool operator != (const SingleObj &other) const = delete; diff --git a/src/fstb/SingleObj.hpp b/src/fstb/SingleObj.hpp index 5bdc086..97798ab 100644 --- a/src/fstb/SingleObj.hpp +++ b/src/fstb/SingleObj.hpp @@ -38,23 +38,28 @@ namespace fstb template SingleObj ::SingleObj () : _allo () -, _obj_ptr (0) +, _obj_ptr (_allo.allocate (1)) { - _obj_ptr = _allo.allocate (1); - if (_obj_ptr == 0) + if (_obj_ptr == nullptr) { +#if defined (__cpp_exceptions) || ! defined (__GNUC__) throw std::bad_alloc (); +#endif } +#if defined (__cpp_exceptions) || ! defined (__GNUC__) try +#endif { new (_obj_ptr) T (); } +#if defined (__cpp_exceptions) || ! defined (__GNUC__) catch (...) { _allo.deallocate (_obj_ptr, 1); throw; } +#endif } @@ -64,27 +69,27 @@ SingleObj ::~SingleObj () { _obj_ptr->~T (); _allo.deallocate (_obj_ptr, 1); - _obj_ptr = 0; + _obj_ptr = nullptr; } template -T * SingleObj ::operator -> () const +T * SingleObj ::operator -> () const noexcept { - assert (_obj_ptr != 0); + assert (_obj_ptr != nullptr); - return (_obj_ptr); + return _obj_ptr; } template -T & SingleObj ::operator * () const +T & SingleObj ::operator * () const noexcept { assert (_obj_ptr != 0); - return (*_obj_ptr); + return *_obj_ptr; } diff --git a/src/fstb/ToolsAvx2.hpp b/src/fstb/ToolsAvx2.hpp index 26d459d..2ccda50 100644 --- a/src/fstb/ToolsAvx2.hpp +++ b/src/fstb/ToolsAvx2.hpp @@ -42,63 +42,63 @@ namespace fstb template inline bool ToolsAvx2::check_ptr_align (T *ptr) { - return (ptr != 0 && (reinterpret_cast (ptr) & 31) == 0); + return (ptr != nullptr && (reinterpret_cast (ptr) & 31) == 0); } __m256i ToolsAvx2::load_16_16ml (const void *msb_ptr, const void *lsb_ptr) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); const __m256i val_msb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 ( - reinterpret_cast (msb_ptr) + static_cast (msb_ptr) )); const __m256i val_lsb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 ( - reinterpret_cast (lsb_ptr) + static_cast (lsb_ptr) )); const __m256i val = _mm256_or_si256 ( _mm256_slli_epi16 (val_msb, 8), val_lsb ); - return (val); + return val; } __m256i ToolsAvx2::load_16_16m (const void *msb_ptr) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); const __m256i val_msb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 ( - reinterpret_cast (msb_ptr) + static_cast (msb_ptr) )); const __m256i val = _mm256_slli_epi16 (val_msb, 8); - return (val); + return val; } __m256i ToolsAvx2::load_16_16l (const void *lsb_ptr) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); const __m256i val_lsb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 ( - reinterpret_cast (lsb_ptr) + static_cast (lsb_ptr) )); - return (val_lsb); + return val_lsb; } __m256i ToolsAvx2::load_16_16ml_partial (const void *msb_ptr, const void *lsb_ptr, int len) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (len >= 0); assert (len < 16); @@ -111,14 +111,14 @@ __m256i ToolsAvx2::load_16_16ml_partial (const void *msb_ptr, const void *lsb_pt val_lsb ); - return (val); + return val; } __m256i ToolsAvx2::load_16_16m_partial (const void *msb_ptr, int len) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); assert (len >= 0); assert (len < 16); @@ -126,21 +126,21 @@ __m256i ToolsAvx2::load_16_16m_partial (const void *msb_ptr, int len) _mm256_cvtepu8_epi16 (ToolsSse2::load_si128_partial (msb_ptr, len)); const __m256i val = _mm256_slli_epi16 (val_msb, 8); - return (val); + return val; } __m256i ToolsAvx2::load_16_16l_partial (const void *lsb_ptr, int len) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); assert (len >= 0); assert (len < 16); const __m256i val = _mm256_cvtepu8_epi16 (ToolsSse2::load_si128_partial (lsb_ptr, len)); - return (val); + return val; } @@ -148,8 +148,8 @@ __m256i ToolsAvx2::load_16_16l_partial (const void *lsb_ptr, int len) // mask_lsb = 0x00FF00FF00FF00... void ToolsAvx2::store_16_16ml (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (lsb_ptr != msb_ptr); const __m256i lsb = _mm256_and_si256 (mask_lsb, val); @@ -160,12 +160,10 @@ void ToolsAvx2::store_16_16ml (void *msb_ptr, void *lsb_ptr, __m256i val, __m256 lsbmsb = _mm256_permute4x64_epi64 (lsbmsb, (0<<0) + (2<<2) + (1<<4) + (3<<6)); _mm_storeu_si128 ( - reinterpret_cast <__m128i *> (lsb_ptr), - _mm256_castsi256_si128 (lsbmsb) + static_cast <__m128i *> (lsb_ptr), _mm256_castsi256_si128 (lsbmsb) ); _mm_storeu_si128 ( - reinterpret_cast <__m128i *> (msb_ptr), - _mm256_extractf128_si256 (lsbmsb, 0x1) + static_cast <__m128i *> (msb_ptr), _mm256_extractf128_si256 (lsbmsb, 0x1) ); } @@ -174,8 +172,8 @@ void ToolsAvx2::store_16_16ml (void *msb_ptr, void *lsb_ptr, __m256i val, __m256 // mask_lsb = 0x00FF00FF00FF00... void ToolsAvx2::store_16_16ml_perm (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb, __m256i permute) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (lsb_ptr != msb_ptr); const __m256i lsb = _mm256_and_si256 (mask_lsb, val); @@ -186,12 +184,10 @@ void ToolsAvx2::store_16_16ml_perm (void *msb_ptr, void *lsb_ptr, __m256i val, _ lsbmsb = _mm256_permutevar8x32_epi32 (lsbmsb, permute); _mm_storeu_si128 ( - reinterpret_cast <__m128i *> (lsb_ptr), - _mm256_castsi256_si128 (lsbmsb) + static_cast <__m128i *> (lsb_ptr), _mm256_castsi256_si128 (lsbmsb) ); _mm_storeu_si128 ( - reinterpret_cast <__m128i *> (msb_ptr), - _mm256_extractf128_si256 (lsbmsb, 0x1) + static_cast <__m128i *> (msb_ptr), _mm256_extractf128_si256 (lsbmsb, 0x1) ); } @@ -200,15 +196,14 @@ void ToolsAvx2::store_16_16ml_perm (void *msb_ptr, void *lsb_ptr, __m256i val, _ // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF void ToolsAvx2::store_16_16m (void *msb_ptr, __m256i val, __m256i mask_lsb) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); __m256i msb = _mm256_andnot_si256 (mask_lsb, val); msb = _mm256_srli_si256 (msb, 1); msb = _mm256_packus_epi16 (msb, msb); msb = _mm256_permute4x64_epi64 (msb, (0<<0) + (2<<2)); _mm_storeu_si128 ( - reinterpret_cast <__m128i *> (msb_ptr), - _mm256_castsi256_si128 (msb) + static_cast <__m128i *> (msb_ptr), _mm256_castsi256_si128 (msb) ); } @@ -217,14 +212,13 @@ void ToolsAvx2::store_16_16m (void *msb_ptr, __m256i val, __m256i mask_lsb) // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF void ToolsAvx2::store_16_16l (void *lsb_ptr, __m256i val, __m256i mask_lsb) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); __m256i lsb = _mm256_and_si256 (mask_lsb, val); lsb = _mm256_packus_epi16 (lsb, lsb); lsb = _mm256_permute4x64_epi64 (lsb, (0<<0) + (2<<2)); _mm_storeu_si128 ( - reinterpret_cast <__m128i *> (lsb_ptr), - _mm256_castsi256_si128 (lsb) + static_cast <__m128i *> (lsb_ptr), _mm256_castsi256_si128 (lsb) ); } @@ -232,8 +226,8 @@ void ToolsAvx2::store_16_16l (void *lsb_ptr, __m256i val, __m256i mask_lsb) void ToolsAvx2::store_16_16ml_partial (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb, int len) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (lsb_ptr != msb_ptr); assert (len >= 0); assert (len < 16); @@ -256,8 +250,8 @@ void ToolsAvx2::store_16_16ml_partial (void *msb_ptr, void *lsb_ptr, __m256i val void ToolsAvx2::store_16_16ml_perm_partial (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb, __m256i permute, int len) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (lsb_ptr != msb_ptr); assert (len >= 0); assert (len < 16); @@ -280,7 +274,7 @@ void ToolsAvx2::store_16_16ml_perm_partial (void *msb_ptr, void *lsb_ptr, __m256 void ToolsAvx2::store_16_16m_partial (void *msb_ptr, __m256i val, __m256i mask_lsb, int len) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); assert (len >= 0); assert (len < 16); @@ -296,7 +290,7 @@ void ToolsAvx2::store_16_16m_partial (void *msb_ptr, __m256i val, __m256i mask_l void ToolsAvx2::store_16_16l_partial (void *lsb_ptr, __m256i val, __m256i mask_lsb, int len) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); assert (len >= 0); assert (len < 16); @@ -311,7 +305,7 @@ void ToolsAvx2::store_16_16l_partial (void *lsb_ptr, __m256i val, __m256i mask_l __m256 ToolsAvx2::load_ps_partial (const void *ptr, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 8); @@ -319,9 +313,9 @@ __m256 ToolsAvx2::load_ps_partial (const void *ptr, int len) if (len >= 4) { const __m128 src_0 = - _mm_loadu_ps (reinterpret_cast (ptr)); + _mm_loadu_ps (static_cast (ptr)); const __m128 src_1 = ToolsSse2::load_ps_partial ( - reinterpret_cast (ptr) + sizeof (src_0), + static_cast (ptr) + sizeof (src_0), len - 4 ); val = _mm256_insertf128_ps ( @@ -343,7 +337,7 @@ __m256 ToolsAvx2::load_ps_partial (const void *ptr, int len) __m256i ToolsAvx2::load_si256_partial (const void *ptr, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 32); @@ -351,9 +345,9 @@ __m256i ToolsAvx2::load_si256_partial (const void *ptr, int len) if (len >= 16) { const __m128i src_0 = - _mm_loadu_si128 (reinterpret_cast (ptr)); + _mm_loadu_si128 (static_cast (ptr)); const __m128i src_1 = ToolsSse2::load_si128_partial ( - reinterpret_cast (ptr) + sizeof (src_0), + static_cast (ptr) + sizeof (src_0), len - 16 ); val = _mm256_insertf128_si256 ( @@ -375,24 +369,22 @@ __m256i ToolsAvx2::load_si256_partial (const void *ptr, int len) void ToolsAvx2::store_ps_partial (void *ptr, __m256 val, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 8); const __m128 val_0 = _mm256_castps256_ps128 (val); if (len >= 4) { - _mm_storeu_ps (reinterpret_cast (ptr), val_0); + _mm_storeu_ps (static_cast (ptr), val_0); const __m128 val_1 = _mm256_extractf128_ps (val, 1); ToolsSse2::store_ps_partial ( - reinterpret_cast (ptr) + sizeof (val_0), val_1, len - 4 + static_cast (ptr) + sizeof (val_0), val_1, len - 4 ); } else { - ToolsSse2::store_ps_partial ( - reinterpret_cast (ptr) , val_0, len - ); + ToolsSse2::store_ps_partial (static_cast (ptr), val_0, len); } } @@ -400,22 +392,22 @@ void ToolsAvx2::store_ps_partial (void *ptr, __m256 val, int len) void ToolsAvx2::store_si256_partial (void *ptr, __m256i val, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 32); const __m128i val_0 = _mm256_castsi256_si128 (val); if (len >= 16) { - _mm_storeu_si128 (reinterpret_cast <__m128i *> (ptr), val_0); + _mm_storeu_si128 (static_cast <__m128i *> (ptr), val_0); const __m128i val_1 = _mm256_extractf128_si256 (val, 1); ToolsSse2::store_si128_partial ( - reinterpret_cast (ptr) + sizeof (val_0), val_1, len - 16 + static_cast (ptr) + sizeof (val_0), val_1, len - 16 ); } else { - ToolsSse2::store_si128_partial (reinterpret_cast (ptr) , val_0, len ); + ToolsSse2::store_si128_partial (static_cast (ptr), val_0, len); } } diff --git a/src/fstb/ToolsSse2.hpp b/src/fstb/ToolsSse2.hpp index b28392c..2fa868c 100644 --- a/src/fstb/ToolsSse2.hpp +++ b/src/fstb/ToolsSse2.hpp @@ -42,15 +42,15 @@ namespace fstb template inline bool ToolsSse2::check_ptr_align (T *ptr) { - return (ptr != 0 && (reinterpret_cast (ptr) & 15) == 0); + return (ptr != nullptr && (reinterpret_cast (ptr) & 15) == 0); } __m128i ToolsSse2::load_8_16ml (const void *msb_ptr, const void *lsb_ptr) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); const __m128i val_msb = _mm_loadl_epi64 ( reinterpret_cast (msb_ptr) @@ -60,43 +60,43 @@ __m128i ToolsSse2::load_8_16ml (const void *msb_ptr, const void *lsb_ptr) ); const __m128i val = _mm_unpacklo_epi8 (val_lsb, val_msb); - return (val); + return val; } __m128i ToolsSse2::load_8_16m (const void *msb_ptr, __m128i zero) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); const __m128i val_msb = _mm_loadl_epi64 ( reinterpret_cast (msb_ptr) ); const __m128i val = _mm_unpacklo_epi8 (zero, val_msb); - return (val); + return val; } __m128i ToolsSse2::load_8_16l (const void *lsb_ptr, __m128i zero) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); const __m128i val_lsb = _mm_loadl_epi64 ( reinterpret_cast (lsb_ptr) ); const __m128i val = _mm_unpacklo_epi8 (val_lsb, zero); - return (val); + return val; } __m128i ToolsSse2::load_8_16ml_partial (const void *msb_ptr, const void *lsb_ptr, int len) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (len >= 0); assert (len < 8); @@ -104,35 +104,35 @@ __m128i ToolsSse2::load_8_16ml_partial (const void *msb_ptr, const void *lsb_ptr const __m128i val_lsb = load_epi64_partial (lsb_ptr, len); const __m128i val = _mm_unpacklo_epi8 (val_lsb, val_msb); - return (val); + return val; } __m128i ToolsSse2::load_8_16m_partial (const void *msb_ptr, __m128i zero, int len) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); assert (len >= 0); assert (len < 8); const __m128i val_msb = load_epi64_partial (msb_ptr, len); const __m128i val = _mm_unpacklo_epi8 (zero, val_msb); - return (val); + return val; } __m128i ToolsSse2::load_8_16l_partial (const void *lsb_ptr, __m128i zero, int len) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); assert (len >= 0); assert (len < 8); const __m128i val_lsb = load_epi64_partial (lsb_ptr, len); const __m128i val = _mm_unpacklo_epi8 (val_lsb, zero); - return (val); + return val; } @@ -140,8 +140,8 @@ __m128i ToolsSse2::load_8_16l_partial (const void *lsb_ptr, __m128i zero, int le // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF void ToolsSse2::store_8_16ml (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i mask_lsb) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (lsb_ptr != msb_ptr); const __m128i lsb = _mm_and_si128 (mask_lsb, val); @@ -149,10 +149,10 @@ void ToolsSse2::store_8_16ml (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i msb = _mm_srli_si128 (msb, 1); __m128i tmp = _mm_packus_epi16 (lsb, msb); - _mm_storel_epi64 (reinterpret_cast <__m128i *> (lsb_ptr), tmp); + _mm_storel_epi64 (static_cast <__m128i *> (lsb_ptr), tmp); tmp = _mm_unpackhi_epi64 (tmp, tmp); - _mm_storel_epi64 (reinterpret_cast <__m128i *> (msb_ptr), tmp); + _mm_storel_epi64 (static_cast <__m128i *> (msb_ptr), tmp); } @@ -160,12 +160,12 @@ void ToolsSse2::store_8_16ml (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF void ToolsSse2::store_8_16m (void *msb_ptr, __m128i val, __m128i mask_lsb) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); __m128i msb = _mm_andnot_si128 (mask_lsb, val); msb = _mm_srli_si128 (msb, 1); msb = _mm_packus_epi16 (msb, msb); - _mm_storel_epi64 (reinterpret_cast <__m128i *> (msb_ptr), msb); + _mm_storel_epi64 (static_cast <__m128i *> (msb_ptr), msb); } @@ -173,19 +173,19 @@ void ToolsSse2::store_8_16m (void *msb_ptr, __m128i val, __m128i mask_lsb) // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF void ToolsSse2::store_8_16l (void *lsb_ptr, __m128i val, __m128i mask_lsb) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); __m128i lsb = _mm_and_si128 (mask_lsb, val); lsb = _mm_packus_epi16 (lsb, lsb); - _mm_storel_epi64 (reinterpret_cast <__m128i *> (lsb_ptr), lsb); + _mm_storel_epi64 (static_cast <__m128i *> (lsb_ptr), lsb); } void ToolsSse2::store_8_16ml_partial (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i mask_lsb, int len) { - assert (msb_ptr != 0); - assert (lsb_ptr != 0); + assert (msb_ptr != nullptr); + assert (lsb_ptr != nullptr); assert (lsb_ptr != msb_ptr); const __m128i lsb = _mm_and_si128 (mask_lsb, val); @@ -203,7 +203,7 @@ void ToolsSse2::store_8_16ml_partial (void *msb_ptr, void *lsb_ptr, __m128i val, void ToolsSse2::store_8_16m_partial (void *msb_ptr, __m128i val, __m128i mask_lsb, int len) { - assert (msb_ptr != 0); + assert (msb_ptr != nullptr); __m128i msb = _mm_andnot_si128 (mask_lsb, val); msb = _mm_srli_si128 (msb, 1); @@ -215,7 +215,7 @@ void ToolsSse2::store_8_16m_partial (void *msb_ptr, __m128i val, __m128i mask_ls void ToolsSse2::store_8_16l_partial (void *lsb_ptr, __m128i val, __m128i mask_lsb, int len) { - assert (lsb_ptr != 0); + assert (lsb_ptr != nullptr); __m128i lsb = _mm_and_si128 (mask_lsb, val); lsb = _mm_packus_epi16 (lsb, lsb); @@ -226,7 +226,7 @@ void ToolsSse2::store_8_16l_partial (void *lsb_ptr, __m128i val, __m128i mask_ls __m128 ToolsSse2::load_ps_partial (const void *ptr, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 4); @@ -234,18 +234,18 @@ __m128 ToolsSse2::load_ps_partial (const void *ptr, int len) while (len > 0) { -- len; - tmp_arr [len] = reinterpret_cast (ptr) [len]; + tmp_arr [len] = static_cast (ptr) [len]; } const __m128 val = _mm_load_ps (tmp_arr); - return (val); + return val; } __m128i ToolsSse2::load_si128_partial (const void *ptr, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 16); @@ -253,27 +253,27 @@ __m128i ToolsSse2::load_si128_partial (const void *ptr, int len) if ((len & 1) != 0) { -- len; - tmp = *(reinterpret_cast (ptr) + len); + tmp = *(static_cast (ptr) + len); } if ((len & 2) != 0) { len -= 2; tmp <<= 16; const int ofs = len >> 1; - tmp += *(reinterpret_cast (ptr) + ofs); + tmp += *(static_cast (ptr) + ofs); } __m128i val; if (len >= 8) { - const int tmp0 = *(reinterpret_cast (ptr) ); - const int tmp1 = *(reinterpret_cast (ptr) + 1); + const int tmp0 = *(static_cast (ptr) ); + const int tmp1 = *(static_cast (ptr) + 1); if (len == 8) { val = _mm_set_epi32 (0, tmp, tmp1, tmp0); } else { - const int tmp2 = *(reinterpret_cast (ptr) + 2); + const int tmp2 = *(static_cast (ptr) + 2); val = _mm_set_epi32 (tmp, tmp2, tmp1, tmp0); } } @@ -285,19 +285,19 @@ __m128i ToolsSse2::load_si128_partial (const void *ptr, int len) } else { - const int tmp0 = *reinterpret_cast (ptr); + const int tmp0 = *static_cast (ptr); val = _mm_set_epi32 (0, 0, tmp, tmp0); } } - return (val); + return val; } __m128i ToolsSse2::load_epi64_partial (const void *ptr, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 8); @@ -305,19 +305,19 @@ __m128i ToolsSse2::load_epi64_partial (const void *ptr, int len) if ((len & 1) != 0) { -- len; - tmp = *(reinterpret_cast (ptr) + len); + tmp = *(static_cast (ptr) + len); } if ((len & 2) != 0) { len -= 2; tmp <<= 16; const int ofs = len >> 1; - tmp += *(reinterpret_cast (ptr) + ofs); + tmp += *(static_cast (ptr) + ofs); } __m128i val; if ((len & 4) != 0) { - const int tmp2 = *reinterpret_cast (ptr); + const int tmp2 = *static_cast (ptr); val = _mm_set_epi32 (0, 0, tmp, tmp2); } else @@ -325,14 +325,14 @@ __m128i ToolsSse2::load_epi64_partial (const void *ptr, int len) val = _mm_set_epi32 (0, 0, 0, tmp); } - return (val); + return val; } void ToolsSse2::store_ps_partial (void *ptr, __m128 val, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 4); @@ -341,7 +341,7 @@ void ToolsSse2::store_ps_partial (void *ptr, __m128 val, int len) while (len > 0) { -- len; - reinterpret_cast (ptr) [len] = tmp_arr [len]; + static_cast (ptr) [len] = tmp_arr [len]; } } @@ -349,7 +349,7 @@ void ToolsSse2::store_ps_partial (void *ptr, __m128 val, int len) void ToolsSse2::store_si128_partial (void *ptr, __m128i val, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 16); @@ -365,22 +365,22 @@ void ToolsSse2::store_si128_partial (void *ptr, __m128i val, int len) if ((len & 1) != 0) { - *(reinterpret_cast (ptr) + len - 1) = tmp.v08 [len - 1]; + *(static_cast (ptr) + len - 1) = tmp.v08 [len - 1]; } len >>= 1; if ((len & 1) != 0) { - *(reinterpret_cast (ptr) + len - 1) = tmp.v16 [len - 1]; + *(static_cast (ptr) + len - 1) = tmp.v16 [len - 1]; } len >>= 1; if ((len & 1) != 0) { - *(reinterpret_cast (ptr) + len - 1) = tmp.v32 [len - 1]; + *(static_cast (ptr) + len - 1) = tmp.v32 [len - 1]; } len >>= 1; if (len != 0) { - * reinterpret_cast (ptr) = tmp.v64 [0 ]; + * static_cast (ptr) = tmp.v64 [0 ]; } } @@ -388,7 +388,7 @@ void ToolsSse2::store_si128_partial (void *ptr, __m128i val, int len) void ToolsSse2::store_epi64_partial (void *ptr, __m128i val, int len) { - assert (ptr != 0); + assert (ptr != nullptr); assert (len >= 0); assert (len < 8); @@ -397,19 +397,19 @@ void ToolsSse2::store_epi64_partial (void *ptr, __m128i val, int len) uint64_t tmp = _mm_cvtsi128_si64 (val); if ((len & 4) != 0) { - *reinterpret_cast (ptr) = uint32_t (tmp); - ptr = reinterpret_cast (ptr) + 1; + *static_cast (ptr) = uint32_t (tmp); + ptr = static_cast (ptr) + 1; tmp >>= 32; } if ((len & 2) != 0) { - *reinterpret_cast (ptr) = uint16_t (tmp); - ptr = reinterpret_cast (ptr) + 1; + *static_cast (ptr) = uint16_t (tmp); + ptr = static_cast (ptr) + 1; tmp >>= 16; } if ((len & 1) != 0) { - *reinterpret_cast (ptr) = uint8_t (tmp); + *static_cast (ptr) = uint8_t (tmp); } #else @@ -425,17 +425,17 @@ void ToolsSse2::store_epi64_partial (void *ptr, __m128i val, int len) if ((len & 1) != 0) { - *(reinterpret_cast (ptr) + len - 1) = tmp.v08 [len - 1]; + *(static_cast (ptr) + len - 1) = tmp.v08 [len - 1]; } len >>= 1; if ((len & 1) != 0) { - *(reinterpret_cast (ptr) + len - 1) = tmp.v16 [len - 1]; + *(static_cast (ptr) + len - 1) = tmp.v16 [len - 1]; } len >>= 1; if (len != 0) { - *(reinterpret_cast (ptr) + len - 1) = tmp.v32 [len - 1]; + *(static_cast (ptr) + len - 1) = tmp.v32 [len - 1]; } #endif @@ -490,7 +490,7 @@ __m128i ToolsSse2::mul_s32_s15_s16 (__m128i src0, __m128i src1, __m128i coef) const __m128i res = _mm_packs_epi32 (sum0, sum1); - return (res); + return res; } @@ -522,7 +522,7 @@ __m128i ToolsSse2::mullo_epi32 (const __m128i &a, const __m128i &b) const __m128i prod23 = _mm_unpackhi_epi32 (prod02, prod13); // (-,-,a3*b3,a2*b2) const __m128i res = _mm_unpacklo_epi64 (prod01 ,prod23); // (ab3,ab2,ab1,ab0) - return (res); + return res; } @@ -566,7 +566,7 @@ __m128i ToolsSse2::pack_epi16 (__m128i a, __m128i b) #endif - return (p); + return p; } @@ -577,7 +577,7 @@ __m128i ToolsSse2::select (const __m128i &cond, const __m128i &v_t, const __m128 const __m128i cond_0 = _mm_andnot_si128 (cond, v_f); const __m128i res = _mm_or_si128 (cond_0, cond_1); - return (res); + return res; } @@ -588,7 +588,7 @@ __m128 ToolsSse2::select (const __m128 &cond, const __m128 &v_t, const __m128 &v const __m128 cond_0 = _mm_andnot_ps (cond, v_f); const __m128 res = _mm_or_ps (cond_0, cond_1); - return (res); + return res; } @@ -597,14 +597,14 @@ __m128i ToolsSse2::select_16_equ (const __m128i &lhs, const __m128i &rhs, const { const __m128i cond = _mm_cmpeq_epi16 (lhs, rhs); - return (ToolsSse2::select (cond, v_t, v_f)); + return ToolsSse2::select (cond, v_t, v_f); } __m128i ToolsSse2::limit_epi16 (const __m128i &x, const __m128i &mi, const __m128i &ma) { - return (_mm_max_epi16 (_mm_min_epi16 (x, ma), mi)); + return _mm_max_epi16 (_mm_min_epi16 (x, ma), mi); } @@ -614,7 +614,7 @@ __m128i ToolsSse2::abs_dif_epu16 (const __m128i &a, const __m128i &b) const __m128i p = _mm_subs_epu16 (a, b); const __m128i n = _mm_subs_epu16 (b, a); - return (_mm_or_si128 (p, n)); + return _mm_or_si128 (p, n); } @@ -628,7 +628,7 @@ __m128i ToolsSse2::abs_dif_epi16 (const __m128i &a, const __m128i &b) const __m128i au = _mm_xor_si128 (a, mask_s); const __m128i bu = _mm_xor_si128 (b, mask_s); - return (abs_dif_epu16 (au, bu)); + return abs_dif_epu16 (au, bu); } diff --git a/src/fstb/def.h b/src/fstb/def.h index 61250e7..60c66cf 100644 --- a/src/fstb/def.h +++ b/src/fstb/def.h @@ -34,8 +34,6 @@ namespace fstb -#define fstb_IS(prop, val) (defined (fstb_##prop##_##val) && (fstb_##prop) == (fstb_##prop##_##val)) - #define fstb_ARCHI_X86 (1) #define fstb_ARCHI_ARM (2) @@ -69,7 +67,7 @@ namespace fstb #if (fstb_ARCHI == fstb_ARCHI_X86) #define fstb_ENDIAN fstb_ENDIAN_LITTLE #elif (fstb_ARCHI == fstb_ARCHI_ARM) - #if defined (__ARMEL__) || defined (__LITTLE_ENDIAN__) + #if ! defined (__ARM_BIG_ENDIAN) || defined (__ARMEL__) || defined (__LITTLE_ENDIAN__) #define fstb_ENDIAN fstb_ENDIAN_LITTLE #else #define fstb_ENDIAN fstb_ENDIAN_BIG @@ -117,10 +115,17 @@ namespace fstb #define fstb_COMPILER_GCC (1) #define fstb_COMPILER_MSVC (2) -#if defined (__GNUC__) +#if defined (__GNUC__) || defined (__clang__) #define fstb_COMPILER fstb_COMPILER_GCC #elif defined (_MSC_VER) #define fstb_COMPILER fstb_COMPILER_MSVC + #if _MSC_VER >= 2000 && __cplusplus < 201402L + // The MS compiler keeps __cplusplus at 199711L, even if C++14 or above + // is enforced and standard compliance is activated. C++11 is not + // officially supported, but almost works with _MSC_VER >= 1900. + // /Zc:__cplusplus sets the macro to the right value for C++ >= 2014. + #error Please compile with /Zc:__cplusplus + #endif #else #error #endif @@ -138,7 +143,7 @@ namespace fstb -// Alignment +// Alignment. Or better directly use alignas() #if defined (_MSC_VER) #define fstb_TYPEDEF_ALIGN( alignsize, srctype, dsttype) \ typedef __declspec (align (alignsize)) srctype dsttype @@ -162,39 +167,75 @@ namespace fstb #else #define fstb_CDECL #endif -#if fstb_IS (SYS, WIN) +#if fstb_SYS == fstb_SYS_WIN #if defined (__GNUC__) - #define fstb_EXPORT(f) extern "C" __attribute__((dllexport)) f + #define fstb_EXPORT(f) extern "C" __attribute__((dllexport)) f noexcept #else - #define fstb_EXPORT(f) extern "C" __declspec(dllexport) f + #define fstb_EXPORT(f) extern "C" __declspec(dllexport) f noexcept #endif #else - #define fstb_EXPORT(f) extern "C" __attribute__((visibility("default"))) f + #define fstb_EXPORT(f) extern "C" __attribute__((visibility("default"))) f noexcept #endif -// Convenient helper to declare unused function parameters -template inline void unused (T &&...) {} +// constexpr functions without too much restrictions +#if (__cplusplus >= 201402L) + #define fstb_CONSTEXPR14 constexpr +#else + #define fstb_CONSTEXPR14 +#endif +// SIMD instruction set availability +#undef fstb_HAS_SIMD +#if fstb_ARCHI == fstb_ARCHI_ARM + #if defined (__ARM_NEON_FP) + #define fstb_HAS_SIMD (1) + #endif +#elif fstb_ARCHI == fstb_ARCHI_X86 + #if (fstb_WORD_SIZE == 64) + #define fstb_HAS_SIMD (1) + #elif fstb_COMPILER == fstb_COMPILER_MSVC + #if defined (_M_IX86_FP) && _M_IX86_FP >= 2 + #define fstb_HAS_SIMD (1) + #endif + #else + #if defined (__SSE2__) + #define fstb_HAS_SIMD (1) + #endif + #endif +#endif -const double PI = 3.1415926535897932384626433832795; -const double LN2 = 0.69314718055994530941723212145818; -const double LN10 = 2.3025850929940456840179914546844; -const double LOG10_2 = 0.30102999566398119521373889472449; -const double LOG2_E = 1.0 / LN2; -const double LOG2_10 = LN10 / LN2; -const double EXP1 = 2.7182818284590452353602874713527; -const double SQRT2 = 1.4142135623730950488016887242097; -const double TWOP32 = 256.0 * 256 * 256 * 256; -const double TWOPM32 = 1.0 / TWOP32; -const float ANTI_DENORMAL_F32 = 1e-20f; -const double ANTI_DENORMAL_F64 = 1e-290; -const float ANTI_DENORMAL_F32_CUB = 1e-10f; // Anti-denormal for float numbers aimed to be raised to the power of 2 or 3. -const double ANTI_DENORMAL_F64_CUB = 1e-100; + +// Convenient helper to declare unused function parameters +template inline void unused (T &&...) noexcept {} + + + +constexpr double PI = 3.1415926535897932384626433832795; +constexpr double LN2 = 0.69314718055994530941723212145818; +constexpr double LN10 = 2.3025850929940456840179914546844; +constexpr double LOG10_2 = 0.30102999566398119521373889472449; +constexpr double LOG2_E = 1.0 / LN2; +constexpr double LOG2_10 = LN10 / LN2; +constexpr double EXP1 = 2.7182818284590452353602874713527; +constexpr double SQRT2 = 1.4142135623730950488016887242097; + +// Exact representation in 32-bit float +constexpr float TWOP16 = 65536.f; +constexpr float TWOP32 = TWOP16 * TWOP16; +constexpr float TWOP64 = TWOP32 * TWOP32; +constexpr float TWOPM16 = 1.f / TWOP16; +constexpr float TWOPM32 = 1.f / TWOP32; +constexpr float TWOPM64 = 1.f / TWOP64; + +constexpr float ANTI_DENORMAL_F32 = 1e-20f; +constexpr double ANTI_DENORMAL_F64 = 1e-290; +constexpr float ANTI_DENORMAL_F32_CUB = 1e-10f; // Anti-denormal for float numbers aimed to be raised to the power of 2 or 3. +constexpr double ANTI_DENORMAL_F64_CUB = 1e-100; diff --git a/src/fstb/fnc.h b/src/fstb/fnc.h index 3e21694..4b620ed 100644 --- a/src/fstb/fnc.h +++ b/src/fstb/fnc.h @@ -27,9 +27,14 @@ To Public License, Version 2, as published by Sam Hocevar. See /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/ +#include "fstb/def.h" + +#include #include +#include #include +#include @@ -39,31 +44,77 @@ namespace fstb template -inline T limit (T x, T mi, T ma); +inline constexpr int sgn (T x) noexcept; +template +inline constexpr T limit (T x, T mi, T ma) noexcept; +template +inline constexpr void sort_2_elt (T &mi,T &ma, T a, T b) noexcept; +template +inline constexpr bool is_pow_2 (T x) noexcept; +inline double round (double x) noexcept; +inline float round (float x) noexcept; +inline int round_int (float x) noexcept; +inline int round_int (double x) noexcept; +inline int round_int_accurate (double x) noexcept; +inline int64_t round_int64 (double x) noexcept; +inline int floor_int (float x) noexcept; +inline int floor_int (double x) noexcept; +inline int floor_int_accurate (double x) noexcept; +inline int64_t floor_int64 (double x) noexcept; +inline int ceil_int (double x) noexcept; template -inline bool is_pow_2 (T x); -inline double round (double x); -inline int round_int (double x); -inline int floor_int (double x); -inline int ceil_int (double x); +inline int trunc_int (T x) noexcept; template -inline int conv_int_fast (T x); +inline int conv_int_fast (T x) noexcept; template -inline bool is_null (T val, T eps = T (1e-9)); +inline constexpr bool is_null (T val, T eps = T (1e-9)) noexcept; template -inline bool is_eq (T v1, T v2, T eps = T (1e-9)); +inline constexpr bool is_eq (T v1, T v2, T eps = T (1e-9)) noexcept; template -inline bool is_eq_rel (T v1, T v2, T tol = T (1e-6)); -inline int get_prev_pow_2 (uint32_t x); -inline double sinc (double x); +inline constexpr bool is_eq_rel (T v1, T v2, T tol = T (1e-6)) noexcept; +inline int get_prev_pow_2 (uint32_t x) noexcept; +inline int get_next_pow_2 (uint32_t x) noexcept; +inline constexpr double sinc (double x) noexcept; +inline double pseudo_exp (double x, double c) noexcept; +inline double pseudo_log (double y, double c) noexcept; template -inline T sshift_l (T x); +inline constexpr T sshift_l (T x) noexcept; template -inline T sshift_r (T x); +inline constexpr T sshift_r (T x) noexcept; +template +inline constexpr T sq (T x) noexcept; +template +inline constexpr T cube (T x) noexcept; +template +inline constexpr T ipow (T x, U n) noexcept; +template +inline constexpr T ipowp (T x, U n) noexcept; +template +inline constexpr T ipowpc (T x) noexcept; +template +inline constexpr T rcp_uint (int x) noexcept; +template +inline constexpr T lerp (T v0, T v1, T p) noexcept; +template +inline constexpr T find_extremum_pos_parabolic (T r1, T r2, T r3); + +template +constexpr std::array make_array (const T &init_val); + +template +inline T read_unalign (const void *ptr) noexcept; +template +inline void write_unalign (void *ptr, T val) noexcept; +template +inline void copy_no_overlap (T * fstb_RESTRICT dst_ptr, const T * fstb_RESTRICT src_ptr, int nbr_elt) noexcept; void conv_to_lower_case (std::string &str); int snprintf4all (char *out_0, size_t size, const char *format_0, ...); +//FILE * fopen_utf8 (const char *filename_0, const char *mode_0); + +template +inline bool is_ptr_align_nz (const T *ptr, int a = 16) noexcept; diff --git a/src/fstb/fnc.hpp b/src/fstb/fnc.hpp index 1ed1791..db6d6bd 100644 --- a/src/fstb/fnc.hpp +++ b/src/fstb/fnc.hpp @@ -24,8 +24,15 @@ To Public License, Version 2, as published by Sam Hocevar. See #include "fstb/def.h" +#include #include +#if (fstb_ARCHI == fstb_ARCHI_X86) + #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64 + #include + #endif +#endif + #if defined (_MSC_VER) #if (fstb_ARCHI == fstb_ARCHI_X86) #include @@ -39,6 +46,7 @@ To Public License, Version 2, as published by Sam Hocevar. See #include #include #include +#include @@ -52,51 +60,126 @@ namespace fstb template -T limit (T x, T mi, T ma) +constexpr int sgn (T x) noexcept +{ + return x < T (0) ? -1 : x > T (0) ? 1 : 0; +} + + + +template +constexpr T limit (T x, T mi, T ma) noexcept { - return ((x < mi) ? mi : ((x > ma) ? ma : x)); + return (x < mi) ? mi : ((x > ma) ? ma : x); } template -bool is_pow_2 (T x) +constexpr void sort_2_elt (T &mi, T &ma, T a, T b) noexcept +{ + if (a < b) + { + mi = a; + ma = b; + } + else + { + mi = b; + ma = a; + } +} + + + +template +constexpr bool is_pow_2 (T x) noexcept { return ((x & -x) == x); } -double round (double x) +double round (double x) noexcept { - return (floor (x + 0.5)); + return floor (x + 0.5f); } -int round_int (double x) +float round (float x) noexcept +{ + return floorf (x + 0.5f); +} + + + +int round_int (float x) noexcept { assert (x <= double (INT_MAX)); - assert (x >= double (INT_MIN)); + assert (x >= static_cast (INT_MIN)); #if (fstb_ARCHI == fstb_ARCHI_X86) - #if defined (_MSC_VER) + #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64 - assert (x <= double (INT_MAX/2)); - assert (x >= double (INT_MIN/2)); + return _mm_cvtss_si32 (_mm_set_ss (x)); - static const float round_to_nearest = 0.5f; - int i; + #elif defined (_MSC_VER) + + int i; + __asm + { + fld x + fistp i + } + return i; + + #else + + return int (floorf (x + 0.5f)); - #if defined (_WIN64) || defined (__64BIT__) || defined (__amd64__) || defined (__x86_64__) + #endif // Compiler + +#else // fstb_ARCHI_X86 + + // Slow + return int (floorf (x + 0.5f)); - const double xx = x + x + round_to_nearest; +#endif // fstb_ARCHI_X86 +} + + + +int round_int (double x) noexcept +{ + return round_int (float (x)); +} + + + +int round_int_accurate (double x) noexcept +{ + assert (x <= double (INT_MAX)); + assert (x >= static_cast (INT_MIN)); + +#if (fstb_ARCHI == fstb_ARCHI_X86) + + #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64 + + constexpr float round_to_nearest = 0.5f; + const double xx = x + x + round_to_nearest; const __m128d x_128d = _mm_set_sd (xx); - i = _mm_cvtsd_si32 (x_128d); - i >>= 1; + const int i = _mm_cvtsd_si32 (x_128d) >> 1; + return i; - #else + #elif defined (_MSC_VER) + + assert (x <= double (INT_MAX/2)); + assert (x >= double (INT_MIN/2)); + + static const float round_to_nearest = 0.5f; + int i; __asm { @@ -107,11 +190,9 @@ int round_int (double x) sar i, 1 } - #endif - assert (i == int (floor (x + 0.5))); - return (i); + return i; #elif defined (__GNUC__) @@ -134,50 +215,124 @@ int round_int (double x) assert (i == int (floor (x + 0.5))); - return (i); + return i; #else // Slow - return (int (floor (x + 0.5))); + return int (floor (x + 0.5)); #endif // Compiler #else // fstb_ARCHI_X86 // Slow - return (int (floor (x + 0.5))); + return int (floor (x + 0.5)); #endif // fstb_ARCHI_X86 } +int64_t round_int64 (double x) noexcept +{ + return int64_t (round (x)); +} + + + // May not give the right result for very small negative values. -int floor_int (double x) +int floor_int (float x) noexcept { assert (x <= double (INT_MAX)); - assert (x >= double (INT_MIN)); + assert (x >= static_cast (INT_MIN)); #if (fstb_ARCHI == fstb_ARCHI_X86) - #if defined (_MSC_VER) + #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64 + + constexpr float round_toward_m_i = -0.5f; + const float xx = x + x + round_toward_m_i; + const __m128 x_128 = _mm_set_ss (xx); + const int i = _mm_cvt_ss2si (x_128) >> 1; + return i; + + #elif defined (_MSC_VER) assert (x <= double (INT_MAX/2)); assert (x >= double (INT_MIN/2)); int i; - static const float round_toward_m_i = -0.5f; + constexpr float round_toward_m_i = -0.5f; + __asm + { + fld x + fadd round_toward_m_i + fistp i + } + return i; + + #elif defined (__GNUC__) + + assert (x <= double (INT_MAX/2)); + assert (x >= double (INT_MIN/2)); + int i; + static const float round_toward_m_i = -0.5f; + asm ( + "fldl %[x] \n" + "fadds (%[rm]) \n" + "fistpl %[i] \n" + : [i] "=m" (i) + : [rm] "r" (&round_toward_m_i) + , [x] "m" (x) + : //"st" + ); + return i; + + #else + + return int (floorf (x)); + + #endif // Compiler + +#else // fstb_ARCHI_X86 + + return int (floorf (x)); + +#endif // fstb_ARCHI_X86 +} - #if defined (_WIN64) || defined (__64BIT__) || defined (__amd64__) || defined (__x86_64__) - const double xx = x + x + round_toward_m_i; + +int floor_int (double x) noexcept +{ + return floor_int (float (x)); +} + + + +// May not give the right result for very small negative values. +int floor_int_accurate (double x) noexcept +{ + assert (x <= double (INT_MAX)); + assert (x >= static_cast (INT_MIN)); + +#if (fstb_ARCHI == fstb_ARCHI_X86) + + #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64 + + constexpr float round_toward_m_i = -0.5f; + const double xx = x + x + round_toward_m_i; const __m128d x_128d = _mm_set_sd (xx); - i = _mm_cvtsd_si32 (x_128d); - i >>= 1; + const int i = _mm_cvtsd_si32 (x_128d) >> 1; + return i; - #else + #elif defined (_MSC_VER) + assert (x <= double (INT_MAX/2)); + assert (x >= double (INT_MIN/2)); + int i; + static const float round_toward_m_i = -0.5f; __asm { fld x @@ -186,21 +341,14 @@ int floor_int (double x) fistp i sar i, 1 } - - #endif - - using namespace std; assert (i == int (floor (x)) || fabs (i - x) < 1e-10); - - return (i); + return i; #elif defined (__GNUC__) assert (x <= double (INT_MAX/2)); assert (x >= double (INT_MIN/2)); - - int i; - + int i; static const float round_toward_m_i = -0.5f; asm ( "fldl %[x] \n" @@ -216,50 +364,55 @@ int floor_int (double x) assert (i == int (floor (x)) || fabs (i - x) < 1e-10); - return (i); + return i; #else // Slow - return (int (floor (x))); + return int (floor (x)); #endif // Compiler #else // fstb_ARCHI_X86 // Slow - return (int (floor (x))); + return int (floor (x)); #endif // fstb_ARCHI_X86 } +int64_t floor_int64 (double x) noexcept +{ + return int64_t (floor (x)); +} + + + // May not give the right result for very small positive values. -int ceil_int (double x) +int ceil_int (double x) noexcept { assert (x <= double (INT_MAX)); - assert (x >= double (INT_MIN)); + assert (x >= static_cast (INT_MIN)); #if (fstb_ARCHI == fstb_ARCHI_X86) - #if (defined (_MSC_VER)) + #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64 + + constexpr float round_toward_p_i = -0.5f; + const double xx = round_toward_p_i - (x + x); + const __m128d x_128d = _mm_set_sd (xx); + const int i = _mm_cvtsd_si32 (x_128d) >> 1; + return -i; + + #elif (defined (_MSC_VER)) assert (x <= double (INT_MAX/2)); assert (x >= double (INT_MIN/2)); int i; static const float round_toward_p_i = -0.5f; - - #if defined (_WIN64) || defined (__64BIT__) || defined (__amd64__) || defined (__x86_64__) - - const double xx = round_toward_p_i - (x + x); - const __m128d x_128d = _mm_set_sd (xx); - i = _mm_cvtsd_si32 (x_128d); - i >>= 1; - - #else - __asm { fld x @@ -268,20 +421,14 @@ int ceil_int (double x) fistp i sar i, 1 } - - #endif - assert (-i == int (ceil (x)) || fabs (-i - x) < 1e-10); - - return (-i); + return -i; #elif defined (__GNUC__) assert (x <= double (INT_MAX/2)); assert (x >= double (INT_MIN/2)); - int i; - static const float round_toward_p_i = -0.5f; asm ( "fldl %[x] \n" @@ -294,31 +441,40 @@ int ceil_int (double x) , [x] "m" (x) : //"st" ); - - using namespace std; assert (-i == int (ceil (x)) || fabs (-i - x) < 1e-10); - - return (-i); + return -i; #else // Slow - return (int (ceil (x))); + return int (ceil (x)); #endif #else // Slow - return (int (ceil (x))); + return int (ceil (x)); + +#endif +} + + +template +int trunc_int (T x) noexcept +{ +#if (fstb_ARCHI == fstb_ARCHI_X86) && (defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64) + return _mm_cvtt_ss2si (_mm_set1_ps (float (x))); +#else + return int (x); #endif } template -int conv_int_fast (T x) +int conv_int_fast (T x) noexcept { static_assert (std::is_floating_point ::value, "T must be floating point"); @@ -367,13 +523,13 @@ int conv_int_fast (T x) #endif - return (p); + return p; } template -bool is_null (T val, T eps) +constexpr bool is_null (T val, T eps) noexcept { static_assert (std::is_floating_point ::value, "T must be floating point"); assert (eps >= 0); @@ -384,18 +540,18 @@ bool is_null (T val, T eps) template -bool is_eq (T v1, T v2, T eps) +constexpr bool is_eq (T v1, T v2, T eps) noexcept { static_assert (std::is_floating_point ::value, "T must be floating point"); assert (eps >= 0); - return (is_null (v2 - v1, eps)); + return is_null (v2 - v1, eps); } template -bool is_eq_rel (T v1, T v2, T tol) +constexpr bool is_eq_rel (T v1, T v2, T tol) noexcept { static_assert (std::is_floating_point ::value, "T must be floating point"); assert (tol >= 0); @@ -404,7 +560,7 @@ bool is_eq_rel (T v1, T v2, T tol) const T v2a = T (fabs (v2)); const T eps = std::max (v1a, v2a) * tol; - return (is_eq (v1, v2, eps)); + return is_eq (v1, v2, eps); } @@ -422,7 +578,7 @@ Throws: Nothing ============================================================================== */ -int get_prev_pow_2 (uint32_t x) +int get_prev_pow_2 (uint32_t x) noexcept { assert (x > 0); @@ -467,21 +623,127 @@ int get_prev_pow_2 (uint32_t x) #endif - return (int (p)); + return int (p); } -double sinc (double x) +/* +============================================================================== +Name: get_next_pow2 +Description: + Computes the exponent of the power of two equal to or immediately greater + than the parameter. It is the base-2 log rounded toward plus infinity. +Input parameters: + - x: Number which we want to compute the base-2 log. +Returns: The exponent +Throws: Nothing +============================================================================== +*/ + +int get_next_pow_2 (uint32_t x) noexcept { + assert (x > 0); + +#if (fstb_ARCHI == fstb_ARCHI_X86) && defined (_MSC_VER) + + #if ((_MSC_VER / 100) < 14) + + -- x; + int p; + if (x == 0) { - return (1); + p = 0; + } + else + { + __asm + { + xor eax, eax + bsr eax, x + inc eax + mov p, eax + } + } + + #else + + unsigned long p; + if (_BitScanReverse (&p, x - 1) == 0) + { + p = 0; + } + else + { + ++ p; + } + + #endif + +#else + + --x; + int p = 0; + + while ((x & ~(uint32_t (0xFFFFL))) != 0) + { + p += 16; + x >>= 16; + } + while ((x & ~(uint32_t (0xFL))) != 0) + { + p += 4; + x >>= 4; + } + while (x > 0) + { + ++p; + x >>= 1; + } + +#endif + + return int (p); +} + + + +constexpr double sinc (double x) noexcept +{ + if (x == 0) + { + return 1; } const double xp = x * PI; - return (sin (xp) / xp); + return sin (xp) / xp; +} + + + +double pseudo_exp (double x, double c) noexcept +{ + assert (x >= 0); + assert (c > 0); + + const double num = exp (c * x) - 1; + const double den = exp (c ) - 1; + + return num / den; +} + + + +double pseudo_log (double y, double c) noexcept +{ + assert (y >= 0); + assert (c > 0); + + const double num = log (y * (exp (c) - 1) + 1); + + return num / c; } @@ -491,28 +753,318 @@ class fnc_ShiftGeneric { public: static_assert (S < int (sizeof (T) * CHAR_BIT), "Shift too large"); - static T sh (T x) { return (x << S); } + static constexpr T sh (T x) noexcept { return x << S; } }; template class fnc_ShiftGeneric { public: static_assert (S < int (sizeof (T) * CHAR_BIT), "Shift too large"); - static T sh (T x) { return (x >> S); } + static constexpr T sh (T x) noexcept { return x >> S; } }; template -T sshift_l (T x) +constexpr T sshift_l (T x) noexcept { static_assert (std::is_integral ::value, "T must be integer"); - return (fnc_ShiftGeneric 0)>::sh (x)); + return fnc_ShiftGeneric 0)>::sh (x); } template -T sshift_r (T x) +constexpr T sshift_r (T x) noexcept { static_assert (std::is_integral ::value, "T must be integer"); - return (fnc_ShiftGeneric ::sh (x)); + return fnc_ShiftGeneric ::sh (x); +} + + + +template +constexpr T sq (T x) noexcept +{ + return x * x; +} + + + +template +constexpr T cube (T x) noexcept +{ + return x * x * x; +} + + + +// U must be a signed integer type +template +constexpr T ipow (T x, U n) noexcept +{ + const U abs_n = std::abs (n); + const T z (ipowp (x, abs_n)); + + return (n < U (0)) ? T (1) / z : z; +} + + + +// U must be an integer type (signed or not) +template +constexpr T ipowp (T x, U n) noexcept +{ + assert (! (n < U (0))); + +#if 1 + T z (1); + + while (n != U (0)) + { + if ((n & U (1)) != U (0)) + { + z *= x; + } + n >>= 1; + x *= x; + } +#else + T z (((n & U (1)) != U (0)) ? x : 1); + n >>= 1; + if (n > U (0)) + { + x *= x; + z *= ipowp (x, n); + } +#endif + + return z; +} + + + +// Result looks optimal with all optimisations enabled +template +constexpr T ipowpc (T x) noexcept +{ + static_assert (N >= 0, "N must be positive or null."); + +#if (__cplusplus >= 201402L) + if (N == 0) + { + return T (1); + } + else if (N > 1) + { + T y = ipowpc (x); + y *= y; + if ((N & 1) != 0) + { + y *= x; + } + return y; + } + + return x; +#else + return + (N == 0) ? 1 + : (N > 1) ? (sq (ipowpc (x)) * (((N & 1) != 0) ? x : 1)) + : x; +#endif +} + + + +template +constexpr T rcp_uint (int x) noexcept +{ + static_assert (std::is_floating_point ::value, "T must be floating point"); + + constexpr int table_len = 256; + constexpr T rcp_arr [table_len] = + { + T (0.00000000000000E+00), T (1.00000000000000E+00), T (5.00000000000000E-01), T (3.33333333333333E-01), + T (2.50000000000000E-01), T (2.00000000000000E-01), T (1.66666666666667E-01), T (1.42857142857143E-01), + T (1.25000000000000E-01), T (1.11111111111111E-01), T (1.00000000000000E-01), T (9.09090909090909E-02), + T (8.33333333333333E-02), T (7.69230769230769E-02), T (7.14285714285714E-02), T (6.66666666666667E-02), + T (6.25000000000000E-02), T (5.88235294117647E-02), T (5.55555555555556E-02), T (5.26315789473684E-02), + T (5.00000000000000E-02), T (4.76190476190476E-02), T (4.54545454545455E-02), T (4.34782608695652E-02), + T (4.16666666666667E-02), T (4.00000000000000E-02), T (3.84615384615385E-02), T (3.70370370370370E-02), + T (3.57142857142857E-02), T (3.44827586206897E-02), T (3.33333333333333E-02), T (3.22580645161290E-02), + T (3.12500000000000E-02), T (3.03030303030303E-02), T (2.94117647058823E-02), T (2.85714285714286E-02), + T (2.77777777777778E-02), T (2.70270270270270E-02), T (2.63157894736842E-02), T (2.56410256410256E-02), + T (2.50000000000000E-02), T (2.43902439024390E-02), T (2.38095238095238E-02), T (2.32558139534884E-02), + T (2.27272727272727E-02), T (2.22222222222222E-02), T (2.17391304347826E-02), T (2.12765957446808E-02), + T (2.08333333333333E-02), T (2.04081632653061E-02), T (2.00000000000000E-02), T (1.96078431372549E-02), + T (1.92307692307692E-02), T (1.88679245283019E-02), T (1.85185185185185E-02), T (1.81818181818182E-02), + T (1.78571428571429E-02), T (1.75438596491228E-02), T (1.72413793103448E-02), T (1.69491525423729E-02), + T (1.66666666666667E-02), T (1.63934426229508E-02), T (1.61290322580645E-02), T (1.58730158730159E-02), + T (1.56250000000000E-02), T (1.53846153846154E-02), T (1.51515151515152E-02), T (1.49253731343284E-02), + T (1.47058823529412E-02), T (1.44927536231884E-02), T (1.42857142857143E-02), T (1.40845070422535E-02), + T (1.38888888888889E-02), T (1.36986301369863E-02), T (1.35135135135135E-02), T (1.33333333333333E-02), + T (1.31578947368421E-02), T (1.29870129870130E-02), T (1.28205128205128E-02), T (1.26582278481013E-02), + T (1.25000000000000E-02), T (1.23456790123457E-02), T (1.21951219512195E-02), T (1.20481927710843E-02), + T (1.19047619047619E-02), T (1.17647058823529E-02), T (1.16279069767442E-02), T (1.14942528735632E-02), + T (1.13636363636364E-02), T (1.12359550561798E-02), T (1.11111111111111E-02), T (1.09890109890110E-02), + T (1.08695652173913E-02), T (1.07526881720430E-02), T (1.06382978723404E-02), T (1.05263157894737E-02), + T (1.04166666666667E-02), T (1.03092783505155E-02), T (1.02040816326531E-02), T (1.01010101010101E-02), + T (1.00000000000000E-02), T (9.90099009900990E-03), T (9.80392156862745E-03), T (9.70873786407767E-03), + T (9.61538461538462E-03), T (9.52380952380952E-03), T (9.43396226415094E-03), T (9.34579439252336E-03), + T (9.25925925925926E-03), T (9.17431192660550E-03), T (9.09090909090909E-03), T (9.00900900900901E-03), + T (8.92857142857143E-03), T (8.84955752212389E-03), T (8.77192982456140E-03), T (8.69565217391304E-03), + T (8.62068965517241E-03), T (8.54700854700855E-03), T (8.47457627118644E-03), T (8.40336134453782E-03), + T (8.33333333333333E-03), T (8.26446280991736E-03), T (8.19672131147541E-03), T (8.13008130081301E-03), + T (8.06451612903226E-03), T (8.00000000000000E-03), T (7.93650793650794E-03), T (7.87401574803150E-03), + T (7.81250000000000E-03), T (7.75193798449612E-03), T (7.69230769230769E-03), T (7.63358778625954E-03), + T (7.57575757575758E-03), T (7.51879699248120E-03), T (7.46268656716418E-03), T (7.40740740740741E-03), + T (7.35294117647059E-03), T (7.29927007299270E-03), T (7.24637681159420E-03), T (7.19424460431655E-03), + T (7.14285714285714E-03), T (7.09219858156028E-03), T (7.04225352112676E-03), T (6.99300699300699E-03), + T (6.94444444444444E-03), T (6.89655172413793E-03), T (6.84931506849315E-03), T (6.80272108843537E-03), + T (6.75675675675676E-03), T (6.71140939597315E-03), T (6.66666666666667E-03), T (6.62251655629139E-03), + T (6.57894736842105E-03), T (6.53594771241830E-03), T (6.49350649350649E-03), T (6.45161290322581E-03), + T (6.41025641025641E-03), T (6.36942675159236E-03), T (6.32911392405063E-03), T (6.28930817610063E-03), + T (6.25000000000000E-03), T (6.21118012422360E-03), T (6.17283950617284E-03), T (6.13496932515337E-03), + T (6.09756097560976E-03), T (6.06060606060606E-03), T (6.02409638554217E-03), T (5.98802395209581E-03), + T (5.95238095238095E-03), T (5.91715976331361E-03), T (5.88235294117647E-03), T (5.84795321637427E-03), + T (5.81395348837209E-03), T (5.78034682080925E-03), T (5.74712643678161E-03), T (5.71428571428571E-03), + T (5.68181818181818E-03), T (5.64971751412429E-03), T (5.61797752808989E-03), T (5.58659217877095E-03), + T (5.55555555555556E-03), T (5.52486187845304E-03), T (5.49450549450549E-03), T (5.46448087431694E-03), + T (5.43478260869565E-03), T (5.40540540540541E-03), T (5.37634408602151E-03), T (5.34759358288770E-03), + T (5.31914893617021E-03), T (5.29100529100529E-03), T (5.26315789473684E-03), T (5.23560209424084E-03), + T (5.20833333333333E-03), T (5.18134715025907E-03), T (5.15463917525773E-03), T (5.12820512820513E-03), + T (5.10204081632653E-03), T (5.07614213197969E-03), T (5.05050505050505E-03), T (5.02512562814070E-03), + T (5.00000000000000E-03), T (4.97512437810945E-03), T (4.95049504950495E-03), T (4.92610837438424E-03), + T (4.90196078431373E-03), T (4.87804878048781E-03), T (4.85436893203883E-03), T (4.83091787439613E-03), + T (4.80769230769231E-03), T (4.78468899521531E-03), T (4.76190476190476E-03), T (4.73933649289100E-03), + T (4.71698113207547E-03), T (4.69483568075117E-03), T (4.67289719626168E-03), T (4.65116279069768E-03), + T (4.62962962962963E-03), T (4.60829493087558E-03), T (4.58715596330275E-03), T (4.56621004566210E-03), + T (4.54545454545455E-03), T (4.52488687782805E-03), T (4.50450450450451E-03), T (4.48430493273543E-03), + T (4.46428571428571E-03), T (4.44444444444444E-03), T (4.42477876106195E-03), T (4.40528634361234E-03), + T (4.38596491228070E-03), T (4.36681222707424E-03), T (4.34782608695652E-03), T (4.32900432900433E-03), + T (4.31034482758621E-03), T (4.29184549356223E-03), T (4.27350427350427E-03), T (4.25531914893617E-03), + T (4.23728813559322E-03), T (4.21940928270042E-03), T (4.20168067226891E-03), T (4.18410041841004E-03), + T (4.16666666666667E-03), T (4.14937759336100E-03), T (4.13223140495868E-03), T (4.11522633744856E-03), + T (4.09836065573771E-03), T (4.08163265306122E-03), T (4.06504065040650E-03), T (4.04858299595142E-03), + T (4.03225806451613E-03), T (4.01606425702811E-03), T (4.00000000000000E-03), T (3.98406374501992E-03), + T (3.96825396825397E-03), T (3.95256916996047E-03), T (3.93700787401575E-03), T (3.92156862745098E-03), + }; + + assert (x > 0); + + if (x < table_len) + { + return rcp_arr [x]; + } + + return T (1) / T (x); +} + + + +template +constexpr T lerp (T v0, T v1, T p) noexcept +{ + return v0 + p * (v1 - v0); +} + + + +// Finds the x position of the extremum (min or max) in the parabolic- +// interpolated curve passes through (-1, r1), (0, r2) and (+1, r3). +// The curve is implicitely defined by: +// f(x) = ((r3 + r1) / 2 - r2) * x^2 + ((r3 - r1) / 2) * x + r2 +// The points must not be aligned so the extremum exists. +// It is not necessariy located between -1 and 1. +template +inline constexpr T find_extremum_pos_parabolic (T r1, T r2, T r3) +{ + const T den = T (2) * r2 - (r3 + r1); + assert (den != T (0)); + + const T pos = (r3 - r1) * T (0.5) / den; + + return pos; +} + + + +namespace detail +{ + template + constexpr std::array + make_array (const T &init_val, std::index_sequence ) + { + return {{ (static_cast (IS), init_val)... }}; + } +} + +// Default-initializes an array with a specified value +// Source: https://stackoverflow.com/a/41259045 +template +constexpr std::array make_array (const T &init_val) +{ + return detail::make_array (init_val, std::make_index_sequence ()); +} + + + +template +T read_unalign (const void *ptr) noexcept +{ + static_assert ( + std::is_trivially_copyable ::value, "T must be trivially copiable" + ); + assert (ptr != nullptr); + + T val; + memcpy (&val, ptr, sizeof (val)); + return val; +} + + + +template +void write_unalign (void *ptr, T val) noexcept +{ + static_assert ( + std::is_trivially_copyable ::value, "T must be trivially copiable" + ); + assert (ptr != nullptr); + + memcpy (ptr, &val, sizeof (val)); +} + + + +// std::copy is already optimized like this but uses memmove instead of +// memcpy. +template +void copy_no_overlap (T * fstb_RESTRICT dst_ptr, const T * fstb_RESTRICT src_ptr, int nbr_elt) noexcept +{ + assert (dst_ptr != nullptr); + assert (src_ptr != nullptr); + assert (nbr_elt > 0); + + if (std::is_trivially_copyable ::value) + { + memcpy (dst_ptr, src_ptr, nbr_elt * sizeof (*dst_ptr)); + } + else + { + std::copy (src_ptr, src_ptr + nbr_elt, dst_ptr); + } +} + + + +template +bool is_ptr_align_nz (const T *ptr, int a) noexcept +{ + assert (a > 0); + assert (is_pow_2 (a)); + + return ( + ptr != nullptr + && (reinterpret_cast (ptr) & (a - 1)) == 0 + ); } diff --git a/src/fstb/fnc.cpp b/src/fstb/fnc_fstb.cpp similarity index 100% rename from src/fstb/fnc.cpp rename to src/fstb/fnc_fstb.cpp diff --git a/src/main.cpp b/src/main.cpp index 09179fc..cd9d8ab 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -164,7 +164,7 @@ class TmpHistLuma const int h = _vsapi.getFrameHeight (dst_ptr, plane); if (_vi_out.format->bytesPerSample == 2) { - const uint16_t fill_cst = 1 << (bits - 1); + const uint16_t fill_cst = uint16_t (1 << (bits - 1)); for (int y = 0; y < h; ++y) { for (int x = 0; x < w; ++x) @@ -555,6 +555,9 @@ VS_EXTERNAL_API (void) VapourSynthPluginInit (::VSConfigPlugin config_fnc, ::VSR "staticnoise:int:opt;" "cpuopt:int:opt;" "patsize:int:opt;" + "tpdfo:int:opt;" + "tpdfn:int:opt;" + "corplane:int:opt;" , &vsutl::Redirect ::create, 0, plugin_ptr ); diff --git a/src/vsutl/FrameRefSPtr.h b/src/vsutl/FrameRefSPtr.h index f63dc04..6c13dcf 100644 --- a/src/vsutl/FrameRefSPtr.h +++ b/src/vsutl/FrameRefSPtr.h @@ -37,10 +37,22 @@ namespace vsutl +class FrameRefSPtr_FncWrapper +{ +public: + static inline const ::VSFrameRef * clone (const ::VSAPI &vsapi, const ::VSFrameRef *f) VS_NOEXCEPT + { + return (*vsapi.cloneFrameRef) (f); + } + static inline void free (const ::VSAPI &vsapi, const ::VSFrameRef *f) VS_NOEXCEPT + { + (*vsapi.freeFrame) (f); + } +}; + typedef ObjRefSPtr < const ::VSFrameRef, - &VSAPI::cloneFrameRef, - &VSAPI::freeFrame + FrameRefSPtr_FncWrapper > FrameRefSPtr; diff --git a/src/vsutl/FuncRefSPtr.h b/src/vsutl/FuncRefSPtr.h index d6d435f..3e581ae 100644 --- a/src/vsutl/FuncRefSPtr.h +++ b/src/vsutl/FuncRefSPtr.h @@ -37,10 +37,22 @@ namespace vsutl +class FuncRefSPtr_FncWrapper +{ +public: + static inline ::VSFuncRef * clone (const ::VSAPI &vsapi, ::VSFuncRef *func) VS_NOEXCEPT + { + return (*vsapi.cloneFuncRef) (func); + } + static inline void free (const ::VSAPI &vsapi, ::VSFuncRef *func) VS_NOEXCEPT + { + (*vsapi.freeFunc) (func); + } +}; + typedef ObjRefSPtr < ::VSFuncRef, - &VSAPI::cloneFuncRef, - &VSAPI::freeFunc + FuncRefSPtr_FncWrapper > FuncRefSPtr; diff --git a/src/vsutl/NodeRefSPtr.h b/src/vsutl/NodeRefSPtr.h index d0ecb04..5536f8b 100644 --- a/src/vsutl/NodeRefSPtr.h +++ b/src/vsutl/NodeRefSPtr.h @@ -37,10 +37,22 @@ namespace vsutl +class NodeRefSPtr_FncWrapper +{ +public: + static inline ::VSNodeRef * clone (const ::VSAPI &vsapi, ::VSNodeRef *node) VS_NOEXCEPT + { + return (*vsapi.cloneNodeRef) (node); + } + static inline void free (const ::VSAPI &vsapi, ::VSNodeRef *node) VS_NOEXCEPT + { + (*vsapi.freeNode) (node); + } +}; + typedef ObjRefSPtr < ::VSNodeRef, - &VSAPI::cloneNodeRef, - &VSAPI::freeNode + NodeRefSPtr_FncWrapper > NodeRefSPtr; diff --git a/src/vsutl/ObjRefSPtr.h b/src/vsutl/ObjRefSPtr.h index 01b1f45..bf9c18a 100644 --- a/src/vsutl/ObjRefSPtr.h +++ b/src/vsutl/ObjRefSPtr.h @@ -9,8 +9,10 @@ Template parameters: - T: The type of the object possibly with const, but without pointer (currently ::VSNodeRef, const ::VSFrameRef or const ::VSFuncRef). -- FC: VSAPI member pointer to the function for cloning const T *. -- FF: VSAPI member pointer to the function for freeing const T *. + +- FW: Wrapper class for clone and free functions. Requires: + static inline T * FW::clone (const ::VSAPI &, T *) VS_NOEXCEPT; + static inline void FW::free (const ::VSAPI &, T *) VS_NOEXCEPT; --- Legal stuff --- @@ -40,20 +42,12 @@ To Public License, Version 2, as published by Sam Hocevar. See -#if (__cplusplus >= 201703L) - #define vsutl_ObjRefSPtr_VS_NOEXCEPT VS_NOEXCEPT -#else - #define vsutl_ObjRefSPtr_VS_NOEXCEPT -#endif - - - namespace vsutl { -template +template class ObjRefSPtr { @@ -63,11 +57,14 @@ class ObjRefSPtr ObjRefSPtr () = default; ObjRefSPtr (T *ptr, const ::VSAPI &vsapi); - ObjRefSPtr (const ObjRefSPtr &other); + ObjRefSPtr (const ObjRefSPtr &other); + ObjRefSPtr (ObjRefSPtr &&other); virtual ~ObjRefSPtr (); - ObjRefSPtr & - operator = (const ObjRefSPtr &other); + ObjRefSPtr & + operator = (const ObjRefSPtr &other); + ObjRefSPtr & + operator = (ObjRefSPtr &&other); T * operator -> () const; T & operator * () const; @@ -89,8 +86,8 @@ class ObjRefSPtr void release_resource (); - T * _obj_ptr = 0; - const ::VSAPI* _vsapi_ptr = 0; // Can be 0 only if _obj_ptr is 0 too. + T * _obj_ptr = nullptr; + const ::VSAPI* _vsapi_ptr = nullptr; // Can be 0 only if _obj_ptr is 0 too. @@ -98,8 +95,8 @@ class ObjRefSPtr private: - bool operator == (const ObjRefSPtr &other) const; - bool operator != (const ObjRefSPtr &other) const; + bool operator == (const ObjRefSPtr &other) const; + bool operator != (const ObjRefSPtr &other) const; }; // class ObjRefSPtr diff --git a/src/vsutl/ObjRefSPtr.hpp b/src/vsutl/ObjRefSPtr.hpp index cb81623..a6e46cf 100644 --- a/src/vsutl/ObjRefSPtr.hpp +++ b/src/vsutl/ObjRefSPtr.hpp @@ -40,25 +40,25 @@ namespace vsutl // Does not increase the reference count. -template -ObjRefSPtr ::ObjRefSPtr (T *ptr, const ::VSAPI &vsapi) +template +ObjRefSPtr ::ObjRefSPtr (T *ptr, const ::VSAPI &vsapi) : _obj_ptr (ptr) , _vsapi_ptr (&vsapi) { - assert (_obj_ptr == 0 || _vsapi_ptr != 0); + assert (_obj_ptr == nullptr || _vsapi_ptr != nullptr); } -template -ObjRefSPtr ::ObjRefSPtr (const ObjRefSPtr &other) -: _obj_ptr (0) +template +ObjRefSPtr ::ObjRefSPtr (const ObjRefSPtr &other) +: _obj_ptr (nullptr) , _vsapi_ptr (other._vsapi_ptr) { - if (other._obj_ptr != 0) + if (other._obj_ptr != nullptr) { - _obj_ptr = (_vsapi_ptr->*FC) (other._obj_ptr); - if (_obj_ptr == 0) + _obj_ptr = FW::clone (*_vsapi_ptr, other._obj_ptr); + if (_obj_ptr == nullptr) { throw std::runtime_error ("Cannot clone VS object reference."); } @@ -67,31 +67,41 @@ ObjRefSPtr ::ObjRefSPtr (const ObjRefSPtr &other) -template -ObjRefSPtr ::~ObjRefSPtr () +template +ObjRefSPtr ::ObjRefSPtr (ObjRefSPtr &&other) +: _obj_ptr (other._obj_ptr) +, _vsapi_ptr (other._vsapi_ptr) +{ + other._obj_ptr = nullptr; +} + + + +template +ObjRefSPtr ::~ObjRefSPtr () { release_resource (); } -template -ObjRefSPtr & ObjRefSPtr ::operator = (const ObjRefSPtr &other) +template +ObjRefSPtr & ObjRefSPtr ::operator = (const ObjRefSPtr &other) { if (other._obj_ptr != _obj_ptr) { - T * tmp_ptr = 0; + T * tmp_ptr = nullptr; - if (other._obj_ptr != 0) + if (other._obj_ptr != nullptr) { - if (_vsapi_ptr == 0) + if (_vsapi_ptr == nullptr) { - assert (other._vsapi_ptr != 0); + assert (other._vsapi_ptr != nullptr); _vsapi_ptr = other._vsapi_ptr; } - tmp_ptr = (_vsapi_ptr->*FC) (other._obj_ptr); - if (tmp_ptr == 0) + tmp_ptr = FW::clone (*_vsapi_ptr, other._obj_ptr); + if (tmp_ptr == nullptr) { throw std::runtime_error ("Cannot clone VS object reference."); } @@ -102,50 +112,65 @@ ObjRefSPtr & ObjRefSPtr ::operator = (const ObjRefSPtr +ObjRefSPtr & ObjRefSPtr ::operator = (ObjRefSPtr &&other) +{ + if (other._obj_ptr != _obj_ptr) + { + _obj_ptr = other._obj_ptr; + _vsapi_ptr = other._vsapi_ptr; + other._obj_ptr = nullptr; + } + + return *this; } -template -T * ObjRefSPtr ::operator -> () const +template +T * ObjRefSPtr ::operator -> () const { - return (_obj_ptr); + return _obj_ptr; } -template -T & ObjRefSPtr ::operator * () const +template +T & ObjRefSPtr ::operator * () const { - return (*_obj_ptr); + return *_obj_ptr; } -template -T * ObjRefSPtr ::get () const +template +T * ObjRefSPtr ::get () const { - return (_obj_ptr); + return _obj_ptr; } -template -T * ObjRefSPtr ::dup () const +template +T * ObjRefSPtr ::dup () const { - assert (_obj_ptr != 0); - assert (_vsapi_ptr != 0); + assert (_obj_ptr != nullptr); + assert (_vsapi_ptr != nullptr); - T * tmp_ptr = (_vsapi_ptr->*FC) (_obj_ptr); + T * tmp_ptr = FW::clone (*_vsapi_ptr, _obj_ptr); - return (tmp_ptr); + return tmp_ptr; } -template -void ObjRefSPtr ::clear () +template +void ObjRefSPtr ::clear () { release_resource (); } @@ -160,14 +185,14 @@ void ObjRefSPtr ::clear () -template -void ObjRefSPtr ::release_resource () +template +void ObjRefSPtr ::release_resource () { - if (_obj_ptr != 0) + if (_obj_ptr != nullptr) { - assert (_vsapi_ptr != 0); - (_vsapi_ptr->*FF) (_obj_ptr); - _obj_ptr = 0; + assert (_vsapi_ptr != nullptr); + FW::free (*_vsapi_ptr, _obj_ptr); + _obj_ptr = nullptr; } } diff --git a/src/vsutl/PlaneProcessor.cpp b/src/vsutl/PlaneProcessor.cpp index cb027be..d8e3f68 100644 --- a/src/vsutl/PlaneProcessor.cpp +++ b/src/vsutl/PlaneProcessor.cpp @@ -174,7 +174,7 @@ void PlaneProcessor::set_filter (const ::VSMap &in, ::VSMap &out, const ::VSVide _vsapi.setError (&out, err_msg.c_str ()); ok_flag = false; } - else if ( plane_content >= PlaneProcMode_NBR_ELT + else if ( plane_content >= double (PlaneProcMode_NBR_ELT) || (int_flag && -plane_content >= max_val)) { const std::string err_msg = @@ -229,7 +229,7 @@ const ::VSFrameRef * PlaneProcessor::try_initial (::VSCore &core) for (int plane_index = 0; plane_index < _nbr_planes; ++plane_index) { const double val = _proc_mode_arr [plane_index]; - if (val < PlaneProcMode_COPY1) + if (val < double (PlaneProcMode_COPY1)) { fill_plane ( const_cast < ::VSFrameRef &> (*_blank_frame_sptr), @@ -294,7 +294,7 @@ int PlaneProcessor::process_frame (::VSFrameRef &dst, int n, void *frame_data_pt copy_plane (dst, *src_sptr, plane_index); } } - else if (mode < PlaneProcMode_COPY1) + else if (mode_i < PlaneProcMode_COPY1) { fill_plane (dst, -mode, plane_index); } diff --git a/src/vsutl/fnc.cpp b/src/vsutl/fnc.cpp index 5a5010b..30d6fa3 100644 --- a/src/vsutl/fnc.cpp +++ b/src/vsutl/fnc.cpp @@ -164,7 +164,7 @@ int compute_plane_width (const ::VSFormat &fmt, int plane_index, int base_w) base_w >>= fmt.subSamplingW; } - return (base_w); + return base_w; } @@ -181,7 +181,7 @@ int compute_plane_height (const ::VSFormat &fmt, int plane_index, int base_h) base_h >>= fmt.subSamplingH; } - return (base_h); + return base_h; } @@ -238,7 +238,7 @@ int conv_str_to_chroma_subspl (int &ssh, int &ssv, std::string css) ret_val = -1; } - return (ret_val); + return ret_val; } diff --git a/zip-release.bat b/zip-release.bat index a826c35..991df48 100644 --- a/zip-release.bat +++ b/zip-release.bat @@ -13,6 +13,7 @@ @mkdir "reltmp\win64" @xcopy /I "doc" "reltmp\doc" @xcopy /I "build\unix" "reltmp\build\unix" +@xcopy /I "build\unix\m4" "reltmp\build\unix\m4" @xcopy /I "src\conc" "reltmp\src\conc" @xcopy /I "src\ffft" "reltmp\src\ffft" @xcopy /I "src\fmtc" "reltmp\src\fmtc"