diff --git a/build/unix/Makefile.am b/build/unix/Makefile.am
index cc23cab..e311ca1 100644
--- a/build/unix/Makefile.am
+++ b/build/unix/Makefile.am
@@ -1,10 +1,54 @@
 ACLOCAL_AMFLAGS = -I m4
 
-warningflags = -Wall -Wextra -Wshadow -Wno-expansion-to-defined -Wno-missing-field-initializers
+warningflags = \
+        -Wall \
+        -Wextra \
+        -Wshadow \
+        -Wunused \
+        -Wnull-dereference \
+        -Wvla \
+        -Wstrict-aliasing \
+        -Wuninitialized \
+        -Wunused-parameter \
+        -Wreorder \
+        -Wsign-compare \
+        -Wunreachable-code \
+        -Wconversion \
+        -Wno-sign-conversion \
+         $(COMPWARNFLAGS)
+
+if CLG
+
+# Clang
+warningflags += \
+        -Wshadow-all \
+        -Wshorten-64-to-32 \
+        -Wint-conversion \
+        -Wconditional-uninitialized \
+        -Wconstant-conversion \
+        -Wunused-private-field \
+        -Wbool-conversion \
+        -Wextra-semi \
+        -Wnullable-to-nonnull-conversion \
+        -Wno-unused-private-field \
+        -Wno-unused-command-line-argument
+# -Wzero-as-null-pointer-constant
+
+else
+
+# GCC
+warningflags += \
+        -Wredundant-decls \
+        -Wno-ignored-attributes \
+        -Wno-expansion-to-defined
+
+endif
+
+warnflagscpp =
 includeflags = -I$(srcdir)/../../src
-commonflags = $(DEBUGCFLAGS) $(MFLAGS) $(warningflags) $(includeflags)
-AM_CXXFLAGS = -std=$(CXXSTD) $(commonflags)
-AM_LDFLAGS  = $(PLUGINLDFLAGS)
+commoncflags = $(DEBUGCFLAGS) $(MFLAGS) $(warningflags) $(includeflags) $(STACKREALIGN)
+AM_CXXFLAGS  = -std=$(CXXSTD) $(commoncflags) $(warnflagscpp) $(EXTRA_CXXFLAGS)
+AM_LDFLAGS   = $(PLUGINLDFLAGS)
 
 lib_LTLIBRARIES = libfmtconv.la
 
@@ -52,8 +96,6 @@ libfmtconv_la_SOURCES =  \
         ../../src/conc/ObjPool.h \
         ../../src/conc/ObjPool.hpp \
         ../../src/ffft/def.h \
-        ../../src/ffft/DynArray.h \
-        ../../src/ffft/DynArray.hpp \
         ../../src/ffft/FFTReal.h \
         ../../src/ffft/FFTReal.hpp \
         ../../src/ffft/OscSinCos.h \
@@ -129,6 +171,8 @@ libfmtconv_la_SOURCES =  \
         ../../src/fmtcl/DiscreteFirCustom.h \
         ../../src/fmtcl/DiscreteFirInterface.cpp \
         ../../src/fmtcl/DiscreteFirInterface.h \
+        ../../src/fmtcl/Dither.cpp \
+        ../../src/fmtcl/Dither.h \
         ../../src/fmtcl/ErrDifBuf.cpp \
         ../../src/fmtcl/ErrDifBuf.h \
         ../../src/fmtcl/ErrDifBuf.hpp \
@@ -138,6 +182,7 @@ libfmtconv_la_SOURCES =  \
         ../../src/fmtcl/FilterResize.h \
         ../../src/fmtcl/fnc.cpp \
         ../../src/fmtcl/fnc.h \
+        ../../src/fmtcl/InterlacingType.h \
         ../../src/fmtcl/KernelData.cpp \
         ../../src/fmtcl/KernelData.h \
         ../../src/fmtcl/Mat3.h \
@@ -150,9 +195,14 @@ libfmtconv_la_SOURCES =  \
         ../../src/fmtcl/MatrixProc.cpp \
         ../../src/fmtcl/MatrixProc.h \
         ../../src/fmtcl/MatrixProc_macro.h \
+        ../../src/fmtcl/MatrixUtil.cpp \
+        ../../src/fmtcl/MatrixUtil.h \
         ../../src/fmtcl/MatrixWrap.h \
         ../../src/fmtcl/MatrixWrap.hpp \
+        ../../src/fmtcl/PicFmt.h \
         ../../src/fmtcl/PrimariesPreset.h \
+        ../../src/fmtcl/PrimUtil.cpp \
+        ../../src/fmtcl/PrimUtil.h \
         ../../src/fmtcl/Proxy.h \
         ../../src/fmtcl/Proxy.hpp \
         ../../src/fmtcl/ProxyRwCpp.h \
@@ -163,8 +213,11 @@ libfmtconv_la_SOURCES =  \
         ../../src/fmtcl/ReadWrapperFlt.hpp \
         ../../src/fmtcl/ReadWrapperInt.h \
         ../../src/fmtcl/ReadWrapperInt.hpp \
+        ../../src/fmtcl/ResamplePlaneData.h \
         ../../src/fmtcl/ResampleSpecPlane.cpp \
         ../../src/fmtcl/ResampleSpecPlane.h \
+        ../../src/fmtcl/ResampleUtil.cpp \
+        ../../src/fmtcl/ResampleUtil.h \
         ../../src/fmtcl/ResizeData.cpp \
         ../../src/fmtcl/ResizeData.h \
         ../../src/fmtcl/ResizeData.hpp \
@@ -212,6 +265,8 @@ libfmtconv_la_SOURCES =  \
         ../../src/fmtcl/TransOpSLog.h \
         ../../src/fmtcl/TransOpSLog3.cpp \
         ../../src/fmtcl/TransOpSLog3.h \
+        ../../src/fmtcl/TransUtil.cpp \
+        ../../src/fmtcl/TransUtil.h \
         ../../src/fmtcl/Vec3.h \
         ../../src/fmtcl/Vec3.hpp \
         ../../src/fmtcl/VoidAndCluster.cpp \
@@ -223,7 +278,7 @@ libfmtconv_la_SOURCES =  \
         ../../src/fstb/CpuId.cpp \
         ../../src/fstb/CpuId.h \
         ../../src/fstb/def.h \
-        ../../src/fstb/fnc.cpp \
+        ../../src/fstb/fnc_fstb.cpp \
         ../../src/fstb/fnc.h \
         ../../src/fstb/fnc.hpp \
         ../../src/fstb/SingleObj.h \
diff --git a/build/unix/configure.ac b/build/unix/configure.ac
index 0a52132..65676fb 100644
--- a/build/unix/configure.ac
+++ b/build/unix/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([fmtconv], [r22], [http://forum.doom9.org/showthread.php?t=166504], [fmtconv], [http://forum.doom9.org/showthread.php?t=166504])
+AC_INIT([fmtconv], [r23], [http://forum.doom9.org/showthread.php?t=166504], [fmtconv], [http://forum.doom9.org/showthread.php?t=166504])
 AC_CONFIG_MACRO_DIR([m4])
 
 AM_INIT_AUTOMAKE([foreign no-dist-gzip dist-xz subdir-objects no-define])
@@ -6,32 +6,26 @@ AM_SILENT_RULES([yes])
 
 LT_INIT([win32-dll disable-static])
 
+: ${CXXFLAGS=""}
+: ${CFLAGS=""}
+
 AC_PROG_CXX
+AC_PROG_CC
 
 AC_CANONICAL_HOST
 
 AC_ARG_ENABLE([debug], AS_HELP_STRING([--enable-debug], [Compilation options required for debugging. [default=no]]))
+AC_ARG_ENABLE([clang], AS_HELP_STRING([--enable-clang], [Use Clang as compiler along with libc++. [default=no]]))
 
-AC_LANG_PUSH([C++])
-AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++17], [CXXSTD="c++17"]))
-AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++11], [CXXSTD="c++11"]))
-AS_IF([test "x$CXXSTD" = "x"], AC_MSG_ERROR([Minimum requirement: C++11]))
-AX_CHECK_COMPILE_FLAG([-Wunused-private-field]        , [CXXFLAGS="$CXXFLAGS -Wno-unused-private-field"]        , , [-Werror])
-AX_CHECK_COMPILE_FLAG([-Wunused-command-line-argument], [CXXFLAGS="$CXXFLAGS -Wno-unused-command-line-argument"], , [-Werror])
-AC_LANG_POP([C++])
 
-# It seems that -latomic is needed only for some versions of GCC < 5.3
-AX_CHECK_LINK_FLAG([-latomic], [LIBS="$LIBS -latomic"])
-
-AS_IF(
-    [test "x$enable_debug" = "xyes"],
-    [DEBUGCFLAGS="-O0 -g3 -ggdb"],
-    [DEBUGCFLAGS="-O3 -g3 -DNDEBUG"]
-)
 
 X86="false"
 PPC="false"
 ARM="false"
+WIN="false"
+UNX="false"
+MAC="false"
+CLG="false"
 
 AS_CASE(
     [$host_cpu],
@@ -44,21 +38,62 @@ AS_CASE(
 
 AS_CASE(
     [$host_os],
-    [cygwin*|mingw*],
-    [AS_IF(
-        [test "x$BITS" = "x32"],
-        [
-            PLUGINLDFLAGS="-Wl,--kill-at"
-            STACKREALIGN="-mstackrealign"
-        ]
-    )]
+    [cygwin*|mingw*],                [WIN="true"],
+    [darwin*],                       [MAC="true"],
+    [*linux*|gnu*|dragonfly*|*bsd*], [UNX="true"]
 )
 
 AS_IF(
-    [test "x$X86" = "xtrue"],
+    [test "x$enable_debug" = "xyes"],
+    [
+        DEBUGCFLAGS="-O0 -g3 -ggdb"
+        AC_MSG_NOTICE([Debug mode enabled.])
+    ],
+    [DEBUGCFLAGS="-O3 -g3 -DNDEBUG"]
+)
+
+AS_IF(
+    [test "x$enable_clang" = "xyes"],
+    [
+        CLG="true"
+        CXX="clang++"
+        CC="clang"
+        LD="clang++"
+        MFLAGS="$MFLAGS -fexperimental-new-pass-manager -mllvm -inline-threshold=1000"
+        COMPWARNFLAGS=""
+        AC_MSG_NOTICE([Using clang as compiler.])
+    ],
+    [COMPWARNFLAGS="-Wduplicated-cond -Wduplicated-branches -Wlogical-op"]
+)
+
+AC_LANG_PUSH([C++])
+#AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++20], [CXXSTD="c++20"]))
+#AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++17], [CXXSTD="c++17"]))
+AS_IF([test "x$CXXSTD" = "x"], AX_CHECK_COMPILE_FLAG([-std=c++14], [CXXSTD="c++14"]))
+AS_IF([test "x$CXXSTD" = "x"], AC_MSG_ERROR([Minimum requirement: C++14]))
+AC_LANG_POP([C++])
+
+# It seems that -latomic is needed only for some versions of GCC < 5.3
+AX_CHECK_LINK_FLAG([-latomic], [LIBS="$LIBS -latomic"])
+
+AS_IF(
+    [test "x$WIN" = "xtrue"],
     [
-        MFLAGS="-mfpmath=sse -msse2 -Wno-ignored-attributes"
+        AS_IF(
+            [test "x$BITS" = "x32"],
+            [
+                PLUGINLDFLAGS="-Wl,--kill-at"
+                STACKREALIGN="-mstackrealign"
+            ]
+        )
+    ]
+)
 
+AS_IF(
+    [test "x$X86" = "xtrue"],
+    [
+        MFLAGS="$MFLAGS -mfpmath=sse -msse2"
+        COMPWARNFLAGS="$COMPWARNFLAGS -Wno-ignored-attributes"
         # We need this to use CMPXCHG16B for 2x64-bit CAS (compare and swap)
         AS_IF([test "x$BITS" = "x64"], [MFLAGS="$MFLAGS -mcx16"])
     ]
@@ -72,19 +107,27 @@ AS_IF(
         AX_CHECK_COMPILE_FLAG([-mfpu=neon], [MFLAGS="$MFLAGS -mfpu=neon"])
 
         # GCC 7 emits some warnings about ABI changes when using std::vector
-        AX_CHECK_COMPILE_FLAG([-Wpsabi -Werror], [MFLAGS="$MFLAGS -Wno-psabi"])
+        AX_CHECK_COMPILE_FLAG([-Wpsabi], [COMPWARNFLAGS="$COMPWARNFLAGS -Wno-psabi"], , [-Werror])
     ]
 )
 
+AX_CHECK_COMPILE_FLAG([-Wmisleading-indentation]          , [COMPWARNFLAGS="$COMPWARNFLAGS -Wmisleading-indentation"]          , , [-Werror])
+# Clang only
+AX_CHECK_COMPILE_FLAG([-Wno-implicit-int-float-conversion], [COMPWARNFLAGS="$COMPWARNFLAGS -Wno-implicit-int-float-conversion"], , [-Werror])
+
+AC_SUBST([CXXSTD])
+AC_SUBST([EXTRA_CXXFLAGS])
+AC_SUBST([LDFLAGS])
 AC_SUBST([MFLAGS])
 AC_SUBST([DEBUGCFLAGS])
-AC_SUBST([CXXSTD])
 AC_SUBST([PLUGINLDFLAGS])
 AC_SUBST([STACKREALIGN])
 
 AM_CONDITIONAL([X86], [test "x$X86" = "xtrue"])
 AM_CONDITIONAL([ARM], [test "x$ARM" = "xtrue"])
-
+AM_CONDITIONAL([UNX], [test "x$UNX" = "xtrue"])
+AM_CONDITIONAL([WIN], [test "x$WIN" = "xtrue"])
+AM_CONDITIONAL([CLG], [test "x$CLG" = "xtrue"])
 
 AC_CONFIG_FILES([Makefile])
 AC_OUTPUT
diff --git a/build/win/fmtconv.vcxproj b/build/win/fmtconv.vcxproj
index 6190cf5..c8f1bed 100644
--- a/build/win/fmtconv.vcxproj
+++ b/build/win/fmtconv.vcxproj
@@ -32,10 +32,11 @@
     <CharacterSet>Unicode</CharacterSet>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)'=='Debug'" Label="Configuration">
-    <LinkIncremental>true</LinkIncremental>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <WholeProgramOptimization>false</WholeProgramOptimization>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)'=='Release'" Label="Configuration">
-    <LinkIncremental>false</LinkIncremental>
+    <UseDebugLibraries>false</UseDebugLibraries>
     <WholeProgramOptimization>true</WholeProgramOptimization>
   </PropertyGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
@@ -45,8 +46,13 @@
     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
   </ImportGroup>
   <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)'=='Debug'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)'=='Release'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
   <PropertyGroup>
-    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
     <OutDir>$(ProjectDir)$(Configuration)$(Platform)\</OutDir>
     <IntDir>$(ProjectDir)$(Configuration)$(Platform)\</IntDir>
     <CodeAnalysisRuleSet>AllRules.ruleset</CodeAnalysisRuleSet>
@@ -57,11 +63,13 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <BrowseInformation>true</BrowseInformation>
     </ClCompile>
+    <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
     <ClCompile>
@@ -74,6 +82,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
     </ClCompile>
     <Link>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
       <OptimizeReferences>true</OptimizeReferences>
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
     </Link>
@@ -102,8 +111,11 @@
       <PrecompiledHeader>
       </PrecompiledHeader>
       <WarningLevel>Level4</WarningLevel>
-      <DisableSpecificWarnings>4127;4505</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4127</DisableSpecificWarnings>
       <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <ConformanceMode>true</ConformanceMode>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
     </ClCompile>
     <Link>
       <GenerateDebugInformation>true</GenerateDebugInformation>
@@ -136,11 +148,13 @@
     <ClInclude Include="..\..\src\fmtcl\ContFirSpline64.h" />
     <ClInclude Include="..\..\src\fmtcl\DiscreteFirCustom.h" />
     <ClInclude Include="..\..\src\fmtcl\DiscreteFirInterface.h" />
+    <ClInclude Include="..\..\src\fmtcl\Dither.h" />
     <ClInclude Include="..\..\src\fmtcl\ErrDifBuf.h" />
     <ClInclude Include="..\..\src\fmtcl\ErrDifBuf.hpp" />
     <ClInclude Include="..\..\src\fmtcl\ErrDifBufFactory.h" />
     <ClInclude Include="..\..\src\fmtcl\FilterResize.h" />
     <ClInclude Include="..\..\src\fmtcl\fnc.h" />
+    <ClInclude Include="..\..\src\fmtcl\InterlacingType.h" />
     <ClInclude Include="..\..\src\fmtcl\KernelData.h" />
     <ClInclude Include="..\..\src\fmtcl\Mat3.h" />
     <ClInclude Include="..\..\src\fmtcl\Mat3.hpp" />
@@ -150,9 +164,12 @@
     <ClInclude Include="..\..\src\fmtcl\Matrix2020CLProc_macro.h" />
     <ClInclude Include="..\..\src\fmtcl\MatrixProc.h" />
     <ClInclude Include="..\..\src\fmtcl\MatrixProc_macro.h" />
+    <ClInclude Include="..\..\src\fmtcl\MatrixUtil.h" />
     <ClInclude Include="..\..\src\fmtcl\MatrixWrap.h" />
     <ClInclude Include="..\..\src\fmtcl\MatrixWrap.hpp" />
+    <ClInclude Include="..\..\src\fmtcl\PicFmt.h" />
     <ClInclude Include="..\..\src\fmtcl\PrimariesPreset.h" />
+    <ClInclude Include="..\..\src\fmtcl\PrimUtil.h" />
     <ClInclude Include="..\..\src\fmtcl\Proxy.h" />
     <ClInclude Include="..\..\src\fmtcl\Proxy.hpp" />
     <ClInclude Include="..\..\src\fmtcl\ProxyRwAvx2.h" />
@@ -165,7 +182,9 @@
     <ClInclude Include="..\..\src\fmtcl\ReadWrapperFlt.hpp" />
     <ClInclude Include="..\..\src\fmtcl\ReadWrapperInt.h" />
     <ClInclude Include="..\..\src\fmtcl\ReadWrapperInt.hpp" />
+    <ClInclude Include="..\..\src\fmtcl\ResamplePlaneData.h" />
     <ClInclude Include="..\..\src\fmtcl\ResampleSpecPlane.h" />
+    <ClInclude Include="..\..\src\fmtcl\ResampleUtil.h" />
     <ClInclude Include="..\..\src\fmtcl\ResizeData.h" />
     <ClInclude Include="..\..\src\fmtcl\ResizeData.hpp" />
     <ClInclude Include="..\..\src\fmtcl\ResizeDataFactory.h" />
@@ -197,6 +216,7 @@
     <ClInclude Include="..\..\src\fmtcl\TransOpPow.h" />
     <ClInclude Include="..\..\src\fmtcl\TransOpSLog.h" />
     <ClInclude Include="..\..\src\fmtcl\TransOpSLog3.h" />
+    <ClInclude Include="..\..\src\fmtcl\TransUtil.h" />
     <ClInclude Include="..\..\src\fmtcl\Vec3.h" />
     <ClInclude Include="..\..\src\fmtcl\Vec3.hpp" />
     <ClInclude Include="..\..\src\fmtcl\VoidAndCluster.h" />
@@ -269,8 +289,6 @@
     <ClInclude Include="..\..\src\conc\ObjPool.h" />
     <ClInclude Include="..\..\src\conc\ObjPool.hpp" />
     <ClInclude Include="..\..\src\ffft\def.h" />
-    <ClInclude Include="..\..\src\ffft\DynArray.h" />
-    <ClInclude Include="..\..\src\ffft\DynArray.hpp" />
     <ClInclude Include="..\..\src\ffft\FFTReal.h" />
     <ClInclude Include="..\..\src\ffft\FFTReal.hpp" />
     <ClInclude Include="..\..\src\ffft\OscSinCos.h" />
@@ -311,6 +329,7 @@
     <ClCompile Include="..\..\src\fmtcl\ContFirSpline64.cpp" />
     <ClCompile Include="..\..\src\fmtcl\DiscreteFirCustom.cpp" />
     <ClCompile Include="..\..\src\fmtcl\DiscreteFirInterface.cpp" />
+    <ClCompile Include="..\..\src\fmtcl\Dither.cpp" />
     <ClCompile Include="..\..\src\fmtcl\ErrDifBuf.cpp" />
     <ClCompile Include="..\..\src\fmtcl\ErrDifBufFactory.cpp" />
     <ClCompile Include="..\..\src\fmtcl\FilterResize.cpp" />
@@ -327,7 +346,10 @@
     <ClCompile Include="..\..\src\fmtcl\MatrixProc_avx2.cpp">
       <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
     </ClCompile>
+    <ClCompile Include="..\..\src\fmtcl\MatrixUtil.cpp" />
+    <ClCompile Include="..\..\src\fmtcl\PrimUtil.cpp" />
     <ClCompile Include="..\..\src\fmtcl\ResampleSpecPlane.cpp" />
+    <ClCompile Include="..\..\src\fmtcl\ResampleUtil.cpp" />
     <ClCompile Include="..\..\src\fmtcl\ResizeData.cpp" />
     <ClCompile Include="..\..\src\fmtcl\ResizeDataFactory.cpp" />
     <ClCompile Include="..\..\src\fmtcl\RgbSystem.cpp" />
@@ -353,6 +375,7 @@
     <ClCompile Include="..\..\src\fmtcl\TransOpPow.cpp" />
     <ClCompile Include="..\..\src\fmtcl\TransOpSLog.cpp" />
     <ClCompile Include="..\..\src\fmtcl\TransOpSLog3.cpp" />
+    <ClCompile Include="..\..\src\fmtcl\TransUtil.cpp" />
     <ClCompile Include="..\..\src\fmtcl\VoidAndCluster.cpp" />
     <ClCompile Include="..\..\src\fmtc\Bitdepth.cpp" />
     <ClCompile Include="..\..\src\fmtc\Convert.cpp" />
@@ -378,14 +401,15 @@
     <ClCompile Include="..\..\src\vsutl\fnc.cpp" />
     <ClCompile Include="..\..\src\vsutl\PlaneProcCbInterface.cpp" />
     <ClCompile Include="..\..\src\vsutl\PlaneProcessor.cpp" />
-    <ClCompile Include="..\..\src\fstb\fnc.cpp">
-      <ObjectFileName>$(IntDir)%(Filename)1.obj</ObjectFileName>
-      <XMLDocumentationFileName>$(IntDir)%(Filename)1.xdc</XMLDocumentationFileName>
-    </ClCompile>
+    <ClCompile Include="..\..\src\fstb\fnc_fstb.cpp" />
     <ClCompile Include="..\..\src\AvstpFinder.cpp" />
     <ClCompile Include="..\..\src\AvstpWrapper.cpp" />
     <ClCompile Include="..\..\src\main.cpp" />
   </ItemGroup>
+  <ItemGroup>
+    <None Include="..\unix\configure.ac" />
+    <None Include="..\unix\Makefile.am" />
+  </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
diff --git a/build/win/fmtconv.vcxproj.filters b/build/win/fmtconv.vcxproj.filters
index 93e2181..1bf331d 100644
--- a/build/win/fmtconv.vcxproj.filters
+++ b/build/win/fmtconv.vcxproj.filters
@@ -177,12 +177,6 @@
     <ClInclude Include="..\..\src\ffft\def.h">
       <Filter>ffft</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\src\ffft\DynArray.h">
-      <Filter>ffft</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\src\ffft\DynArray.hpp">
-      <Filter>ffft</Filter>
-    </ClInclude>
     <ClInclude Include="..\..\src\ffft\FFTReal.h">
       <Filter>ffft</Filter>
     </ClInclude>
@@ -532,6 +526,30 @@
     <ClInclude Include="..\..\src\fmtcl\TransOpHlg.h">
       <Filter>fmtcl</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\Dither.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\MatrixUtil.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\PicFmt.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\PrimUtil.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\TransUtil.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\ResampleUtil.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\ResamplePlaneData.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\fmtcl\InterlacingType.h">
+      <Filter>fmtcl</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\src\fmtc\Bitdepth.cpp">
@@ -561,7 +579,7 @@
     <ClCompile Include="..\..\src\vsutl\PlaneProcessor.cpp">
       <Filter>vsutl</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\src\fstb\fnc.cpp">
+    <ClCompile Include="..\..\src\fstb\fnc_fstb.cpp">
       <Filter>fstb</Filter>
     </ClCompile>
     <ClCompile Include="..\..\src\AvstpFinder.cpp" />
@@ -756,5 +774,24 @@
     <ClCompile Include="..\..\src\fmtcl\TransOpHlg.cpp">
       <Filter>fmtcl</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\fmtcl\Dither.cpp">
+      <Filter>fmtcl</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fmtcl\MatrixUtil.cpp">
+      <Filter>fmtcl</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fmtcl\PrimUtil.cpp">
+      <Filter>fmtcl</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fmtcl\TransUtil.cpp">
+      <Filter>fmtcl</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\src\fmtcl\ResampleUtil.cpp">
+      <Filter>fmtcl</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <None Include="..\unix\configure.ac" />
+    <None Include="..\unix\Makefile.am" />
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/build/win/toolset.props b/build/win/toolset.props
index 4227e63..8e24a0c 100644
--- a/build/win/toolset.props
+++ b/build/win/toolset.props
@@ -4,9 +4,9 @@
     <_ProjectFileVersion>12.0.30501.0</_ProjectFileVersion>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Platform)'=='Win32'" Label="Configuration">
-    <PlatformToolset>v141_xp</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Platform)'=='x64'" Label="Configuration">
-    <PlatformToolset>v141</PlatformToolset>
+    <PlatformToolset>v142</PlatformToolset>
   </PropertyGroup>
 </Project>
\ No newline at end of file
diff --git a/doc/fmtconv.html b/doc/fmtconv.html
index 8cc575a..2a0b9fe 100644
--- a/doc/fmtconv.html
+++ b/doc/fmtconv.html
@@ -15,7 +15,7 @@ <h2>Abstract</h2>
 
 <table class="n">
 <tr><td class="n"><b>Authors:     </b></td><td class="n">&nbsp;</td><td class="n">Firesledge (aka Cretindesalpes)</td></tr>
-<tr><td class="n"><b>Version:     </b></td><td class="n">&nbsp;</td><td class="n">r22</td></tr>
+<tr><td class="n"><b>Version:     </b></td><td class="n">&nbsp;</td><td class="n">r23</td></tr>
 <tr><td class="n"><b>Download:    </b></td><td class="n">&nbsp;</td><td class="n"><a href="http://ldesoras.free.fr/prod.html#src_fmtconv">http://ldesoras.free.fr/prod.html</a></td></tr>
 <tr><td class="n"><b>Category:    </b></td><td class="n">&nbsp;</td><td class="n">Format tools</td></tr>
 <tr><td class="n"><b>Requirements:</b></td><td class="n">&nbsp;</td><td class="n"><a href="http://www.vapoursynth.com/">Vapoursynth</a></td></tr>
@@ -205,7 +205,7 @@ <h3><a id="compiling"></a>Compiling from the source code</h3>
 
 <h4>Visual C++</h4>
 
-<p>Visual Studio 2013 or later is required, previous versions are
+<p>Visual Studio 2019 or later is required, previous versions are
 not supported anymore.
 Just load <code>build/win/fmtconv.sln</code> and run the compiler.</p>
 
@@ -215,7 +215,6 @@ <h4>Visual C++</h4>
 
 <ul>
 <li>Add <code>.</code> (the <code>src</code> directory) as include path.</li>
-<li>Use the <code>v120_xp</code> toolset for the 32-bit version.</li>
 <li>For the whole project, enable the SS2 instruction set.</li>
 <li>Enable the AVX2 instruction set for the <code>*.cpp</code> files containing <code>avx2</code> in their name, and the AVX set for the <code>avx</code> files.</li>
 <li>Enable optimizations maximizing speed and “any suitable” functions for inlining.</li>
@@ -223,13 +222,18 @@ <h4>Visual C++</h4>
 
 <h4>GNU/Linux and other Unix-like systems</h4>
 
-<p>On Linux and similar GNU-based systems, the <code>build</code> directory
-contains autotools settings:</p>
+<p>On Linux and similar GNU-based systems (including MSYS2 and Cygwin), the
+<code>build</code> directory contains autotools settings:</p>
 <pre class="src">cd build/unix
 ./autogen.sh
 ./configure
 make
 make install</pre>
+<p>You can add some options to the <code>configure</code> command:</p>
+<ul>
+<li><code>--enable-debug</code> to activate debugging code</li>
+<li><code>--enable-clang</code> to use Clang instead of the default compiler, usually GCC</li>
+</ul>
 
 <h4>GCC</h4>
 
@@ -247,7 +251,7 @@ <h4>GCC</h4>
 <p>Add <code>AvstpFinder.cpp</code> on Windows.
 Use the following options (on a single line):</p>
 
-<pre class="src">-std=c++11 -shared -fabi-version=6 -msse2 -mcx16 -O3 -DNDEBUG -I.
+<pre class="src">-std=c++14 -shared -fabi-version=6 -msse2 -mcx16 -O3 -DNDEBUG -I.
 -Wall -Wextra -Wno-unused-parameter -Wno-unused-result -Wno-missing-field-initializers -Wshadow
 -Wno-unused-private-field</pre>
 
@@ -276,7 +280,7 @@ <h4>GCC</h4>
 <p>Link with <code>-latomic -lpthread</code>.</p>
 
 <p>With MinGW, it seems you will need a specific MinGW-64 build supporting
-C++11 threading (not tested here yet, please report if you find something
+C++14 threading (not tested here yet, please report if you find something
 useful).</p>
 
 <h2><a id="description"></a>III) Filters description</h2>
@@ -298,6 +302,9 @@ <h3><a id="bitdepth"></a>bitdepth</h3>
 	staticnoise: int  : opt; (False)
 	cpuopt     : int  : opt; (-1)
 	patsize    : int  : opt; (32)
+	tpdfo      : int  : opt; (0)
+	tpdfn      : int  : opt; (0)
+	corplane   : int  : opt; (0)
 )
 </pre>
 
@@ -309,14 +316,14 @@ <h3><a id="bitdepth"></a>bitdepth</h3>
 <li>Doing a full-range ↔ TV-range conversion between integer formats, because the resulting values haven’t an exact representation.</li>
 </ul>
 
-<p>Pure ordered dithering seems to be retained better than noise or error
-diffusion by video compression in 8 bits.
-Therefore this is the recommended method to avoid color banding, unless you
-encode at very high bitrates.
-If you don’t care about video compression, error diffusion gives the most
-accurate results.
-To avoid discontinuities between purely flat areas and dithered areas, you
-can add a bit of noise.</p>
+<p>Video compression seems to retrain better pure ordered (Bayer) dithering.
+Therefore this is the recommended method to avoid color banding in 8 bit
+signals, unless you encode at high bitrates.
+If you don’t care about video compression, error diffusion, void and cluster
+and quasirandom sequence methods give the most accurate results.
+To avoid discontinuities between purely flat areas and dithered areas (also
+called noise modulation), you can add a bit of noise, ideally in triangular
+distribution.</p>
 
 <p>The internal noise generator is deterministic and will give the same result
 each run.</p>
@@ -381,10 +388,11 @@ <h4>Parameters</h4>
 <tr><td><b>2</b></td><td>Round, may be a bit faster but possibly less accurate.</td></tr>
 <tr><td><b>3</b></td><td>Sierra-2-4A error diffusion, aka “Filter Lite”. Quick and excellent quality, similar to Floyd-Steinberg.</td></tr>
 <tr><td><b>4</b></td><td>Stucki error diffusion. Preserves delicate edges better but distorts gradients.</td></tr>
-<tr><td><b>5</b></td><td>Atkinson error diffusion. Generates distinct patterns but keeps clean the flat areas.</td></tr>
-<tr><td><b>6</b></td><td>Floyd-Steinberg error diffusion. Classic.</td></tr>
+<tr><td><b>5</b></td><td>Atkinson error diffusion. Generates distinct patterns but keeps clean the flat areas (noise modulation).</td></tr>
+<tr><td><b>6</b></td><td>Classic Floyd-Steinberg error diffusion, modified for serpentine scan (avoids worm artefacts).</td></tr>
 <tr><td><b>7</b></td><td><a href="http://www.iro.umontreal.ca/~ostrom/publications/publications_abstracts.html#SIGGRAPH01_VarcoeffED">Ostromoukhov error diffusion</a>. Slow, available only for integer input at the moment. Avoids usual F-S artefacts.</td></tr>
-<tr><td><b>8</b></td><td>Void and cluster halftone dithering. Better visual aspect than ordered dithering.</td></tr>
+<tr><td><b>8</b></td><td>Void and cluster halftone dithering. This is a way to generate blue-noise dither and has a much better visual aspect than ordered dithering.</td></tr>
+<tr><td><b>9</b></td><td>Dither using <a href="http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/">quasirandom sequences</a>. Good intermediated between Void and cluster and error diffusion algorithms.</td></tr>
 </table>
 
 <p>When using error-diffusion dithering on interlaced content, you should
@@ -408,7 +416,7 @@ <h4>Parameters</h4>
 
 <p class="var">dyn</p>
 <p>Indicates if the ordered dither pattern is dynamic (True) or static (False).
-If dynamic, the pattern is rotated each frame.</p>
+If dynamic, the pattern is changed or rotated each frame.</p>
 
 <p class="var">staticnoise</p>
 <p>If set to 1, the noise generated with <var>ampn</var> is static
@@ -425,6 +433,27 @@ <h4>Parameters</h4>
 <p>Width of the pattern used in the Void and cluster algorithm.
 The only valid values are 4, 8, 16 and 32.</p>
 
+<p class="var">tpdfo</p>
+<p>Set it to 1 to enable the triangular probability distribution function
+(TPDF) for halftone-based dithering algorithms.
+It has no effect on error diffusion methods.
+0 is the standard rectangular distribution (RPDF).
+Note that when triangular distribution is enabled, the maximum halftone
+amplitude is multiplied by 1.414 at constant <var>ampo</var>.</p>
+
+<p class="var">tpdfn</p>
+<p>Same as <var>tpdfo</var>, but for the additive noise part.
+TPDF noise looks more natural than RPDF noise, and is a crude approximation of
+a gaussian noise, with a bounded amplitude.
+Maximum noise amplitude is multiplied by 1.414 at constant <var>ampn</var>,
+so the introduced noise power is kept approximately constant.</p>
+
+<p class="var">corplane</p>
+<p>Set it to 1 to keep the dither and noise patterns correlated for all the
+planes.
+When processing a RGB picture, it helps to prevent colored noise on grey
+features.</p>
+
 
 
 <h3><a id="convert"></a>convert</h3>
@@ -502,7 +531,7 @@ <h3><a id="convert"></a>convert</h3>
 	cplaced    : data   : opt; (cplace)
 	matd       : data   : opt;
 
-	# Transfert curve parameters
+	# Transfer curve parameters
 	transs     : data[] : opt;
 	transd     : data[] : opt;
 	cont       : float  : opt;
@@ -1361,6 +1390,8 @@ <h3><a id="transfer"></a>transfer</h3>
 	flt        : int    : opt;
 	fulls      : int    : opt; (True)
 	fulld      : int    : opt; (True)
+	logceis    : int    : opt; (800)
+	logceid    : int    : opt; (800)
 	cpuopt     : int    : opt; (-1)
 	blacklvl   : float  : opt; (0)
 )</pre>
@@ -1427,8 +1458,8 @@ <h4>Parameters</h4>
 <tr><td><b><code>&quot;slog&quot;      </code></b></td><td>&minus;0.006&hellip;10</td><td>Sony S-Log<br />Linear 1.0 is the reference white, peak white is at 10.0.</td></tr>
 <tr><td><b><code>&quot;slog2&quot;      </code></b></td><td>&minus;0.0085&hellip;14.13</td><td>Sony S-Log 2<br />Linear 1.0 is the reference white, peak white is at 14.13.</td></tr>
 <tr><td><b><code>&quot;slog3&quot;     </code></b></td><td>0&hellip;38.421</td><td>Sony S-Log3.</td></tr>
-<tr><td><b><code>&quot;logc2&quot;     </code></b></td><td>Unspecified</td><td>Arri Log C Alexa 2.x (800 EI), linear scene exposure<br />Peak white is 57.45 linear. The negative part of the range allows coding sensor noise.</td></tr>
-<tr><td><b><code>&quot;logc3&quot;     </code></b></td><td>Unspecified</td><td>Arri Log C Alexa 3.x (800 EI), linear scene exposure<br />Peak white is 55.08 linear. The negative part of the range allows coding sensor noise.</td></tr>
+<tr><td><b><code>&quot;logc2&quot;     </code></b></td><td>Unspecified</td><td>Arri Log C Alexa 2.x, linear scene exposure<br />Peak white is 57.45 linear. The negative part of the range allows coding sensor noise. <var>logceis</var> and <var>logceid</var> set the Exposure Index (EI).</td></tr>
+<tr><td><b><code>&quot;logc3&quot;     </code></b></td><td>Unspecified</td><td>Arri Log C Alexa 3.x, linear scene exposure<br />Peak white is 55.08 linear. The negative part of the range allows coding sensor noise. <var>logceis</var> and <var>logceid</var> set the Exposure Index (EI).</td></tr>
 <tr><td><b><code>&quot;canonlog&quot;  </code></b></td><td>0&hellip;8.00903</td><td>Canon-Log<br />Peak white is 8.00903 in linear scale and 1.08676 in compressed scale.</td></tr>
 <tr><td><b><code>&quot;adobergb&quot;  </code></b></td><td>0&hellip;1</td><td>Adobe RGB (1998 and Wide Gamut)</td></tr>
 <tr><td><b><code>&quot;romm&quot;      </code></b></td><td>0&hellip;1</td><td>ProPhoto, ROMM</td></tr>
@@ -1468,6 +1499,11 @@ <h4>Parameters</h4>
 in TV-range (16 to 240 for the Y’Cb’Cr’ chroma planes).
 This value has no meaning for float data.</p>
 
+<p class="var">logceis, logceid</p>
+<p>Exposure index (EI) for the Arri Log C Alexa 2.x and 3.x curves.
+Allowed values are:
+160, 200, 250, 320, 400, 500, 640, 800 (default), 1000, 1280 and 1600.<p>
+
 <p class="var">cpuopt</p>
 <p>Limits the CPU instruction set.
 &minus;1: automatic (no limitation),
@@ -1524,7 +1560,23 @@ <h2><a id="troubleshooting"></a>IV) Troubleshooting</h2>
 
 <h2><a id="changelog"></a>V) Changelog</h2>
 
-<p><b>r22, 2019.12.11</b></p>
+<p><b>r24, 202?-??-??</b></p>
+<ul>
+<li><code>bitdepth</code>: added dithering <var>mode</var> 9: quasirandom sequences.</li>
+<li><code>bitdepth</code>: added a triangular probability distribution function (TPDF) for the dithering patterns and noises, along with the associated parameters <var>tpdfo</var> and <var>tpdfn</var>.</li>
+<li><code>bitdepth</code>: added <var>corplane</var> parameter to prevent colored noise in RGB processing.</li>
+</ul>
+
+<p><b>r23, 2021-07-14</b></p>
+<ul>
+<li><code>transfer</code>: added an Exposure Index (EI) parameter for the Arri Log C Alexa 2.x and 3.x curves.</li>
+<li><code>bitdepth</code>: properly sets the <code>_ColorRange</code> attribute.</li>
+<li>Doesn’t output a debug message when AVSTP is not found.</li>
+<li>Fixed a concurrency issue by using a more recent toolkit when compiling with MSVC.</li>
+<li>Windows XP is not supported any more.</li>
+</ul>
+
+<p><b>r22, 2019-12-11</b></p>
 <ul>
 <li><code>bitdepth</code>: upconversions for full range data now scale to the maximum value instead of shifting bits. Thanks to Z4ST1N for the report.</li>
 <li><code>matrix</code>: added support for the YDzDx, ICtCp-PQ and ICtCp-HLG colorspaces.</li>
@@ -1543,25 +1595,25 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li>Fixed compilation for Linux on ARM or aarch64. Binaries not tested yet.</li>
 </ul>
 
-<p><b>r21, 2019.12.08</b></p>
+<p><b>r21, 2019-12-08</b></p>
 <ul>
 <li><code>transfer</code>: fixed highlight clipping for several high dynamic range transfer curves, thanks to groucho86 for the report.</li>
 </ul>
 
-<p><b>r20, 2016.03.25</b></p>
+<p><b>r20, 2016-03-25</b></p>
 <ul>
 <li><code>primaries</code>: fixed a bug preventing to set all primaries individually without specifying any preset.</li>
 <li><code>primaries</code>: fixed a bug in the color conversion, thanks to J1Man for having spotted it.</li>
 </ul>
 
-<p><b>r19, 2016.03.19</b></p>
+<p><b>r19, 2016-03-19</b></p>
 <ul>
 <li><code>primaries</code>: refined the values for the Adobe Wide gamut and BT.2020 primaries.</li>
 <li><code>primaries</code>: added DCI-P3, ACES AP0/AP1, S-Gamut, S-Gamut3.Cine, ALEXA and V-Gamut presets.</li>
 <li><code>transfer</code>: added ACEScc, ERIMM, S-Log2, S-Log3 and V-Log curves.</li>
 </ul>
 
-<p><b>r18, 2016.03.08</b></p>
+<p><b>r18, 2016-03-08</b></p>
 <ul>
 <li>Added the <code>primaries</code> function to convert between gamuts.</li>
 <li>The “full” range is now closer to what is specified in the standards.</li>
@@ -1569,7 +1621,7 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li><code>transfer</code>: added the Adobe RGB and ProPhoto / ROMM curves.</li>
 </ul>
 
-<p><b>r17, 2015.07.08</b></p>
+<p><b>r17, 2015-07-08</b></p>
 <ul>
 <li><code>bitdepth</code>: added “Void and cluster” dithering method and its <var>patsize</var> parameter.</li>
 <li><code>bitdepth</code>: added floating point implementation for the Ostromoukhov dithering</li>
@@ -1577,7 +1629,7 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li><code>bitdepth</code>: fixed incorrect conversion from float to 8-bit integer using the “fast” modes with SSE2 instruction set.</li>
 </ul>
 
-<p><b>r16, 2015.07.01</b></p>
+<p><b>r16, 2015-07-01</b></p>
 <ul>
 <li><code>bitdepth</code>: added support for 11-bit and 14-bit integer input.</li>
 <li><code>bitdepth</code>: fixed a slight plane inconsistency when dithering grey multi-plane pictures using an error diffusion algorithm.</li>
@@ -1586,18 +1638,18 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li><code>transfer</code>: added the <var>blacklvl</var> parameter.</li>
 </ul>
 
-<p><b>r15, 2015.05.22</b></p>
+<p><b>r15, 2015-05-22</b></p>
 <ul>
 <li><code>resample</code> and <code>bitdepth</code>: fixed a bug creating dark lines or weird patterns. Was introduced in r13 while trying to fix the buffer overflow problem. Thanks to feisty2 for spotting it.</li>
 <li><code>resample</code>: fixed the non-SIMD code path, causing crashes.</li>
 </ul>
 
-<p><b>r14, 2015.05.20</b></p>
+<p><b>r14, 2015-05-20</b></p>
 <ul>
 <li><code>matrix</code>: fixed a bug introducing wrong offsets in custom matrix coefficients, thanks to mawen1250 for the report.</li>
 </ul>
 
-<p><b>r13, 2015.05.18</b></p>
+<p><b>r13, 2015-05-18</b></p>
 <ul>
 <li><code>matrix</code>: optimized the SSE2 and AVX2 paths for integer data.</li>
 <li>Added <var>cpuopt</var> to some functions, to manually limit the instruction set optimizations.</li>
@@ -1606,23 +1658,23 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li>Removed the <code>int16tofloat</code> and <code>floattoint16</code> temporary functions.</li>
 </ul>
 
-<p><b>r12, 2015.05.08</b></p>
+<p><b>r12, 2015-05-08</b></p>
 <ul>
 <li><code>resample</code>: fixed a crash in the AVX2 code path, thanks to HolyWu for spotting it.</li>
 </ul>
 
-<p><b>r11, 2015.05.07</b></p>
+<p><b>r11, 2015-05-07</b></p>
 <ul>
 <li><code>transfer</code>: fixed a bug in the SSE2 code path.</li>
 </ul>
 
-<p><b>r10, 2015.05.06</b></p>
+<p><b>r10, 2015-05-06</b></p>
 <ul>
 <li>fmtconv is compatible with the older Vapoursynth versions again until API 3.2 is out.</li>
 <li>Source code: fixed compilation problems.</li>
 </ul>
 
-<p><b>r9, 2015.05.06</b></p>
+<p><b>r9, 2015-05-06</b></p>
 <ul>
 <li>Added the <code>transfer</code> function.</li>
 <li><code>resample</code>: Most kernel-related parameters are now arrays, allowing to specify different values for each plane.</li>
@@ -1632,25 +1684,25 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li><code>bitdepth</code>: SSE2 optimizations for the “fast” algorithm.</li>
 </ul>
 
-<p><b>r8, 2013.11.30</b></p>
+<p><b>r8, 2013-11-30</b></p>
 <ul>
 	<li><code>resample</code>: Fixed bugs introduced in r7.</li>
 	<li>Fixed a range conversion issue in “plane copy” modes with source and destination formats are the same.</li>
 </ul>
 
-<p><b>r7, 2013.11.27</b></p>
+<p><b>r7, 2013-11-27</b></p>
 <ul>
 <li>64-bit windows version.</li>
 <li><code>resample</code>: A few optimizations for special cases.</li>
 <li><code>resample</code>: fixed the coefficients used in integer resizing, whose sum was sometimes off by a few units.</li>
 </ul>
 
-<p><b>r6, 2013.08.24</b></p>
+<p><b>r6, 2013-08-24</b></p>
 <ul>
 	<li><code>matrix</code>: single-plane output now works correctly.</li>
 </ul>
 
-<p><b>r5, 2013.08.18</b></p>
+<p><b>r5, 2013-08-18</b></p>
 <ul>
 <li>Added 12-bit support for all the functions.</li>
 <li>Added <code>matrix2020cl</code> to convert between linear RGB and Y’Cb’Cr’ colorspaces using the BT.2020 constant luminance matrix.</li>
@@ -1660,7 +1712,7 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li><code>resample</code>: added SSE2 integer calculations for slight speed improvement. Activated by default, use <var>flt=1</var> to compute everything in float (previous operating mode).</li>
 </ul>
 
-<p><b>r4, 2012.12.09</b></p>
+<p><b>r4, 2012-12-09</b></p>
 <ul>
 <li>Added a documentation.</li>
 <li>Filters now write some frame properties when known.</li>
@@ -1677,7 +1729,7 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li>Added <code>nativetostack16</code>.</li>
 </ul>
 
-<p><b>r3, 2012.11.23</b></p>
+<p><b>r3, 2012-11-23</b></p>
 <ul>
 <li><code>bitdepth</code>: changed the <var>bitdepth</var> parameter to <var>bits</var>.</li>
 <li><code>bitdepth</code>: added SSE2 optimizations for upconversions.</li>
@@ -1686,7 +1738,7 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li><code>resample</code>: fixed the <var>planes</var> parameter previously interpreted as 0 (black or green screen).</li>
 </ul>
 
-<p><b>r2, 2012.11.18</b></p>
+<p><b>r2, 2012-11-18</b></p>
 <ul>
 <li><code>bitdepth</code>: implemented fast dither mode (but not in SSE2 yet).</li>
 <li><code>bitdepth</code>: optimized float-to-integer path.</li>
@@ -1696,7 +1748,7 @@ <h2><a id="changelog"></a>V) Changelog</h2>
 <li><code>resample</code>: fixed white/magenta screen with 8-bit input and float output.</li>
 </ul>
 
-<p><b>r1, 2012.11.16</b></p>
+<p><b>r1, 2012-11-16</b></p>
 <ul>
 <li>Initial release.</li>
 </ul>
diff --git a/src/AvstpWrapper.cpp b/src/AvstpWrapper.cpp
index b4f7d09..c7ced4c 100644
--- a/src/AvstpWrapper.cpp
+++ b/src/AvstpWrapper.cpp
@@ -22,6 +22,11 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 
 
+// Define this macro to output error messages
+#undef AvstpWrapper_DEBUG_VERBOSE
+
+
+
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #if defined (_MSC_VER)
@@ -159,10 +164,12 @@ AvstpWrapper::AvstpWrapper ()
 #if defined (_MSC_VER) && defined (USE_AVSTP)
 	if (_dll_hnd == 0)
 	{
+#if defined (AvstpWrapper_DEBUG_VERBOSE)
 		::OutputDebugStringW (
 			L"AvstpWrapper: cannot find avstp.dll."
 			L"Usage restricted to single threading.\n"
 		);
+#endif
 //		throw std::runtime_error ("Cannot find avstp.dll.");
 #endif
 		assign_fallback ();
diff --git a/src/VapourSynth.h b/src/VapourSynth.h
index fec53ad..b4e7ff0 100644
--- a/src/VapourSynth.h
+++ b/src/VapourSynth.h
@@ -461,10 +461,6 @@ typedef const VSFrameRef *(VS_CC *VSFilterGetFrame)(int n, int activationReason,
 
 
 
-typedef int (VS_CC *VSGetOutputIndex)(VSFrameContext *frameCtx);
-
-
-
 /*
 ==============================================================================
 Name: *VSFilterFree
diff --git a/src/conc/AioAdd.h b/src/conc/AioAdd.h
index 45fa578..ad59ef9 100644
--- a/src/conc/AioAdd.h
+++ b/src/conc/AioAdd.h
@@ -46,7 +46,7 @@ class AioAdd
 	               AioAdd (T operand);
 	virtual        ~AioAdd () = default;
 
-	inline T       operator () (T old_val) const;
+	inline T       operator () (T old_val) const noexcept;
 
 
 
@@ -70,7 +70,9 @@ class AioAdd
 
 	               AioAdd ()                                   = delete;
 	               AioAdd (const AioAdd <T> &other)            = delete;
+	               AioAdd (const AioAdd <T> &&other)           = delete;
 	AioAdd <T> &   operator = (const AioAdd <T> &other)        = delete;
+	AioAdd <T> &   operator = (const AioAdd <T> &&other)       = delete;
 	bool           operator == (const AioAdd <T> &other) const = delete;
 	bool           operator != (const AioAdd <T> &other) const = delete;
 
diff --git a/src/conc/AioAdd.hpp b/src/conc/AioAdd.hpp
index 37be677..289b15e 100644
--- a/src/conc/AioAdd.hpp
+++ b/src/conc/AioAdd.hpp
@@ -43,9 +43,9 @@ AioAdd <T>::AioAdd (T operand)
 
 
 template <class T>
-T	AioAdd <T>::operator () (T old_val) const
+T	AioAdd <T>::operator () (T old_val) const noexcept
 {
-	return (old_val + _operand);
+	return old_val + _operand;
 }
 
 
diff --git a/src/conc/AioMax.h b/src/conc/AioMax.h
index 850a55e..054b6ff 100644
--- a/src/conc/AioMax.h
+++ b/src/conc/AioMax.h
@@ -44,9 +44,8 @@ class AioMax
 
 	explicit inline
 	               AioMax (T operand);
-	virtual        ~AioMax () = default;
 
-	inline T       operator () (T old_val) const;
+	inline T       operator () (T old_val) const noexcept;
 
 
 
@@ -70,7 +69,9 @@ class AioMax
 
 	               AioMax ()                                   = delete;
 	               AioMax (const AioMax <T> &other)            = delete;
+	               AioMax (const AioMax <T> &&other)           = delete;
 	AioMax <T> &   operator = (const AioMax <T> &other)        = delete;
+	AioMax <T> &   operator = (const AioMax <T> &&other)       = delete;
 	bool           operator == (const AioMax <T> &other) const = delete;
 	bool           operator != (const AioMax <T> &other) const = delete;
 
diff --git a/src/conc/AioMax.hpp b/src/conc/AioMax.hpp
index 1ac6a29..1ea8ddf 100644
--- a/src/conc/AioMax.hpp
+++ b/src/conc/AioMax.hpp
@@ -45,9 +45,9 @@ AioMax <T>::AioMax (T operand)
 
 
 template <class T>
-T	AioMax <T>::operator () (T old_val) const
+T	AioMax <T>::operator () (T old_val) const noexcept
 {
-	return (std::max (old_val, _operand));
+	return std::max (old_val, _operand);
 }
 
 
diff --git a/src/conc/AioSub.h b/src/conc/AioSub.h
index cce1528..64e5ed3 100644
--- a/src/conc/AioSub.h
+++ b/src/conc/AioSub.h
@@ -47,9 +47,8 @@ class AioSub
 
 	explicit inline
 	               AioSub (T operand);
-	virtual        ~AioSub () = default;
 
-	inline T       operator () (T old_val) const;
+	inline T       operator () (T old_val) const noexcept;
 
 
 
@@ -73,7 +72,9 @@ class AioSub
 
 	               AioSub ()                                   = delete;
 	               AioSub (const AioSub <T> &other)            = delete;
+	               AioSub (const AioSub <T> &&other)           = delete;
 	AioSub <T> &   operator = (const AioSub <T> &other)        = delete;
+	AioSub <T> &   operator = (const AioSub <T> &&other)       = delete;
 	bool           operator == (const AioSub <T> &other) const = delete;
 	bool           operator != (const AioSub <T> &other) const = delete;
 
diff --git a/src/conc/AioSub.hpp b/src/conc/AioSub.hpp
index 2208095..ebf99d8 100644
--- a/src/conc/AioSub.hpp
+++ b/src/conc/AioSub.hpp
@@ -43,9 +43,9 @@ AioSub <T>::AioSub (T operand)
 
 
 template <class T>
-T	AioSub <T>::operator () (T old_val) const
+T	AioSub <T>::operator () (T old_val) const noexcept
 {
-	return (old_val - _operand);
+	return old_val - _operand;
 }
 
 
diff --git a/src/conc/AtomicInt.h b/src/conc/AtomicInt.h
index 2117805..db88c17 100644
--- a/src/conc/AtomicInt.h
+++ b/src/conc/AtomicInt.h
@@ -41,6 +41,8 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 #include "conc/def.h"
 
+#include <type_traits>
+
 #include <cstdint>
 
 #if (conc_ARCHI == conc_ARCHI_X86)
@@ -59,6 +61,14 @@ namespace conc
 template <class T>
 class AtomicInt
 {
+	static_assert (
+		(   std::is_trivially_copyable <T>::value
+		&&  std::is_copy_constructible <T>::value
+		&&  std::is_move_constructible <T>::value
+		&&  std::is_copy_assignable <T>::value
+		&&  std::is_move_assignable <T>::value),
+		"Requirements on T"
+	);
 
 /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
@@ -66,30 +76,30 @@ class AtomicInt
 
 	typedef	T	DataType;
 
-	inline			AtomicInt ();
+	inline			AtomicInt () noexcept;
 	inline explicit
-						AtomicInt (T val);
-	inline			AtomicInt (const AtomicInt <T> &other);
+						AtomicInt (T val) noexcept;
+	inline			AtomicInt (const AtomicInt <T> &other) noexcept;
 	inline AtomicInt <T> &
-						operator = (T other);
+						operator = (T other) noexcept;
 
-	inline			operator T () const;
+	inline			operator T () const noexcept;
 
-	inline T			swap (T other);
-	inline T			cas (T other, T comp);
+	inline T			swap (T other) noexcept;
+	inline T			cas (T other, T comp) noexcept;
 
 	// Beware while using the result of these operators, modification and
 	// read is not atomic. Use directly the AtomicIntOp instead.
 	inline AtomicInt <T> &
-						operator += (const T &other);
+						operator += (const T &other) noexcept;
 	inline AtomicInt <T> &
-						operator -= (const T &other);
+						operator -= (const T &other) noexcept;
 	inline AtomicInt <T> &
-						operator ++ ();
-	inline T			operator ++ (int);
+						operator ++ () noexcept;
+	inline T			operator ++ (int) noexcept;
 	inline AtomicInt <T> &
-						operator -- ();
-	inline T			operator -- (int);
+						operator -- () noexcept;
+	inline T			operator -- (int) noexcept;
 
 
 
@@ -105,13 +115,14 @@ class AtomicInt
 
 #if (conc_ARCHI == conc_ARCHI_X86)
 
-	enum {			SZ  = sizeof (T)	};
-	enum {			SL2 =    (SZ > 16) ? -1
-						      : ((SZ >  8) ?  4
-						      : ((SZ >  4) ?  3
-						      : ((SZ >  2) ?  2
-						      : ((SZ >  1) ?  1
-						      :               0))))	};
+	static constexpr int SZ  = int (sizeof (T));
+	static constexpr int SL2 =
+		   (SZ > 16) ? -1
+		: ((SZ >  8) ?  4
+		: ((SZ >  4) ?  3
+		: ((SZ >  2) ?  2
+		: ((SZ >  1) ?  1
+		:               0))));
 
 	typedef	AtomicMem <SL2>	StoredTypeWrapper;
 	typedef	typename StoredTypeWrapper::DataType	StoredType;
diff --git a/src/conc/AtomicInt.hpp b/src/conc/AtomicInt.hpp
index 490a4a7..a5f7e43 100644
--- a/src/conc/AtomicInt.hpp
+++ b/src/conc/AtomicInt.hpp
@@ -52,7 +52,7 @@ namespace conc
 
 
 template <class T>
-AtomicInt <T>::AtomicInt ()
+AtomicInt <T>::AtomicInt () noexcept
 :	_val ()
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
@@ -63,7 +63,7 @@ AtomicInt <T>::AtomicInt ()
 
 
 template <class T>
-AtomicInt <T>::AtomicInt (T val)
+AtomicInt <T>::AtomicInt (T val) noexcept
 :	_val (val)
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
@@ -74,19 +74,18 @@ AtomicInt <T>::AtomicInt (T val)
 
 
 template <class T>
-AtomicInt <T>::AtomicInt (const AtomicInt <T> &other)
+AtomicInt <T>::AtomicInt (const AtomicInt <T> &other) noexcept
 :	_val (T (other))
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	assert (is_ptr_aligned_nz ((const void *) (&_val), sizeof (_val)));
 #endif // conc_ARCHI
-	assert (&other != 0);
 }
 
 
 
 template <class T>
-AtomicInt <T> &	AtomicInt <T>::operator = (T other)
+AtomicInt <T> &	AtomicInt <T>::operator = (T other) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	StoredTypeWrapper::swap (_val, other);
@@ -100,7 +99,7 @@ AtomicInt <T> &	AtomicInt <T>::operator = (T other)
 
 
 template <class T>
-AtomicInt <T>::operator T () const
+AtomicInt <T>::operator T () const noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	return (T (_val));
@@ -112,7 +111,7 @@ AtomicInt <T>::operator T () const
 
 
 template <class T>
-T	AtomicInt <T>::swap (T other)
+T	AtomicInt <T>::swap (T other) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	return (T (StoredTypeWrapper::swap (_val, other)));
@@ -124,12 +123,14 @@ T	AtomicInt <T>::swap (T other)
 
 
 template <class T>
-T	AtomicInt <T>::cas (T other, T comp)
+T	AtomicInt <T>::cas (T other, T comp) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	return (T (StoredTypeWrapper::cas (_val, other, comp)));
 #else  // conc_ARCHI
-	_val.compare_exchange_weak (comp, other);
+	// Some algorithms do something specific upon failure, so we need to
+	// use the strong version.
+	_val.compare_exchange_strong (comp, other);
 	return (comp);
 #endif // conc_ARCHI
 }
@@ -137,7 +138,7 @@ T	AtomicInt <T>::cas (T other, T comp)
 
 
 template <class T>
-AtomicInt <T> &	AtomicInt <T>::operator += (const T &other)
+AtomicInt <T> &	AtomicInt <T>::operator += (const T &other) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	AioAdd <T>	ftor (other);
@@ -152,7 +153,7 @@ AtomicInt <T> &	AtomicInt <T>::operator += (const T &other)
 
 
 template <class T>
-AtomicInt <T> &	AtomicInt <T>::operator -= (const T &other)
+AtomicInt <T> &	AtomicInt <T>::operator -= (const T &other) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	AioSub <T>	ftor (other);
@@ -167,7 +168,7 @@ AtomicInt <T> &	AtomicInt <T>::operator -= (const T &other)
 
 
 template <class T>
-AtomicInt <T> &	AtomicInt <T>::operator ++ ()
+AtomicInt <T> &	AtomicInt <T>::operator ++ () noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	return ((*this) += 1);
@@ -180,7 +181,7 @@ AtomicInt <T> &	AtomicInt <T>::operator ++ ()
 
 
 template <class T>
-T	AtomicInt <T>::operator ++ (int)
+T	AtomicInt <T>::operator ++ (int) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	const T        prev = _val;
@@ -194,7 +195,7 @@ T	AtomicInt <T>::operator ++ (int)
 
 
 template <class T>
-AtomicInt <T> &	AtomicInt <T>::operator -- ()
+AtomicInt <T> &	AtomicInt <T>::operator -- () noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	return ((*this) -= 1);
@@ -207,7 +208,7 @@ AtomicInt <T> &	AtomicInt <T>::operator -- ()
 
 
 template <class T>
-T	AtomicInt <T>::operator -- (int)
+T	AtomicInt <T>::operator -- (int) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	const T        prev = _val;
diff --git a/src/conc/AtomicIntOp.h b/src/conc/AtomicIntOp.h
index b17811b..93f23ed 100644
--- a/src/conc/AtomicIntOp.h
+++ b/src/conc/AtomicIntOp.h
@@ -61,16 +61,16 @@ class AtomicIntOp
 
 	template <class T, class F>
 	static inline void
-	               exec (AtomicInt <T> &atom, F &ftor);
+	               exec (AtomicInt <T> &atom, F &ftor) noexcept;
 	template <class T, class F>
 	static inline T
-	               exec_old (AtomicInt <T> &atom, F &ftor);
+	               exec_old (AtomicInt <T> &atom, F &ftor) noexcept;
 	template <class T, class F>
 	static inline T
-	               exec_new (AtomicInt <T> &atom, F &ftor);
+	               exec_new (AtomicInt <T> &atom, F &ftor) noexcept;
 	template <class T, class F>
 	static inline void
-	               exec_both (AtomicInt <T> &atom, F &ftor, T &val_old, T &val_new);
+	               exec_both (AtomicInt <T> &atom, F &ftor, T &val_old, T &val_new) noexcept;
 
 
 
diff --git a/src/conc/AtomicIntOp.hpp b/src/conc/AtomicIntOp.hpp
index bb3030a..673531a 100644
--- a/src/conc/AtomicIntOp.hpp
+++ b/src/conc/AtomicIntOp.hpp
@@ -34,7 +34,7 @@ namespace conc
 
 
 template <class T, class F>
-void	AtomicIntOp::exec (AtomicInt <T> &atom, F &ftor)
+void	AtomicIntOp::exec (AtomicInt <T> &atom, F &ftor) noexcept
 {
 	T              val_new;
 	T              val_old;
@@ -44,7 +44,7 @@ void	AtomicIntOp::exec (AtomicInt <T> &atom, F &ftor)
 
 
 template <class T, class F>
-T	AtomicIntOp::exec_old (AtomicInt <T> &atom, F &ftor)
+T	AtomicIntOp::exec_old (AtomicInt <T> &atom, F &ftor) noexcept
 {
 	T              val_new;
 	T              val_old;
@@ -56,7 +56,7 @@ T	AtomicIntOp::exec_old (AtomicInt <T> &atom, F &ftor)
 
 
 template <class T, class F>
-T	AtomicIntOp::exec_new (AtomicInt <T> &atom, F &ftor)
+T	AtomicIntOp::exec_new (AtomicInt <T> &atom, F &ftor) noexcept
 {
 	T              val_new;
 	T              val_old;
@@ -68,7 +68,7 @@ T	AtomicIntOp::exec_new (AtomicInt <T> &atom, F &ftor)
 
 
 template <class T, class F>
-void	AtomicIntOp::exec_both (AtomicInt <T> &atom, F &ftor, T &val_old, T &val_new)
+void	AtomicIntOp::exec_both (AtomicInt <T> &atom, F &ftor, T &val_old, T &val_new) noexcept
 {
 	T              val_cur;
 	do
diff --git a/src/conc/AtomicMem.h b/src/conc/AtomicMem.h
index 2975666..ad985ec 100644
--- a/src/conc/AtomicMem.h
+++ b/src/conc/AtomicMem.h
@@ -53,9 +53,9 @@ class AtomicMem
 	conc_TYPEDEF_ALIGN (4, DataType, DataTypeAlign);
 
 	conc_FORCEINLINE static DataType
-	               swap (volatile DataType &dest, DataType excg);
+	               swap (volatile DataType &dest, DataType excg) noexcept;
 	conc_FORCEINLINE static DataType
-	               cas (volatile DataType &dest, DataType excg, DataType comp);
+	               cas (volatile DataType &dest, DataType excg, DataType comp) noexcept;
 };	// class AtomicMem
 
 
@@ -68,9 +68,9 @@ class AtomicMem <3>
 	conc_TYPEDEF_ALIGN (8, DataType, DataTypeAlign);
 
 	conc_FORCEINLINE static DataType
-	               swap (volatile DataType &dest, DataType excg);
+	               swap (volatile DataType &dest, DataType excg) noexcept;
 	conc_FORCEINLINE static DataType
-	               cas (volatile DataType &dest, DataType excg, DataType comp);
+	               cas (volatile DataType &dest, DataType excg, DataType comp) noexcept;
 };	// class AtomicMem <3>
 
 
@@ -85,9 +85,9 @@ class AtomicMem <4>
 	conc_TYPEDEF_ALIGN (16, DataType, DataTypeAlign);
 
 	conc_FORCEINLINE static DataType
-	               swap (volatile DataType &dest, DataType excg);
+	               swap (volatile DataType &dest, DataType excg) noexcept;
 	conc_FORCEINLINE static DataType
-	               cas (volatile DataType &dest, DataType excg, DataType comp);
+	               cas (volatile DataType &dest, DataType excg, DataType comp) noexcept;
 };	// class AtomicMem <4>
 
 #endif	// conc_HAS_CAS_128
diff --git a/src/conc/AtomicMem.hpp b/src/conc/AtomicMem.hpp
index e639a6b..dbdfa24 100644
--- a/src/conc/AtomicMem.hpp
+++ b/src/conc/AtomicMem.hpp
@@ -36,7 +36,7 @@ namespace conc
 
 
 template <int SL2>
-typename AtomicMem <SL2>::DataType	AtomicMem <SL2>::swap (volatile DataType &dest, DataType excg)
+typename AtomicMem <SL2>::DataType	AtomicMem <SL2>::swap (volatile DataType &dest, DataType excg) noexcept
 {
 	static_assert ((SL2 >= 0 && SL2 <= 2), "");
 
@@ -46,7 +46,7 @@ typename AtomicMem <SL2>::DataType	AtomicMem <SL2>::swap (volatile DataType &des
 
 
 template <int SL2>
-typename AtomicMem <SL2>::DataType	AtomicMem <SL2>::cas (volatile DataType &dest, DataType excg, DataType comp)
+typename AtomicMem <SL2>::DataType	AtomicMem <SL2>::cas (volatile DataType &dest, DataType excg, DataType comp) noexcept
 {
 	static_assert ((SL2 >= 0 && SL2 <= 2), "");
 
@@ -55,14 +55,14 @@ typename AtomicMem <SL2>::DataType	AtomicMem <SL2>::cas (volatile DataType &dest
 
 
 
-AtomicMem <3>::DataType	AtomicMem <3>::swap (volatile DataType &dest, DataType excg)
+AtomicMem <3>::DataType	AtomicMem <3>::swap (volatile DataType &dest, DataType excg) noexcept
 {
 	return (Interlocked::swap (dest, excg));
 }
 
 
 
-AtomicMem <3>::DataType	AtomicMem <3>::cas (volatile DataType &dest, DataType excg, DataType comp)
+AtomicMem <3>::DataType	AtomicMem <3>::cas (volatile DataType &dest, DataType excg, DataType comp) noexcept
 {
 	return (Interlocked::cas (dest, excg, comp));
 }
@@ -73,7 +73,7 @@ AtomicMem <3>::DataType	AtomicMem <3>::cas (volatile DataType &dest, DataType ex
 
 
 
-AtomicMem <4>::DataType	AtomicMem <4>::swap (volatile DataType &dest, DataType excg)
+AtomicMem <4>::DataType	AtomicMem <4>::swap (volatile DataType &dest, DataType excg) noexcept
 {
 	Interlocked::Data128 old;
 
@@ -90,7 +90,7 @@ AtomicMem <4>::DataType	AtomicMem <4>::swap (volatile DataType &dest, DataType e
 
 
 
-AtomicMem <4>::DataType	AtomicMem <4>::cas (volatile DataType &dest, DataType excg, DataType comp)
+AtomicMem <4>::DataType	AtomicMem <4>::cas (volatile DataType &dest, DataType excg, DataType comp) noexcept
 {
 	Interlocked::Data128 old;
 
diff --git a/src/conc/AtomicPtr.h b/src/conc/AtomicPtr.h
index 293431a..85bdca8 100644
--- a/src/conc/AtomicPtr.h
+++ b/src/conc/AtomicPtr.h
@@ -48,18 +48,18 @@ class AtomicPtr
 
 public:
 
-	inline         AtomicPtr ();
-	inline         AtomicPtr (T *ptr);
+	inline         AtomicPtr () noexcept;
+	inline         AtomicPtr (T *ptr) noexcept;
 	inline AtomicPtr <T> &
-	               operator = (T *other_ptr);
+	               operator = (T *other_ptr) noexcept;
 
-	inline         operator T * () const;
+	inline         operator T * () const noexcept;
 
-	bool           operator == (T *other_ptr) const;
-	bool           operator != (T *other_ptr) const;
+	bool           operator == (T *other_ptr) const noexcept;
+	bool           operator != (T *other_ptr) const noexcept;
 
-	inline T *     swap (T *other_ptr);
-	inline T *     cas (T *other_ptr, T *comp_ptr);
+	inline T *     swap (T *other_ptr) noexcept;
+	inline T *     cas (T *other_ptr, T *comp_ptr) noexcept;
 
 
 
@@ -73,7 +73,7 @@ class AtomicPtr
 
 private:
 
-	inline T *     read_ptr () const;
+	inline T *     read_ptr () const noexcept;
 
 #if (conc_ARCHI == conc_ARCHI_X86)
 
diff --git a/src/conc/AtomicPtr.hpp b/src/conc/AtomicPtr.hpp
index c6f83bc..f20c63e 100644
--- a/src/conc/AtomicPtr.hpp
+++ b/src/conc/AtomicPtr.hpp
@@ -41,7 +41,7 @@ namespace conc
 
 
 template <class T>
-AtomicPtr <T>::AtomicPtr ()
+AtomicPtr <T>::AtomicPtr () noexcept
 :	_ptr ()
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
@@ -52,18 +52,23 @@ AtomicPtr <T>::AtomicPtr ()
 
 
 template <class T>
-AtomicPtr <T>::AtomicPtr (T *ptr)
+AtomicPtr <T>::AtomicPtr (T *ptr) noexcept
+#if (conc_ARCHI == conc_ARCHI_X86)
+:	_ptr ()
+#else  // conc_ARCHI
 :	_ptr (ptr)
+#endif // conc_ARCHI
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	assert (is_ptr_aligned_nz ((const void *) (&_ptr), sizeof (_ptr)));
+	_ptr._void_ptr = ptr;
 #endif // conc_ARCHI
 }
 
 
 
 template <class T>
-AtomicPtr <T> &	AtomicPtr <T>::operator = (T *other_ptr)
+AtomicPtr <T> &	AtomicPtr <T>::operator = (T *other_ptr) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	Interlocked::swap (_ptr._void_ptr, other_ptr);
@@ -77,7 +82,7 @@ AtomicPtr <T> &	AtomicPtr <T>::operator = (T *other_ptr)
 
 
 template <class T>
-AtomicPtr <T>::operator T * () const
+AtomicPtr <T>::operator T * () const noexcept
 {
 	return (read_ptr ());
 }
@@ -85,7 +90,7 @@ AtomicPtr <T>::operator T * () const
 
 
 template <class T>
-bool	AtomicPtr <T>::operator == (T *other_ptr) const
+bool	AtomicPtr <T>::operator == (T *other_ptr) const noexcept
 {
 	const T *      ptr = read_ptr ();
 
@@ -95,7 +100,7 @@ bool	AtomicPtr <T>::operator == (T *other_ptr) const
 
 
 template <class T>
-bool	AtomicPtr <T>::operator != (T *other_ptr) const
+bool	AtomicPtr <T>::operator != (T *other_ptr) const noexcept
 {
 	return (! ((*this) == other_ptr));
 }
@@ -103,7 +108,7 @@ bool	AtomicPtr <T>::operator != (T *other_ptr) const
 
 
 template <class T>
-T *	AtomicPtr <T>::swap (T *other_ptr)
+T *	AtomicPtr <T>::swap (T *other_ptr) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	return (static_cast <T *> (Interlocked::swap (
@@ -118,7 +123,7 @@ T *	AtomicPtr <T>::swap (T *other_ptr)
 
 
 template <class T>
-T *	AtomicPtr <T>::cas (T *other_ptr, T *comp_ptr)
+T *	AtomicPtr <T>::cas (T *other_ptr, T *comp_ptr) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
 	return (static_cast <T *> (Interlocked::cas (
@@ -127,7 +132,9 @@ T *	AtomicPtr <T>::cas (T *other_ptr, T *comp_ptr)
 		comp_ptr
 	)));
 #else  // conc_ARCHI
-	_ptr.compare_exchange_weak (comp_ptr, other_ptr);
+	// Some algorithms do something specific upon failure, so we need to
+	// use the strong version.
+	_ptr.compare_exchange_strong (comp_ptr, other_ptr);
 	return (comp_ptr);
 #endif // conc_ARCHI
 }
@@ -143,12 +150,12 @@ T *	AtomicPtr <T>::cas (T *other_ptr, T *comp_ptr)
 
 
 template <class T>
-T *	AtomicPtr <T>::read_ptr () const
+T *	AtomicPtr <T>::read_ptr () const noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86)
-	return (static_cast <T *> (_ptr._t_ptr));
+	return _ptr._t_ptr;
 #else  // conc_ARCHI
-	return (_ptr.load ());
+	return _ptr.load ();
 #endif // conc_ARCHI
 }
 
diff --git a/src/conc/AtomicPtrIntPair.h b/src/conc/AtomicPtrIntPair.h
index b466ade..293575b 100644
--- a/src/conc/AtomicPtrIntPair.h
+++ b/src/conc/AtomicPtrIntPair.h
@@ -60,13 +60,13 @@ class AtomicPtrIntPair
 
 public:
 
-	               AtomicPtrIntPair ();
+	               AtomicPtrIntPair () noexcept;
 
-	void           set (T * ptr, ptrdiff_t val);
-	void           get (T * &ptr, ptrdiff_t &val) const;
-	T *            get_ptr () const;
-	ptrdiff_t      get_val () const;
-	bool           cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptrdiff_t comp_val);
+	void           set (T * ptr, intptr_t val) noexcept;
+	void           get (T * &ptr, intptr_t &val) const noexcept;
+	T *            get_ptr () const noexcept;
+	intptr_t       get_val () const noexcept;
+	bool           cas2 (T *new_ptr, intptr_t new_val, T *comp_ptr, intptr_t comp_val) noexcept;
 
 
 
@@ -101,9 +101,8 @@ class AtomicPtrIntPair
 	class RealContent
 	{
 	public:
-		T * volatile   _ptr;
-		volatile intptr_t
-		               _val;
+		T *            _ptr;
+		intptr_t       _val;
 	};
 	static_assert (sizeof (RealContent) <= sizeof (DataType), "");
 
@@ -113,7 +112,7 @@ class AtomicPtrIntPair
 		RealContent    _content;
 	};
 
-	static void    cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp);
+	static void    cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp) noexcept;
 
 	Combi          _data;
 
diff --git a/src/conc/AtomicPtrIntPair.hpp b/src/conc/AtomicPtrIntPair.hpp
index 850894d..0ea5cfc 100644
--- a/src/conc/AtomicPtrIntPair.hpp
+++ b/src/conc/AtomicPtrIntPair.hpp
@@ -36,24 +36,24 @@ namespace conc
 
 
 template <class T>
-AtomicPtrIntPair <T>::AtomicPtrIntPair ()
+AtomicPtrIntPair <T>::AtomicPtrIntPair () noexcept
 :	_data ()
 {
-	set (0, 0);
+	set (nullptr, 0);
 }
 
 
 template <class T>
-void	AtomicPtrIntPair <T>::set (T * ptr, ptrdiff_t val)
+void	AtomicPtrIntPair <T>::set (T * ptr, intptr_t val) noexcept
 {
+	const RealContent content = { ptr, val };
+
 #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS)
 
-	_data._content._ptr = ptr;
-	_data._content._val = val;
+	_data._content = content;
 
 #else  // conc_ARCHI
 
-	const RealContent content = { ptr, val };
 	_data.store (content);
 
 #endif // conc_ARCHI
@@ -62,7 +62,7 @@ void	AtomicPtrIntPair <T>::set (T * ptr, ptrdiff_t val)
 
 
 template <class T>
-void	AtomicPtrIntPair <T>::get (T * &ptr, ptrdiff_t &val) const
+void	AtomicPtrIntPair <T>::get (T * &ptr, intptr_t &val) const noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS)
 
@@ -90,7 +90,7 @@ void	AtomicPtrIntPair <T>::get (T * &ptr, ptrdiff_t &val) const
 
 
 template <class T>
-T *	AtomicPtrIntPair <T>::get_ptr () const
+T *	AtomicPtrIntPair <T>::get_ptr () const noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS)
 
@@ -108,7 +108,7 @@ T *	AtomicPtrIntPair <T>::get_ptr () const
 
 
 template <class T>
-ptrdiff_t	AtomicPtrIntPair <T>::get_val () const
+intptr_t	AtomicPtrIntPair <T>::get_val () const noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS)
 
@@ -126,7 +126,7 @@ ptrdiff_t	AtomicPtrIntPair <T>::get_val () const
 
 
 template <class T>
-bool	AtomicPtrIntPair <T>::cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptrdiff_t comp_val)
+bool	AtomicPtrIntPair <T>::cas2 (T *new_ptr, intptr_t new_val, T *comp_ptr, intptr_t comp_val) noexcept
 {
 #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS)
 
@@ -148,7 +148,9 @@ bool	AtomicPtrIntPair <T>::cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptr
 	const RealContent val      = { new_ptr , new_val  };
 	RealContent       expected = { comp_ptr, comp_val };
 
-	return (_data.compare_exchange_weak (expected, val));
+	// Some algorithms do something specific upon failure, so we need to
+	// use the strong version.
+	return (_data.compare_exchange_strong (expected, val));
 
 #endif // conc_ARCHI
 }
@@ -166,7 +168,7 @@ bool	AtomicPtrIntPair <T>::cas2 (T *new_ptr, ptrdiff_t new_val, T *comp_ptr, ptr
 #if (conc_ARCHI == conc_ARCHI_X86 || ! conc_USE_STD_ATOMIC_128BITS)
 
 template <class T>
-void	AtomicPtrIntPair <T>::cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp)
+void	AtomicPtrIntPair <T>::cas_combi (Combi &old, Combi &dest, const Combi &excg, const Combi &comp) noexcept
 {
 #if (conc_WORD_SIZE == 64)
 
diff --git a/src/conc/CellPool.h b/src/conc/CellPool.h
index ce14cb1..1d38cda 100644
--- a/src/conc/CellPool.h
+++ b/src/conc/CellPool.h
@@ -65,7 +65,7 @@ class CellPool
 
 	inline CellType *
 	               take_cell (bool autogrow_flag = false);
-	inline void    return_cell (CellType &cell);
+	inline void    return_cell (CellType &cell) noexcept;
 
 
 
@@ -126,7 +126,9 @@ class CellPool
 private:
 
 	               CellPool (const CellPool <T> &other)          = delete;
+	               CellPool (CellPool <T> &&other)               = delete;
 	CellPool <T> & operator = (const CellPool <T> &other)        = delete;
+	CellPool <T> & operator = (CellPool <T> &&other)             = delete;
 	bool           operator == (const CellPool <T> &other) const = delete;
 	bool           operator != (const CellPool <T> &other) const = delete;
 
diff --git a/src/conc/CellPool.hpp b/src/conc/CellPool.hpp
index 5bd6152..821b4fe 100644
--- a/src/conc/CellPool.hpp
+++ b/src/conc/CellPool.hpp
@@ -53,7 +53,7 @@ CellPool <T>::CellPool ()
 
 	for (int zone_index = 0; zone_index < MAX_NBR_ZONES; ++zone_index)
 	{
-		_m_ptr->_zone_list [zone_index] = 0;
+		_m_ptr->_zone_list [zone_index] = nullptr;
 	}
 }
 
@@ -73,13 +73,13 @@ template <class T>
 void	CellPool <T>::clear_all ()
 {
 #if !defined (NDEBUG)
-	size_t         nbr_total_cells =
+	const size_t   nbr_total_cells =
 		compute_total_size_for_zones (_m_ptr->_nbr_zones);
 	
 	assert (_m_ptr->_nbr_avail_cells == nbr_total_cells);
 #endif
 	
-	while (_cell_stack.pop () != 0)
+	while (_cell_stack.pop () != nullptr)
 	{
 		continue;
 	}
@@ -89,10 +89,10 @@ void	CellPool <T>::clear_all ()
 	{
 		AtomicPtr <CellType> &  zone_ptr_ref = _m_ptr->_zone_list [zone_index];
 		CellType *     zone_ptr = zone_ptr_ref;
-		if (zone_ptr != 0)
+		if (zone_ptr != nullptr)
 		{
 			dealloc_cells (zone_ptr);
-			zone_ptr_ref = 0;
+			zone_ptr_ref = nullptr;
 		}
 	}
 	_m_ptr->_nbr_zones       = 0;
@@ -113,8 +113,8 @@ void	CellPool <T>::expand_to (size_t nbr_cells)
 	while (total_size < nbr_cells && zone_index < MAX_NBR_ZONES)
 	{
 		AtomicPtr <CellType> &  zone_ptr_ref = _m_ptr->_zone_list [zone_index];
-		CellType *     zone_ptr = zone_ptr_ref;
-		if (zone_ptr == 0)
+		const CellType *  zone_ptr = zone_ptr_ref;
+		if (zone_ptr == nullptr)
 		{
 			allocate_zone (cur_size, zone_ptr_ref);
 		}
@@ -134,7 +134,7 @@ void	CellPool <T>::expand_to (size_t nbr_cells)
 template <class T>
 typename CellPool <T>::CellType *	CellPool <T>::take_cell (bool autogrow_flag)
 {
-	CellType *     cell_ptr = 0;
+	CellType *     cell_ptr = nullptr;
 	
 	const int      nbr_zones = _m_ptr->_nbr_zones;
 
@@ -142,27 +142,32 @@ typename CellPool <T>::CellType *	CellPool <T>::take_cell (bool autogrow_flag)
 	{
 		cell_ptr = _cell_stack.pop ();
 
-		if ((cell_ptr == 0) && autogrow_flag && (nbr_zones < MAX_NBR_ZONES))
+		if (   cell_ptr == nullptr
+		    && autogrow_flag
+		    && nbr_zones < MAX_NBR_ZONES)
 		{
-			const size_t	new_size = compute_total_size_for_zones (nbr_zones + 1);
+			const size_t	new_size =
+				compute_total_size_for_zones (nbr_zones + 1);
 			expand_to (new_size);
 		}
 	}
-	while ((cell_ptr == 0) && autogrow_flag && (nbr_zones < MAX_NBR_ZONES));
+	while (   cell_ptr == nullptr
+	       && autogrow_flag
+	       && nbr_zones < MAX_NBR_ZONES);
 
-	if (cell_ptr != 0)
+	if (cell_ptr != nullptr)
 	{
 		-- _m_ptr->_nbr_avail_cells;
 	}
 
-	return (cell_ptr);
+	return cell_ptr;
 }
 
 
 
 // Thread-safe
 template <class T>
-void	CellPool <T>::return_cell (CellType &cell)
+void	CellPool <T>::return_cell (CellType &cell) noexcept
 {
 	_cell_stack.push (cell);
 
@@ -186,7 +191,7 @@ void	CellPool <T>::allocate_zone (size_t cur_size, AtomicPtr <CellType> & zone_p
 
 	CellType *     zone_ptr = alloc_cells (cur_size);
 
-	if (zone_ptr_ref.cas (zone_ptr, 0) != (CellType *)0)
+	if (zone_ptr_ref.cas (zone_ptr, nullptr) != static_cast <CellType *> (nullptr))
 	{
 		// CAS has failed, meaning that another thread is allocating this zone.
 		dealloc_cells (zone_ptr);
@@ -235,7 +240,7 @@ size_t	CellPool <T>::compute_total_size_for_zones (int nbr_zones)
 		++ zone_index;
 	}
 
-	return (total_size);
+	return total_size;
 }
 
 
@@ -275,7 +280,7 @@ typename CellPool <T>::CellType *	CellPool <T>::alloc_cells (size_t n)
 		throw;
 	}
 
-	return (cell_ptr);
+	return cell_ptr;
 }
 
 
diff --git a/src/conc/Interlocked.h b/src/conc/Interlocked.h
index 1ad2d0a..f7cff4c 100644
--- a/src/conc/Interlocked.h
+++ b/src/conc/Interlocked.h
@@ -46,14 +46,14 @@ class Interlocked
 public:
 
 	static conc_FORCEINLINE int32_t
-	               swap (int32_t volatile &dest, int32_t excg);
+	               swap (int32_t volatile &dest, int32_t excg) noexcept;
 	static conc_FORCEINLINE int32_t
-	               cas (int32_t volatile &dest, int32_t excg, int32_t comp);
+	               cas (int32_t volatile &dest, int32_t excg, int32_t comp) noexcept;
 
 	static conc_FORCEINLINE int64_t
-	               swap (int64_t volatile &dest, int64_t excg);
+	               swap (int64_t volatile &dest, int64_t excg) noexcept;
 	static conc_FORCEINLINE int64_t
-	               cas (int64_t volatile &dest, int64_t excg, int64_t comp);
+	               cas (int64_t volatile &dest, int64_t excg, int64_t comp) noexcept;
 
 #if defined (conc_HAS_CAS_128)
 
@@ -67,9 +67,9 @@ class Interlocked
 	{
 	public:
 		conc_FORCEINLINE bool
-		               operator == (const Data128 & other) const;
+		               operator == (const Data128 & other) const noexcept;
 		conc_FORCEINLINE bool
-		               operator != (const Data128 & other) const;
+		               operator != (const Data128 & other) const noexcept;
 
 		int64_t        _data [2];
 	};
@@ -82,16 +82,16 @@ class Interlocked
  #endif
 
 	static conc_FORCEINLINE void
-	               swap (Data128 &old, volatile Data128 &dest, const Data128 &excg);
+	               swap (Data128 &old, volatile Data128 &dest, const Data128 &excg) noexcept;
 	static conc_FORCEINLINE void
-	               cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp);
+	               cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp) noexcept;
 
 #endif
 
 	static conc_FORCEINLINE void *
-	               swap (void * volatile &dest_ptr, void *excg_ptr);
+	               swap (void * volatile &dest_ptr, void *excg_ptr) noexcept;
 	static conc_FORCEINLINE void *
-	               cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr);
+	               cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr) noexcept;
 
 
 
diff --git a/src/conc/Interlocked.hpp b/src/conc/Interlocked.hpp
index 957e933..8a14af4 100644
--- a/src/conc/Interlocked.hpp
+++ b/src/conc/Interlocked.hpp
@@ -47,7 +47,7 @@ namespace conc
 
 
 
-int32_t	Interlocked::swap (int32_t volatile &dest, int32_t excg)
+int32_t	Interlocked::swap (int32_t volatile &dest, int32_t excg) noexcept
 {
 	assert (is_ptr_aligned_nz (&dest));
 
@@ -80,7 +80,7 @@ int32_t	Interlocked::swap (int32_t volatile &dest, int32_t excg)
 
 
 
-int32_t	Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp)
+int32_t	Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp) noexcept
 {
 	assert (is_ptr_aligned_nz (&dest));
 
@@ -100,6 +100,10 @@ int32_t	Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp)
 		::LONG (comp)
 	));
 
+#elif defined (__GNUC__)
+
+	return (__sync_val_compare_and_swap (&dest, comp, excg));
+
 #elif defined (__APPLE__)
 
 	return (::OSAtomicCompareAndSwap32Barrier (
@@ -108,10 +112,6 @@ int32_t	Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp)
 		const_cast <int32_t *> (reinterpret_cast <int32_t volatile *> (&dest))
 	) ? comp : excg);
 
-#elif defined (__GNUC__)
-
-	return (__sync_val_compare_and_swap (&dest, comp, excg));
-
 #else
 
 	#error Unknown platform
@@ -121,7 +121,7 @@ int32_t	Interlocked::cas (int32_t volatile &dest, int32_t excg, int32_t comp)
 
 
 
-int64_t	Interlocked::swap (int64_t volatile &dest, int64_t excg)
+int64_t	Interlocked::swap (int64_t volatile &dest, int64_t excg) noexcept
 {
 	assert (is_ptr_aligned_nz (&dest));
 
@@ -178,7 +178,7 @@ int64_t	Interlocked::swap (int64_t volatile &dest, int64_t excg)
 
 
 
-int64_t	Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp)
+int64_t	Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp) noexcept
 {
 	assert (is_ptr_aligned_nz (&dest));
 
@@ -214,6 +214,10 @@ int64_t	Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp)
 
 	return (old);
 
+#elif defined (__GNUC__)
+
+	return (__sync_val_compare_and_swap (&dest, comp, excg));
+
 #elif defined (__APPLE__)
 
 	return (::OSAtomicCompareAndSwap64Barrier (
@@ -222,10 +226,6 @@ int64_t	Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp)
 		const_cast <int64_t *> (reinterpret_cast <int64_t volatile *> (&dest))
 	) ? comp : excg);
 
-#elif defined (__GNUC__)
-
-	return (__sync_val_compare_and_swap (&dest, comp, excg));
-
 #else
 
 	#error Unknown platform
@@ -239,7 +239,7 @@ int64_t	Interlocked::cas (int64_t volatile &dest, int64_t excg, int64_t comp)
 
 
 
-void	Interlocked::swap (Data128 &old, volatile Data128 &dest, const Data128 &excg)
+void	Interlocked::swap (Data128 &old, volatile Data128 &dest, const Data128 &excg) noexcept
 {
 	assert (is_ptr_aligned_nz (&dest));
 
@@ -254,7 +254,7 @@ void	Interlocked::swap (Data128 &old, volatile Data128 &dest, const Data128 &exc
 
 
 
-void	Interlocked::cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp)
+void	Interlocked::cas (Data128 &old, volatile Data128 &dest, const Data128 &excg, const Data128 &comp) noexcept
 {
 	assert (is_ptr_aligned_nz (&dest));
 
@@ -322,13 +322,13 @@ void	Interlocked::cas (Data128 &old, volatile Data128 &dest, const Data128 &excg
 
 #if defined (_MSC_VER)
 
-bool	Interlocked::Data128::operator == (const Data128 & other) const
+bool	Interlocked::Data128::operator == (const Data128 & other) const noexcept
 {
 	return (   _data [0] == other._data [0]
 	        && _data [1] == other._data [1]);
 }
 
-bool	Interlocked::Data128::operator != (const Data128 & other) const
+bool	Interlocked::Data128::operator != (const Data128 & other) const noexcept
 {
 	return (   _data [0] != other._data [0]
 	        || _data [1] != other._data [1]);
@@ -345,25 +345,41 @@ bool	Interlocked::Data128::operator != (const Data128 & other) const
 	#pragma warning (4 : 4311 4312)
 #endif
 
-void *	Interlocked::swap (void * volatile &dest_ptr, void *excg_ptr)
+void *	Interlocked::swap (void * volatile &dest_ptr, void *excg_ptr) noexcept
 {
+	// We cannot just cast void * to IntPtr and relying on it to match
+	// either int32_t or int64_t, because it's possible that we have
+	//   typedef long IntPtr;
+	//   typedef long long int64_t;
+	// on a 64-bit system (i.e. macOS) thus making them incompatible.
 	return (reinterpret_cast <void *> (
 		swap (
-			*reinterpret_cast <IntPtr volatile *> (&dest_ptr),
-			reinterpret_cast <IntPtr> (excg_ptr)
+#if conc_WORD_SIZE == 32
+			*reinterpret_cast <int32_t volatile *> (&dest_ptr),
+			reinterpret_cast <int32_t> (excg_ptr)
+#else
+			*reinterpret_cast <int64_t volatile *> (&dest_ptr),
+			reinterpret_cast <int64_t> (excg_ptr)
+#endif
 		)
 	));
 }
 
 
 
-void *	Interlocked::cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr)
+void *	Interlocked::cas (void * volatile &dest_ptr, void *excg_ptr, void *comp_ptr) noexcept
 {
 	return (reinterpret_cast <void *> (
 		cas (
-			*reinterpret_cast <IntPtr volatile *> (&dest_ptr),
-			reinterpret_cast <IntPtr> (excg_ptr),
-			reinterpret_cast <IntPtr> (comp_ptr)
+#if conc_WORD_SIZE == 32
+			*reinterpret_cast <int32_t volatile *> (&dest_ptr),
+			reinterpret_cast <int32_t> (excg_ptr),
+			reinterpret_cast <int32_t> (comp_ptr)
+#else
+			*reinterpret_cast <int64_t volatile *> (&dest_ptr),
+			reinterpret_cast <int64_t> (excg_ptr),
+			reinterpret_cast <int64_t> (comp_ptr)
+#endif
 		)
 	));
 }
diff --git a/src/conc/LockFreeCell.h b/src/conc/LockFreeCell.h
index 2d16236..d3ccb2f 100644
--- a/src/conc/LockFreeCell.h
+++ b/src/conc/LockFreeCell.h
@@ -54,7 +54,7 @@ class LockFreeCell
 	typedef	T	ValueType;
 
 	AtomicPtr <LockFreeCell <T> >
-	               _next_ptr;
+	               _next_ptr { nullptr };
 	T              _val;
 
 
diff --git a/src/conc/LockFreeQueue.h b/src/conc/LockFreeQueue.h
index f80aef0..bee4d49 100644
--- a/src/conc/LockFreeQueue.h
+++ b/src/conc/LockFreeQueue.h
@@ -75,8 +75,8 @@ class LockFreeQueue
 	               LockFreeQueue ();
 	virtual        ~LockFreeQueue () = default;
 
-	void           enqueue (CellType &cell);
-	CellType *     dequeue ();
+	void           enqueue (CellType &cell) noexcept;
+	CellType *     dequeue () noexcept;
 
 
 
@@ -115,8 +115,11 @@ class LockFreeQueue
 private:
 
 	               LockFreeQueue (const LockFreeQueue <T> &other)     = delete;
+	               LockFreeQueue (LockFreeQueue <T> &&other)          = delete;
 	LockFreeQueue <T> &
 	               operator = (const LockFreeQueue <T> &other)        = delete;
+	LockFreeQueue <T> &
+	               operator = (LockFreeQueue <T> &&other)             = delete;
 	bool           operator == (const LockFreeQueue <T> &other) const = delete;
 	bool           operator != (const LockFreeQueue <T> &other) const = delete;
 
diff --git a/src/conc/LockFreeQueue.hpp b/src/conc/LockFreeQueue.hpp
index c84b538..ca8b02a 100644
--- a/src/conc/LockFreeQueue.hpp
+++ b/src/conc/LockFreeQueue.hpp
@@ -39,7 +39,7 @@ template <class T>
 LockFreeQueue <T>::LockFreeQueue ()
 :	_m_ptr ()
 {
-	_m_ptr->_dummy._next_ptr = 0;
+	_m_ptr->_dummy._next_ptr = nullptr;
 	_m_ptr->_head.set (&_m_ptr->_dummy, 0);
 	_m_ptr->_tail.set (&_m_ptr->_dummy, 0);
 }
@@ -47,23 +47,24 @@ LockFreeQueue <T>::LockFreeQueue ()
 
 
 template <class T>
-void	LockFreeQueue <T>::enqueue (CellType &cell)
+void	LockFreeQueue <T>::enqueue (CellType &cell) noexcept
 {
-	cell._next_ptr = 0;	// set the cell next pointer to NULL
+	cell._next_ptr = nullptr;  // set the cell next pointer to NULL
 
-	CellType *     tail_ptr;
-	ptrdiff_t      icount;
+	CellType *     tail_ptr = nullptr;
+	intptr_t       icount   = 0;
 
 	bool           cont_flag = true;
 	do	// try until enqueue is done
 	{
-		icount   = _m_ptr->_tail.get_val ();	// read the tail modification count
-		tail_ptr = _m_ptr->_tail.get_ptr ();	// read the tail cell
+		// read the tail modification count
+		// read the tail cell
+		_m_ptr->_tail.get (tail_ptr, icount);
 
 		// try to link the cell to the tail cell
-		void *         old_ptr = tail_ptr->_next_ptr.cas (&cell, 0);
+		void *         old_ptr = tail_ptr->_next_ptr.cas (&cell, nullptr);
 
-		if (old_ptr == 0)
+		if (old_ptr == nullptr)
 		{
 			cont_flag = false;	// enqueue is done, exit the loop
 		}
@@ -82,32 +83,36 @@ void	LockFreeQueue <T>::enqueue (CellType &cell)
 
 // Returns 0 if the queue is empty.
 template <class T>
-typename LockFreeQueue <T>::CellType *	LockFreeQueue <T>::dequeue ()
+typename LockFreeQueue <T>::CellType *	LockFreeQueue <T>::dequeue () noexcept
 {
-	ptrdiff_t      ocount;
-	ptrdiff_t      icount;
-	CellType *     head_ptr;
-	CellType *     next_ptr;
+	constexpr int  max_loop = 100;
+	int            loop_cnt = 0;
+	intptr_t       ocount   = 0;
+	intptr_t       icount   = 0;
+	CellType *     head_ptr = nullptr;
+	CellType *     next_ptr = nullptr;
 
 	do	// try until dequeue is done
 	{
-		ocount   = _m_ptr->_head.get_val ();   // read the head modification count
+		// read the head modification count
+		// read the head cell
+		_m_ptr->_head.get (head_ptr, ocount);
 		icount   = _m_ptr->_tail.get_val ();   // read the tail modification count
-		head_ptr = _m_ptr->_head.get_ptr ();   // read the head cell
 		next_ptr = head_ptr->_next_ptr;        // read the next cell
 
-		if (ocount == _m_ptr->_head.get_val ())  // ensures that next is a valid pointer to avoid failure when reading next value
+		const intptr_t ocount_tst = _m_ptr->_head.get_val ();
+		if (ocount == ocount_tst)  // ensures that next is a valid pointer to avoid failure when reading next value
 		{
 			if (head_ptr == _m_ptr->_tail.get_ptr ())   // is queue empty or tail falling behind ?
 			{
-				if (next_ptr == 0)   // is queue empty ?
+				if (next_ptr == nullptr)   // is queue empty ?
 				{
-					return (0); // queue is empty: return NULL
+					return nullptr; // queue is empty: return NULL
 				}
 				// tail is pointing to head in a non empty queue, try to set tail to the next cell
 				_m_ptr->_tail.cas2 (next_ptr, icount + 1, head_ptr, icount);
 			}
-			else if (next_ptr != 0) // if we are not competing on the dummy next
+			else if (next_ptr != nullptr) // if we are not competing on the dummy next
 			{
 				// try to set tail to the next cell
 				if (_m_ptr->_head.cas2 (next_ptr, ocount + 1, head_ptr, ocount))
@@ -116,6 +121,16 @@ typename LockFreeQueue <T>::CellType *	LockFreeQueue <T>::dequeue ()
 				}
 			}
 		}
+
+		++ loop_cnt;
+		if (loop_cnt >= max_loop)
+		{
+			// This could indicate that the queue is:
+			// - corrupted
+			// - or in heavy contention
+			assert (false);
+			return nullptr;
+		}
 	}
 	while (true);
 
diff --git a/src/conc/LockFreeStack.h b/src/conc/LockFreeStack.h
index c3fb1f1..f00a8ae 100644
--- a/src/conc/LockFreeStack.h
+++ b/src/conc/LockFreeStack.h
@@ -67,8 +67,8 @@ class LockFreeStack
 	               LockFreeStack ();
 	virtual        ~LockFreeStack () = default;
 
-	void           push (CellType &cell);
-	CellType *     pop ();
+	void           push (CellType &cell) noexcept;
+	CellType *     pop () noexcept;
 
 
 
@@ -92,8 +92,11 @@ class LockFreeStack
 private:
 
 	               LockFreeStack (const LockFreeStack <T> &other)     = delete;
+	               LockFreeStack (LockFreeStack <T> &&other)          = delete;
 	LockFreeStack <T> &
 	               operator = (const LockFreeStack <T> &other)        = delete;
+	LockFreeStack <T> &
+	               operator = (LockFreeStack <T> &&other)             = delete;
 	bool           operator == (const LockFreeStack <T> &other) const = delete;
 	bool           operator != (const LockFreeStack <T> &other) const = delete;
 
diff --git a/src/conc/LockFreeStack.hpp b/src/conc/LockFreeStack.hpp
index 11e0ed3..45eeec9 100644
--- a/src/conc/LockFreeStack.hpp
+++ b/src/conc/LockFreeStack.hpp
@@ -37,16 +37,16 @@ template <class T>
 LockFreeStack <T>::LockFreeStack ()
 :	_head_ptr_ptr ()
 {
-	_head_ptr_ptr->set (0, 0);
+	_head_ptr_ptr->set (nullptr, 0);
 }
 
 
 
 template <class T>
-void	LockFreeStack <T>::push (CellType &cell)
+void	LockFreeStack <T>::push (CellType &cell) noexcept
 {
-	CellType *     head_ptr;
-	ptrdiff_t      count;
+	CellType *     head_ptr = nullptr;
+	intptr_t       count    = 0;
 	do
 	{
 		head_ptr = _head_ptr_ptr->get_ptr ();
@@ -60,28 +60,28 @@ void	LockFreeStack <T>::push (CellType &cell)
 
 // Returns 0 if the stack is empty.
 template <class T>
-typename LockFreeStack <T>::CellType *	LockFreeStack <T>::pop ()
+typename LockFreeStack <T>::CellType *	LockFreeStack <T>::pop () noexcept
 {
-	CellType *     cell_ptr;
+	CellType *     cell_ptr  = nullptr;
 	bool           cont_flag = true;
 	do
 	{
 		cell_ptr = _head_ptr_ptr->get_ptr ();
 
-		if (cell_ptr == 0)
+		if (cell_ptr == nullptr)
 		{
-			cont_flag = false;	// Empty stack.
+			cont_flag = false; // Empty stack.
 		}
 
 		else
 		{
-			const ptrdiff_t   count = _head_ptr_ptr->get_val ();
-			if (cell_ptr != 0)
+			const intptr_t    count = _head_ptr_ptr->get_val ();
+			if (cell_ptr != nullptr)
 			{
 				CellType *     next_ptr = cell_ptr->_next_ptr;
 				if (_head_ptr_ptr->cas2 (next_ptr, count + 1, cell_ptr, count))
 				{
-					cell_ptr->_next_ptr = 0;
+					cell_ptr->_next_ptr = nullptr;
 					cont_flag = false;
 				}
 			}
diff --git a/src/conc/ObjPool.h b/src/conc/ObjPool.h
index 2ce4056..81b14d3 100644
--- a/src/conc/ObjPool.h
+++ b/src/conc/ObjPool.h
@@ -67,9 +67,9 @@ class ObjPool
 	               ObjPool ();
 	virtual        ~ObjPool ();
 
-	void           set_factory (Factory &fact);
-	Factory &      use_factory () const;
-	void           cleanup ();
+	void           set_factory (Factory &fact) noexcept;
+	Factory &      use_factory () const noexcept;
+	void           cleanup () noexcept;
 
 	T *            take_obj ();
 	void           return_obj (T &obj);
@@ -90,7 +90,7 @@ class ObjPool
 	typedef	typename PtrPool::CellType	PtrCell;
 	typedef	LockFreeStack <ObjType *>	PtrStack;
 
-	int            delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag);
+	int            delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag) noexcept;
 
 	Factory *      _factory_ptr = 0;    // 0 = not set
 	PtrStack       _stack_free;
diff --git a/src/conc/ObjPool.hpp b/src/conc/ObjPool.hpp
index d808aa6..2504104 100644
--- a/src/conc/ObjPool.hpp
+++ b/src/conc/ObjPool.hpp
@@ -86,7 +86,7 @@ Throws: Nothing
 */
 
 template <class T>
-void	ObjPool <T>::set_factory (Factory &fact)
+void	ObjPool <T>::set_factory (Factory &fact) noexcept
 {
 	_factory_ptr = &fact;
 }
@@ -94,7 +94,7 @@ void	ObjPool <T>::set_factory (Factory &fact)
 
 
 template <class T>
-typename ObjPool <T>::Factory &	ObjPool <T>::use_factory () const
+typename ObjPool <T>::Factory &	ObjPool <T>::use_factory () const noexcept
 {
 	assert (_factory_ptr != 0);
 
@@ -103,6 +103,36 @@ typename ObjPool <T>::Factory &	ObjPool <T>::use_factory () const
 
 
 
+/*
+==============================================================================
+Name: cleanup
+Description:
+	Preliminary deletion of the pool content, also used during the pool
+	destruction.
+	Do not call it if some objects are still out of the pool!
+	Use with care.
+Throws: Nothing
+==============================================================================
+*/
+
+template <class T>
+void	ObjPool <T>::cleanup () noexcept
+{
+#if ! defined (NDEBUG)
+	const int      count_free =
+#endif
+		delete_obj_stack  (_stack_free, false);
+#if ! defined (NDEBUG)
+	const int      count_all  =
+#endif
+		delete_obj_stack  (_stack_all,  true);
+
+	// False would mean that some cells are still out, in use.
+	assert (count_free == count_all);
+}
+
+
+
 /*
 ==============================================================================
 Name: take_obj
@@ -113,7 +143,7 @@ Name: take_obj
 Returns:
 	A pointer on the object, or 0 if no object is available and cannot be
 	created for any reason.
-Throws: Nothing
+Throws: Depends on the factory
 ==============================================================================
 */
 
@@ -175,7 +205,7 @@ Name: return_obj
 	- Do not return an object you didn't get from take_obj()
 Input parameters:
 	- obj: Reference on the returned object.
-Throws: Nothing
+Throws: std::runtime_error
 ==============================================================================
 */
 
@@ -205,36 +235,6 @@ void	ObjPool <T>::return_obj (T &obj)
 
 
 
-/*
-==============================================================================
-Name: cleanup
-Description:
-	Preliminary deletion of the pool content, also used during the pool
-	destruction.
-	Do not call it if some objects are still out of the pool!
-	Use with care.
-Throws: Nothing
-==============================================================================
-*/
-
-template <class T>
-void	ObjPool <T>::cleanup ()
-{
-#if ! defined (NDEBUG)
-	const int      count_free =
-#endif
-		delete_obj_stack  (_stack_free, false);
-#if ! defined (NDEBUG)
-	const int      count_all  =
-#endif
-		delete_obj_stack  (_stack_all,  true);
-
-	// False would mean that some cells are still out, in use.
-	assert (count_free == count_all);
-}
-
-
-
 /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 
@@ -244,7 +244,7 @@ void	ObjPool <T>::cleanup ()
 
 
 template <class T>
-int	ObjPool <T>::delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag)
+int	ObjPool <T>::delete_obj_stack (PtrStack &ptr_stack, bool destroy_flag) noexcept
 {
 	typename PtrStack::CellType *   cell_ptr = 0;
 	int            count = 0;
diff --git a/src/conc/fnc.h b/src/conc/fnc.h
index 83934bd..a51d9ab 100644
--- a/src/conc/fnc.h
+++ b/src/conc/fnc.h
@@ -35,11 +35,11 @@ namespace conc
 
 
 template <class T>
-bool	is_ptr_aligned (const T *ptr, int align);
+bool	is_ptr_aligned (const T *ptr, int align) noexcept;
 template <class T>
-bool	is_ptr_aligned_nz (const T *ptr, int align);
+bool	is_ptr_aligned_nz (const T *ptr, int align) noexcept;
 template <class T>
-bool	is_ptr_aligned_nz (const T *ptr);
+bool	is_ptr_aligned_nz (const T *ptr) noexcept;
 
 
 
diff --git a/src/conc/fnc.hpp b/src/conc/fnc.hpp
index 573e356..b280e9b 100644
--- a/src/conc/fnc.hpp
+++ b/src/conc/fnc.hpp
@@ -23,7 +23,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #include <cassert>
-#include <cstddef>
+#include <cstdint>
 
 
 
@@ -33,25 +33,25 @@ namespace conc
 
 
 template <class T>
-bool	is_ptr_aligned (const T *ptr, int align)
+bool	is_ptr_aligned (const T *ptr, int align) noexcept
 {
 	assert (align > 0);
 	assert ((align & -align) == align);
 
-	return ((reinterpret_cast <ptrdiff_t> (ptr) & (align - 1)) == 0);
+	return ((reinterpret_cast <intptr_t> (ptr) & (align - 1)) == 0);
 }
 
 template <class T>
-bool	is_ptr_aligned_nz (const T *ptr, int align)
+bool	is_ptr_aligned_nz (const T *ptr, int align) noexcept
 {
 	assert (align > 0);
 	assert ((align & -align) == align);
 
-	return (ptr != 0 && is_ptr_aligned (ptr, align));
+	return (ptr != nullptr && is_ptr_aligned (ptr, align));
 }
 
 template <class T>
-bool	is_ptr_aligned_nz (const T *ptr)
+bool	is_ptr_aligned_nz (const T *ptr) noexcept
 {
 	return (is_ptr_aligned_nz (ptr, sizeof (T)));
 }
diff --git a/src/ffft/DynArray.h b/src/ffft/DynArray.h
deleted file mode 100644
index ae36415..0000000
--- a/src/ffft/DynArray.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*****************************************************************************
-
-        DynArray.h
-        By Laurent de Soras
-
---- Legal stuff ---
-
-This program is free software. It comes without any warranty, to
-the extent permitted by applicable law. You can redistribute it
-and/or modify it under the terms of the Do What The Fuck You Want
-To Public License, Version 2, as published by Sam Hocevar. See
-http://sam.zoy.org/wtfpl/COPYING for more details.
-
-*Tab=3***********************************************************************/
-
-
-
-#if ! defined (ffft_DynArray_HEADER_INCLUDED)
-#define	ffft_DynArray_HEADER_INCLUDED
-
-#if defined (_MSC_VER)
-	#pragma once
-	#pragma warning (4 : 4250) // "Inherits via dominance."
-#endif
-
-
-
-/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-
-
-namespace ffft
-{
-
-
-
-template <class T>
-class DynArray
-{
-
-/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-public:
-
-	typedef	T	DataType;
-
-	               DynArray ();
-	explicit       DynArray (long sz);
-	               ~DynArray ();
-
-	inline long    size () const;
-	inline void    resize (long sz);
-
-	inline const DataType &
-	               operator [] (long pos) const;
-	inline DataType &
-	               operator [] (long pos);
-
-
-
-/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-protected:
-
-
-
-/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-private:
-
-	DataType *     _data_ptr;
-	long           _len;
-
-
-
-/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-private:
-
-	               DynArray (const DynArray &other);
-	DynArray &     operator = (const DynArray &other);
-	bool           operator == (const DynArray &other);
-	bool           operator != (const DynArray &other);
-
-};	// class DynArray
-
-
-
-}	// namespace ffft
-
-
-
-#include "ffft/DynArray.hpp"
-
-
-
-#endif	// ffft_DynArray_HEADER_INCLUDED
-
-
-
-/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/ffft/DynArray.hpp b/src/ffft/DynArray.hpp
deleted file mode 100644
index 42d57ba..0000000
--- a/src/ffft/DynArray.hpp
+++ /dev/null
@@ -1,144 +0,0 @@
-/*****************************************************************************
-
-        DynArray.hpp
-        By Laurent de Soras
-
---- Legal stuff ---
-
-This program is free software. It comes without any warranty, to
-the extent permitted by applicable law. You can redistribute it
-and/or modify it under the terms of the Do What The Fuck You Want
-To Public License, Version 2, as published by Sam Hocevar. See
-http://sam.zoy.org/wtfpl/COPYING for more details.
-
-*Tab=3***********************************************************************/
-
-
-
-#if defined (ffft_DynArray_CURRENT_CODEHEADER)
-	#error Recursive inclusion of DynArray code header.
-#endif
-#define	ffft_DynArray_CURRENT_CODEHEADER
-
-#if ! defined (ffft_DynArray_CODEHEADER_INCLUDED)
-#define	ffft_DynArray_CODEHEADER_INCLUDED
-
-
-
-/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-#include <cassert>
-
-
-
-namespace ffft
-{
-
-
-
-/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-
-
-template <class T>
-DynArray <T>::DynArray ()
-:	_data_ptr (0)
-,	_len (0)
-{
-	// Nothing
-}
-
-
-
-template <class T>
-DynArray <T>::DynArray (long sz)
-:	_data_ptr (0)
-,	_len (0)
-{
-	assert (sz >= 0);
-	if (sz > 0)
-	{
-		_data_ptr = new DataType [sz];
-		_len      = sz;
-	}
-}
-
-
-
-template <class T>
-DynArray <T>::~DynArray ()
-{
-	delete [] _data_ptr;
-	_data_ptr = 0;
-	_len      = 0;
-}
-
-
-
-template <class T>
-long	DynArray <T>::size () const
-{
-	return (_len);
-}
-
-
-
-template <class T>
-void	DynArray <T>::resize (long sz)
-{
-	assert (sz >= 0);
-	if (sz > 0)
-	{
-		DataType *     old_data_ptr = _data_ptr;
-		DataType *     tmp_data_ptr = new DataType [sz];
-
-		_data_ptr = tmp_data_ptr;
-		_len      = sz;
-
-		delete [] old_data_ptr;
-	}
-}
-
-
-
-template <class T>
-const typename DynArray <T>::DataType &	DynArray <T>::operator [] (long pos) const
-{
-	assert (pos >= 0);
-	assert (pos < _len);
-
-	return (_data_ptr [pos]);
-}
-
-
-
-template <class T>
-typename DynArray <T>::DataType &	DynArray <T>::operator [] (long pos)
-{
-	assert (pos >= 0);
-	assert (pos < _len);
-
-	return (_data_ptr [pos]);
-}
-
-
-
-/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-
-
-/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-
-
-}	// namespace ffft
-
-
-
-#endif	// ffft_DynArray_CODEHEADER_INCLUDED
-
-#undef ffft_DynArray_CURRENT_CODEHEADER
-
-
-
-/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/ffft/FFTReal.h b/src/ffft/FFTReal.h
index d0ca448..b799ebe 100644
--- a/src/ffft/FFTReal.h
+++ b/src/ffft/FFTReal.h
@@ -27,9 +27,10 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
-#include "ffft/def.h"
-#include "ffft/DynArray.h"
-#include "ffft/OscSinCos.h"
+#include	"ffft/def.h"
+#include	"ffft/OscSinCos.h"
+
+#include <vector>
 
 
 
@@ -46,18 +47,25 @@ class FFTReal
 
 public:
 
-	enum {         MAX_BIT_DEPTH = 30 };   // So length can be represented as long int
+	// So length can be represented as long int
+	static constexpr int MAX_BIT_DEPTH = 30;
 
 	typedef	DT	DataType;
 
-	explicit       FFTReal (long length);
-	virtual        ~FFTReal () {}
+	explicit			FFTReal (long length);
+						FFTReal (const FFTReal &other)    = default;
+						FFTReal (FFTReal &&other)         = default;
+
+	virtual			~FFTReal ()                       = default;
+
+	FFTReal &		operator = (const FFTReal &other) = default;
+	FFTReal &		operator = (FFTReal &&other)      = default;
 
-	long           get_length () const;
-	void           do_fft (DataType f [], const DataType x []) const;
-	void           do_ifft (const DataType f [], DataType x []) const;
-	void           rescale (DataType x []) const;
-	DataType *     use_buffer () const;
+	long				get_length () const noexcept;
+	void				do_fft (DataType f [], const DataType x []) const noexcept;
+	void				do_ifft (const DataType f [], DataType x []) const noexcept;
+	void				rescale (DataType x []) const noexcept;
+	DataType *		use_buffer () const noexcept;
 
 
 
@@ -72,45 +80,45 @@ class FFTReal
 private:
 
    // Over this bit depth, we use direct calculation for sin/cos
-   enum {         TRIGO_BD_LIMIT = 12 };
+   static constexpr int TRIGO_BD_LIMIT	= 12;
 
 	typedef	OscSinCos <DataType>	OscType;
 
-	void           init_br_lut ();
-	void           init_trigo_lut ();
-	void           init_trigo_osc ();
+	void				init_br_lut ();
+	void				init_trigo_lut ();
+	void				init_trigo_osc ();
 
 	ffft_FORCEINLINE const long *
-	               get_br_ptr () const;
+						get_br_ptr () const noexcept;
 	ffft_FORCEINLINE const DataType	*
-	               get_trigo_ptr (int level) const;
+						get_trigo_ptr (int level) const noexcept;
 	ffft_FORCEINLINE long
-	               get_trigo_level_index (int level) const;
-
-	inline void    compute_fft_general (DataType f [], const DataType x []) const;
-	inline void    compute_direct_pass_1_2 (DataType df [], const DataType x []) const;
-	inline void    compute_direct_pass_3 (DataType df [], const DataType sf []) const;
-	inline void    compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const;
-	inline void    compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const;
-	inline void    compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const;
-
-	inline void    compute_ifft_general (const DataType f [], DataType x []) const;
-	inline void    compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const;
-	inline void    compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const;
-	inline void    compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const;
-	inline void    compute_inverse_pass_3 (DataType df [], const DataType sf []) const;
-	inline void    compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const;
-
-	const long     _length;
-	const int      _nbr_bits;
-	DynArray <long>
-	               _br_lut;
-	DynArray <DataType>
-	               _trigo_lut;
-	mutable DynArray <DataType>
-	               _buffer;
-   mutable DynArray <OscType>
-	               _trigo_osc;
+						get_trigo_level_index (int level) const noexcept;
+
+	inline void		compute_fft_general (DataType f [], const DataType x []) const noexcept;
+	inline void		compute_direct_pass_1_2 (DataType df [], const DataType x []) const noexcept;
+	inline void		compute_direct_pass_3 (DataType df [], const DataType sf []) const noexcept;
+	inline void		compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const noexcept;
+	inline void		compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept;
+	inline void		compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept;
+
+	inline void		compute_ifft_general (const DataType f [], DataType x []) const noexcept;
+	inline void		compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const noexcept;
+	inline void		compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept;
+	inline void		compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept;
+	inline void		compute_inverse_pass_3 (DataType df [], const DataType sf []) const noexcept;
+	inline void		compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const noexcept;
+
+	const long		_length;
+	const int		_nbr_bits;
+	std::vector <long>
+						_br_lut;
+	std::vector <DataType>
+						_trigo_lut;
+	mutable std::vector <DataType>
+						_buffer;
+   mutable std::vector <OscType>
+						_trigo_osc;
 
 
 
@@ -118,11 +126,9 @@ class FFTReal
 
 private:
 
-	               FFTReal ();
-	               FFTReal (const FFTReal &other);
-	FFTReal &      operator = (const FFTReal &other);
-	bool           operator == (const FFTReal &other);
-	bool           operator != (const FFTReal &other);
+						FFTReal ()                         = delete;
+	bool				operator == (const FFTReal &other) = delete;
+	bool				operator != (const FFTReal &other) = delete;
 
 };	// class FFTReal
 
@@ -132,7 +138,7 @@ class FFTReal
 
 
 
-#include "ffft/FFTReal.hpp"
+#include	"ffft/FFTReal.hpp"
 
 
 
diff --git a/src/ffft/FFTReal.hpp b/src/ffft/FFTReal.hpp
index 6b19cd2..37c0ef0 100644
--- a/src/ffft/FFTReal.hpp
+++ b/src/ffft/FFTReal.hpp
@@ -27,8 +27,8 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
-#include <cassert>
-#include <cmath>
+#include	<cassert>
+#include	<cmath>
 
 
 
@@ -37,7 +37,15 @@ namespace ffft
 
 
 
-static inline bool	FFTReal_is_pow2 (long x)
+#if (__cplusplus >= 201402L)
+	#define ffft_CONSTEXPR14 constexpr
+#else
+	#define ffft_CONSTEXPR14
+#endif
+
+
+
+static inline ffft_CONSTEXPR14 bool	FFTReal_is_pow2 (long x) noexcept
 {
 	assert (x > 0);
 
@@ -46,11 +54,11 @@ static inline bool	FFTReal_is_pow2 (long x)
 
 
 
-static inline int	FFTReal_get_next_pow2 (long x)
+static inline ffft_CONSTEXPR14 int	FFTReal_get_next_pow2 (long x) noexcept
 {
 	--x;
 
-	int            p = 0;
+	int				p = 0;
 	while ((x & ~0xFFFFL) != 0)
 	{
 		p += 16;
@@ -116,9 +124,9 @@ Throws: Nothing
 */
 
 template <class DT>
-long	FFTReal <DT>::get_length () const
+long	FFTReal <DT>::get_length () const noexcept
 {
-	return (_length);
+	return _length;
 }
 
 
@@ -140,11 +148,11 @@ Throws: Nothing
 */
 
 template <class DT>
-void	FFTReal <DT>::do_fft (DataType f [], const DataType x []) const
+void	FFTReal <DT>::do_fft (DataType f [], const DataType x []) const noexcept
 {
-	assert (f != 0);
+	assert (f != nullptr);
 	assert (f != use_buffer ());
-	assert (x != 0);
+	assert (x != nullptr);
 	assert (x != use_buffer ());
 	assert (x != f);
 
@@ -160,8 +168,8 @@ void	FFTReal <DT>::do_fft (DataType f [], const DataType x []) const
 		f [1] = x [0] - x [2];
 		f [3] = x [1] - x [3];
 
-		const DataType b_0 = x [0] + x [2];
-		const DataType b_2 = x [1] + x [3];
+		const DataType	b_0 = x [0] + x [2];
+		const DataType	b_2 = x [1] + x [3];
 		
 		f [0] = b_0 + b_2;
 		f [2] = b_0 - b_2;
@@ -201,11 +209,11 @@ Throws: Nothing
 */
 
 template <class DT>
-void	FFTReal <DT>::do_ifft (const DataType f [], DataType x []) const
+void	FFTReal <DT>::do_ifft (const DataType f [], DataType x []) const noexcept
 {
-	assert (f != 0);
+	assert (f != nullptr);
 	assert (f != use_buffer ());
-	assert (x != 0);
+	assert (x != nullptr);
 	assert (x != use_buffer ());
 	assert (x != f);
 
@@ -218,8 +226,8 @@ void	FFTReal <DT>::do_ifft (const DataType f [], DataType x []) const
 	// 4-point IFFT
 	else if (_nbr_bits == 2)
 	{
-		const DataType b_0 = f [0] + f [2];
-		const DataType b_2 = f [0] - f [2];
+		const DataType	b_0 = f [0] + f [2];
+		const DataType	b_2 = f [0] - f [2];
 
 		x [0] = b_0 + f [1] * 2;
 		x [2] = b_0 - f [1] * 2;
@@ -256,13 +264,13 @@ Throws: Nothing
 */
 
 template <class DT>
-void	FFTReal <DT>::rescale (DataType x []) const
+void	FFTReal <DT>::rescale (DataType x []) const noexcept
 {
 	const DataType	mul = DataType (1.0 / _length);
 
 	if (_length < 4)
 	{
-		long           i = _length - 1;
+		long				i = _length - 1;
 		do
 		{
 			x [i] *= mul;
@@ -276,7 +284,7 @@ void	FFTReal <DT>::rescale (DataType x []) const
 		assert ((_length & 3) == 0);
 
 		// Could be optimized with SIMD instruction sets (needs alignment check)
-		long           i = _length - 4;
+		long				i = _length - 4;
 		do
 		{
 			x [i + 0] *= mul;
@@ -307,13 +315,18 @@ Throws: Nothing
 */
 
 template <class DT>
-typename FFTReal <DT>::DataType *	FFTReal <DT>::use_buffer () const
+typename FFTReal <DT>::DataType *	FFTReal <DT>::use_buffer () const noexcept
 {
-	return (&_buffer [0]);
+	return _buffer.data ();
 }
 
 
 
+template <class DT>
+constexpr int	FFTReal <DT>::MAX_BIT_DEPTH;
+
+
+
 /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 
@@ -325,15 +338,15 @@ typename FFTReal <DT>::DataType *	FFTReal <DT>::use_buffer () const
 template <class DT>
 void	FFTReal <DT>::init_br_lut ()
 {
-	const long     length = 1L << _nbr_bits;
+	const long		length = 1L << _nbr_bits;
 	_br_lut.resize (length);
 
 	_br_lut [0] = 0;
-	long           br_index = 0;
+	long				br_index = 0;
 	for (long cnt = 1; cnt < length; ++cnt)
 	{
 		// ++br_index (bit reversed)
-		long           bit = length >> 1;
+		long				bit = length >> 1;
 		while (((br_index ^= bit) & bit) == 0)
 		{
 			bit >>= 1;
@@ -348,23 +361,21 @@ void	FFTReal <DT>::init_br_lut ()
 template <class DT>
 void	FFTReal <DT>::init_trigo_lut ()
 {
-	using namespace std;
-
 	if (_nbr_bits > 3)
 	{
-		const long     total_len = (1L << (_nbr_bits - 1)) - 4;
+		const long		total_len = (1L << (_nbr_bits - 1)) - 4;
 		_trigo_lut.resize (total_len);
 
 		for (int level = 3; level < _nbr_bits; ++level)
 		{
-			const long     level_len = 1L << (level - 1);
-			DataType	* const  level_ptr =
+			const long		level_len = 1L << (level - 1);
+			DataType	* const	level_ptr =
 				&_trigo_lut [get_trigo_level_index (level)];
-			const double   mul = PI / (level_len << 1);
+			const double	mul = PI / double (level_len << 1);
 
 			for (long i = 0; i < level_len; ++ i)
 			{
-				level_ptr [i] = static_cast <DataType> (cos (i * mul));
+				level_ptr [i] = static_cast <DataType> (cos (double (i) * mul));
 			}
 		}
 	}
@@ -375,17 +386,17 @@ void	FFTReal <DT>::init_trigo_lut ()
 template <class DT>
 void	FFTReal <DT>::init_trigo_osc ()
 {
-	const int      nbr_osc = _nbr_bits - TRIGO_BD_LIMIT;
+	const int		nbr_osc = _nbr_bits - TRIGO_BD_LIMIT;
 	if (nbr_osc > 0)
 	{
 		_trigo_osc.resize (nbr_osc);
 
 		for (int osc_cnt = 0; osc_cnt < nbr_osc; ++osc_cnt)
 		{
-			OscType &      osc = _trigo_osc [osc_cnt];
+			OscType &		osc = _trigo_osc [osc_cnt];
 
-			const long     len = 1L << (TRIGO_BD_LIMIT + osc_cnt);
-			const double   mul = (0.5 * PI) / len;
+			const long		len = 1L << (TRIGO_BD_LIMIT + osc_cnt);
+			const double	mul = (0.5 * PI) / double (len);
 			osc.set_step (mul);
 		}
 	}
@@ -394,7 +405,7 @@ void	FFTReal <DT>::init_trigo_osc ()
 
 
 template <class DT>
-const long *	FFTReal <DT>::get_br_ptr () const
+const long *	FFTReal <DT>::get_br_ptr () const noexcept
 {
 	return (&_br_lut [0]);
 }
@@ -402,7 +413,7 @@ const long *	FFTReal <DT>::get_br_ptr () const
 
 
 template <class DT>
-const typename FFTReal <DT>::DataType *	FFTReal <DT>::get_trigo_ptr (int level) const
+const typename FFTReal <DT>::DataType *	FFTReal <DT>::get_trigo_ptr (int level) const noexcept
 {
 	assert (level >= 3);
 
@@ -412,7 +423,7 @@ const typename FFTReal <DT>::DataType *	FFTReal <DT>::get_trigo_ptr (int level)
 
 
 template <class DT>
-long	FFTReal <DT>::get_trigo_level_index (int level) const
+long	FFTReal <DT>::get_trigo_level_index (int level) const noexcept
 {
 	assert (level >= 3);
 
@@ -423,16 +434,16 @@ long	FFTReal <DT>::get_trigo_level_index (int level) const
 
 // Transform in several passes
 template <class DT>
-void	FFTReal <DT>::compute_fft_general (DataType f [], const DataType x []) const
+void	FFTReal <DT>::compute_fft_general (DataType f [], const DataType x []) const noexcept
 {
-	assert (f != 0);
+	assert (f != nullptr);
 	assert (f != use_buffer ());
-	assert (x != 0);
+	assert (x != nullptr);
 	assert (x != use_buffer ());
 	assert (x != f);
 
-	DataType *     sf;
-	DataType *     df;
+	DataType *		sf;
+	DataType *		df;
 
 	if ((_nbr_bits & 1) != 0)
 	{
@@ -452,7 +463,7 @@ void	FFTReal <DT>::compute_fft_general (DataType f [], const DataType x []) cons
 	{
 		compute_direct_pass_n (df, sf, pass);
 
-		DataType * const  temp_ptr = df;
+		DataType * const	temp_ptr = df;
 		df = sf;
 		sf = temp_ptr;
 	}
@@ -461,27 +472,27 @@ void	FFTReal <DT>::compute_fft_general (DataType f [], const DataType x []) cons
 
 
 template <class DT>
-void	FFTReal <DT>::compute_direct_pass_1_2 (DataType df [], const DataType x []) const
+void	FFTReal <DT>::compute_direct_pass_1_2 (DataType df [], const DataType x []) const noexcept
 {
-	assert (df != 0);
-	assert (x != 0);
+	assert (df != nullptr);
+	assert (x  != nullptr);
 	assert (df != x);
 
-	const long* const bit_rev_lut_ptr = get_br_ptr ();
-	long              coef_index      = 0;
+	const long * const	bit_rev_lut_ptr = get_br_ptr ();
+	long				coef_index = 0;
 	do
 	{
-		const long     rev_index_0 = bit_rev_lut_ptr [coef_index    ];
-		const long     rev_index_1 = bit_rev_lut_ptr [coef_index + 1];
-		const long     rev_index_2 = bit_rev_lut_ptr [coef_index + 2];
-		const long     rev_index_3 = bit_rev_lut_ptr [coef_index + 3];
+		const long		rev_index_0 = bit_rev_lut_ptr [coef_index];
+		const long		rev_index_1 = bit_rev_lut_ptr [coef_index + 1];
+		const long		rev_index_2 = bit_rev_lut_ptr [coef_index + 2];
+		const long		rev_index_3 = bit_rev_lut_ptr [coef_index + 3];
 
 		DataType	* const	df2 = df + coef_index;
 		df2 [1] = x [rev_index_0] - x [rev_index_1];
 		df2 [3] = x [rev_index_2] - x [rev_index_3];
 
-		const DataType sf_0 = x [rev_index_0] + x [rev_index_1];
-		const DataType sf_2 = x [rev_index_2] + x [rev_index_3];
+		const DataType	sf_0 = x [rev_index_0] + x [rev_index_1];
+		const DataType	sf_2 = x [rev_index_2] + x [rev_index_3];
 
 		df2 [0] = sf_0 + sf_2;
 		df2 [2] = sf_0 - sf_2;
@@ -494,20 +505,20 @@ void	FFTReal <DT>::compute_direct_pass_1_2 (DataType df [], const DataType x [])
 
 
 template <class DT>
-void	FFTReal <DT>::compute_direct_pass_3 (DataType df [], const DataType sf []) const
+void	FFTReal <DT>::compute_direct_pass_3 (DataType df [], const DataType sf []) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 
-	const DataType sqrt2_2    = DataType (SQRT2 * 0.5);
-	long           coef_index = 0;
+	const DataType	sqrt2_2 = DataType (SQRT2 * 0.5);
+	long				coef_index = 0;
 	do
 	{
-		DataType       v;
+		DataType			v;
 
-		df [coef_index]     = sf [coef_index    ] + sf [coef_index + 4];
-		df [coef_index + 4] = sf [coef_index    ] - sf [coef_index + 4];
+		df [coef_index] = sf [coef_index] + sf [coef_index + 4];
+		df [coef_index + 4] = sf [coef_index] - sf [coef_index + 4];
 		df [coef_index + 2] = sf [coef_index + 2];
 		df [coef_index + 6] = sf [coef_index + 6];
 
@@ -527,10 +538,10 @@ void	FFTReal <DT>::compute_direct_pass_3 (DataType df [], const DataType sf [])
 
 
 template <class DT>
-void	FFTReal <DT>::compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const
+void	FFTReal <DT>::compute_direct_pass_n (DataType df [], const DataType sf [], int pass) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 	assert (pass >= 3);
 	assert (pass < _nbr_bits);
@@ -548,25 +559,25 @@ void	FFTReal <DT>::compute_direct_pass_n (DataType df [], const DataType sf [],
 
 
 template <class DT>
-void	FFTReal <DT>::compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const
+void	FFTReal <DT>::compute_direct_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 	assert (pass >= 3);
 	assert (pass < _nbr_bits);
 
-	const long     nbr_coef   = 1 << pass;
-	const long     h_nbr_coef = nbr_coef >> 1;
-	const long     d_nbr_coef = nbr_coef << 1;
-	long           coef_index = 0;
+	const long		nbr_coef = 1 << pass;
+	const long		h_nbr_coef = nbr_coef >> 1;
+	const long		d_nbr_coef = nbr_coef << 1;
+	long				coef_index = 0;
 	const DataType	* const	cos_ptr = get_trigo_ptr (pass);
 	do
 	{
-		const DataType * const  sf1r = sf + coef_index;
-		const DataType * const  sf2r = sf1r + nbr_coef;
-		DataType       * const  dfr  = df + coef_index;
-		DataType       * const  dfi  = dfr + nbr_coef;
+		const DataType	* const	sf1r = sf + coef_index;
+		const DataType	* const	sf2r = sf1r + nbr_coef;
+		DataType			* const	dfr = df + coef_index;
+		DataType			* const	dfi = dfr + nbr_coef;
 
 		// Extreme coefficients are always real
 		dfr [0] = sf1r [0] + sf2r [0];
@@ -575,20 +586,20 @@ void	FFTReal <DT>::compute_direct_pass_n_lut (DataType df [], const DataType sf
 		dfi [h_nbr_coef] = sf2r [h_nbr_coef];
 
 		// Others are conjugate complex numbers
-		const DataType * const  sf1i = sf1r + h_nbr_coef;
-		const DataType * const  sf2i = sf1i + nbr_coef;
+		const DataType * const	sf1i = sf1r + h_nbr_coef;
+		const DataType * const	sf2i = sf1i + nbr_coef;
 		for (long i = 1; i < h_nbr_coef; ++ i)
 		{
-			const DataType c = cos_ptr [             i]; // cos (i*PI/nbr_coef);
-			const DataType s = cos_ptr [h_nbr_coef - i]; // sin (i*PI/nbr_coef);
-			DataType       v;
+			const DataType	c = cos_ptr [i];					// cos (i*PI/nbr_coef);
+			const DataType	s = cos_ptr [h_nbr_coef - i];	// sin (i*PI/nbr_coef);
+			DataType	 		v;
 
 			v = sf2r [i] * c - sf2i [i] * s;
-			dfr [ i] = sf1r [i] + v;
+			dfr [i] = sf1r [i] + v;
 			dfi [-i] = sf1r [i] - v;	// dfr [nbr_coef - i] =
 
 			v = sf2r [i] * s + sf2i [i] * c;
-			dfi [           i] = v + sf1i [i];
+			dfi [i] = v + sf1i [i];
 			dfi [nbr_coef - i] = v - sf1i [i];
 		}
 
@@ -600,25 +611,25 @@ void	FFTReal <DT>::compute_direct_pass_n_lut (DataType df [], const DataType sf
 
 
 template <class DT>
-void	FFTReal <DT>::compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const
+void	FFTReal <DT>::compute_direct_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 	assert (pass > TRIGO_BD_LIMIT);
 	assert (pass < _nbr_bits);
 
-	const long     nbr_coef    = 1 << pass;
-	const long     h_nbr_coef = nbr_coef >> 1;
-	const long     d_nbr_coef = nbr_coef << 1;
-	long           coef_index = 0;
-	OscType &      osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)];
+	const long		nbr_coef = 1 << pass;
+	const long		h_nbr_coef = nbr_coef >> 1;
+	const long		d_nbr_coef = nbr_coef << 1;
+	long				coef_index = 0;
+	OscType &		osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)];
 	do
 	{
-		const DataType * const  sf1r = sf   + coef_index;
-		const DataType * const  sf2r = sf1r + nbr_coef;
-		DataType       * const  dfr  = df   + coef_index;
-		DataType       * const  dfi  = dfr  + nbr_coef;
+		const DataType	* const	sf1r = sf + coef_index;
+		const DataType	* const	sf2r = sf1r + nbr_coef;
+		DataType			* const	dfr = df + coef_index;
+		DataType			* const	dfi = dfr + nbr_coef;
 
 		osc.clear_buffers ();
 
@@ -629,21 +640,21 @@ void	FFTReal <DT>::compute_direct_pass_n_osc (DataType df [], const DataType sf
 		dfi [h_nbr_coef] = sf2r [h_nbr_coef];
 
 		// Others are conjugate complex numbers
-		const DataType * const  sf1i = sf1r + h_nbr_coef;
-		const DataType * const  sf2i = sf1i + nbr_coef;
+		const DataType * const	sf1i = sf1r + h_nbr_coef;
+		const DataType * const	sf2i = sf1i + nbr_coef;
 		for (long i = 1; i < h_nbr_coef; ++ i)
 		{
 			osc.step ();
-			const DataType c = osc.get_cos ();
-			const DataType s = osc.get_sin ();
-			DataType       v;
+			const DataType	c = osc.get_cos ();
+			const DataType	s = osc.get_sin ();
+			DataType	 		v;
 
 			v = sf2r [i] * c - sf2i [i] * s;
-			dfr [ i] = sf1r [i] + v;
+			dfr [i] = sf1r [i] + v;
 			dfi [-i] = sf1r [i] - v;	// dfr [nbr_coef - i] =
 
 			v = sf2r [i] * s + sf2i [i] * c;
-			dfi [           i] = v + sf1i [i];
+			dfi [i] = v + sf1i [i];
 			dfi [nbr_coef - i] = v - sf1i [i];
 		}
 
@@ -656,26 +667,26 @@ void	FFTReal <DT>::compute_direct_pass_n_osc (DataType df [], const DataType sf
 
 // Transform in several pass
 template <class DT>
-void	FFTReal <DT>::compute_ifft_general (const DataType f [], DataType x []) const
+void	FFTReal <DT>::compute_ifft_general (const DataType f [], DataType x []) const noexcept
 {
-	assert (f != 0);
+	assert (f != nullptr);
 	assert (f != use_buffer ());
-	assert (x != 0);
+	assert (x != nullptr);
 	assert (x != use_buffer ());
 	assert (x != f);
 
-	DataType *     sf = const_cast <DataType *> (f);
-	DataType *     df;
-	DataType *     df_temp;
+	DataType *		sf = const_cast <DataType *> (f);
+	DataType *		df;
+	DataType *		df_temp;
 
 	if (_nbr_bits & 1)
 	{
-		df      = use_buffer ();
+		df = use_buffer ();
 		df_temp = x;
 	}
 	else
 	{
-		df      = x;
+		df = x;
 		df_temp = use_buffer ();
 	}
 
@@ -685,7 +696,7 @@ void	FFTReal <DT>::compute_ifft_general (const DataType f [], DataType x []) con
 
 		if (pass < _nbr_bits - 1)
 		{
-			DataType	* const  temp_ptr = df;
+			DataType	* const	temp_ptr = df;
 			df = sf;
 			sf = temp_ptr;
 		}
@@ -703,10 +714,10 @@ void	FFTReal <DT>::compute_ifft_general (const DataType f [], DataType x []) con
 
 
 template <class DT>
-void	FFTReal <DT>::compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const
+void	FFTReal <DT>::compute_inverse_pass_n (DataType df [], const DataType sf [], int pass) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 	assert (pass >= 3);
 	assert (pass < _nbr_bits);
@@ -724,25 +735,25 @@ void	FFTReal <DT>::compute_inverse_pass_n (DataType df [], const DataType sf [],
 
 
 template <class DT>
-void	FFTReal <DT>::compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const
+void	FFTReal <DT>::compute_inverse_pass_n_lut (DataType df [], const DataType sf [], int pass) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 	assert (pass >= 3);
 	assert (pass < _nbr_bits);
 
-	const long     nbr_coef   = 1 << pass;
-	const long     h_nbr_coef = nbr_coef >> 1;
-	const long     d_nbr_coef = nbr_coef << 1;
-	long           coef_index = 0;
+	const long		nbr_coef = 1 << pass;
+	const long		h_nbr_coef = nbr_coef >> 1;
+	const long		d_nbr_coef = nbr_coef << 1;
+	long				coef_index = 0;
 	const DataType * const	cos_ptr = get_trigo_ptr (pass);
 	do
 	{
-		const DataType * const  sfr  = sf   + coef_index;
-		const DataType * const  sfi  = sfr  + nbr_coef;
-		DataType       * const  df1r = df   + coef_index;
-		DataType       * const  df2r = df1r + nbr_coef;
+		const DataType	* const	sfr = sf + coef_index;
+		const DataType	* const	sfi = sfr + nbr_coef;
+		DataType			* const	df1r = df + coef_index;
+		DataType			* const	df2r = df1r + nbr_coef;
 
 		// Extreme coefficients are always real
 		df1r [0] = sfr [0] + sfi [0];		// + sfr [nbr_coef]
@@ -751,16 +762,16 @@ void	FFTReal <DT>::compute_inverse_pass_n_lut (DataType df [], const DataType sf
 		df2r [h_nbr_coef] = sfi [h_nbr_coef] * 2;
 
 		// Others are conjugate complex numbers
-		DataType * const  df1i = df1r + h_nbr_coef;
-		DataType * const  df2i = df1i + nbr_coef;
+		DataType * const	df1i = df1r + h_nbr_coef;
+		DataType * const	df2i = df1i + nbr_coef;
 		for (long i = 1; i < h_nbr_coef; ++ i)
 		{
-			df1r [i] = sfr [i] + sfi [          -i];     // + sfr [nbr_coef - i]
+			df1r [i] = sfr [i] + sfi [-i];		// + sfr [nbr_coef - i]
 			df1i [i] = sfi [i] - sfi [nbr_coef - i];
 
-			const DataType	c = cos_ptr [i             ]; // cos (i*PI/nbr_coef);
-			const DataType	s = cos_ptr [h_nbr_coef - i]; // sin (i*PI/nbr_coef);
-			const DataType	vr = sfr [i] - sfi [          -i]; // - sfr [nbr_coef - i]
+			const DataType	c = cos_ptr [i];					// cos (i*PI/nbr_coef);
+			const DataType	s = cos_ptr [h_nbr_coef - i];	// sin (i*PI/nbr_coef);
+			const DataType	vr = sfr [i] - sfi [-i];		// - sfr [nbr_coef - i]
 			const DataType	vi = sfi [i] + sfi [nbr_coef - i];
 
 			df2r [i] = vr * c + vi * s;
@@ -775,25 +786,25 @@ void	FFTReal <DT>::compute_inverse_pass_n_lut (DataType df [], const DataType sf
 
 
 template <class DT>
-void	FFTReal <DT>::compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const
+void	FFTReal <DT>::compute_inverse_pass_n_osc (DataType df [], const DataType sf [], int pass) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 	assert (pass > TRIGO_BD_LIMIT);
 	assert (pass < _nbr_bits);
 
-	const long     nbr_coef   = 1 << pass;
-	const long     h_nbr_coef = nbr_coef >> 1;
-	const long     d_nbr_coef = nbr_coef << 1;
-	long           coef_index = 0;
-	OscType &      osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)];
+	const long		nbr_coef = 1 << pass;
+	const long		h_nbr_coef = nbr_coef >> 1;
+	const long		d_nbr_coef = nbr_coef << 1;
+	long				coef_index = 0;
+	OscType &		osc = _trigo_osc [pass - (TRIGO_BD_LIMIT + 1)];
 	do
 	{
-		const DataType * const  sfr  = sf   + coef_index;
-		const DataType * const  sfi  = sfr  + nbr_coef;
-		DataType       * const  df1r = df   + coef_index;
-		DataType       * const  df2r = df1r + nbr_coef;
+		const DataType	* const	sfr = sf + coef_index;
+		const DataType	* const	sfi = sfr + nbr_coef;
+		DataType			* const	df1r = df + coef_index;
+		DataType			* const	df2r = df1r + nbr_coef;
 
 		osc.clear_buffers ();
 
@@ -808,13 +819,13 @@ void	FFTReal <DT>::compute_inverse_pass_n_osc (DataType df [], const DataType sf
 		DataType * const	df2i = df1i + nbr_coef;
 		for (long i = 1; i < h_nbr_coef; ++ i)
 		{
-			df1r [i] = sfr [i] + sfi [          -i];     // + sfr [nbr_coef - i]
+			df1r [i] = sfr [i] + sfi [-i];		// + sfr [nbr_coef - i]
 			df1i [i] = sfi [i] - sfi [nbr_coef - i];
 
 			osc.step ();
 			const DataType	c = osc.get_cos ();
 			const DataType	s = osc.get_sin ();
-			const DataType	vr = sfr [i] - sfi [          -i]; // - sfr [nbr_coef - i]
+			const DataType	vr = sfr [i] - sfi [-i];		// - sfr [nbr_coef - i]
 			const DataType	vi = sfi [i] + sfi [nbr_coef - i];
 
 			df2r [i] = vr * c + vi * s;
@@ -829,26 +840,26 @@ void	FFTReal <DT>::compute_inverse_pass_n_osc (DataType df [], const DataType sf
 
 
 template <class DT>
-void	FFTReal <DT>::compute_inverse_pass_3 (DataType df [], const DataType sf []) const
+void	FFTReal <DT>::compute_inverse_pass_3 (DataType df [], const DataType sf []) const noexcept
 {
-	assert (df != 0);
-	assert (sf != 0);
+	assert (df != nullptr);
+	assert (sf != nullptr);
 	assert (df != sf);
 
-	const DataType sqrt2_2    = DataType (SQRT2 * 0.5);
-	long           coef_index = 0;
+	const DataType	sqrt2_2 = DataType (SQRT2 * 0.5);
+	long				coef_index = 0;
 	do
 	{
-		df [coef_index    ] = sf [coef_index    ] + sf [coef_index + 4];
-		df [coef_index + 4] = sf [coef_index    ] - sf [coef_index + 4];
+		df [coef_index] = sf [coef_index] + sf [coef_index + 4];
+		df [coef_index + 4] = sf [coef_index] - sf [coef_index + 4];
 		df [coef_index + 2] = sf [coef_index + 2] * 2;
 		df [coef_index + 6] = sf [coef_index + 6] * 2;
 
 		df [coef_index + 1] = sf [coef_index + 1] + sf [coef_index + 3];
 		df [coef_index + 3] = sf [coef_index + 5] - sf [coef_index + 7];
 
-		const DataType vr   = sf [coef_index + 1] - sf [coef_index + 3];
-		const DataType vi   = sf [coef_index + 5] + sf [coef_index + 7];
+		const DataType	vr = sf [coef_index + 1] - sf [coef_index + 3];
+		const DataType	vi = sf [coef_index + 5] + sf [coef_index + 7];
 
 		df [coef_index + 5] = (vr + vi) * sqrt2_2;
 		df [coef_index + 7] = (vi - vr) * sqrt2_2;
@@ -861,22 +872,22 @@ void	FFTReal <DT>::compute_inverse_pass_3 (DataType df [], const DataType sf [])
 
 
 template <class DT>
-void	FFTReal <DT>::compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const
+void	FFTReal <DT>::compute_inverse_pass_1_2 (DataType x [], const DataType sf []) const noexcept
 {
-	assert (x != 0);
-	assert (sf != 0);
-	assert (x != sf);
+	assert (x  != nullptr);
+	assert (sf != nullptr);
+	assert (x  != sf);
 
-	const long *   bit_rev_lut_ptr = get_br_ptr ();
-	const DataType *  sf2 = sf;
-	long           coef_index = 0;
+	const long *	bit_rev_lut_ptr = get_br_ptr ();
+	const DataType *	sf2 = sf;
+	long				coef_index = 0;
 	do
 	{
 		{
-			const DataType b_0 = sf2 [0] + sf2 [2];
-			const DataType b_2 = sf2 [0] - sf2 [2];
-			const DataType b_1 = sf2 [1] * 2;
-			const DataType b_3 = sf2 [3] * 2;
+			const DataType	b_0 = sf2 [0] + sf2 [2];
+			const DataType	b_2 = sf2 [0] - sf2 [2];
+			const DataType	b_1 = sf2 [1] * 2;
+			const DataType	b_3 = sf2 [3] * 2;
 
 			x [bit_rev_lut_ptr [0]] = b_0 + b_1;
 			x [bit_rev_lut_ptr [1]] = b_0 - b_1;
@@ -884,10 +895,10 @@ void	FFTReal <DT>::compute_inverse_pass_1_2 (DataType x [], const DataType sf []
 			x [bit_rev_lut_ptr [3]] = b_2 - b_3;
 		}
 		{
-			const DataType b_0 = sf2 [4] + sf2 [6];
-			const DataType b_2 = sf2 [4] - sf2 [6];
-			const DataType b_1 = sf2 [5] * 2;
-			const DataType b_3 = sf2 [7] * 2;
+			const DataType	b_0 = sf2 [4] + sf2 [6];
+			const DataType	b_2 = sf2 [4] - sf2 [6];
+			const DataType	b_1 = sf2 [5] * 2;
+			const DataType	b_3 = sf2 [7] * 2;
 
 			x [bit_rev_lut_ptr [4]] = b_0 + b_1;
 			x [bit_rev_lut_ptr [5]] = b_0 - b_1;
@@ -895,8 +906,8 @@ void	FFTReal <DT>::compute_inverse_pass_1_2 (DataType x [], const DataType sf []
 			x [bit_rev_lut_ptr [7]] = b_2 - b_3;
 		}
 
-		sf2             += 8;
-		coef_index      += 8;
+		sf2 += 8;
+		coef_index += 8;
 		bit_rev_lut_ptr += 8;
 	}
 	while (coef_index < _length);
@@ -904,6 +915,11 @@ void	FFTReal <DT>::compute_inverse_pass_1_2 (DataType x [], const DataType sf []
 
 
 
+template <class DT>
+constexpr int	FFTReal <DT>::TRIGO_BD_LIMIT;
+
+
+
 }	// namespace ffft
 
 
diff --git a/src/ffft/OscSinCos.h b/src/ffft/OscSinCos.h
index afaf1d5..6c535b9 100644
--- a/src/ffft/OscSinCos.h
+++ b/src/ffft/OscSinCos.h
@@ -27,7 +27,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
-#include "ffft/def.h"
+#include	"ffft/def.h"
 
 
 
@@ -46,19 +46,17 @@ class OscSinCos
 
 	typedef	T	DataType;
 
-	               OscSinCos ();
-
 	ffft_FORCEINLINE void
-	               set_step (double angle_rad);
+						set_step (double angle_rad) noexcept;
 
 	ffft_FORCEINLINE DataType
-	               get_cos () const;
+						get_cos () const noexcept;
 	ffft_FORCEINLINE DataType
-	               get_sin () const;
+						get_sin () const noexcept;
 	ffft_FORCEINLINE void
-	               step ();
+						step () noexcept;
 	ffft_FORCEINLINE void
-	               clear_buffers ();
+						clear_buffers () noexcept;
 
 
 
@@ -72,10 +70,10 @@ class OscSinCos
 
 private:
 
-	DataType       _pos_cos;      // Current phase expressed with sin and cos. [-1 ; 1]
-	DataType       _pos_sin;      // -
-	DataType       _step_cos;     // Phase increment per step, [-1 ; 1]
-	DataType       _step_sin;     // -
+	DataType       _pos_cos  { 1 };  // Current phase expressed with sin and cos. [-1 ; 1]
+	DataType       _pos_sin  { 0 };  // -
+	DataType       _step_cos { 1 };  // Phase increment per step, [-1 ; 1]
+	DataType       _step_sin { 0 };  // -
 
 
 
@@ -83,10 +81,8 @@ class OscSinCos
 
 private:
 
-	               OscSinCos (const OscSinCos &other);
-	OscSinCos &    operator = (const OscSinCos &other);
-	bool           operator == (const OscSinCos &other);
-	bool           operator != (const OscSinCos &other);
+	bool           operator == (const OscSinCos &other) const = delete;
+	bool           operator != (const OscSinCos &other) const = delete;
 
 };	// class OscSinCos
 
@@ -96,7 +92,7 @@ class OscSinCos
 
 
 
-#include "ffft/OscSinCos.hpp"
+#include	"ffft/OscSinCos.hpp"
 
 
 
diff --git a/src/ffft/OscSinCos.hpp b/src/ffft/OscSinCos.hpp
index 04fe703..05f4202 100644
--- a/src/ffft/OscSinCos.hpp
+++ b/src/ffft/OscSinCos.hpp
@@ -27,7 +27,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
-#include <cmath>
+#include	<cmath>
 
 namespace std { }
 
@@ -43,22 +43,8 @@ namespace ffft
 
 
 template <class T>
-OscSinCos <T>::OscSinCos ()
-:	_pos_cos (1)
-,	_pos_sin (0)
-,	_step_cos (1)
-,	_step_sin (0)
+void	OscSinCos <T>::set_step (double angle_rad) noexcept
 {
-	// Nothing
-}
-
-
-
-template <class T>
-void	OscSinCos <T>::set_step (double angle_rad)
-{
-	using namespace std;
-
 	_step_cos = static_cast <DataType> (cos (angle_rad));
 	_step_sin = static_cast <DataType> (sin (angle_rad));
 }
@@ -66,7 +52,7 @@ void	OscSinCos <T>::set_step (double angle_rad)
 
 
 template <class T>
-typename OscSinCos <T>::DataType	OscSinCos <T>::get_cos () const
+typename OscSinCos <T>::DataType	OscSinCos <T>::get_cos () const noexcept
 {
 	return (_pos_cos);
 }
@@ -74,7 +60,7 @@ typename OscSinCos <T>::DataType	OscSinCos <T>::get_cos () const
 
 
 template <class T>
-typename OscSinCos <T>::DataType	OscSinCos <T>::get_sin () const
+typename OscSinCos <T>::DataType	OscSinCos <T>::get_sin () const noexcept
 {
 	return (_pos_sin);
 }
@@ -82,10 +68,10 @@ typename OscSinCos <T>::DataType	OscSinCos <T>::get_sin () const
 
 
 template <class T>
-void	OscSinCos <T>::step ()
+void	OscSinCos <T>::step () noexcept
 {
-	const DataType old_cos = _pos_cos;
-	const DataType old_sin = _pos_sin;
+	const DataType	old_cos = _pos_cos;
+	const DataType	old_sin = _pos_sin;
 
 	_pos_cos = old_cos * _step_cos - old_sin * _step_sin;
 	_pos_sin = old_cos * _step_sin + old_sin * _step_cos;
@@ -94,7 +80,7 @@ void	OscSinCos <T>::step ()
 
 
 template <class T>
-void	OscSinCos <T>::clear_buffers ()
+void	OscSinCos <T>::clear_buffers () noexcept
 {
 	_pos_cos = static_cast <DataType> (1);
 	_pos_sin = static_cast <DataType> (0);
diff --git a/src/ffft/def.h b/src/ffft/def.h
index a1e6ff8..2f7f8ee 100644
--- a/src/ffft/def.h
+++ b/src/ffft/def.h
@@ -34,8 +34,8 @@ namespace ffft
 
 
 
-const double   PI    = 3.1415926535897932384626433832795;
-const double   SQRT2 = 1.41421356237309514547462185873883;
+constexpr double  PI    = 3.1415926535897932384626433832795;
+constexpr double  SQRT2 = 1.41421356237309514547462185873883;
 
 #if defined (_MSC_VER)
 
@@ -47,6 +47,20 @@ const double   SQRT2 = 1.41421356237309514547462185873883;
 
 #endif
 
+// Compiler type
+#define ffft_COMPILER_UNKNOWN (-1)
+#define ffft_COMPILER_GCC     (1)
+#define ffft_COMPILER_MSVC    (2)
+
+#if defined (__GNUC__) || defined (__clang__)
+	#define ffft_COMPILER ffft_COMPILER_GCC
+#elif defined (_MSC_VER)
+	#define ffft_COMPILER ffft_COMPILER_MSVC
+#else
+	#define ffft_COMPILER ffft_COMPILER_UNKNOWN
+#endif
+
+
 
 
 }	// namespace ffft
diff --git a/src/fmtc/Bitdepth.cpp b/src/fmtc/Bitdepth.cpp
index 78f284a..2ec282a 100644
--- a/src/fmtc/Bitdepth.cpp
+++ b/src/fmtc/Bitdepth.cpp
@@ -25,11 +25,8 @@ To Public License, Version 2, as published by Sam Hocevar. See
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #include "fmtc/Bitdepth.h"
+#include "fmtc/fnc.h"
 #include "fmtc/SplFmtUtl.h"
-#if (fstb_ARCHI == fstb_ARCHI_X86)
-	#include "fmtcl/ProxyRwSse2.h"
-#endif
-#include "fmtcl/VoidAndCluster.h"
 #include "fstb/def.h"
 #include "fstb/fnc.h"
 #include "vsutl/CpuOpt.h"
@@ -41,6 +38,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 #include <cassert>
 
 
+
 namespace fmtc
 {
 
@@ -52,7 +50,7 @@ namespace fmtc
 
 Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCore &core, const ::VSAPI &vsapi)
 :	vsutl::FilterBase (vsapi, "bitdepth", ::fmParallel, 0)
-,	_clip_src_sptr (vsapi.propGetNode (&in, "clip", 0, 0), vsapi)
+,	_clip_src_sptr (vsapi.propGetNode (&in, "clip", 0, nullptr), vsapi)
 ,	_vi_in (*_vsapi.getVideoInfo (_clip_src_sptr.get ()))
 ,	_vi_out (_vi_in)
 #if defined (_MSC_VER)
@@ -63,40 +61,15 @@ Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo
 #if defined (_MSC_VER)
 #pragma warning (pop)
 #endif
-,	_splfmt_src (fmtcl::SplFmt_ILLEGAL)
-,	_splfmt_dst (fmtcl::SplFmt_ILLEGAL)
-,	_scale_info_arr ()
-,	_upconv_flag (false)
-,	_sse2_flag (false)
-,	_avx2_flag (false)
-,	_full_range_in_flag (false)
-,	_full_range_out_flag (false)
-,	_dmode (get_arg_int (in, out, "dmode", DMode_FILTERLITE))
-,	_ampo (get_arg_flt (in, out, "ampo", 1.0))
-,	_ampn (get_arg_flt (in, out, "ampn", 0.0))
-,	_dyn_flag (get_arg_int (in, out, "dyn", 0) != 0)
-,	_static_noise_flag (get_arg_int (in, out, "staticnoise", 0) != 0)
-,	_pat_size (get_arg_int (in, out, "patsize", PAT_WIDTH))
-,	_ampo_i (0)
-,	_ampn_i (0)
-,	_ampe_i (0)
-,	_ampe_f (0)
-,	_ampn_f (0)
-,	_errdif_flag (false)
-,	_simple_flag (false)
-,	_dither_pat_arr ()
-,	_buf_factory_uptr ()
-,	_process_seg_int_int_ptr (0)
-,	_process_seg_flt_int_ptr (0)
 {
 	fstb::unused (user_data_ptr);
 
 	vsutl::CpuOpt  cpu_opt (*this, in, out);
-	_sse2_flag = cpu_opt.has_sse2 ();
-	_avx2_flag = cpu_opt.has_avx2 ();
+	const bool     sse2_flag = cpu_opt.has_sse2 ();
+	const bool     avx2_flag = cpu_opt.has_avx2 ();
 
 	// Checks the input clip
-	if (_vi_in.format == 0)
+	if (_vi_in.format == nullptr)
 	{
 		throw_inval_arg ("only constant pixel formats are supported.");
 	}
@@ -123,7 +96,8 @@ Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo
 		}
 	}
 
-	_splfmt_src = SplFmtUtl::conv_from_vsformat (fmt_src);
+	const auto     splfmt_src = SplFmtUtl::conv_from_vsformat (fmt_src);
+	const auto     col_fam    = conv_colfam_to_fmtcl (fmt_src);
 
 	// Destination colorspace
 	const ::VSFormat& fmt_dst = get_output_colorspace (in, out, core, fmt_src);
@@ -155,119 +129,71 @@ Bitdepth::Bitdepth (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo
 
 	// Format is validated
 	_vi_out.format = &fmt_dst;
-	_splfmt_dst = SplFmtUtl::conv_from_vsformat (fmt_dst);
+	const auto     splfmt_dst = SplFmtUtl::conv_from_vsformat (fmt_dst);
+
+	const int      w = _vi_in.width; // May be <= 0
 
 	// Conversion-related things
+	bool           range_def_src_flag = false;
 	_full_range_in_flag  = (get_arg_int (
-		in, out, "fulls" , vsutl::is_full_range_default (fmt_src) ? 1 : 0
+		in, out, "fulls" , vsutl::is_full_range_default (fmt_src) ? 1 : 0,
+		0, &range_def_src_flag
 	) != 0);
+	bool           range_def_dst_flag = false;
 	_full_range_out_flag = (get_arg_int (
-		in, out, "fulld", (_full_range_in_flag) ? 1 : 0
+		in, out, "fulld", (_full_range_in_flag) ? 1 : 0,
+		0, &range_def_dst_flag
 	) != 0);
-
-	// No dithering required
-	if (   (   fmt_src.sampleType == ::stInteger
-	        && (    fmt_dst.sampleType == ::stFloat
-	            || (   fmt_src.bitsPerSample <= fmt_dst.bitsPerSample
-	                && ! _full_range_in_flag
-	                && ! _full_range_out_flag)))
-	    || (   fmt_src.sampleType == ::stFloat
-	        && fmt_dst.sampleType == ::stFloat))
-	{
-		_upconv_flag = true;
-	}
-
-	for (int plane_index = 0; plane_index < fmt_dst.numPlanes; ++plane_index)
-	{
-		SclInf &       scl_inf = _scale_info_arr [plane_index];
-		vsutl::compute_fmt_mac_cst (
-			scl_inf._info._gain,
-			scl_inf._info._add_cst,
-			*_vi_out.format, _full_range_out_flag,
-			fmt_src, _full_range_in_flag,
-			plane_index
-		);
-
-		if (   _upconv_flag
-		    && fmt_src.sampleType == ::stInteger
-		    && fmt_dst.sampleType == ::stFloat)
-		{
-			scl_inf._ptr = &scl_inf._info;
-		}
-		else
-		{
-			scl_inf._ptr = 0;
-		}
-	}
+	_range_def_flag = (range_def_src_flag || range_def_dst_flag);
 
 	// Dithering parameters
-	if (_dmode == DMode_ROUND_ALIAS)
+	fmtcl::Dither::DMode dmode = static_cast <fmtcl::Dither::DMode> (
+		get_arg_int (in, out, "dmode", fmtcl::Dither::DMode_FILTERLITE)
+	);
+	if (dmode == fmtcl::Dither::DMode_ROUND_ALIAS)
 	{
-		_dmode = DMode_ROUND;
+		dmode = fmtcl::Dither::DMode_ROUND;
 	}
-	if (   _dmode <  0
-	    || _dmode >= DMode_NBR_ELT)
+	if (   dmode <  0
+	    || dmode >= fmtcl::Dither::DMode_NBR_ELT)
 	{
 		throw_inval_arg ("invalid dmode.");
 	}
 
-	if (_ampo < 0)
+	const double   ampo = get_arg_flt (in, out, "ampo", 1.0);
+	if (ampo < 0)
 	{
 		throw_inval_arg ("ampo cannot be negative.");
 	}
-	if (_ampn < 0)
+
+	const double   ampn = get_arg_flt (in, out, "ampn", 0.0);
+	if (ampn < 0)
 	{
 		throw_inval_arg ("ampn cannot be negative.");
 	}
 
-	if (_pat_size < 4 || PAT_WIDTH % _pat_size != 0)
+	const int      pat_size =
+		get_arg_int (in, out, "patsize", fmtcl::Dither::_max_pat_width);
+	if (pat_size < 4 || fmtcl::Dither::_max_pat_width % pat_size != 0)
 	{
 		throw_inval_arg ("Wrong value for patsize.");
 	}
 
-	int            w = _vi_in.width;
-	if (_vi_in.width <= 0)
-	{
-		w = MAX_UNK_WIDTH;
-	}
-	_buf_factory_uptr =
-		std::unique_ptr <fmtcl::ErrDifBufFactory> (new fmtcl::ErrDifBufFactory (w));
-	_buf_pool.set_factory (*_buf_factory_uptr);
-
-	build_dither_pat ();
-
-	const int		amp_mul = 1 << AMP_BITS;
-	const int      ampo_i_raw = fstb::round_int (_ampo * amp_mul);
-	const int      ampn_i_raw = fstb::round_int (_ampn * amp_mul);
-	_ampo_i = std::min (ampo_i_raw, 127);
-	_ampn_i = std::min (ampn_i_raw, 127);
-	_ampn_f = float (_ampn * (1.0f / 4294967296.0f));  // / (2 ^ 32)
-
-	_simple_flag = (ampo_i_raw == amp_mul && ampn_i_raw == 0);
-
-	if (_errdif_flag)
-	{
-		_ampe_i = fstb::limit (
-			fstb::round_int ((_ampo - 1) * (128 << AMP_BITS)),
-			0,
-			(2048 << AMP_BITS) - 1
-		);
-		_ampe_f = fstb::limit (float (_ampo) - 1, 0.0f, 8.0f);
-	}
-
-	// Processing function initialisation
-	if (_errdif_flag)
-	{
-		init_fnc_errdiff ();
-	}
-	else if (_dmode == DMode_FAST)
-	{
-		init_fnc_fast ();
-	}
-	else
-	{
-		init_fnc_ordered ();
-	}
+	const bool     dyn_flag = (get_arg_int (in, out, "dyn", 0) != 0);
+	const bool     static_noise_flag = (get_arg_int (in, out, "staticnoise", 0) != 0);
+	const bool     correlated_planes_flag = (get_arg_int (in, out, "corplane", 0) != 0);
+	const bool     tpdfo_flag = (get_arg_int (in, out, "tpdfo", 0) != 0);
+	const bool     tpdfn_flag = (get_arg_int (in, out, "tpdfn", 0) != 0);
+
+	_engine_uptr = std::make_unique <fmtcl::Dither> (
+		splfmt_src, fmt_src.bitsPerSample, _full_range_in_flag,
+		splfmt_dst, fmt_dst.bitsPerSample, _full_range_out_flag,
+		col_fam, fmt_dst.numPlanes, w,
+		dmode, pat_size, ampo, ampn,
+		dyn_flag, static_noise_flag, correlated_planes_flag,
+		tpdfo_flag, tpdfn_flag,
+		sse2_flag, avx2_flag
+	);
 }
 
 
@@ -286,8 +212,8 @@ const ::VSFrameRef *	Bitdepth::get_frame (int n, int activation_reason, void * &
 {
 	assert (n >= 0);
 
-	::VSFrameRef *    dst_ptr = 0;
-	::VSNodeRef &     node = *_clip_src_sptr;
+	::VSFrameRef *    dst_ptr = nullptr;
+	::VSNodeRef &     node    = *_clip_src_sptr;
 
 	if (activation_reason == ::arInitial)
 	{
@@ -302,8 +228,8 @@ const ::VSFrameRef *	Bitdepth::get_frame (int n, int activation_reason, void * &
 		);
 		const ::VSFrameRef & src = *src_sptr;
 
-		const int         w = _vsapi.getFrameWidth (&src, 0);
-		const int         h = _vsapi.getFrameHeight (&src, 0);
+		const int      w = _vsapi.getFrameWidth (&src, 0);
+		const int      h = _vsapi.getFrameHeight (&src, 0);
 		dst_ptr = _vsapi.newVideoFrame (_vi_out.format, w, h, &src, &core);
 
 		const int      ret_val = _plane_processor.process_frame (
@@ -312,11 +238,19 @@ const ::VSFrameRef *	Bitdepth::get_frame (int n, int activation_reason, void * &
 		if (ret_val != 0)
 		{
 			_vsapi.freeFrame (dst_ptr);
-			dst_ptr = 0;
+			dst_ptr = nullptr;
+		}
+
+		// Output frame properties
+		::VSMap &      dst_prop = *(_vsapi.getFramePropsRW (dst_ptr));
+		if (_range_def_flag)
+		{
+			const int      cr_val = (_full_range_out_flag) ? 0 : 1;
+			_vsapi.propSetInt (&dst_prop, "_ColorRange", cr_val, ::paReplace);
 		}
 	}
 
-	return (dst_ptr);
+	return dst_ptr;
 }
 
 
@@ -328,7 +262,7 @@ const ::VSFrameRef *	Bitdepth::get_frame (int n, int activation_reason, void * &
 int	Bitdepth::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void *frame_data_ptr, ::VSFrameContext &frame_ctx, ::VSCore &core, const vsutl::NodeRefSPtr &src_node1_sptr, const vsutl::NodeRefSPtr &src_node2_sptr, const vsutl::NodeRefSPtr &src_node3_sptr)
 {
 	fstb::unused (frame_data_ptr, core, src_node2_sptr, src_node3_sptr);
-	assert (src_node1_sptr.get () != 0);
+	assert (src_node1_sptr.get () != nullptr);
 
 	int            ret_val = 0;
 
@@ -353,43 +287,11 @@ int	Bitdepth::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void
 
 		try
 		{
-			if (_upconv_flag)
-			{
-				fmtcl::BitBltConv blitter (_sse2_flag, _avx2_flag);
-				blitter.bitblt (
-					_splfmt_dst, _vi_out.format->bitsPerSample,
-					data_dst_ptr, 0, stride_dst,
-					_splfmt_src, _vi_in.format->bitsPerSample,
-					data_src_ptr, 0, stride_src,
-					w, h,
-					_scale_info_arr [plane_index]._ptr
-				);
-			}
-			else
-			{
-				uint32_t       rnd_state = plane_index << 16;
-				if (_static_noise_flag)
-				{
-					rnd_state += 55555;
-				}
-				else
-				{
-					rnd_state += n;
-				}
-
-				const int      pat_index = (n + plane_index) & (PAT_PERIOD - 1);
-				const PatData& pattern = _dither_pat_arr [pat_index];
-
-				dither_plane (
-					_splfmt_dst, _vi_out.format->bitsPerSample,
-					data_dst_ptr, stride_dst,
-					_splfmt_src, _vi_in.format->bitsPerSample,
-					data_src_ptr, stride_src,
-					w, h,
-					_scale_info_arr [plane_index]._info,
-					pattern, rnd_state
-				);
-			}
+			_engine_uptr->process_plane (
+				data_dst_ptr, stride_dst,
+				data_src_ptr, stride_src,
+				w, h, n, plane_index
+			);
 		}
 
 		catch (std::exception &e)
@@ -404,7 +306,7 @@ int	Bitdepth::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void
 		}
 	}
 
-	return (ret_val);
+	return ret_val;
 }
 
 
@@ -433,7 +335,7 @@ const ::VSFormat &	Bitdepth::get_output_colorspace (const ::VSMap &in, ::VSMap &
 	if (dst_csp != undef)
 	{
 		fmt_dst_ptr = _vsapi.getFormatPreset (dst_csp, &core);
-		if (fmt_dst_ptr == 0)
+		if (fmt_dst_ptr == nullptr)
 		{
 			throw_inval_arg ("unknown output colorspace.");
 		}
@@ -492,9 +394,9 @@ const ::VSFormat &	Bitdepth::get_output_colorspace (const ::VSMap &in, ::VSMap &
 		}
 		catch (...)
 		{
-			fmt_dst_ptr = 0;
+			fmt_dst_ptr = nullptr;
 		}
-		if (fmt_dst_ptr == 0)
+		if (fmt_dst_ptr == nullptr)
 		{
 			throw_rt_err (
 				"couldn\'t get a pixel format identifier for the output clip."
@@ -502,1836 +404,10 @@ const ::VSFormat &	Bitdepth::get_output_colorspace (const ::VSMap &in, ::VSMap &
 		}
 	}
 
-	return (*fmt_dst_ptr);
-}
-
-
-
-void	Bitdepth::build_dither_pat ()
-{
-	_errdif_flag = false;
-
-	switch (_dmode)
-	{
-	case	DMode_BAYER:
-		build_dither_pat_bayer ();
-		break;
-
-	case	DMode_FILTERLITE:
-	case	DMode_STUCKI:
-	case	DMode_ATKINSON:
-	case	DMode_FLOYD:
-	case	DMode_OSTRO:
-		_errdif_flag = true;
-		break;
-
-	case	DMode_ROUND:
-	case	DMode_FAST:
-	default:
-		build_dither_pat_round ();
-		break;
-
-	case	DMode_VOIDCLUST:
-		build_dither_pat_void_and_cluster (_pat_size);
-		break;
-	}
-}
-
-
-
-void	Bitdepth::build_dither_pat_round ()
-{
-	PatData &      pat_data = _dither_pat_arr [0];
-	for (int y = 0; y < PAT_WIDTH; ++y)
-	{
-		for (int x = 0; x < PAT_WIDTH; ++x)
-		{
-			pat_data [y] [x] = 0;
-		}
-	}
-
-	build_next_dither_pat ();
-}
-
-
-
-void	Bitdepth::build_dither_pat_bayer ()
-{
-	assert (fstb::is_pow_2 (int (PAT_WIDTH)));
-
-	PatData &      pat_data = _dither_pat_arr [0];
-	for (int y = 0; y < PAT_WIDTH; ++y)
-	{
-		for (int x = 0; x < PAT_WIDTH; ++x)
-		{
-			pat_data [y] [x] = -128;
-		}
-	}
-
-	for (int dith_size = 2; dith_size <= PAT_WIDTH; dith_size <<= 1)
-	{
-		for (int y = 0; y < PAT_WIDTH; y += 2)
-		{
-			for (int x = 0; x < PAT_WIDTH; x += 2)
-			{
-				const int      xx = (x >> 1) + (PAT_WIDTH >> 1);
-				const int      yy = (y >> 1) + (PAT_WIDTH >> 1);
-				const int      val = (pat_data [yy] [xx] + 128) >> 2;
-				pat_data [y    ] [x    ] = int16_t (val +   0-128);
-				pat_data [y    ] [x + 1] = int16_t (val + 128-128);
-				pat_data [y + 1] [x    ] = int16_t (val + 192-128);
-				pat_data [y + 1] [x + 1] = int16_t (val +  64-128);
-			}
-		}
-	}
-
-	build_next_dither_pat ();
-}
-
-
-
-void	Bitdepth::build_dither_pat_void_and_cluster (int w)
-{
-	assert (PAT_WIDTH % w == 0);
-	fmtcl::VoidAndCluster   vc_gen;
-	fmtcl::MatrixWrap <uint16_t> pat_raw (w, w);
-	vc_gen.create_matrix (pat_raw);
-
-	PatData &      pat_data = _dither_pat_arr [0];
-	const int      area = w * w;
-	for (int y = 0; y < PAT_WIDTH; ++y)
-	{
-		for (int x = 0; x < PAT_WIDTH; ++x)
-		{
-			pat_data [y] [x] = int16_t (pat_raw (x, y) * 256 / area - 128);
-		}
-	}
-
-	build_next_dither_pat ();
-}
-
-
-
-void	Bitdepth::build_next_dither_pat ()
-{
-	for (int seq = 1; seq < PAT_PERIOD; ++seq)
-	{
-		const int      angle = (_dyn_flag) ? seq & 3 : 0;
-		copy_dither_pat_rotate (
-			_dither_pat_arr [seq],
-			_dither_pat_arr [0],
-			angle
-		);
-	}
-}
-
-
-
-void	Bitdepth::copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle)
-{
-	assert (angle >= 0);
-	assert (angle < 4);
-
-	static const int  sin_arr [4] = { 0, 1, 0, -1 };
-	const int      s = sin_arr [ angle         ];
-	const int      c = sin_arr [(angle + 1) & 3];
-
-	assert (fstb::is_pow_2 (int (PAT_WIDTH)));
-	const int		mask = PAT_WIDTH - 1;
-
-	for (int y = 0; y < PAT_WIDTH; ++y)
-	{
-		for (int x = 0; x < PAT_WIDTH; ++x)
-		{
-			const int		xs = (x * c - y * s) & mask;
-			const int		ys = (x * s + y * c) & mask;
-
-			dst [y] [x] = src [ys] [xs];
-		}
-	}
-}
-
-
-
-// All possible combinations
-#define fmtc_Bitdepth_SPAN_INT(SETP, NAMP, NAMF, simple_flag, dst_res, dst_fmt, src_res, src_fmt) \
-	switch (  ((simple_flag) << 30) \
-	        + ((dst_res) << 24) + ((dst_fmt) << 16) \
-	        + ((src_res) <<  8) +  (src_fmt)) \
-	{ \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t,  9) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 10) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 10) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	}
-
-// All possible combinations using float as intermediary data
-#define fmtc_Bitdepth_SPAN_FLT(SETP, NAMP, NAMF, simple_flag, dst_res, dst_fmt, src_res, src_fmt) \
-	switch (  ((simple_flag) << 30) \
-	        + ((dst_res) << 24) + ((dst_fmt) << 16) \
-	        + ((src_res) <<  8) +  (src_fmt)) \
-	{ \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT8 , uint8_t ,  8) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t,  9) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 10) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT8 , uint8_t ,  8, fmtcl::SplFmt_FLOAT, float   , 32) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT8 , uint8_t ,  8) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t,  9) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 10) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t,  9, fmtcl::SplFmt_FLOAT, float   , 32) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT8 , uint8_t ,  8) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t,  9) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 10) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 10, fmtcl::SplFmt_FLOAT, float   , 32) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT8 , uint8_t ,  8) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t,  9) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 10) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 12, fmtcl::SplFmt_FLOAT, float   , 32) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT8 , uint8_t ,  8) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t,  9) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 10) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 11) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 12) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 14) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_INT16, uint16_t, 16) \
-	SETP (NAMP, NAMF, fmtcl::SplFmt_INT16, uint16_t, 16, fmtcl::SplFmt_FLOAT, float   , 32) \
-	}
-
-#define fmtc_Bitdepth_SET_FNC_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
-	case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_int_int_ptr = \
-			&ThisType::process_seg_##NAMF##_int_int_cpp <false, DT, DP, ST, SP>; \
-		break; \
-	case (true  << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_int_int_ptr = \
-			&ThisType::process_seg_##NAMF##_int_int_cpp <true, DT, DP, ST, SP>; \
-		break;
-
-#define fmtc_Bitdepth_SET_FNC_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
-	case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_flt_int_ptr = \
-			&ThisType::process_seg_##NAMF##_flt_int_cpp <false, DT, DP, ST>; \
-		break; \
-	case (true  << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_flt_int_ptr = \
-			&ThisType::process_seg_##NAMF##_flt_int_cpp <true, DT, DP, ST>; \
-		break;
-
-#define fmtc_Bitdepth_SET_FNC_INT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
-	case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_int_int_ptr = \
-			&ThisType::process_seg_##NAMF##_int_int_sse2 <false, DF, DP, SF, SP>; \
-		break; \
-	case (true  << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_int_int_ptr = \
-			&ThisType::process_seg_##NAMF##_int_int_sse2 <true, DF, DP, SF, SP>; \
-		break;
-
-#define fmtc_Bitdepth_SET_FNC_FLT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
-	case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_flt_int_ptr = \
-			&ThisType::process_seg_##NAMF##_flt_int_sse2 <false, DF, DP, SF>; \
-		break; \
-	case (true  << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_flt_int_ptr = \
-			&ThisType::process_seg_##NAMF##_flt_int_sse2 <true, DF, DP, SF>; \
-		break;
-
-#define fmtc_Bitdepth_SET_FNC_ERRDIF_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
-	case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_int_int_ptr = \
-			&ThisType::process_seg_errdif_int_int_cpp <false, Diffuse##NAMF <DT, DP, ST, SP> >; \
-		break; \
-	case (true  << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_int_int_ptr = \
-			&ThisType::process_seg_errdif_int_int_cpp <true, Diffuse##NAMF <DT, DP, ST, SP> >; \
-		break;
-
-#define fmtc_Bitdepth_SET_FNC_ERRDIF_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
-	case (false << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_flt_int_ptr = \
-			&ThisType::process_seg_errdif_flt_int_cpp <false, Diffuse##NAMF <DT, DP, ST, SP> >; \
-		break; \
-	case (true  << 30) + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
-		_process_seg_flt_int_ptr = \
-			&ThisType::process_seg_errdif_flt_int_cpp <true, Diffuse##NAMF <DT, DP, ST, SP> >; \
-		break;
-
-
-
-void	Bitdepth::init_fnc_fast ()
-{
-	const fmtcl::SplFmt  dst_fmt = _splfmt_dst;
-	const int            dst_res = _vi_out.format->bitsPerSample;
-	const fmtcl::SplFmt  src_fmt = _splfmt_src;
-	const int            src_res = _vi_in.format->bitsPerSample;
-
-	fmtc_Bitdepth_SPAN_INT (
-		fmtc_Bitdepth_SET_FNC_INT, fast, fast, false,
-		dst_res, dst_fmt, src_res, src_fmt
-	)
-	fmtc_Bitdepth_SPAN_FLT (
-		fmtc_Bitdepth_SET_FNC_FLT, fast, fast, false,
-		dst_res, dst_fmt, src_res, src_fmt
-	)
-
-#if (fstb_ARCHI == fstb_ARCHI_X86)
-	if (_sse2_flag)
-	{
-		fmtc_Bitdepth_SPAN_INT (
-			fmtc_Bitdepth_SET_FNC_INT_SSE2, fast, fast, false,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		fmtc_Bitdepth_SPAN_FLT (
-			fmtc_Bitdepth_SET_FNC_FLT_SSE2, fast, fast, false,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-	}
-#endif
-}
-
-
-
-void	Bitdepth::init_fnc_ordered ()
-{
-	assert (! _errdif_flag);
-
-	const fmtcl::SplFmt  dst_fmt = _splfmt_dst;
-	const int            dst_res = _vi_out.format->bitsPerSample;
-	const fmtcl::SplFmt  src_fmt = _splfmt_src;
-	const int            src_res = _vi_in.format->bitsPerSample;
-
-	fmtc_Bitdepth_SPAN_INT (
-		fmtc_Bitdepth_SET_FNC_INT, ord, ord, _simple_flag,
-		dst_res, dst_fmt, src_res, src_fmt
-	)
-	fmtc_Bitdepth_SPAN_FLT (
-		fmtc_Bitdepth_SET_FNC_FLT, ord, ord, _simple_flag,
-		dst_res, dst_fmt, src_res, src_fmt
-	)
-
-#if (fstb_ARCHI == fstb_ARCHI_X86)
-	if (_sse2_flag)
-	{
-		fmtc_Bitdepth_SPAN_INT (
-			fmtc_Bitdepth_SET_FNC_INT_SSE2, ord, ord, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		fmtc_Bitdepth_SPAN_FLT (
-			fmtc_Bitdepth_SET_FNC_FLT_SSE2, ord, ord, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-	}
-#endif
-}
-
-
-
-void	Bitdepth::init_fnc_errdiff ()
-{
-	assert (_errdif_flag);
-
-	const fmtcl::SplFmt  dst_fmt = _splfmt_dst;
-	const int            dst_res = _vi_out.format->bitsPerSample;
-	const fmtcl::SplFmt  src_fmt = _splfmt_src;
-	const int            src_res = _vi_in.format->bitsPerSample;
-
-	switch (_dmode)
-	{
-	case	DMode_FILTERLITE:
-		fmtc_Bitdepth_SPAN_INT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, FilterLite, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		fmtc_Bitdepth_SPAN_FLT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, FilterLite, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		break;
-
-	case	DMode_STUCKI:
-		fmtc_Bitdepth_SPAN_INT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, Stucki, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		fmtc_Bitdepth_SPAN_FLT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, Stucki, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		break;
-
-	case	DMode_ATKINSON:
-		fmtc_Bitdepth_SPAN_INT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, Atkinson, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		fmtc_Bitdepth_SPAN_FLT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, Atkinson, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		break;
-
-	case	DMode_FLOYD:
-		fmtc_Bitdepth_SPAN_INT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, FloydSteinberg, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		fmtc_Bitdepth_SPAN_FLT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, FloydSteinberg, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		break;
-
-	case	DMode_OSTRO:
-		fmtc_Bitdepth_SPAN_INT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_INT, errdif, Ostromoukhov, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		fmtc_Bitdepth_SPAN_FLT (
-			fmtc_Bitdepth_SET_FNC_ERRDIF_FLT, errdif, Ostromoukhov, _simple_flag,
-			dst_res, dst_fmt, src_res, src_fmt
-		)
-		break;
-
-	default:
-		break;
-	}
-
-}
-
-
-
-#undef fmtc_Bitdepth_SET_FNC_INT
-#undef fmtc_Bitdepth_SET_FNC_FLT
-#undef fmtc_Bitdepth_SPAN_INT
-#undef fmtc_Bitdepth_SPAN_FLT
-
-
-
-void	Bitdepth::dither_plane (fmtcl::SplFmt dst_fmt, int dst_res, uint8_t *dst_ptr, int dst_stride, fmtcl::SplFmt src_fmt, int src_res, const uint8_t *src_ptr, int src_stride, int w, int h, const fmtcl::BitBltConv::ScaleInfo &scale_info, const PatData &pattern, uint32_t rnd_state)
-{
-	fstb::unused (dst_fmt);
-	assert (dst_fmt >= 0);
-	assert (dst_fmt < fmtcl::SplFmt_NBR_ELT);
-	assert (dst_res >= 8);
-	assert (dst_ptr != 0);
-	assert (src_fmt >= 0);
-	assert (src_fmt < fmtcl::SplFmt_NBR_ELT);
-	assert (src_res >= 8);
-	assert (src_ptr != 0);
-	assert (w > 0);
-	assert (h > 0);
-
-	SegContext     ctx;
-	ctx._rnd_state      = rnd_state;
-	ctx._scale_info_ptr = &scale_info;
-
-	const bool     sc_flag =
-		(   src_fmt == fmtcl::SplFmt_FLOAT
-		 || ! fstb::is_eq (scale_info._gain * ((uint64_t (1)) << (src_res - dst_res)), 1.0, 1e-6)
-		 || ! fstb::is_null (scale_info._add_cst, 1e-6));
-
-	void (ThisType::* process_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const =
-		  (sc_flag)
-		? _process_seg_flt_int_ptr
-		: _process_seg_int_int_ptr;
-	assert (process_ptr != 0);
-
-	fmtcl::ErrDifBuf *   ed_buf_ptr = 0;
-	if (_errdif_flag)
-	{
-		ed_buf_ptr = _buf_pool.take_obj ();
-		if (ed_buf_ptr == 0)
-		{
-			throw_rt_err ("cannot allocate memory for temporary buffer.");
-		}
-		ed_buf_ptr->clear ((sc_flag) ? sizeof (float) : sizeof (int16_t));
-	}
-
-	switch (_dmode)
-	{
-	case	DMode_BAYER:
-	case	DMode_ROUND:
-	case	DMode_VOIDCLUST:
-		ctx._pattern_ptr = &pattern;
-		break;
-
-	case	DMode_FAST:
-		// Nothing
-		break;
-
-	case	DMode_FILTERLITE:
-	case	DMode_STUCKI:
-	case	DMode_ATKINSON:
-	case	DMode_FLOYD:
-	case	DMode_OSTRO:
-		ctx._ed_buf_ptr = ed_buf_ptr;
-		break;
-
-	default:
-		assert (false);
-		throw_logic_err ("unexpected dithering algorithm");
-		break;
-	}
-
-	for (int y = 0; y < h; ++y)
-	{
-		ctx._y = y;
-
-		(this->*process_ptr) (dst_ptr, src_ptr, w, ctx);
-
-		src_ptr += src_stride;
-		dst_ptr += dst_stride;
-	}
-
-	if (ed_buf_ptr != 0)
-	{
-		_buf_pool.return_obj (*ed_buf_ptr);
-		ed_buf_ptr = 0;
-	}
-}
-
-
-
-template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-void	Bitdepth::process_seg_fast_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-
-	enum {         DIF_BITS = SRC_BITS - DST_BITS };
-	static_assert (DIF_BITS >= 0, "This function cannot increase bidepth.");
-
-	const SRC_TYPE *  src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
-	DST_TYPE *        dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
-
-	for (int pos = 0; pos < w; ++pos)
-	{
-		const int      s   = src_n_ptr [pos];
-		const int      pix = s >> DIF_BITS;
-		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
-	}
-}
-
-
-
-template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
-void	Bitdepth::process_seg_fast_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-	assert (ctx._scale_info_ptr != 0);
-
-	const SRC_TYPE *  src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
-	DST_TYPE *        dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
-
-	const float    mul  = float (ctx._scale_info_ptr->_gain);
-	const float    add  = float (ctx._scale_info_ptr->_add_cst);
-	const int      vmax = (1 << DST_BITS) - 1;
-
-	for (int pos = 0; pos < w; ++pos)
-	{
-		float          s = float (src_n_ptr [pos]);
-		s = s * mul + add;
-		const int      quant = fstb::conv_int_fast (s);
-		const int      pix   = fstb::limit (quant, 0, vmax);
-		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
-	}
-}
-
-
-
-#if (fstb_ARCHI == fstb_ARCHI_X86)
-
-
-
-template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT, int SRC_BITS>
-void	Bitdepth::process_seg_fast_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-
-	enum {         DIF_BITS = SRC_BITS - DST_BITS };
-	static_assert (DIF_BITS >= 0, "This function cannot increase bidepth.");
-
-	typedef typename  fmtcl::ProxyRwSse2 <SRC_FMT>::PtrConst::Type SrcPtr;
-	typedef typename  fmtcl::ProxyRwSse2 <DST_FMT>::Ptr::Type      DstPtr;
-	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
-	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
-	const __m128i  zero      = _mm_setzero_si128 ();
-	const __m128i  mask_lsb  = _mm_set1_epi16 (0x00FF);
-
-	for (int pos = 0; pos < w; pos += 8)
-	{
-		const __m128i  s   =
-			fmtcl::ProxyRwSse2 <SRC_FMT>::read_i16 (src_n_ptr + pos, zero);
-		const __m128i  pix = _mm_srli_epi16 (s, DIF_BITS);
-		fmtcl::ProxyRwSse2 <DST_FMT>::write_i16 (dst_n_ptr + pos, pix, mask_lsb);
-	}
-}
-
-
-
-template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT>
-void	Bitdepth::process_seg_fast_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-	assert (ctx._scale_info_ptr != 0);
-
-	typedef typename  fmtcl::ProxyRwSse2 <SRC_FMT>::PtrConst::Type  SrcPtr;
-	typedef typename  fmtcl::ProxyRwSse2 <DST_FMT>::Ptr::Type       DstPtr;
-	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
-	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
-
-	const __m128   mul      = _mm_set1_ps (float (ctx._scale_info_ptr->_gain));
-	const __m128   add      = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst));
-	const __m128   vmax     = _mm_set1_ps (float ((1 << DST_BITS) - 1));
-	const __m128   zero_f   = _mm_setzero_ps ();
-	const __m128i  zero_i   = _mm_setzero_si128 ();
-	const __m128i  mask_lsb = _mm_set1_epi16 (0x00FF);
-	const __m128i  sign_bit = _mm_set1_epi16 (-0x8000);
-	const __m128   offset   = _mm_set1_ps (-32768);
-
-	for (int pos = 0; pos < w; pos += 8)
-	{
-		__m128         s0;
-		__m128         s1;
-		fmtcl::ProxyRwSse2 <SRC_FMT>::read_flt (
-			src_n_ptr + pos, s0, s1, zero_i
-		);
-		s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add);
-		s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add);
-		s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f);
-		s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f);
-		fmtcl::ProxyRwSse2 <DST_FMT>::write_flt (
-			dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset
-		);
-	}
-}
-
-
-
-#endif
-
-
-
-template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-void	Bitdepth::process_seg_ord_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-
-	enum {         DIF_BITS = SRC_BITS - DST_BITS };
-	static_assert (DIF_BITS >= 1, "This function must reduce bidepth.");
-
-	const PatRow & pattern   = ctx.extract_pattern_row ();
-	uint32_t &     rnd_state = ctx._rnd_state;
-
-	const SRC_TYPE *  src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
-	DST_TYPE *        dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
-
-	const int      rcst     = 1 << (DIF_BITS - 1);
-	const int      vmax     = (1 << DST_BITS) - 1;
-
-	const int      ao = _ampo_i;				// s8
-	const int      an = _ampn_i;				// s8
-
-	for (int pos = 0; pos < w; ++pos)
-	{
-		if (! S_FLAG)
-		{
-			generate_rnd (rnd_state);
-		}
-
-		const int      s = src_n_ptr [pos];
-
-		const int      dith_o = pattern [pos & (PAT_WIDTH - 1)];	// s8
-		int            dither;
-		if (S_FLAG)
-		{
-			enum {         DIT_SHFT = 8 - DIF_BITS };
-			dither = fstb::sshift_r <int, DIT_SHFT> (dith_o);
-		}
-		else
-		{
-			const int      dith_n = int8_t (rnd_state >> 24);			// s8
-
-			enum {         DIT_SHFT = AMP_BITS + 8 - DIF_BITS };
-			dither = fstb::sshift_r <int, DIT_SHFT> (dith_o * ao + dith_n * an);	// s16 = s8 * s8 // s16 = s16 >> cst
-		}
-		const int      sum    = s + dither;	// s16+
-		const int      quant  = (sum + rcst) >> DIF_BITS;	// s16
-
-		const int      pix = fstb::limit (quant, 0, vmax);
-		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
-	}
-
-	if (! S_FLAG)
-	{
-		generate_rnd_eol (rnd_state);
-	}
+	return *fmt_dst_ptr;
 }
 
 
-template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
-void	Bitdepth::process_seg_ord_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-
-	const SRC_TYPE *  src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
-	DST_TYPE *        dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
-
-	const PatRow & pattern   = ctx.extract_pattern_row ();
-	uint32_t &     rnd_state = ctx._rnd_state;
-
-	const int      ao = _ampo_i;				// s8
-	const int      an = _ampn_i;				// s8
-
-	const float    mul  = float (ctx._scale_info_ptr->_gain);
-	const float    add  = float (ctx._scale_info_ptr->_add_cst);
-	const float    qt   = 1.0f / (1 << ((S_FLAG ? 0 : AMP_BITS) + 8));
-	const int      vmax = (1 << DST_BITS) - 1;
-
-	for (int pos = 0; pos < w; ++pos)
-	{
-		if (! S_FLAG)
-		{
-			generate_rnd (rnd_state);
-		}
-
-		float          s = float (src_n_ptr [pos]);
-		s = s * mul + add;
-
-		const int      dith_o = pattern [pos & (PAT_WIDTH - 1)];	// s8
-		float          dither;
-		if (S_FLAG)
-		{
-			dither = dith_o * qt;
-		}
-		else
-		{
-			const int      dith_n = int8_t (rnd_state >> 24);			// s8
-			dither = (dith_o * ao + dith_n * an) * qt;
-		}
-		const float    sum    = s + dither;
-		const int      quant  = fstb::round_int (sum);
-
-		const int      pix = fstb::limit (quant, 0, vmax);
-		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
-	}
-
-	if (! S_FLAG)
-	{
-		generate_rnd_eol (rnd_state);
-	}
-}
-
-
-
-#if (fstb_ARCHI == fstb_ARCHI_X86)
-
-
-
-template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT, int SRC_BITS>
-void	Bitdepth::process_seg_ord_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-
-	enum {         DIF_BITS = SRC_BITS - DST_BITS };
-	static_assert (DIF_BITS >= 0, "This function cannot increase bidepth.");
-
-	const PatRow & pattern   = ctx.extract_pattern_row ();
-	uint32_t &     rnd_state = ctx._rnd_state;
-
-	typedef typename  fmtcl::ProxyRwSse2 <SRC_FMT>::PtrConst::Type SrcPtr;
-	typedef typename  fmtcl::ProxyRwSse2 <DST_FMT>::Ptr::Type      DstPtr;
-	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
-	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
-	const __m128i  zero      = _mm_setzero_si128 ();
-	const __m128i  mask_lsb  = _mm_set1_epi16 (0x00FF);
-	const __m128i  c128_16   = _mm_set1_epi16 (0x80);
-	const __m128i  sign_bit  = _mm_set1_epi16 (-0x8000);
-	const __m128i  rcst      = _mm_set1_epi16 (1 << (DIF_BITS - 1));
-	const __m128i  vmax      = _mm_set1_epi16 ((1 << DST_BITS) - 1);
-
-	const __m128i  ampo_i    = _mm_set1_epi16 (int16_t (_ampo_i)); // 8 ?16 [0 ; 255]
-	const __m128i  ampn_i    = _mm_set1_epi16 (int16_t (_ampn_i)); // 8 ?16 [0 ; 255]
-
-	for (int pos = 0; pos < w; pos += 8)
-	{
-		const __m128i  s =	// 8 u16
-			fmtcl::ProxyRwSse2 <SRC_FMT>::read_i16 (src_n_ptr + pos, zero);
-
-		__m128i        dith_o = 
-			_mm_load_si128 (reinterpret_cast <const __m128i *> (
-				&pattern [pos & (PAT_WIDTH - 1)]
-			)
-		);
-
-		__m128i        dither;
-		if (S_FLAG)
-		{
-			enum {         DIT_SHFT = 8 - DIF_BITS };
-			dither = _mm_srai_epi16 (dith_o, DIT_SHFT);
-		}
-		else
-		{
-			// Random generation
-			generate_rnd (rnd_state);
-			const uint32_t rnd_03  = rnd_state;
-			generate_rnd (rnd_state);
-			const uint32_t rnd_47  = rnd_state;
-			const __m128i  rnd_val = _mm_set_epi32 (0, 0, rnd_47, rnd_03);
-
-			__m128i			dith_n =
-				_mm_unpacklo_epi8 (rnd_val, zero);           // 8 ?16 [0 ; 255]
-			dith_n = _mm_sub_epi16 (dith_n, c128_16);       // 8 s16 [-128 ; 127]
-
-			dith_o = _mm_mullo_epi16 (dith_o, ampo_i);      // 8 s16 (full range)
-			dith_n = _mm_mullo_epi16 (dith_n, ampn_i);      // 8 s16 (full range)
-			dither = _mm_adds_epi16 (dith_o, dith_n);       // 8 s16 = s8 * s8
-
-			enum {         DIT_SHFT = AMP_BITS + 8 - DIF_BITS };
-			dither = _mm_srai_epi16 (dither, DIT_SHFT);     // 8 s16 = s16 >> cst
-		}
-
-		const __m128i  dith_rcst = _mm_adds_epi16 (dither, rcst);
-
-		__m128i        quant;
-		if (S_FLAG && SRC_BITS < 16)
-		{
-			__m128i        sum = _mm_adds_epi16 (s, dith_rcst);
-			quant = _mm_srai_epi16 (sum, DIF_BITS);
-		}
-		else
-		{
-			__m128i        sum  = _mm_xor_si128 (s, sign_bit); // 8 s16
-			sum   = _mm_adds_epi16 (sum, dith_rcst);
-			sum   = _mm_xor_si128 (sum, sign_bit);          // 8 u16
-			quant = _mm_srli_epi16 (sum, DIF_BITS);
-		}
-
-		__m128i        pix = quant;
-		if (SRC_BITS < 16)
-		{
-			pix = _mm_max_epi16 (pix, zero);
-			pix = _mm_min_epi16 (pix, vmax);
-		}
-
-		fmtcl::ProxyRwSse2 <DST_FMT>::write_i16 (dst_n_ptr + pos, pix, mask_lsb);
-	}
-
-	if (! S_FLAG)
-	{
-		generate_rnd_eol (rnd_state);
-	}
-}
-
-
-
-template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT>
-void	Bitdepth::process_seg_ord_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-	assert (((_mm_getcsr () >> 13) & 3) == 0);   // 00 = Round to nearest (even)
-
-	const PatRow & pattern   = ctx.extract_pattern_row ();
-	uint32_t &     rnd_state = ctx._rnd_state;
-
-	const float    qt_cst    = 1.0f / (
-		65536.0f * float (1 << ((S_FLAG ? 0 : AMP_BITS) + 8))
-	);
-
-	typedef typename  fmtcl::ProxyRwSse2 <SRC_FMT>::PtrConst::Type SrcPtr;
-	typedef typename  fmtcl::ProxyRwSse2 <DST_FMT>::Ptr::Type      DstPtr;
-	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
-	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
-	const __m128   zero_f    = _mm_setzero_ps ();
-	const __m128i  zero_i    = _mm_setzero_si128 ();
-	const __m128i  c128_16   = _mm_set1_epi16 (0x80);
-	const __m128   mul       = _mm_set1_ps (float (ctx._scale_info_ptr->_gain));
-	const __m128   add       = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst));
-	const __m128   qt        = _mm_set1_ps (qt_cst);
-	const __m128   vmax      = _mm_set1_ps ((1 << DST_BITS) - 1);
-	const __m128   offset    = _mm_set1_ps (-32768);
-	const __m128i  mask_lsb  = _mm_set1_epi16 (0x00FF);
-	const __m128i  sign_bit  = _mm_set1_epi16 (-0x8000);
-
-	const __m128i  ampo_i    = _mm_set1_epi16 (int16_t (_ampo_i)); // 8 ?16 [0 ; 255]
-	const __m128i  ampn_i    = _mm_set1_epi16 (int16_t (_ampn_i)); // 8 ?16 [0 ; 255]
-
-	for (int pos = 0; pos < w; pos += 8)
-	{
-		__m128         s0;
-		__m128         s1;
-		fmtcl::ProxyRwSse2 <SRC_FMT>::read_flt (
-			src_n_ptr + pos, s0, s1, zero_i
-		);
-		s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add);
-		s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add);
-
-		__m128i        dith_o = 
-			_mm_load_si128 (reinterpret_cast <const __m128i *> (
-				&pattern [pos & (PAT_WIDTH - 1)]
-			)
-		);
-
-		__m128i        dither;
-		if (S_FLAG)
-		{
-			dither = dith_o;
-		}
-		else
-		{
-			// Random generation
-			generate_rnd (rnd_state);
-			const uint32_t rnd_03  = rnd_state;
-			generate_rnd (rnd_state);
-			const uint32_t rnd_47  = rnd_state;
-			const __m128i  rnd_val = _mm_set_epi32 (0, 0, rnd_47, rnd_03);
-
-			__m128i			dith_n =
-				_mm_unpacklo_epi8 (rnd_val, zero_i);         // 8 ?16 [0 ; 255]
-			dith_n = _mm_sub_epi16 (dith_n, c128_16);       // 8 s16 [-128 ; 127]
-
-			dith_o = _mm_mullo_epi16 (dith_o, ampo_i);      // 8 s16 (full range)
-			dith_n = _mm_mullo_epi16 (dith_n, ampn_i);      // 8 s16 (full range)
-			dither = _mm_adds_epi16 (dith_o, dith_n);       // 8 s16 = s8 * s8
-		}
-
-		__m128i        dither_03i = _mm_unpacklo_epi16 (zero_i, dither);  // 4 s32 << 16
-		__m128i        dither_47i = _mm_unpackhi_epi16 (zero_i, dither);  // 4 s32 << 16
-		__m128         dither_03  = _mm_cvtepi32_ps (dither_03i);
-		__m128         dither_47  = _mm_cvtepi32_ps (dither_47i);
-		dither_03 = _mm_mul_ps (dither_03, qt);
-		dither_47 = _mm_mul_ps (dither_47, qt);
-
-		s0 = _mm_add_ps (s0, dither_03);
-		s1 = _mm_add_ps (s1, dither_47);
-
-		s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f);
-		s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f);
-
-		fmtcl::ProxyRwSse2 <DST_FMT>::write_flt (
-			dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset
-		);
-	}
-
-	if (! S_FLAG)
-	{
-		generate_rnd_eol (rnd_state);
-	}
-}
-
-
-
-#endif   // fstb_ARCHI_X86
-
-
-
-template <bool S_FLAG, class ERRDIF>
-void	Bitdepth::process_seg_errdif_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-	assert (ctx._y >= 0);
-
-	typedef typename ERRDIF::SrcType SRC_TYPE;
-	typedef typename ERRDIF::DstType DST_TYPE;
-	enum { SRC_BITS = ERRDIF::SRC_BITS };
-	enum { DST_BITS = ERRDIF::DST_BITS };
-
-	uint32_t &           rnd_state =  ctx._rnd_state;
-	fmtcl::ErrDifBuf &   ed_buf    = *ctx._ed_buf_ptr;
-
-	const SRC_TYPE *  src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
-	DST_TYPE *        dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
-
-	const int      ae = _ampe_i;
-
-	// Makes e1 point on the default buffer line for single-line
-	// error diffusor because we use it in prepare_next_line()
-	int            e0 = 0;
-	int            e1 = 0;
-	if (ERRDIF::NBR_ERR_LINES == 2)
-	{
-		e0 =      ctx._y & 1 ;
-		e1 = 1 - (ctx._y & 1);
-	}
-	int16_t *      err0_ptr = ed_buf.get_buf <int16_t> (e0);
-	int16_t *      err1_ptr = ed_buf.get_buf <int16_t> (e1);
-
-	int            err_nxt0 = ed_buf.use_mem <int16_t> (0);
-	int            err_nxt1 = ed_buf.use_mem <int16_t> (1);
-
-	// Forward
-	if ((ctx._y & 1) == 0)
-	{
-		for (int x = 0; x < w; ++x)
-		{
-			int            err = err_nxt0;
-			SRC_TYPE       src_raw;
-
-			quantize_pix_int <S_FLAG, DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS> (
-				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, _ampn_i
-			);
-			ERRDIF::template diffuse <1> (
-				err, err_nxt0, err_nxt1,
-				err0_ptr + x, err1_ptr + x, src_raw
-			);
-		}
-		ERRDIF::prepare_next_line (err1_ptr + w);
-	}
-
-	// Backward
-	else
-	{
-		for (int x = w - 1; x >= 0; --x)
-		{
-			int            err = err_nxt0;
-			SRC_TYPE       src_raw;
-
-			quantize_pix_int <S_FLAG, DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS> (
-				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, _ampn_i
-			);
-			ERRDIF::template diffuse <-1> (
-				err, err_nxt0, err_nxt1,
-				err0_ptr + x, err1_ptr + x, src_raw
-			);
-		}
-		ERRDIF::prepare_next_line (err1_ptr - 1);
-	}
-
-	ed_buf.use_mem <int16_t> (0) = int16_t (err_nxt0);
-	ed_buf.use_mem <int16_t> (1) = int16_t (err_nxt1);
-
-	if (! S_FLAG)
-	{
-		generate_rnd_eol (rnd_state);
-	}
-}
-
-
-
-template <bool S_FLAG, class ERRDIF>
-void	Bitdepth::process_seg_errdif_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const
-{
-	assert (dst_ptr != 0);
-	assert (src_ptr != 0);
-	assert (w > 0);
-	assert (ctx._y >= 0);
-
-	typedef typename ERRDIF::SrcType SRC_TYPE;
-	typedef typename ERRDIF::DstType DST_TYPE;
-	enum { SRC_BITS = ERRDIF::SRC_BITS };
-	enum { DST_BITS = ERRDIF::DST_BITS };
-
-	uint32_t &           rnd_state =  ctx._rnd_state;
-	fmtcl::ErrDifBuf &   ed_buf    = *ctx._ed_buf_ptr;
-
-	const SRC_TYPE *  src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
-	DST_TYPE *        dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
-
-	const float    mul = float (ctx._scale_info_ptr->_gain);
-	const float    add = float (ctx._scale_info_ptr->_add_cst);
-	const float    ae  = float (_ampe_f);
-	const float    an  = float (_ampn_f);
-
-	// Makes e1 point on the default buffer line for single-line
-	// error diffusor because we use it in prepare_next_line()
-	int            e0 = 0;
-	int            e1 = 0;
-	if (ERRDIF::NBR_ERR_LINES == 2)
-	{
-		e0 =      ctx._y & 1 ;
-		e1 = 1 - (ctx._y & 1);
-	}
-	float *        err0_ptr = ed_buf.get_buf <float> (e0);
-	float *        err1_ptr = ed_buf.get_buf <float> (e1);
-
-	float          err_nxt0 = ed_buf.use_mem <float> (0);
-	float          err_nxt1 = ed_buf.use_mem <float> (1);
-
-	// Forward
-	if ((ctx._y & 1) == 0)
-	{
-		for (int x = 0; x < w; ++x)
-		{
-			float          err = err_nxt0;
-			SRC_TYPE       src_raw;
-
-			quantize_pix_flt <S_FLAG, DST_TYPE, DST_BITS, SRC_TYPE> (
-				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add
-			);
-			ERRDIF::template diffuse <1> (
-				err, err_nxt0, err_nxt1,
-				err0_ptr + x, err1_ptr + x, src_raw
-			);
-		}
-		ERRDIF::prepare_next_line (err1_ptr + w);
-	}
-
-	// Backward
-	else
-	{
-		for (int x = w - 1; x >= 0; --x)
-		{
-			float          err = err_nxt0;
-			SRC_TYPE       src_raw;
-
-			quantize_pix_flt <S_FLAG, DST_TYPE, DST_BITS, SRC_TYPE> (
-				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add
-			);
-			ERRDIF::template diffuse <-1> (
-				err, err_nxt0, err_nxt1,
-				err0_ptr + x, err1_ptr + x, src_raw
-			);
-		}
-		ERRDIF::prepare_next_line (err1_ptr - 1);
-	}
-
-	ed_buf.use_mem <float> (0) = err_nxt0;
-	ed_buf.use_mem <float> (1) = err_nxt1;
-
-	if (! S_FLAG)
-	{
-		generate_rnd_eol (rnd_state);
-	}
-}
-
-
-
-void	Bitdepth::generate_rnd (uint32_t &state)
-{
-	state = state * uint32_t (1664525) + 1013904223;
-}
-
-
-
-void	Bitdepth::generate_rnd_eol (uint32_t &state)
-{
-	state = state * uint32_t (1103515245) + 12345;
-	if ((state & 0x2000000) != 0)
-	{
-		state = state * uint32_t (134775813) + 1;
-	}
-}
-
-
-
-Bitdepth::SegContext::SegContext ()
-:	_pattern_ptr (0)
-,	_rnd_state (0)
-,	_scale_info_ptr (0)
-,	_ed_buf_ptr (0)
-,	_y (-1)
-{
-	// Nothing
-}
-
-
-
-const Bitdepth::PatRow &	Bitdepth::SegContext::extract_pattern_row () const
-{
-	assert (_pattern_ptr != 0);
-	assert (_y >= 0);
-
-	return ((*_pattern_ptr) [_y & (PAT_WIDTH - 1)]);
-}
-
-
-
-template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-void	Bitdepth::quantize_pix_int (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, int &err, uint32_t &rnd_state, int ampe_i, int ampn_i)
-{
-	enum {         DIF_BITS = SRC_BITS - DST_BITS };
-	enum {         TMP_BITS =
-		(DIF_BITS < 6 && SRC_BITS < ERR_RES && DST_BITS < ERR_RES)
-		? ERR_RES
-		: SRC_BITS };
-	enum {         TMP_SHFT = TMP_BITS - SRC_BITS };
-	enum {         TMP_INVS = TMP_BITS - DST_BITS };
-
-	const int      rcst     = 1 << (TMP_INVS - 1);
-	const int      vmax     = (1 << DST_BITS) - 1;
-
-	src_raw = src_ptr [x];
-	const int		src     = src_raw << TMP_SHFT;
-	const int      preq    = src + err;
-
-	int            sum     = preq;
-	if (! S_FLAG)
-	{
-		enum {         DIT_SHFT = AMP_BITS + 8 - TMP_INVS };  // May be negative
-
-		generate_rnd (rnd_state);
-		const int		rnd_val = int8_t (rnd_state >> 24);			// s8
-		const int		err_add = (err < 0) ? -ampe_i : ampe_i;
-		const int		noise   =
-			fstb::sshift_r <int, DIT_SHFT> (rnd_val * ampn_i + err_add);	// s16 = s8 * s8 // s16 = s16 >> cst
-
-		sum += noise;
-	}
-
-	const int      quant   = (sum + rcst) >> TMP_INVS;
-
-	err = preq - (quant << TMP_INVS);
-	const int      pix     = fstb::limit (quant, 0, vmax);
-
-	dst_ptr [x] = static_cast <DST_TYPE> (pix);
-}
-
-
-
-template <class SRC_TYPE>
-static inline SRC_TYPE	Bitdepth_extract_src (SRC_TYPE src_read, float src)
-{
-	fstb::unused (src);
-
-	return (src_read);
-}
-
-static inline float	Bitdepth_extract_src (float src_read, float src)
-{
-	fstb::unused (src_read);
-
-	return (src);
-}
-
-template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
-void	Bitdepth::quantize_pix_flt (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, float &err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add)
-{
-	const int      vmax = (1 << DST_BITS) - 1;
-
-	const SRC_TYPE src_read = src_ptr [x];
-	const float    src      = float (src_read) * mul + add;
-	src_raw = Bitdepth_extract_src (src_read, src);
-	const float    preq     = src + err;
-
-	float          sum      = preq;
-	if (! S_FLAG)
-	{
-		generate_rnd (rnd_state);
-		const int32_t  rnd_val = int32_t (rnd_state);   // Signed
-		const float    err_add = (err < 0) ? -ampe_f : (err > 0) ? ampe_f : 0;
-		const float    noise   = rnd_val * ampn_f + err_add;
-
-		sum += noise;
-	}
-
-	const int      quant   = fstb::round_int (sum);
-
-	err = preq - float (quant);
-	const int      pix = fstb::limit (quant, 0, vmax);
-
-	dst_ptr [x] = static_cast <DST_TYPE> (pix);
-}
-
-
-
-// Original coefficients                     : 7, 3, 5, 1
-// Optimised coefficients for serpentine scan: 7, 4, 5, 0
-// Source:
-// Sam Hocevar and Gary Niger,
-// Reinstating Floyd-Steinberg: Improved Metrics for Quality Assessment
-// of Error Diffusion Algorithms,
-// Lecture Notes in Computer Science LNCS 5099, pp. 38�45, 2008
-// (Proceedings of the International Conference on Image and Signal Processing
-// ICISP 2008) ISSN 0302-9743
-
-#define fmtc_Bitdepth_FS_OPTIMIZED_SERPENTINE_COEF
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (err_nxt1, err1_ptr, src_raw);
-
-#if defined (fmtc_Bitdepth_FS_OPTIMIZED_SERPENTINE_COEF)
-	const int      e1 = 0;
-	const int      e3 = (err * 4 + 8) >> 4;
-#else
-	const int      e1 = (err     + 8) >> 4;
-	const int      e3 = (err * 3 + 8) >> 4;
-#endif
-	const int      e5 = (err * 5 + 8) >> 4;
-	const int      e7 = err - e1 - e3 - e5;
-	spread_error <DIR> (e1, e3, e5, e7, err_nxt0, err0_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (err_nxt1, err1_ptr, src_raw);
-
-#if defined (fmtc_Bitdepth_FS_OPTIMIZED_SERPENTINE_COEF)
-	const float    e1 = 0;
-	const float    e3 = err * (4.0f / 16);
-#else
-	const float    e1 = err * (1.0f / 16);
-	const float    e3 = err * (3.0f / 16);
-#endif
-	const float    e5 = err * (5.0f / 16);
-	const float    e7 = err * (7.0f / 16);
-	spread_error <DIR> (e1, e3, e5, e7, err_nxt0, err0_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <typename EB>
-void	Bitdepth::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB *err_ptr)
-{
-	// Nothing
-	fstb::unused (err_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR, typename ET, typename EB>
-void	Bitdepth::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e3, ET e5, ET e7, ET &err_nxt0, EB *err0_ptr)
-{
-	err_nxt0         = err0_ptr [DIR];
-	err0_ptr [-DIR] += EB (e3);
-	err0_ptr [   0] += EB (e5);
-	err0_ptr [ DIR]  = EB (e1);
-	err_nxt0        += e7;
-}
-
-
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (err_nxt1, err1_ptr, src_raw);
-
-	const int      e1 = (err + 2) >> 2;
-	const int      e2 = err - 2 * e1;
-	spread_error <DIR> (e1, e2, err_nxt0, err0_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (err_nxt1, err1_ptr, src_raw);
-
-	const float    e1 = err * (1.0f / 4);
-	const float    e2 = err * (2.0f / 4);
-	spread_error <DIR> (e1, e2, err_nxt0, err0_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <typename EB>
-void	Bitdepth::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB *err_ptr)
-{
-	err_ptr [0] = EB (0);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR, typename ET, typename EB>
-void	Bitdepth::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e2, ET &err_nxt0, EB *err0_ptr)
-{
-	err_nxt0         = err0_ptr [DIR];
-	err0_ptr [-DIR] += EB (e1);
-	err0_ptr [   0]  = EB (e1);
-	err_nxt0        += e2;
-}
-
-
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (src_raw);
-
-	const int      m  = (err << 4) / 42;
-	const int      e1 = (m + 8) >> 4;
-	const int      e2 = (m + 4) >> 3;
-	const int      e4 = (m + 2) >> 2;
-//	const int      e8 = (m + 1) >> 1;
-	const int      sum = (e1 << 1) + ((e2 + e4) << 2);
-	const int      e8 = (err - sum + 1) >> 1;
-	spread_error <DIR> (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (src_raw);
-
-	const float    e1 = err * (1.0f / 42);
-	const float    e2 = err * (2.0f / 42);
-	const float    e4 = err * (4.0f / 42);
-	const float    e8 = err * (8.0f / 42);
-	spread_error <DIR> (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <typename EB>
-void	Bitdepth::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB *err_ptr)
-{
-	// Nothing
-	fstb::unused (err_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR, typename ET, typename EB>
-void	Bitdepth::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e2, ET e4, ET e8, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr)
-{
-	err_nxt0             = err_nxt1 + e8;
-	err_nxt1             = err1_ptr [DIR * 2] + e4;
-	err0_ptr [-DIR * 2] += EB (e2);
-	err0_ptr [-DIR    ] += EB (e4);
-	err0_ptr [   0    ] += EB (e8);
-	err0_ptr [ DIR    ] += EB (e4);
-	err0_ptr [ DIR * 2] += EB (e2);
-	err1_ptr [-DIR * 2] += EB (e1);
-	err1_ptr [-DIR    ] += EB (e2);
-	err1_ptr [   0    ] += EB (e4);
-	err1_ptr [ DIR    ] += EB (e2);
-	err1_ptr [ DIR * 2]  = EB (e1);
-}
-
-
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (src_raw);
-
-	const int      e1 = (err + 4) >> 3;
-	spread_error <DIR> (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (src_raw);
-
-	const float    e1 = err * (1.0f / 8);
-	spread_error <DIR> (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <typename EB>
-void	Bitdepth::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB *err_ptr)
-{
-	err_ptr [0] = EB (0);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR, typename ET, typename EB>
-void	Bitdepth::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr)
-{
-	err_nxt0         = err_nxt1           + e1;
-	err_nxt1         = err1_ptr [2 * DIR] + e1;
-	err0_ptr [-DIR] += EB (e1);
-	err0_ptr [   0] += EB (e1);
-	err0_ptr [+DIR] += EB (e1);
-	err1_ptr [   0]  = EB (e1);
-}
-
-
-
-// Victor Ostromoukhov,
-// A Simple and Efficient Error-Diffusion Algorithm
-// Proceedings of SIGGRAPH 2001, in ACM Computer Graphics,
-// Annual Conference Series, pp. 567-572, 2001.
-// Not optimised at all
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (err_nxt1, err1_ptr);
-
-	enum {         DIF_BITS = SRC_BITS - DST_BITS };
-
-	const int      index    = fstb::sshift_l <
-		int,
-		DiffuseOstromoukhov::T_BITS - DIF_BITS
-	> (src_raw) & DiffuseOstromoukhov::T_MASK;
-	const typename DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::TableEntry & te =
-		DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::_table [index];
-	const int      d        = te._sum;
-
-	const int      e1 = err * te._c0 / d;
-	const int      e2 = err * te._c1 / d;
-	const int      e3 = err - e1 - e2;
-
-	spread_error <DIR> (e1, e2, e3, err_nxt0, err0_ptr);
-}
-
-template <int DST_BITS, int SRC_BITS>
-template <class SRC_TYPE>
-int	Bitdepth::DiffuseOstromoukhovBase2 <DST_BITS, SRC_BITS>::get_index (SRC_TYPE src_raw)
-{
-	enum {         DIF_BITS = SRC_BITS - DST_BITS };
-
-	return (fstb::sshift_l <
-		int,
-		DiffuseOstromoukhovBase::T_BITS - DIF_BITS
-	> (src_raw) & DiffuseOstromoukhovBase::T_MASK);
-}
-
-template <int DST_BITS, int SRC_BITS>
-int	Bitdepth::DiffuseOstromoukhovBase2 <DST_BITS, SRC_BITS>::get_index (float src_raw)
-{
-	return (  fstb::round_int (src_raw * DiffuseOstromoukhovBase::T_LEN)
-	        & DiffuseOstromoukhovBase::T_MASK);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR>
-void	Bitdepth::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw)
-{
-	fstb::unused (err_nxt1, err1_ptr);
-
-	const int      index    = DiffuseOstromoukhov::get_index (src_raw);
-	const typename DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::TableEntry &   te =
-		DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::_table [index];
-	const float    invd     = te._inv_sum;
-
-	const float    e1 = err * te._c0 * invd;
-	const float    e2 = err * te._c1 * invd;
-	const float    e3 = err - e1 - e2;
-
-	spread_error <DIR> (e1, e2, e3, err_nxt0, err0_ptr);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <typename EB>
-void	Bitdepth::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB *err_ptr)
-{
-	err_ptr [0] = EB (0);
-}
-
-template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-template <int DIR, typename ET, typename EB>
-void	Bitdepth::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e2, ET e3, ET &err_nxt0, EB *err0_ptr)
-{
-	err_nxt0         = err0_ptr [DIR];
-	err0_ptr [-DIR] += EB (e2);
-	err0_ptr [   0]  = EB (e3);
-	err_nxt0        += e1;
-}
-
-
-
-const Bitdepth::DiffuseOstromoukhovBase::TableEntry	Bitdepth::DiffuseOstromoukhovBase::_table [T_LEN] =
-{
-	{   13,    0,    5,   18, 1.0f /   18 },
-	{   13,    0,    5,   18, 1.0f /   18 },
-	{   21,    0,   10,   31, 1.0f /   31 },
-	{    7,    0,    4,   11, 1.0f /   11 },
-	{    8,    0,    5,   13, 1.0f /   13 },
-	{   47,    3,   28,   78, 1.0f /   78 },
-	{   23,    3,   13,   39, 1.0f /   39 },
-	{   15,    3,    8,   26, 1.0f /   26 },
-	{   22,    6,   11,   39, 1.0f /   39 },
-	{   43,   15,   20,   78, 1.0f /   78 },
-	{    7,    3,    3,   13, 1.0f /   13 },
-	{  501,  224,  211,  936, 1.0f /  936 },
-	{  249,  116,  103,  468, 1.0f /  468 },
-	{  165,   80,   67,  312, 1.0f /  312 },
-	{  123,   62,   49,  234, 1.0f /  234 },
-	{  489,  256,  191,  936, 1.0f /  936 },
-	{   81,   44,   31,  156, 1.0f /  156 },
-	{  483,  272,  181,  936, 1.0f /  936 },
-	{   60,   35,   22,  117, 1.0f /  117 },
-	{   53,   32,   19,  104, 1.0f /  104 },
-	{  237,  148,   83,  468, 1.0f /  468 },
-	{  471,  304,  161,  936, 1.0f /  936 },
-	{    3,    2,    1,    6, 1.0f /    6 },
-	{  481,  314,  185,  980, 1.0f /  980 },
-	{  354,  226,  155,  735, 1.0f /  735 },
-	{ 1389,  866,  685, 2940, 1.0f / 2940 },
-	{  227,  138,  125,  490, 1.0f /  490 },
-	{  267,  158,  163,  588, 1.0f /  588 },
-	{  327,  188,  220,  735, 1.0f /  735 },
-	{   61,   34,   45,  140, 1.0f /  140 },
-	{  627,  338,  505, 1470, 1.0f / 1470 },
-	{ 1227,  638, 1075, 2940, 1.0f / 2940 },
-
-	{   20,   10,   19,   49, 1.0f /   49 },
-	{ 1937, 1000, 1767, 4704, 1.0f / 4704 },
-	{  977,  520,  855, 2352, 1.0f / 2352 },
-	{  657,  360,  551, 1568, 1.0f / 1568 },
-	{   71,   40,   57,  168, 1.0f /  168 },
-	{ 2005, 1160, 1539, 4704, 1.0f / 4704 },
-	{  337,  200,  247,  784, 1.0f /  784 },
-	{ 2039, 1240, 1425, 4704, 1.0f / 4704 },
-	{  257,  160,  171,  588, 1.0f /  588 },
-	{  691,  440,  437, 1568, 1.0f / 1568 },
-	{ 1045,  680,  627, 2352, 1.0f / 2352 },
-	{  301,  200,  171,  672, 1.0f /  672 },
-	{  177,  120,   95,  392, 1.0f /  392 },
-	{ 2141, 1480, 1083, 4704, 1.0f / 4704 },
-	{ 1079,  760,  513, 2352, 1.0f / 2352 },
-	{  725,  520,  323, 1568, 1.0f / 1568 },
-	{  137,  100,   57,  294, 1.0f /  294 },
-	{ 2209, 1640,  855, 4704, 1.0f / 4704 },
-	{   53,   40,   19,  112, 1.0f /  112 },
-	{ 2243, 1720,  741, 4704, 1.0f / 4704 },
-	{  565,  440,  171, 1176, 1.0f / 1176 },
-	{  759,  600,  209, 1568, 1.0f / 1568 },
-	{ 1147,  920,  285, 2352, 1.0f / 2352 },
-	{ 2311, 1880,  513, 4704, 1.0f / 4704 },
-	{   97,   80,   19,  196, 1.0f /  196 },
-	{  335,  280,   57,  672, 1.0f /  672 },
-	{ 1181, 1000,  171, 2352, 1.0f / 2352 },
-	{  793,  680,   95, 1568, 1.0f / 1568 },
-	{  599,  520,   57, 1176, 1.0f / 1176 },
-	{ 2413, 2120,  171, 4704, 1.0f / 4704 },
-	{  405,  360,   19,  784, 1.0f /  784 },
-	{ 2447, 2200,   57, 4704, 1.0f / 4704 },
-
-	{   11,   10,    0,   21, 1.0f /   21 },
-	{  158,  151,    3,  312, 1.0f /  312 },
-	{  178,  179,    7,  364, 1.0f /  364 },
-	{ 1030, 1091,   63, 2184, 1.0f / 2184 },
-	{  248,  277,   21,  546, 1.0f /  546 },
-	{  318,  375,   35,  728, 1.0f /  728 },
-	{  458,  571,   63, 1092, 1.0f / 1092 },
-	{  878, 1159,  147, 2184, 1.0f / 2184 },
-	{    5,    7,    1,   13, 1.0f /   13 },
-	{  172,  181,   37,  390, 1.0f /  390 },
-	{   97,   76,   22,  195, 1.0f /  195 },
-	{   72,   41,   17,  130, 1.0f /  130 },
-	{  119,   47,   29,  195, 1.0f /  195 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{   65,   18,   17,  100, 1.0f /  100 },
-	{   95,   29,   26,  150, 1.0f /  150 },
-	{  185,   62,   53,  300, 1.0f /  300 },
-	{   30,   11,    9,   50, 1.0f /   50 },
-	{   35,   14,   11,   60, 1.0f /   60 },
-	{   85,   37,   28,  150, 1.0f /  150 },
-	{   55,   26,   19,  100, 1.0f /  100 },
-	{   80,   41,   29,  150, 1.0f /  150 },
-	{  155,   86,   59,  300, 1.0f /  300 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{  305,  176,  119,  600, 1.0f /  600 },
-	{  155,   86,   59,  300, 1.0f /  300 },
-	{  105,   56,   39,  200, 1.0f /  200 },
-	{   80,   41,   29,  150, 1.0f /  150 },
-	{   65,   32,   23,  120, 1.0f /  120 },
-	{   55,   26,   19,  100, 1.0f /  100 },
-	{  335,  152,  113,  600, 1.0f /  600 },
-	{   85,   37,   28,  150, 1.0f /  150 },
-	{  115,   48,   37,  200, 1.0f /  200 },
-	{   35,   14,   11,   60, 1.0f /   60 },
-	{  355,  136,  109,  600, 1.0f /  600 },
-	{   30,   11,    9,   50, 1.0f /   50 },
-	{  365,  128,  107,  600, 1.0f /  600 },
-	{  185,   62,   53,  300, 1.0f /  300 },
-	{   25,    8,    7,   40, 1.0f /   40 },
-	{   95,   29,   26,  150, 1.0f /  150 },
-	{  385,  112,  103,  600, 1.0f /  600 },
-	{   65,   18,   17,  100, 1.0f /  100 },
-	{  395,  104,  101,  600, 1.0f /  600 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-
-	// Symetric
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{  395,  104,  101,  600, 1.0f /  600 },
-	{   65,   18,   17,  100, 1.0f /  100 },
-	{  385,  112,  103,  600, 1.0f /  600 },
-	{   95,   29,   26,  150, 1.0f /  150 },
-	{   25,    8,    7,   40, 1.0f /   40 },
-	{  185,   62,   53,  300, 1.0f /  300 },
-	{  365,  128,  107,  600, 1.0f /  600 },
-	{   30,   11,    9,   50, 1.0f /   50 },
-	{  355,  136,  109,  600, 1.0f /  600 },
-	{   35,   14,   11,   60, 1.0f /   60 },
-	{  115,   48,   37,  200, 1.0f /  200 },
-	{   85,   37,   28,  150, 1.0f /  150 },
-	{  335,  152,  113,  600, 1.0f /  600 },
-	{   55,   26,   19,  100, 1.0f /  100 },
-	{   65,   32,   23,  120, 1.0f /  120 },
-	{   80,   41,   29,  150, 1.0f /  150 },
-	{  105,   56,   39,  200, 1.0f /  200 },
-	{  155,   86,   59,  300, 1.0f /  300 },
-	{  305,  176,  119,  600, 1.0f /  600 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{    5,    3,    2,   10, 1.0f /   10 },
-
-	{    5,    3,    2,   10, 1.0f /   10 },
-	{  155,   86,   59,  300, 1.0f /  300 },
-	{   80,   41,   29,  150, 1.0f /  150 },
-	{   55,   26,   19,  100, 1.0f /  100 },
-	{   85,   37,   28,  150, 1.0f /  150 },
-	{   35,   14,   11,   60, 1.0f /   60 },
-	{   30,   11,    9,   50, 1.0f /   50 },
-	{  185,   62,   53,  300, 1.0f /  300 },
-	{   95,   29,   26,  150, 1.0f /  150 },
-	{   65,   18,   17,  100, 1.0f /  100 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{    4,    1,    1,    6, 1.0f /    6 },
-	{  119,   47,   29,  195, 1.0f /  195 },
-	{   72,   41,   17,  130, 1.0f /  130 },
-	{   97,   76,   22,  195, 1.0f /  195 },
-	{  172,  181,   37,  390, 1.0f /  390 },
-	{    5,    7,    1,   13, 1.0f /   13 },
-	{  878, 1159,  147, 2184, 1.0f / 2184 },
-	{  458,  571,   63, 1092, 1.0f / 1092 },
-	{  318,  375,   35,  728, 1.0f /  728 },
-	{  248,  277,   21,  546, 1.0f /  546 },
-	{ 1030, 1091,   63, 2184, 1.0f / 2184 },
-	{  178,  179,    7,  364, 1.0f /  364 },
-	{  158,  151,    3,  312, 1.0f /  312 },
-	{   11,   10,    0,   21, 1.0f /   21 },
-
-	{ 2447, 2200,   57, 4704, 1.0f / 4704 },
-	{  405,  360,   19,  784, 1.0f /  784 },
-	{ 2413, 2120,  171, 4704, 1.0f / 4704 },
-	{  599,  520,   57, 1176, 1.0f / 1176 },
-	{  793,  680,   95, 1568, 1.0f / 1568 },
-	{ 1181, 1000,  171, 2352, 1.0f / 2352 },
-	{  335,  280,   57,  672, 1.0f /  672 },
-	{   97,   80,   19,  196, 1.0f /  196 },
-	{ 2311, 1880,  513, 4704, 1.0f / 4704 },
-	{ 1147,  920,  285, 2352, 1.0f / 2352 },
-	{  759,  600,  209, 1568, 1.0f / 1568 },
-	{  565,  440,  171, 1176, 1.0f / 1176 },
-	{ 2243, 1720,  741, 4704, 1.0f / 4704 },
-	{   53,   40,   19,  112, 1.0f /  112 },
-	{ 2209, 1640,  855, 4704, 1.0f / 4704 },
-	{  137,  100,   57,  294, 1.0f /  294 },
-	{  725,  520,  323, 1568, 1.0f / 1568 },
-	{ 1079,  760,  513, 2352, 1.0f / 2352 },
-	{ 2141, 1480, 1083, 4704, 1.0f / 4704 },
-	{  177,  120,   95,  392, 1.0f /  392 },
-	{  301,  200,  171,  672, 1.0f /  672 },
-	{ 1045,  680,  627, 2352, 1.0f / 2352 },
-	{  691,  440,  437, 1568, 1.0f / 1568 },
-	{  257,  160,  171,  588, 1.0f /  588 },
-	{ 2039, 1240, 1425, 4704, 1.0f / 4704 },
-	{  337,  200,  247,  784, 1.0f /  784 },
-	{ 2005, 1160, 1539, 4704, 1.0f / 4704 },
-	{   71,   40,   57,  168, 1.0f /  168 },
-	{  657,  360,  551, 1568, 1.0f / 1568 },
-	{  977,  520,  855, 2352, 1.0f / 2352 },
-	{ 1937, 1000, 1767, 4704, 1.0f / 4704 },
-	{   20,   10,   19,   49, 1.0f /   49 },
-
-	{ 1227,  638, 1075, 2940, 1.0f / 2940 },
-	{  627,  338,  505, 1470, 1.0f / 1470 },
-	{   61,   34,   45,  140, 1.0f /  140 },
-	{  327,  188,  220,  735, 1.0f /  735 },
-	{  267,  158,  163,  588, 1.0f /  588 },
-	{  227,  138,  125,  490, 1.0f /  490 },
-	{ 1389,  866,  685, 2940, 1.0f / 2940 },
-	{  354,  226,  155,  735, 1.0f /  735 },
-	{  481,  314,  185,  980, 1.0f /  980 },
-	{    3,    2,    1,    6, 1.0f /    6 },
-	{  471,  304,  161,  936, 1.0f /  936 },
-	{  237,  148,   83,  468, 1.0f /  468 },
-	{   53,   32,   19,  104, 1.0f /  104 },
-	{   60,   35,   22,  117, 1.0f /  117 },
-	{  483,  272,  181,  936, 1.0f /  936 },
-	{   81,   44,   31,  156, 1.0f /  156 },
-	{  489,  256,  191,  936, 1.0f /  936 },
-	{  123,   62,   49,  234, 1.0f /  234 },
-	{  165,   80,   67,  312, 1.0f /  312 },
-	{  249,  116,  103,  468, 1.0f /  468 },
-	{  501,  224,  211,  936, 1.0f /  936 },
-	{    7,    3,    3,   13, 1.0f /   13 },
-	{   43,   15,   20,   78, 1.0f /   78 },
-	{   22,    6,   11,   39, 1.0f /   39 },
-	{   15,    3,    8,   26, 1.0f /   26 },
-	{   23,    3,   13,   39, 1.0f /   39 },
-	{   47,    3,   28,   78, 1.0f /   78 },
-	{    8,    0,    5,   13, 1.0f /   13 },
-	{    7,    0,    4,   11, 1.0f /   11 },
-	{   21,    0,   10,   31, 1.0f /   31 },
-	{   13,    0,    5,   18, 1.0f /   18 },
-	{   13,    0,    5,   18, 1.0f /   18 }
-};
-
-
 
 }	// namespace fmtc
 
diff --git a/src/fmtc/Bitdepth.h b/src/fmtc/Bitdepth.h
index bae3667..b6aea37 100644
--- a/src/fmtc/Bitdepth.h
+++ b/src/fmtc/Bitdepth.h
@@ -27,21 +27,14 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
-#include "conc/ObjPool.h"
-#include "fmtcl/BitBltConv.h"
-#include "fmtcl/ErrDifBuf.h"
-#include "fmtcl/ErrDifBufFactory.h"
-#include "fmtcl/SplFmt.h"
-#include "fstb/ArrayAlign.h"
+#include "fmtcl/Dither.h"
 #include "vsutl/FilterBase.h"
 #include "vsutl/NodeRefSPtr.h"
 #include "vsutl/PlaneProcCbInterface.h"
 #include "vsutl/PlaneProcessor.h"
 #include "VapourSynth.h"
 
-#include <array>
 #include <memory>
-#include <vector>
 
 
 
@@ -84,256 +77,9 @@ class Bitdepth
 
 private:
 
-	static const int  MAX_NBR_PLANES =     3;
-	static const int  PAT_WIDTH      =    32; // Number of pixels for halftone dithering
-	static const int  PAT_PERIOD     =     4; // Must be a power of 2 (because cycled with & as modulo)
-	static const int  AMP_BITS       =     5; // Bit depth of the amplitude fractionnal part. The whole thing is 7 bits, and we need a few bits for the integer part.
-	static const int  ERR_RES        =    24; // Resolution (bits) of the temporary data for error diffusion when source bitdepth is not high enough (relative to the destination bitdepth) to guarantee an accurate error diffusion.
-	static const int  MAX_UNK_WIDTH  = 65536; // Maximum width (pixels) for variable formats
-
-	enum DMode
-	{
-		DMode_ROUND_ALIAS = -1,
-		DMode_BAYER = 0,
-		DMode_ROUND,      // 1
-		DMode_FAST,       // 2
-		DMode_FILTERLITE, // 3
-		DMode_STUCKI,     // 4
-		DMode_ATKINSON,   // 5
-		DMode_FLOYD,      // 6
-		DMode_OSTRO,      // 7
-		DMode_VOIDCLUST,  // 8
-
-		DMode_NBR_ELT
-	};
-
-	class SclInf
-	{
-	public:
-		fmtcl::BitBltConv::ScaleInfo
-		               _info;
-		fmtcl::BitBltConv::ScaleInfo *   // 0 if _info is not used.
-		               _ptr = 0;
-	};
-
-	typedef	int16_t	PatRow [PAT_WIDTH];  // Contains data in [-128; +127]
-	typedef	PatRow	PatData [PAT_WIDTH]; // [y] [x]
-	typedef	fstb::ArrayAlign <PatData, PAT_PERIOD, 16>	PatDataArray;
-
-	class SegContext
-	{
-	public:
-		inline         SegContext ();
-		inline const PatRow &
-		               extract_pattern_row () const;
-		const PatData* _pattern_ptr;           // Ordered dithering
-		uint32_t       _rnd_state;             // Anything excepted fast mode
-		const fmtcl::BitBltConv::ScaleInfo *   // Float processing
-		               _scale_info_ptr;
-		fmtcl::ErrDifBuf *                     // Error diffusion
-		               _ed_buf_ptr;
-		int            _y;                     // Ordered dithering and error diffusion
-	};
-
 	const ::VSFormat &
 	               get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const;
 
-	void           build_dither_pat ();
-	void           build_dither_pat_round ();
-	void           build_dither_pat_bayer ();
-	void           build_dither_pat_void_and_cluster (int w);
-	void           build_next_dither_pat ();
-	void           copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle);
-	void           init_fnc_fast ();
-	void           init_fnc_ordered ();
-	void           init_fnc_errdiff ();
-
-	void           dither_plane (fmtcl::SplFmt dst_fmt, int dst_res, uint8_t *dst_ptr, int dst_stride, fmtcl::SplFmt src_fmt, int src_res, const uint8_t *src_ptr, int src_stride, int w, int h, const fmtcl::BitBltConv::ScaleInfo &scale_info, const PatData &pattern, uint32_t rnd_state);
-
-	template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	void           process_seg_fast_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const;
-	template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
-	void           process_seg_fast_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-
-#if (fstb_ARCHI == fstb_ARCHI_X86)
-	template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT, int SRC_BITS>
-	void           process_seg_fast_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &/*ctx*/) const;
-	template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT>
-	void           process_seg_fast_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-#endif
-
-	template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	void           process_seg_ord_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-	template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
-	void           process_seg_ord_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-
-#if (fstb_ARCHI == fstb_ARCHI_X86)
-	template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT, int SRC_BITS>
-	void           process_seg_ord_int_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-	template <bool S_FLAG, fmtcl::SplFmt DST_FMT, int DST_BITS, fmtcl::SplFmt SRC_FMT>
-	void           process_seg_ord_flt_int_sse2 (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-#endif
-
-	template <bool S_FLAG, class ERRDIF>
-	void           process_seg_errdif_int_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-	template <bool S_FLAG, class ERRDIF>
-	void           process_seg_errdif_flt_int_cpp (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-
-	static inline void
-	               generate_rnd (uint32_t &state);
-	static inline void
-	               generate_rnd_eol (uint32_t &state);
-
-	template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	static inline void
-	               quantize_pix_int (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, int &err, uint32_t &rnd_state, int ampe_i, int ampn_i);
-	template <bool S_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
-	static inline void
-	               quantize_pix_flt (DST_TYPE *dst_ptr, const SRC_TYPE *src_ptr, SRC_TYPE &src_raw, int x, float &err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add);
-
-	template <class DT, int DB, class ST, int SB, int EL>
-	class ErrDifAddParam
-	{
-	public:
-		typedef DT DstType;
-		typedef ST SrcType;
-		static const int  DST_BITS      = DB;
-		static const int  SRC_BITS      = SB;
-		static const int  NBR_ERR_LINES = EL;
-	};
-
-	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	class DiffuseFloydSteinberg
-	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 1>
-	{
-	public:
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw);
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw);
-		template <typename EB>
-		static fstb_FORCEINLINE void
-		               prepare_next_line (EB *err_ptr);
-	private:
-		template <int DIR, typename ET, typename EB>
-		static fstb_FORCEINLINE void
-		               spread_error (ET e1, ET e3, ET e5, ET e7, ET &err_nxt0, EB *err0_ptr);
-	};
-
-	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	class DiffuseFilterLite
-	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 1>
-	{
-	public:
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw);
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw);
-		template <typename EB>
-		static fstb_FORCEINLINE void
-		               prepare_next_line (EB *err_ptr);
-	private:
-		template <int DIR, typename ET, typename EB>
-		static fstb_FORCEINLINE void
-		               spread_error (ET e1, ET e2, ET &err_nxt0, EB *err0_ptr);
-	};
-
-	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	class DiffuseStucki
-	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 2>
-	{
-	public:
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw);
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw);
-		template <typename EB>
-		static fstb_FORCEINLINE void
-		               prepare_next_line (EB *err_ptr);
-	private:
-		template <int DIR, typename ET, typename EB>
-		static fstb_FORCEINLINE void
-		               spread_error (ET e1, ET e2, ET e4, ET e8, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr);
-	};
-
-	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	class DiffuseAtkinson
-	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 2>
-	{
-	public:
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw);
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw);
-		template <typename EB>
-		static fstb_FORCEINLINE void
-		               prepare_next_line (EB *err_ptr);
-	private:
-		template <int DIR, typename ET, typename EB>
-		static fstb_FORCEINLINE void
-		               spread_error (ET e1, ET &err_nxt0, ET &err_nxt1, EB *err0_ptr, EB *err1_ptr);
-	};
-
-	class DiffuseOstromoukhovBase
-	{
-	public:
-		struct TableEntry
-		{
-			int            _c0;
-			int            _c1;
-			int            _c2;        // Actually not used
-			int            _sum;
-			float          _inv_sum;   // Possible optimization: store 1/_c0 and 1/_c1 instead of this field.
-		};
-		static const int  T_BITS = 8;
-		static const int  T_LEN  = 1 << T_BITS;
-		static const int  T_MASK = T_LEN - 1;
-
-		static const TableEntry
-		               _table [T_LEN];
-	};
-
-	template <int DST_BITS, int SRC_BITS>
-	class DiffuseOstromoukhovBase2
-	:	public DiffuseOstromoukhovBase
-	{
-	public:
-		template <class SRC_TYPE>
-		static inline int
-		               get_index (SRC_TYPE src_raw);
-		static inline int
-		               get_index (float src_raw);
-	};
-
-	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
-	class DiffuseOstromoukhov
-	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 1>
-	,	public DiffuseOstromoukhovBase2 <DST_BITS, SRC_BITS>
-	{
-	public:
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (int err, int &err_nxt0, int &err_nxt1, int16_t *err0_ptr, int16_t *err1_ptr, SRC_TYPE src_raw);
-		template <int DIR>
-		static fstb_FORCEINLINE void
-		               diffuse (float err, float &err_nxt0, float &err_nxt1, float *err0_ptr, float *err1_ptr, SRC_TYPE src_raw);
-		template <typename EB>
-		static fstb_FORCEINLINE void
-		               prepare_next_line (EB *err_ptr);
-	private:
-		template <int DIR, typename ET, typename EB>
-		static fstb_FORCEINLINE void
-		               spread_error (ET e1, ET e2, ET e3, ET &err_nxt0, EB *err0_ptr);
-	};
-
 	vsutl::NodeRefSPtr
 	               _clip_src_sptr;
 	const ::VSVideoInfo             
@@ -342,42 +88,13 @@ class Bitdepth
 
 	vsutl::PlaneProcessor
 	               _plane_processor;
-	fmtcl::SplFmt  _splfmt_src;
-	fmtcl::SplFmt  _splfmt_dst;
-
-	std::array <SclInf, MAX_NBR_PLANES>
-	               _scale_info_arr;
-	bool           _upconv_flag;
-	bool           _sse2_flag;
-	bool           _avx2_flag;
-	bool           _full_range_in_flag;
-	bool           _full_range_out_flag;
-
-	int            _dmode;
-	double         _ampo;
-	double         _ampn;
-	bool           _dyn_flag;
-	bool           _static_noise_flag;
-	int            _pat_size;        // Must be a divisor of PAT_WIDTH
-
-	int            _ampo_i;          // [0 ;  127], 1.0 = 1 << AMP_BITS
-	int            _ampn_i;          // [0 ;  127], 1.0 = 1 << AMP_BITS
-	int            _ampe_i;          // [0 ; 2047], 1.0 = 256
-	float          _ampe_f;
-	float          _ampn_f;
-	bool           _errdif_flag;     // Indicates a dithering method using error diffusion.
-	bool           _simple_flag;     // Simplified implementation for ampo == 1 and ampn == 0
-	PatDataArray   _dither_pat_arr;  // Contains levels for ordered dithering
-
-	conc::ObjPool <fmtcl::ErrDifBuf>
-						_buf_pool;
-	std::unique_ptr <fmtcl::ErrDifBufFactory>
-	               _buf_factory_uptr;
-
-	void (ThisType::*
-	               _process_seg_int_int_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
-	void (ThisType::*
-	               _process_seg_flt_int_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) const;
+
+	bool           _full_range_in_flag  = false;
+	bool           _full_range_out_flag = false;
+	bool           _range_def_flag      = false;
+
+	std::unique_ptr <fmtcl::Dither>
+	               _engine_uptr;
 
 
 
@@ -387,7 +104,9 @@ class Bitdepth
 
 	               Bitdepth ()                               = delete;
 	               Bitdepth (const Bitdepth &other)          = delete;
+	               Bitdepth (Bitdepth &&other)               = delete;
 	Bitdepth &     operator = (const Bitdepth &other)        = delete;
+	Bitdepth &     operator = (Bitdepth &&other)             = delete;
 	bool           operator == (const Bitdepth &other) const = delete;
 	bool           operator != (const Bitdepth &other) const = delete;
 
diff --git a/src/fmtc/Matrix.cpp b/src/fmtc/Matrix.cpp
index 2ec329d..b8d93e4 100644
--- a/src/fmtc/Matrix.cpp
+++ b/src/fmtc/Matrix.cpp
@@ -31,7 +31,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 #include "fmtc/Matrix.h"
 #include "fmtc/fnc.h"
-#include "fmtcl/Mat4.h"
+#include "fmtcl/MatrixUtil.h"
 #include "fstb/def.h"
 #include "fstb/fnc.h"
 #include "vsutl/CpuOpt.h"
@@ -77,9 +77,9 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC
 	_avx_flag  = cpu_opt.has_avx ();
 	_avx2_flag = cpu_opt.has_avx2 ();
 
-	_proc_uptr = std::unique_ptr <fmtcl::MatrixProc> (new fmtcl::MatrixProc (
+	_proc_uptr = std::make_unique <fmtcl::MatrixProc> (
 		_sse_flag, _sse2_flag, _avx_flag, _avx2_flag
-	));
+	);
 
 	// Checks the input clip
 	if (_vi_in.format == 0)
@@ -93,7 +93,7 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC
 	{
 		throw_inval_arg ("input must be 4:4:4.");
 	}
-	if (fmt_src.numPlanes != NBR_PLANES)
+	if (fmt_src.numPlanes != _nbr_planes)
 	{
 		throw_inval_arg ("greyscale format not supported as input.");
 	}
@@ -107,7 +107,7 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC
 		throw_inval_arg ("pixel bitdepth not supported.");
 	}
 
-	if (_plane_out >= NBR_PLANES)
+	if (_plane_out >= _nbr_planes)
 	{
 		throw_inval_arg (
 			"singleout is a plane index and must be -1 or ranging from 0 to 3."
@@ -172,7 +172,7 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC
 	_vi_out.format = fmt_dst_ptr;
 	const ::VSFormat &fmt_dst = *fmt_dst_ptr;
 
-	const int      nbr_expected_coef = NBR_PLANES * (NBR_PLANES + 1);
+	const int      nbr_expected_coef = _nbr_planes * (_nbr_planes + 1);
 
 	bool           mat_init_flag = false;
 
@@ -192,8 +192,14 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC
 
 		fmtcl::Mat4    m2s;
 		fmtcl::Mat4    m2d;
-		make_mat_from_str (m2s, mats, true);
-		make_mat_from_str (m2d, matd, false);
+		if (fmtcl::MatrixUtil::make_mat_from_str (m2s, mats, true) != 0)
+		{
+			throw_inval_arg ("unknown source matrix identifier.");
+		}
+		if (fmtcl::MatrixUtil::make_mat_from_str (m2d, matd, false) != 0)
+		{
+			throw_inval_arg ("unknown destination matrix identifier.");
+		}
 		_csp_out = find_cs_from_mat_str (*this, matd, false);
 
 		_mat_main = m2d * m2s;
@@ -223,13 +229,13 @@ Matrix::Matrix (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, ::VSC
 			throw_inval_arg ("coef has a wrong number of elements.");
 		}
 
-		for (int y = 0; y < NBR_PLANES + 1; ++y)
+		for (int y = 0; y < _nbr_planes + 1; ++y)
 		{
-			for (int x = 0; x < NBR_PLANES + 1; ++x)
+			for (int x = 0; x < _nbr_planes + 1; ++x)
 			{
 				_mat_main [y] [x] = (x == y) ? 1 : 0;
 
-				if (   (x < fmt_src.numPlanes || x == NBR_PLANES)
+				if (   (x < fmt_src.numPlanes || x == _nbr_planes)
 				    &&  y < fmt_dst.numPlanes)
 				{
 					int            err = 0;
@@ -319,26 +325,26 @@ const ::VSFrameRef *	Matrix::get_frame (int n, int activation_reason, void * &fr
 		const int         h  =  _vsapi.getFrameHeight (&src, 0);
 		dst_ptr = _vsapi.newVideoFrame (_vi_out.format, w, h, &src, &core);
 
-		uint8_t * const   dst_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		uint8_t * const   dst_ptr_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			                        _vsapi.getWritePtr (dst_ptr, 0),
 			(_plane_out >= 0) ? 0 : _vsapi.getWritePtr (dst_ptr, 1),
 			(_plane_out >= 0) ? 0 : _vsapi.getWritePtr (dst_ptr, 2)
 		};
-		const int         dst_str_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		const int         dst_str_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			                        _vsapi.getStride (dst_ptr, 0),
 			(_plane_out >= 0) ? 0 : _vsapi.getStride (dst_ptr, 1),
 			(_plane_out >= 0) ? 0 : _vsapi.getStride (dst_ptr, 2)
 		};
 		const uint8_t * const
-		                  src_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		                  src_ptr_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			_vsapi.getReadPtr (&src, 0),
 			_vsapi.getReadPtr (&src, 1),
 			_vsapi.getReadPtr (&src, 2)
 		};
-		const int         src_str_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		const int         src_str_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			_vsapi.getStride (&src, 0),
 			_vsapi.getStride (&src, 1),
@@ -371,7 +377,7 @@ const ::VSFrameRef *	Matrix::get_frame (int n, int activation_reason, void * &fr
 		}
 	}
 
-	return (dst_ptr);
+	return dst_ptr;
 }
 
 
@@ -406,62 +412,15 @@ void	Matrix::select_def_mat (std::string &mat, const ::VSFormat &fmt)
 // mat should be already converted to lower case
 fmtcl::ColorSpaceH265	Matrix::find_cs_from_mat_str (const vsutl::FilterBase &flt, const std::string &mat, bool allow_2020cl_flag)
 {
-	fmtcl::ColorSpaceH265   cs = fmtcl::ColorSpaceH265_UNSPECIFIED;
+	const auto     cs =
+		fmtcl::MatrixUtil::find_cs_from_mat_str (mat, allow_2020cl_flag);
 
-	if (mat.empty () || mat == "rgb")
-	{
-		cs = fmtcl::ColorSpaceH265_RGB;
-	}
-	else if (mat == "601")
-	{
-		cs = fmtcl::ColorSpaceH265_SMPTE170M;
-	}
-	else if (mat == "709")
-	{
-		cs = fmtcl::ColorSpaceH265_BT709;
-	}
-	else if (mat == "240")
-	{
-		cs = fmtcl::ColorSpaceH265_SMPTE240M;
-	}
-	else if (mat == "fcc")
-	{
-		cs = fmtcl::ColorSpaceH265_FCC;
-	}
-	else if (mat == "ycgco" || mat == "ycocg")
-	{
-		cs = fmtcl::ColorSpaceH265_YCGCO;
-	}
-	else if (mat == "2020")
-	{
-		cs = fmtcl::ColorSpaceH265_BT2020NCL;
-	}
-	else if (mat == "2020cl" && allow_2020cl_flag)
-	{
-		cs = fmtcl::ColorSpaceH265_BT2020CL;
-	}
-	else if (mat == "ydzdx")
-	{
-		cs = fmtcl::ColorSpaceH265_YDZDX;
-	}
-	else if (mat == "lms")
-	{
-		cs = fmtcl::ColorSpaceH265_LMS;
-	}
-	else if (mat == "ictcp_pq")
-	{
-		cs = fmtcl::ColorSpaceH265_ICTCP_PQ;
-	}
-	else if (mat == "ictcp_hlg")
-	{
-		cs = fmtcl::ColorSpaceH265_ICTCP_HLG;
-	}
-	else
+	if (cs == fmtcl::ColorSpaceH265_UNDEF)
 	{
 		flt.throw_inval_arg ("unknown matrix identifier.");
 	}
 
-	return (cs);
+	return cs;
 }
 
 
@@ -474,6 +433,10 @@ fmtcl::ColorSpaceH265	Matrix::find_cs_from_mat_str (const vsutl::FilterBase &flt
 
 
 
+constexpr int	Matrix::_nbr_planes;
+
+
+
 const ::VSFormat *	Matrix::get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src, int &plane_out, bool &force_col_fam_flag) const
 {
 	force_col_fam_flag = false;
@@ -485,9 +448,12 @@ const ::VSFormat *	Matrix::get_output_colorspace (const ::VSMap &in, ::VSMap &ou
 	if (csp_dst != ::pfNone)
 	{
 		fmt_dst_ptr = _vsapi.getFormatPreset (csp_dst, &core);
-		if (fmt_dst_ptr == 0)
+		if (fmt_dst_ptr == nullptr)
 		{
 			throw_inval_arg ("unknown output colorspace.");
+			// The following return statement is never reached, it just prevents
+			// false positive when compiling with -Wnull-dereference
+			return &fmt_src;
 		}
 		else
 		{
@@ -538,17 +504,17 @@ const ::VSFormat *	Matrix::get_output_colorspace (const ::VSMap &in, ::VSMap &ou
 	}
 	catch (...)
 	{
-		fmt_dst_ptr = 0;
+		fmt_dst_ptr = nullptr;
 	}
 
-	if (fmt_dst_ptr == 0)
+	if (fmt_dst_ptr == nullptr)
 	{
 		throw_rt_err (
 			"couldn\'t get a pixel format identifier for the output clip."
 		);
 	}
 
-	return (fmt_dst_ptr);
+	return fmt_dst_ptr;
 }
 
 
@@ -626,238 +592,7 @@ const ::VSFormat *	Matrix::find_dst_col_fam (fmtcl::ColorSpaceH265 tmp_csp, cons
 		}
 	}
 
-	return (fmt_dst_ptr);
-}
-
-
-
-void	Matrix::make_mat_from_str (fmtcl::Mat4 &m, const std::string &mat, bool to_rgb_flag) const
-{
-	if (mat.empty () || mat == "rgb")
-	{
-		m[0][0] = 1; m[0][1] = 0; m[0][2] = 0;
-		m[1][0] = 0; m[1][1] = 1; m[1][2] = 0;
-		m[2][0] = 0; m[2][1] = 0; m[2][2] = 1;
-		m.clean3 (1);
-	}
-	else if (mat == "601")
-	{
-		make_mat_yuv (m, 0.299, 0.587, 0.114, to_rgb_flag);
-	}
-	else if (mat == "709")
-	{
-		make_mat_yuv (m, 0.2126, 0.7152, 0.0722, to_rgb_flag);
-	}
-	else if (mat == "240")
-	{
-		make_mat_yuv (m, 0.212, 0.701, 0.087, to_rgb_flag);
-	}
-	else if (mat == "fcc")
-	{
-		make_mat_yuv (m, 0.30, 0.59, 0.11, to_rgb_flag);
-	}
-	else if (mat == "ycgco" || mat == "ycocg")
-	{
-		make_mat_ycgco (m, to_rgb_flag);
-	}
-	else if (mat == "2020")
-	{
-		make_mat_yuv (m, 0.2627, 0.678, 0.0593, to_rgb_flag);
-	}
-	else if (mat == "ydzdx")
-	{
-		make_mat_ydzdx (m, to_rgb_flag);
-	}
-	else if (mat == "lms")
-	{
-		make_mat_lms (m, to_rgb_flag);
-	}
-	else if (mat == "ictcp_pq")
-	{
-		make_mat_ictcp (m, false, to_rgb_flag);
-	}
-	else if (mat == "ictcp_hlg")
-	{
-		make_mat_ictcp (m, true, to_rgb_flag);
-	}
-	else
-	{
-		throw_inval_arg ("unknown matrix identifier.");
-	}
-}
-
-
-
-/*
-kr/kg/kb matrix (Rec. ITU-T H.265 2019-06, p. 413):
-
-R = Y                  + V*(1-Kr)
-G = Y - U*(1-Kb)*Kb/Kg - V*(1-Kr)*Kr/Kg
-B = Y + U*(1-Kb)
-
-Y =                  R * Kr        + G * Kg        + B * Kb
-U = (B-Y)/(1-Kb) = - R * Kr/(1-Kb) - G * Kg/(1-Kb) + B
-V = (R-Y)/(1-Kr) =   R             - G * Kg/(1-Kr) - B * Kb/(1-Kr)
-
-The given equations work for R, G, B in range [0 ; 1] and U and V in range
-[-1 ; 1]. Scaling must be applied to match the required range for U and V.
-
-R, G, B, Y range : [0 ; 1]
-U, V range : [-0.5 ; 0.5]
-*/
-
-void	Matrix::make_mat_yuv (fmtcl::Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag)
-{
-	assert (! fstb::is_null (kg));
-	assert (! fstb::is_eq (kb, 1.0));
-	assert (! fstb::is_eq (kr, 1.0));
-
-	const double   r = 0.5;
-	const double   x = 1.0 / r;
-	if (to_rgb_flag)
-	{
-		m[0][0] = 1; m[0][1] =              0; m[0][2] = x*(1-kr)      ;
-		m[1][0] = 1; m[1][1] = x*(kb-1)*kb/kg; m[1][2] = x*(kr-1)*kr/kg;
-		m[2][0] = 1; m[2][1] = x*(1-kb)      ; m[2][2] =              0;
-	}
-
-	else
-	{
-		m[0][0] =     kr     ; m[0][1] =   kg       ; m[0][2] =   kb       ;
-		m[1][0] = r*kr/(kb-1); m[1][1] = r*kg/(kb-1); m[1][2] = r          ;
-		m[2][0] = r          ; m[2][1] = r*kg/(kr-1); m[2][2] = r*kb/(kr-1);
-	}
-
-	m.clean3 (1);
-}
-
-
-
-/*
-YCgCo matrix (Rec. ITU-T H.265 2019-06, p. 413):
-
-R  = Y - Cg + Co
-G  = Y + Cg
-B  = Y - Cg - Co
-
-Y  =  0.25 * R + 0.5  * G + 0.25 * B
-Cg = -0.25 * R + 0.5  * G - 0.25 * B
-Co =  0.5  * R            - 0.5  * B
-
-R, G, B, Y range : [0 ; 1]
-Cg, Co range : [-0.5 ; 0.5]
-
-Note: this implementation is not exactly the same as specified because the
-standard specifies specific steps to apply the RGB-to-YCgCo matrix, leading
-to different roundings.
-*/
-
-void	Matrix::make_mat_ycgco (fmtcl::Mat4 &m, bool to_rgb_flag)
-{
-	if (to_rgb_flag)
-	{
-		m[0][0] = 1; m[0][1] = -1; m[0][2] =  1;
-		m[1][0] = 1; m[1][1] =  1; m[1][2] =  0;
-		m[2][0] = 1; m[2][1] = -1; m[2][2] = -1;
-	}
-	else
-	{
-		m[0][0] =  0.25; m[0][1] = 0.5; m[0][2] =  0.25;
-		m[1][0] = -0.25; m[1][1] = 0.5; m[1][2] = -0.25;
-		m[2][0] =  0.5 ; m[2][1] = 0  ; m[2][2] = -0.5 ;
-	}
-
-	m.clean3 (1);
-}
-
-
-
-/*
-YDzDx transform (Rec. ITU-T H.265 2019-06, p. 414)
-
-Y  = G
-Dz = 0.5 * (0.986566 * B - Y)
-Dx = 0.5 * (R - 0.991902 * Y)
-
-Y  =                      G
-Dz =         - 0.5      * G + 0.493283 * B
-Dx = 0.5 * R - 0.495951 * G
-*/
-
-void	Matrix::make_mat_ydzdx (fmtcl::Mat4 &m, bool to_rgb_flag)
-{
-	fmtcl::Mat3    m3;
-	m3[0][0] = 0  ; m3[0][1] =  1       ; m3[0][2] = 0;
-	m3[1][0] = 0  ; m3[1][1] = -0.5     ; m3[1][2] = 0.493283;
-	m3[2][0] = 0.5; m3[2][1] = -0.495951; m3[2][2] = 0;
-
-	if (to_rgb_flag)
-	{
-		m3.invert ();
-	}
-
-	m.insert3 (m3);
-	m.clean3 (1);
-}
-
-
-
-/*
-LMS transform (Rec. ITU-T H.265 2019-06, p. 411)
-
-LMS is an intermediate colorspace for ICtCp transforms.
-LMS data are conveyed on RGB planes.
-Here, to_rgb_flag indicates real RGB target.
-*/
-
-void	Matrix::make_mat_lms (fmtcl::Mat4 &m, bool to_rgb_flag)
-{
-	fmtcl::Mat3    m3;
-	m3[0][0] = 1688; m3[0][1] = 2146; m3[0][2] =  262;
-	m3[1][0] =  683; m3[1][1] = 2951; m3[1][2] =  462;
-	m3[2][0] =   99; m3[2][1] =  309; m3[2][2] = 3688;
-	m3 *= 1.0 / 4096;
-
-	if (to_rgb_flag)
-	{
-		m3.invert ();
-	}
-
-	m.insert3 (m3);
-	m.clean3 (1);
-}
-
-
-
-/*
-ICtCp transfrom from and to LMS (Rec. ITU-T H.265 2019-06, p. 414)
-
-LMS data are conveyed on RGB planes.
-*/
-
-void	Matrix::make_mat_ictcp (fmtcl::Mat4 &m, bool hlg_flag, bool to_lms_flag)
-{
-	fmtcl::Mat3    m3;
-	m3[0][0] =  2048; m3[0][1] =   2048; m3[0][2] =    0;
-	if (hlg_flag)
-	{
-		m3[1][0] =  3625; m3[1][1] =  -7465; m3[1][2] = 3840;
-		m3[2][0] =  9500; m3[2][1] =  -9212; m3[2][2] = -288;
-	}
-	else
-	{
-		m3[1][0] =  6610; m3[1][1] = -13613; m3[1][2] = 7003;
-		m3[2][0] = 17933; m3[2][1] = -17390; m3[2][2] = -543;
-	}
-	m3 *= 1.0 / 4096;
-
-	if (to_lms_flag)
-	{
-		m3.invert ();
-	}
-
-	m.insert3 (m3);
-	m.clean3 (1);
+	return fmt_dst_ptr;
 }
 
 
diff --git a/src/fmtc/Matrix.h b/src/fmtc/Matrix.h
index 2f70c7d..b53aa8e 100644
--- a/src/fmtc/Matrix.h
+++ b/src/fmtc/Matrix.h
@@ -30,6 +30,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 #include "fstb/def.h"
 #include "fmtcl/CoefArrInt.h"
 #include "fmtcl/ColorSpaceH265.h"
+#include "fmtcl/Mat4.h"
 #include "fmtcl/MatrixProc.h"
 #include "fstb/AllocAlign.h"
 #include "vsutl/FilterBase.h"
@@ -47,11 +48,6 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 
 
-namespace fmtcl
-{
-	class Mat4;
-}
-
 namespace fmtc
 {
 
@@ -91,8 +87,7 @@ class Matrix
 
 private:
 
-	static const int  NBR_PLANES    = 3;
-	static const int  SHIFT_INT     = 12;  // Number of bits for the fractional part
+	static constexpr int _nbr_planes = 3;
 
 	enum Dir
 	{
@@ -107,13 +102,6 @@ class Matrix
 
 	const ::VSFormat *
 	               find_dst_col_fam (fmtcl::ColorSpaceH265 tmp_csp, const ::VSFormat *fmt_dst_ptr, const ::VSFormat &fmt_src, ::VSCore &core);
-	void           make_mat_from_str (fmtcl::Mat4 &m, const std::string &mat, bool to_rgb_flag) const;
-
-	static void    make_mat_yuv (fmtcl::Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag);
-	static void    make_mat_ycgco (fmtcl::Mat4 &m, bool to_rgb_flag);
-	static void    make_mat_ydzdx (fmtcl::Mat4 &m, bool to_rgb_flag);
-	static void    make_mat_lms (fmtcl::Mat4 &m, bool to_rgb_flag);
-	static void    make_mat_ictcp (fmtcl::Mat4 &m, bool hlg_flag, bool to_lms_flag);
 
 	vsutl::NodeRefSPtr
 	               _clip_src_sptr;
@@ -146,7 +134,9 @@ class Matrix
 
 	               Matrix ()                               = delete;
 	               Matrix (const Matrix &other)            = delete;
+	               Matrix (Matrix &&other)                 = delete;
 	Matrix &       operator = (const Matrix &other)        = delete;
+	Matrix &       operator = (Matrix &&other)             = delete;
 	bool           operator == (const Matrix &other) const = delete;
 	bool           operator != (const Matrix &other) const = delete;
 
diff --git a/src/fmtc/Matrix2020CL.cpp b/src/fmtc/Matrix2020CL.cpp
index 54c88d3..9b86e76 100644
--- a/src/fmtc/Matrix2020CL.cpp
+++ b/src/fmtc/Matrix2020CL.cpp
@@ -268,7 +268,7 @@ const ::VSFrameRef *	Matrix2020CL::get_frame (int n, int activation_reason, void
 		}
 	}
 
-	return (dst_ptr);
+	return dst_ptr;
 }
 
 
@@ -350,7 +350,7 @@ const ::VSFormat &	Matrix2020CL::get_output_colorspace (const ::VSMap &in, ::VSM
 		);
 	}
 
-	return (*fmt_dst_ptr);
+	return *fmt_dst_ptr;
 }
 
 
diff --git a/src/fmtc/Matrix2020CL.h b/src/fmtc/Matrix2020CL.h
index 099dcbc..343c089 100644
--- a/src/fmtc/Matrix2020CL.h
+++ b/src/fmtc/Matrix2020CL.h
@@ -110,7 +110,9 @@ class Matrix2020CL
 
 	               Matrix2020CL ()                                = delete;
 	               Matrix2020CL (const Matrix2020CL &other)       = delete;
+	               Matrix2020CL (Matrix2020CL &&other)            = delete;
 	Matrix2020CL & operator = (const Matrix2020CL &other)         = delete;
+	Matrix2020CL & operator = (Matrix2020CL &&other)              = delete;
 	bool           operator == (const Matrix2020CL &other) const  = delete;
 	bool           operator != (const Matrix2020CL &other) const  = delete;
 
diff --git a/src/fmtc/Primaries.cpp b/src/fmtc/Primaries.cpp
index dd793bf..11a1724 100644
--- a/src/fmtc/Primaries.cpp
+++ b/src/fmtc/Primaries.cpp
@@ -26,7 +26,9 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 #include "fmtc/fnc.h"
 #include "fmtc/Primaries.h"
+#include "fmtcl/fnc.h"
 #include "fmtcl/Mat3.h"
+#include "fmtcl/PrimUtil.h"
 #include "fstb/def.h"
 #include "fstb/fnc.h"
 #include "vsutl/CpuOpt.h"
@@ -90,19 +92,20 @@ Primaries::Primaries (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VS
 	_vi_out.format = &fmt_dst;
 
 	// Primaries
-	_prim_s.init (*this, in, out, "prims");
-	_prim_s.init (*this, in, out, "rs", "gs", "bs", "ws");
+	init (_prim_s, *this, in, out, "prims");
+	init (_prim_s, *this, in, out, "rs", "gs", "bs", "ws");
 	if (! _prim_s.is_ready ())
 	{
 		throw_inval_arg ("input primaries not set.");
 	}
 
 	_prim_d = _prim_s;
-	_prim_d.init (*this, in, out, "primd");
-	_prim_d.init (*this, in, out, "rd", "gd", "bd", "wd");
+	init (_prim_d, *this, in, out, "primd");
+	init (_prim_d, *this, in, out, "rd", "gd", "bd", "wd");
 	assert (_prim_d.is_ready ());
 
-	const fmtcl::Mat3 mat_conv = compute_conversion_matrix ();
+	const fmtcl::Mat3 mat_conv =
+		fmtcl::PrimUtil::compute_conversion_matrix (_prim_s, _prim_d);
 	_mat_main.insert3 (mat_conv);
 	_mat_main.clean3 (1);
 
@@ -154,26 +157,26 @@ const ::VSFrameRef *	Primaries::get_frame (int n, int activation_reason, void *
 		const int         h = _vsapi.getFrameHeight (&src, 0);
 		dst_ptr = _vsapi.newVideoFrame (_vi_out.format, w, h, &src, &core);
 
-		uint8_t * const   dst_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		uint8_t * const   dst_ptr_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			_vsapi.getWritePtr (dst_ptr, 0),
 			_vsapi.getWritePtr (dst_ptr, 1),
 			_vsapi.getWritePtr (dst_ptr, 2)
 		};
-		const int         dst_str_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		const int         dst_str_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			_vsapi.getStride (dst_ptr, 0),
 			_vsapi.getStride (dst_ptr, 1),
 			_vsapi.getStride (dst_ptr, 2)
 		};
 		const uint8_t * const
-		                  src_ptr_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		                  src_ptr_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			_vsapi.getReadPtr (&src, 0),
 			_vsapi.getReadPtr (&src, 1),
 			_vsapi.getReadPtr (&src, 2)
 		};
-		const int         src_str_arr [fmtcl::MatrixProc::NBR_PLANES] =
+		const int         src_str_arr [fmtcl::MatrixProc::_nbr_planes] =
 		{
 			_vsapi.getStride (&src, 0),
 			_vsapi.getStride (&src, 1),
@@ -213,92 +216,7 @@ const ::VSFrameRef *	Primaries::get_frame (int n, int activation_reason, void *
 
 
 
-void	Primaries::RgbSystem::init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0)
-{
-	assert (preset_0 != 0);
-
-	std::string    preset_str = filter.get_arg_str (in, out, preset_0, "");
-	fstb::conv_to_lower_case (preset_str);
-	_preset = conv_string_to_primaries (preset_str);
-	if (_preset >= 0)
-	{
-		set (_preset);
-	}
-}
-
-
-
-void	Primaries::RgbSystem::init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 [])
-{
-	assert (r_0 != 0);
-	assert (g_0 != 0);
-	assert (b_0 != 0);
-	assert (w_0 != 0);
-
-	const bool     ready_old_flag         = is_ready ();
-	std::array <Vec2, NBR_PLANES> rgb_old = _rgb;
-	Vec2                          w_old   = _white;
-
-	const char *   name_0_arr [NBR_PLANES] = { r_0, g_0, b_0 };
-	for (int k = 0; k < NBR_PLANES; ++k)
-	{
-		_init_flag_arr [k] |=
-			read_coord_tuple (_rgb [k], filter, in, out, name_0_arr [k]);
-	}
-
-	_init_flag_arr [NBR_PLANES] |=
-		read_coord_tuple (_white, filter, in, out, w_0);
-
-	if (ready_old_flag && is_ready () && (rgb_old != _rgb || w_old != _white))
-	{
-		_preset = fmtcl::PrimariesPreset_UNDEF;
-	}
-}
-
-
-
-bool	Primaries::RgbSystem::read_coord_tuple (Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0)
-{
-	bool           set_flag = false;
-	typedef std::vector <double> Vect;
-	Vect           v_def;
-
-	Vect           c_v = filter.get_arg_vflt (in, out, name_0, v_def);
-	if (c_v.size () != 0)
-	{
-		if (c_v.size () != c.size ())
-		{
-			fstb::snprintf4all (
-				filter._filter_error_msg_0,
-				filter._max_error_buf_len,
-				"%s: wrong number of coordinates (expected %d).",
-				name_0,
-				int (c.size ())
-			);
-			filter.throw_inval_arg (filter._filter_error_msg_0);
-		}
-		double            sum = 0;
-		for (size_t k = 0; k < c_v.size (); ++k)
-		{
-			sum += c_v [k];
-			c [k] = c_v [k];
-		}
-		if (c [1] == 0)
-		{
-			fstb::snprintf4all (
-				filter._filter_error_msg_0,
-				filter._max_error_buf_len,
-				"%s: y coordinate cannot be 0.",
-				name_0
-			);
-			filter.throw_inval_arg (filter._filter_error_msg_0);
-		}
-
-		set_flag = true;
-	}
-
-	return (set_flag);
-}
+constexpr int	Primaries::_nbr_planes;
 
 
 
@@ -339,239 +257,97 @@ void	Primaries::check_colorspace (const ::VSFormat &fmt, const char *inout_0) co
 		throw_inval_arg (_filter_error_msg_0);
 	}
 
-	assert (fmt.numPlanes == NBR_PLANES);
+	assert (fmt.numPlanes == _nbr_planes);
 }
 
 
 
-fmtcl::Mat3	Primaries::compute_conversion_matrix () const
+void	Primaries::init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0)
 {
-	fmtcl::Mat3    rgb2xyz = compute_rgb2xyz (_prim_s);
-	fmtcl::Mat3    xyz2rgb = compute_rgb2xyz (_prim_d).invert ();
-	fmtcl::Mat3    adapt   = compute_chroma_adapt (_prim_s, _prim_d);
+	assert (preset_0 != 0);
 
-	return xyz2rgb * adapt * rgb2xyz;
+	std::string    preset_str = filter.get_arg_str (in, out, preset_0, "");
+	fstb::conv_to_lower_case (preset_str);
+	prim._preset = fmtcl::PrimUtil::conv_string_to_primaries (preset_str);
+	if (prim._preset >= 0)
+	{
+		prim.set (prim._preset);
+	}
 }
 
 
 
-// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
-fmtcl::Mat3	Primaries::compute_rgb2xyz (const RgbSystem &prim)
+void	Primaries::init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 [])
 {
-	fmtcl::Mat3    m;
+	assert (r_0 != 0);
+	assert (g_0 != 0);
+	assert (b_0 != 0);
+	assert (w_0 != 0);
 
-	if (prim._preset == fmtcl::PrimariesPreset_CIEXYZ)
-	{
-		m = fmtcl::Mat3 (1, fmtcl::Mat3::Preset_DIAGONAL);
-	}
+	const bool     ready_old_flag = prim.is_ready ();
+	std::array <fmtcl::RgbSystem::Vec2, _nbr_planes> rgb_old = prim._rgb;
+	fmtcl::RgbSystem::Vec2  w_old = prim._white;
 
-	else
+	const char *   name_0_arr [_nbr_planes] = { r_0, g_0, b_0 };
+	for (int k = 0; k < _nbr_planes; ++k)
 	{
-		const fmtcl::Vec3 white = conv_xy_to_xyz (prim._white);
-
-		fmtcl::Mat3    xyzrgb;
-		for (int k = 0; k < NBR_PLANES; ++k)
-		{
-			fmtcl::Vec3    comp_xyz = conv_xy_to_xyz (prim._rgb [k]);
-			xyzrgb.set_col (k, comp_xyz);
-		}
-
-		fmtcl::Vec3    s = xyzrgb.compute_inverse () * white;
-
-		for (int u = 0; u < NBR_PLANES; ++u)
-		{
-			m.set_col (u, xyzrgb.get_col (u) * s [u]);
-		}
+		prim._init_flag_arr [k] |=
+			read_coord_tuple (prim._rgb [k], filter, in, out, name_0_arr [k]);
 	}
 
-	return m;
-}
-
-
+	prim._init_flag_arr [_nbr_planes] |=
+		read_coord_tuple (prim._white, filter, in, out, w_0);
 
-// http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html
-fmtcl::Mat3	Primaries::compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d)
-{
-	fmtcl::Vec3    white_s = conv_xy_to_xyz (prim_s._white);
-	fmtcl::Vec3    white_d = conv_xy_to_xyz (prim_d._white);
-
-	// Bradford adaptation
-	const fmtcl::Mat3 ma ({
-		fmtcl::Vec3 ( 0.8951,  0.2664, -0.1614),
-		fmtcl::Vec3 (-0.7502,  1.7135,  0.0367),
-		fmtcl::Vec3 ( 0.0389, -0.0685,  1.0296)
-	});
-
-	fmtcl::Vec3    crd_s = ma * white_s;
-	fmtcl::Vec3    crd_d = ma * white_d;
-	fmtcl::Mat3    scale (0.0);
-	for (int k = 0; k < NBR_PLANES; ++k)
+	if (   ready_old_flag && prim.is_ready ()
+	    && (rgb_old != prim._rgb || w_old != prim._white))
 	{
-		assert (crd_s [k] != 0);
-		scale [k] [k] = crd_d [k] / crd_s [k];
+		prim._preset = fmtcl::PrimariesPreset_UNDEF;
 	}
-
-	return ma.compute_inverse () * scale * ma;
 }
 
 
 
-// Obtains X, Y, Z from (x, y)
-// Y is assumed to be 1.0
-// X =      x      / y
-// Z = (1 - x - y) / y
-// http://www.brucelindbloom.com/index.html?Eqn_xyY_to_XYZ.html
-fmtcl::Vec3	Primaries::conv_xy_to_xyz (const RgbSystem::Vec2 &xy)
+bool	Primaries::read_coord_tuple (fmtcl::RgbSystem::Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0)
 {
-	fmtcl::Vec3    xyz;
+	bool           set_flag = false;
+	typedef std::vector <double> Vect;
+	Vect           v_def;
 
-	// When y is null, X = Y = Z = 0.
-	if (fstb::is_null (xy [1]))
-	{
-		xyz [0] = 0;
-		xyz [1] = 0;
-		xyz [2] = 0;
-	}
-	else
+	Vect           c_v = filter.get_arg_vflt (in, out, name_0, v_def);
+	if (c_v.size () != 0)
 	{
-		xyz [0] =      xy [0]           / xy [1];
-		xyz [1] = 1;
-		xyz [2] = (1 - xy [0] - xy [1]) / xy [1];
-	}
-
-	return xyz;
-}
-
-
+		if (c_v.size () != c.size ())
+		{
+			fstb::snprintf4all (
+				filter._filter_error_msg_0,
+				filter._max_error_buf_len,
+				"%s: wrong number of coordinates (expected %d).",
+				name_0,
+				int (c.size ())
+			);
+			filter.throw_inval_arg (filter._filter_error_msg_0);
+		}
+		double            sum = 0;
+		for (size_t k = 0; k < c_v.size (); ++k)
+		{
+			sum += c_v [k];
+			c [k] = c_v [k];
+		}
+		if (c [1] == 0)
+		{
+			fstb::snprintf4all (
+				filter._filter_error_msg_0,
+				filter._max_error_buf_len,
+				"%s: y coordinate cannot be 0.",
+				name_0
+			);
+			filter.throw_inval_arg (filter._filter_error_msg_0);
+		}
 
-// str should be already converted to lower case
-fmtcl::PrimariesPreset	Primaries::conv_string_to_primaries (const std::string &str)
-{
-	fmtcl::PrimariesPreset  preset = fmtcl::PrimariesPreset_UNDEF;
-
-	if (        str == "709"
-	         || str == "1361"
-	         || str == "61966-2-1"
-	         || str == "61966-2-4"
-	         || str == "hdtv"
-	         || str == "srgb")
-	{
-		preset = fmtcl::PrimariesPreset_BT709;
-	}
-	else if (   str == "470m"
-	         || str == "ntsc")
-	{
-		preset = fmtcl::PrimariesPreset_FCC;
-	}
-	else if (   str == "470m93"
-	         || str == "ntscj")
-	{
-		preset = fmtcl::PrimariesPreset_NTSCJ;
-	}
-	else if (   str == "470bg"
-	         || str == "601-625"
-	         || str == "1358-625"
-	         || str == "1700-625"
-	         || str == "pal"
-	         || str == "secam")
-	{
-		preset = fmtcl::PrimariesPreset_BT470BG;
-	}
-	else if (   str == "170m"
-	         || str == "601-525"
-	         || str == "1358-525"
-	         || str == "1700-525")
-	{
-		preset = fmtcl::PrimariesPreset_SMPTE170M;
-	}
-	else if (   str == "240m")
-	{
-		preset = fmtcl::PrimariesPreset_SMPTE240M;
-	}
-	else if (   str == "filmc")
-	{
-		preset = fmtcl::PrimariesPreset_GENERIC_FILM;
-	}
-	else if (   str == "2020"
-	         || str == "2100"
-	         || str == "uhdtv")
-	{
-		preset = fmtcl::PrimariesPreset_BT2020;
-	}
-	else if (   str == "61966-2-2"
-	         || str == "scrgb")
-	{
-		preset = fmtcl::PrimariesPreset_SCRGB;
-	}
-	else if (   str == "adobe98")
-	{
-		preset = fmtcl::PrimariesPreset_ADOBE_RGB_98;
-	}
-	else if (   str == "adobewide")
-	{
-		preset = fmtcl::PrimariesPreset_ADOBE_RGB_WIDE;
-	}
-	else if (   str == "apple")
-	{
-		preset = fmtcl::PrimariesPreset_APPLE_RGB;
-	}
-	else if (   str == "photopro"
-	         || str == "romm")
-	{
-		preset = fmtcl::PrimariesPreset_ROMM;
-	}
-	else if (   str == "ciergb")
-	{
-		preset = fmtcl::PrimariesPreset_CIERGB;
-	}
-	else if (   str == "ciexyz")
-	{
-		preset = fmtcl::PrimariesPreset_CIEXYZ;
-	}
-	else if (   str == "p3d65"
-	         || str == "dcip3")
-	{
-		preset = fmtcl::PrimariesPreset_P3D65;
-	}
-	else if (   str == "aces")
-	{
-		preset = fmtcl::PrimariesPreset_ACES;
-	}
-	else if (   str == "ap1")
-	{
-		preset = fmtcl::PrimariesPreset_ACESAP1;
-	}
-	else if (   str == "sgamut"
-	         || str == "sgamut3")
-	{
-		preset = fmtcl::PrimariesPreset_SGAMUT;
-	}
-	else if (   str == "sgamut3cine")
-	{
-		preset = fmtcl::PrimariesPreset_SGAMUT3CINE;
-	}
-	else if (   str == "alexa")
-	{
-		preset = fmtcl::PrimariesPreset_ALEXA;
-	}
-	else if (   str == "vgamut")
-	{
-		preset = fmtcl::PrimariesPreset_VGAMUT;
-	}
-	else if (   str == "p3dci")
-	{
-		preset = fmtcl::PrimariesPreset_P3DCI;
-	}
-	else if (   str == "p3d60")
-	{
-		preset = fmtcl::PrimariesPreset_P3D60;
-	}
-	else if (   str == "3213")
-	{
-		preset = fmtcl::PrimariesPreset_EBU3213E;
+		set_flag = true;
 	}
 
-	return preset;
+	return (set_flag);
 }
 
 
diff --git a/src/fmtc/Primaries.h b/src/fmtc/Primaries.h
index 5a3546b..63ed96c 100644
--- a/src/fmtc/Primaries.h
+++ b/src/fmtc/Primaries.h
@@ -70,28 +70,13 @@ class Primaries
 
 private:
 
-	static const int  NBR_PLANES    = 3;
-
-	class RgbSystem
-	:	public fmtcl::RgbSystem
-	{
-	public:
-		               RgbSystem () = default;
-		void           init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0);
-		void           init (const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 []);
-		static bool    read_coord_tuple (Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0);
-	};
+	static constexpr int _nbr_planes = fmtcl::RgbSystem::_nbr_planes;
 
 	void           check_colorspace (const ::VSFormat &fmt, const char *inout_0) const;
-	fmtcl::Mat3    compute_conversion_matrix () const;
-	static fmtcl::Mat3
-	               compute_rgb2xyz (const RgbSystem &prim);
-	static fmtcl::Mat3
-	               compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d);
-	static fmtcl::Vec3
-	               conv_xy_to_xyz (const RgbSystem::Vec2 &xy);
-	static fmtcl::PrimariesPreset
-	               conv_string_to_primaries (const std::string &preset);
+
+	static void    init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *preset_0);
+	static void    init (fmtcl::RgbSystem &prim, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char r_0 [], const char g_0 [], const char b_0 [], const char w_0 []);
+	static bool    read_coord_tuple (fmtcl::RgbSystem::Vec2 &c, const vsutl::FilterBase &filter, const ::VSMap &in, ::VSMap &out, const char *name_0);
 
 	vsutl::NodeRefSPtr
 	               _clip_src_sptr;
@@ -104,8 +89,10 @@ class Primaries
 	bool           _avx_flag;
 	bool           _avx2_flag;
 
-	RgbSystem      _prim_s;
-	RgbSystem      _prim_d;
+	fmtcl::RgbSystem
+	               _prim_s;
+	fmtcl::RgbSystem
+	               _prim_d;
 
 	fmtcl::Mat4    _mat_main;
 
@@ -120,7 +107,9 @@ class Primaries
 
 	               Primaries ()                               = delete;
 	               Primaries (const Primaries &other)         = delete;
+	               Primaries (Primaries &&other)              = delete;
 	Primaries &    operator = (const Primaries &other)        = delete;
+	Primaries &    operator = (Primaries &&other)             = delete;
 	bool           operator == (const Primaries &other) const = delete;
 	bool           operator != (const Primaries &other) const = delete;
 
diff --git a/src/fmtc/Resample.cpp b/src/fmtc/Resample.cpp
index db7a62d..637c8e2 100644
--- a/src/fmtc/Resample.cpp
+++ b/src/fmtc/Resample.cpp
@@ -24,8 +24,10 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
+#include "fmtc/fnc.h"
 #include "fmtc/Resample.h"
 #include "fmtc/SplFmtUtl.h"
+#include "fmtcl/ResampleUtil.h"
 #include "fstb/def.h"
 #include "vsutl/CpuOpt.h"
 #include "vsutl/fnc.h"
@@ -203,7 +205,7 @@ Resample::Resample (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo
 
 	for (int plane_index = 0; plane_index < fmt_src.numPlanes; ++plane_index)
 	{
-		PlaneData &    plane_data = _plane_data_arr [plane_index];
+		auto &         plane_data = _plane_data_arr [plane_index];
 		vsutl::compute_fmt_mac_cst (
 			plane_data._gain,
 			plane_data._add_cst,
@@ -275,10 +277,10 @@ Resample::Resample (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo
 	const int      nbr_sh = _vsapi.propNumElements (&in, "sh");
 	for (int plane_index = 0; plane_index < fmt_src.numPlanes; ++plane_index)
 	{
-		PlaneData &    plane_data = _plane_data_arr [plane_index];
+		auto &         plane_data = _plane_data_arr [plane_index];
 
 		// Source window
-		Win &          s = plane_data._win;
+		auto &         s = plane_data._win;
 		if (plane_index > 0)
 		{
 			s = _plane_data_arr [plane_index - 1]._win;
@@ -430,7 +432,7 @@ Resample::Resample (const ::VSMap &in, ::VSMap &out, void *user_data_ptr, ::VSCo
 		);
 	}
 
-	create_plane_specs ();
+	create_all_plane_specs ();
 }
 
 
@@ -557,7 +559,7 @@ const ::VSFrameRef *	Resample::get_frame (int n, int activation_reason, void * &
 		}
 	}
 
-	return (dst_ptr);
+	return dst_ptr;
 }
 
 
@@ -567,15 +569,15 @@ fmtcl::ChromaPlacement	Resample::conv_str_to_chroma_placement (const vsutl::Filt
 	fmtcl::ChromaPlacement  cp_val = fmtcl::ChromaPlacement_MPEG1;
 
 	fstb::conv_to_lower_case (cplace);
-	if (strcmp (cplace.c_str (), "mpeg1") == 0)
+	if (cplace == "mpeg1")
 	{
 		cp_val = fmtcl::ChromaPlacement_MPEG1;
 	}
-	else if (strcmp (cplace.c_str (), "mpeg2") == 0)
+	else if (cplace == "mpeg2")
 	{
 		cp_val = fmtcl::ChromaPlacement_MPEG2;
 	}
-	else if (strcmp (cplace.c_str (), "dv") == 0)
+	else if (cplace == "dv")
 	{
 		cp_val = fmtcl::ChromaPlacement_DV;
 	}
@@ -584,7 +586,7 @@ fmtcl::ChromaPlacement	Resample::conv_str_to_chroma_placement (const vsutl::Filt
 		flt.throw_inval_arg ("unexpected cplace string.");
 	}
 
-	return (cp_val);
+	return cp_val;
 }
 
 
@@ -637,7 +639,7 @@ int	Resample::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void
 		_plane_processor.fill_plane (dst, val, plane_index);
 	}
 
-	return (ret_val);
+	return ret_val;
 }
 
 
@@ -646,6 +648,10 @@ int	Resample::do_process_plane (::VSFrameRef &dst, int n, int plane_index, void
 
 
 
+constexpr int	Resample::_max_nbr_planes;
+
+
+
 const ::VSFormat &	Resample::get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const
 {
 	const ::VSFormat *   fmt_dst_ptr = &fmt_src;
@@ -697,7 +703,7 @@ const ::VSFormat &	Resample::get_output_colorspace (const ::VSMap &in, ::VSMap &
 		);
 	}
 
-	return (*fmt_dst_ptr);
+	return *fmt_dst_ptr;
 }
 
 
@@ -712,7 +718,7 @@ bool	Resample::cumulate_flag (bool flag, const ::VSMap &in, ::VSMap &out, const
 		flag = (val != 0);
 	}
 
-	return (flag);
+	return flag;
 }
 
 
@@ -809,10 +815,12 @@ int	Resample::process_plane_proc (::VSFrameRef &dst, int n, int plane_index, voi
 
 	const FrameInfo & frame_info =
 		*reinterpret_cast <const FrameInfo *> (frame_data_ptr);
-	const InterlacingType   itl_s =
-		get_itl_type (frame_info._itl_s_flag, frame_info._top_s_flag);
-	const InterlacingType   itl_d =
-		get_itl_type (frame_info._itl_d_flag, frame_info._top_d_flag);
+	const fmtcl::InterlacingType  itl_s = fmtcl::InterlacingType_get (
+		frame_info._itl_s_flag, frame_info._top_s_flag
+	);
+	const fmtcl::InterlacingType  itl_d = fmtcl::InterlacingType_get (
+		frame_info._itl_d_flag, frame_info._top_d_flag
+	);
 
 	try
 	{
@@ -845,7 +853,7 @@ int	Resample::process_plane_proc (::VSFrameRef &dst, int n, int plane_index, voi
 		ret_val = -1;
 	}
 
-	return (ret_val);
+	return ret_val;
 }
 
 
@@ -882,7 +890,7 @@ int	Resample::process_plane_copy (::VSFrameRef &dst, int n, int plane_index, voi
 	const bool     src_flt_flag = (_src_type == fmtcl::SplFmt_FLOAT);
 	if (dst_flt_flag != src_flt_flag)
 	{
-		const PlaneData & plane_data = _plane_data_arr [plane_index];
+		const auto &   plane_data = _plane_data_arr [plane_index];
 		scale_info._gain    = plane_data._gain;
 		scale_info._add_cst = plane_data._add_cst;
 
@@ -896,20 +904,20 @@ int	Resample::process_plane_copy (::VSFrameRef &dst, int n, int plane_index, voi
 		w, h, scale_info_ptr
 	);
 
-	return (ret_val);
+	return ret_val;
 }
 
 
 
-fmtcl::FilterResize *	Resample::create_or_access_plane_filter (int plane_index, InterlacingType itl_d, InterlacingType itl_s)
+fmtcl::FilterResize *	Resample::create_or_access_plane_filter (int plane_index, fmtcl::InterlacingType itl_d, fmtcl::InterlacingType itl_s)
 {
 	assert (plane_index >= 0);
 	assert (itl_d >= 0);
-	assert (itl_d < InterlacingType_NBR_ELT);
+	assert (itl_d < fmtcl::InterlacingType_NBR_ELT);
 	assert (itl_s >= 0);
-	assert (itl_s < InterlacingType_NBR_ELT);
+	assert (itl_s < fmtcl::InterlacingType_NBR_ELT);
 
-	const PlaneData & plane_data         = _plane_data_arr [plane_index];
+	const auto &   plane_data = _plane_data_arr [plane_index];
 	const fmtcl::ResampleSpecPlane & key = plane_data._spec_arr [itl_d] [itl_s];
 
 	std::lock_guard <std::mutex>  autolock (_filter_mutex);
@@ -917,7 +925,7 @@ fmtcl::FilterResize *	Resample::create_or_access_plane_filter (int plane_index,
 	std::unique_ptr <fmtcl::FilterResize> &   filter_uptr = _filter_uptr_map [key];
 	if (filter_uptr.get () == 0)
 	{
-		filter_uptr = std::unique_ptr <fmtcl::FilterResize> (new fmtcl::FilterResize (
+		filter_uptr = std::make_unique <fmtcl::FilterResize> (
 			key,
 			*(plane_data._kernel_arr [fmtcl::FilterResize::Dir_H]._k_uptr),
 			*(plane_data._kernel_arr [fmtcl::FilterResize::Dir_V]._k_uptr),
@@ -925,98 +933,32 @@ fmtcl::FilterResize *	Resample::create_or_access_plane_filter (int plane_index,
 			plane_data._gain,
 			_src_type, _src_res, _dst_type, _dst_res,
 			_int_flag, _sse2_flag, _avx2_flag
-		));
+		);
 	}
 
-	return (filter_uptr.get ());
+	return filter_uptr.get ();
 }
 
 
 
-void	Resample::create_plane_specs ()
+void	Resample::create_all_plane_specs ()
 {
-	fmtcl::ResampleSpecPlane   spec;
-
-	const int      src_w = _vi_in.width;
-	const int      src_h = _vi_in.height;
-	const int      dst_w = _vi_out.width;
-	const int      dst_h = _vi_out.height;
+	const fmtcl::ColorFamily src_cf = fmtc::conv_colfam_to_fmtcl (*_vi_in.format);
+	const fmtcl::ColorFamily dst_cf = fmtc::conv_colfam_to_fmtcl (*_vi_out.format);
+	const int      src_ss_h   = _vi_in.format->subSamplingW;
+	const int      src_ss_v   = _vi_in.format->subSamplingH;
+	const int      dst_ss_h   = _vi_out.format->subSamplingW;
+	const int      dst_ss_v   = _vi_out.format->subSamplingH;
 	const int      nbr_planes = _vi_in.format->numPlanes;
-
 	for (int plane_index = 0; plane_index < nbr_planes; ++plane_index)
 	{
-		PlaneData &    plane_data = _plane_data_arr [plane_index];
-
-		spec._src_width  =
-			vsutl::compute_plane_width (*_vi_in.format, plane_index, src_w);
-		spec._src_height =
-			vsutl::compute_plane_height (*_vi_in.format, plane_index, src_h);
-		spec._dst_width  =
-			vsutl::compute_plane_width (*_vi_out.format, plane_index, dst_w);
-		spec._dst_height =
-			vsutl::compute_plane_height (*_vi_out.format, plane_index, dst_h);
-
-		const int      subspl_h = src_w / spec._src_width;
-		const int      subspl_v = src_h / spec._src_height;
-
-		const Win &    s = plane_data._win;
-		spec._win_x   = s._x / subspl_h;
-		spec._win_y   = s._y / subspl_v;
-		spec._win_w   = s._w / subspl_h;
-		spec._win_h   = s._h / subspl_v;
-
-		spec._add_cst        = plane_data._add_cst;
-		spec._kernel_scale_h = plane_data._kernel_scale_h;
-		spec._kernel_scale_v = plane_data._kernel_scale_v;
-		spec._kernel_hash_h  = plane_data._kernel_arr [fmtcl::FilterResize::Dir_H].get_hash ();
-		spec._kernel_hash_v  = plane_data._kernel_arr [fmtcl::FilterResize::Dir_V].get_hash ();
-
-		for (int itl_d = 0; itl_d < InterlacingType_NBR_ELT; ++itl_d)
-		{
-			for (int itl_s = 0; itl_s < InterlacingType_NBR_ELT; ++itl_s)
-			{
-				double         cp_s_h = 0;
-				double         cp_s_v = 0;
-				double         cp_d_h = 0;
-				double         cp_d_v = 0;
-				if (plane_data._preserve_center_flag)
-				{
-					fmtcl::ChromaPlacement_compute_cplace (
-						cp_s_h, cp_s_v, _cplace_s, plane_index,
-						_vi_in.format->subSamplingW, _vi_in.format->subSamplingH,
-						(_vi_in.format->colorFamily == ::cmRGB),
-						(itl_s != InterlacingType_FRAME),
-						(itl_s == InterlacingType_TOP)
-					);
-					fmtcl::ChromaPlacement_compute_cplace (
-						cp_d_h, cp_d_v, _cplace_d, plane_index,
-						_vi_out.format->subSamplingW, _vi_out.format->subSamplingH,
-						(_vi_out.format->colorFamily == ::cmRGB),
-						(itl_d != InterlacingType_FRAME),
-						(itl_d == InterlacingType_TOP)
-					);
-				}
-
-				spec._center_pos_src_h = cp_s_h;
-				spec._center_pos_src_v = cp_s_v;
-				spec._center_pos_dst_h = cp_d_h;
-				spec._center_pos_dst_v = cp_d_v;
-
-				plane_data._spec_arr [itl_d] [itl_s] = spec;
-			}  // for itl_s
-		}  // for itl_d
-	}  // for plane_index
-}
-
-
-
-Resample::InterlacingType	Resample::get_itl_type (bool itl_flag, bool top_flag)
-{
-	return (
-		(itl_flag) ? ((top_flag) ? InterlacingType_TOP
-		                         : InterlacingType_BOT)
-		           :               InterlacingType_FRAME
-	);
+		auto &         plane_data = _plane_data_arr [plane_index];
+		fmtcl::ResampleUtil::create_plane_specs (
+			plane_data, plane_index,
+			src_cf, _src_width   , src_ss_h, _src_height   , src_ss_v, _cplace_s,
+			dst_cf, _vi_out.width, dst_ss_h, _vi_out.height, dst_ss_v, _cplace_d
+		);
+	}
 }
 
 
diff --git a/src/fmtc/Resample.h b/src/fmtc/Resample.h
index 66b44ae..e5a3070 100644
--- a/src/fmtc/Resample.h
+++ b/src/fmtc/Resample.h
@@ -28,8 +28,11 @@ To Public License, Version 2, as published by Sam Hocevar. See
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #include "fmtcl/ChromaPlacement.h"
+#include "fmtcl/ColorFamily.h"
 #include "fmtcl/FilterResize.h"
+#include "fmtcl/InterlacingType.h"
 #include "fmtcl/KernelData.h"
+#include "fmtcl/ResamplePlaneData.h"
 #include "fmtcl/ResampleSpecPlane.h"
 #include "vsutl/FilterBase.h"
 #include "vsutl/NodeRefSPtr.h"
@@ -86,7 +89,7 @@ class Resample
 
 private:
 
-	static const int  MAX_NBR_PLANES = 3;
+	static constexpr int _max_nbr_planes = 3;
 
 	enum InterlacingParam
 	{
@@ -106,55 +109,16 @@ class Resample
 		FieldOrder_NBR_ELT
 	};
 
-	enum InterlacingType
-	{
-		InterlacingType_FRAME = 0,
-		InterlacingType_TOP,
-		InterlacingType_BOT,
-
-		InterlacingType_NBR_ELT
-	};
-
-	class Win
-	{
-	public:
-		double         _x;	// Data is in full coordinates whatever the plane (never subsampled)
-		double         _y;
-		double         _w;
-		double         _h;
-	};
-
 	class FrameInfo
 	{
 	public:
-		bool           _itl_s_flag;
-		bool           _top_s_flag;
-		bool           _itl_d_flag;
-		bool           _top_d_flag;
+		bool           _itl_s_flag = false;
+		bool           _top_s_flag = false;
+		bool           _itl_d_flag = false;
+		bool           _top_d_flag = false;
 	};
 
-	// Array order: [dest] [src]
-	typedef std::array <fmtcl::ResampleSpecPlane, InterlacingType_NBR_ELT> SpecSrcArray;
-	typedef std::array <SpecSrcArray,             InterlacingType_NBR_ELT> SpecArray;
-
-	class PlaneData
-	{
-	public:
-		typedef std::array <
-			fmtcl::KernelData,
-			fmtcl::FilterResize::Dir_NBR_ELT
-		>  KernelArray;
-		Win            _win;
-		SpecArray      _spec_arr;        // Contains the spec (used as a key) for each plane/interlacing combination
-		KernelArray    _kernel_arr;
-		double         _kernel_scale_h;  // Can be negative (forced scaling)
-		double         _kernel_scale_v;  // Can be negative (forced scaling)
-		double         _gain;
-		double         _add_cst;
-		bool           _preserve_center_flag;
-	};
-
-	typedef std::array <PlaneData, MAX_NBR_PLANES> PlaneDataArray;
+	typedef std::array <fmtcl::ResamplePlaneData, _max_nbr_planes> PlaneDataArray;
 
 	const ::VSFormat &
 	               get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const;
@@ -163,11 +127,8 @@ class Resample
 	int            process_plane_proc (::VSFrameRef &dst, int n, int plane_index, void *frame_data_ptr, ::VSFrameContext &frame_ctx, ::VSCore &core, const vsutl::NodeRefSPtr &src_node1_sptr);
 	int            process_plane_copy (::VSFrameRef &dst, int n, int plane_index, void *frame_data_ptr, ::VSFrameContext &frame_ctx, ::VSCore &core, const vsutl::NodeRefSPtr &src_node1_sptr);
 	fmtcl::FilterResize *
-	               create_or_access_plane_filter (int plane_index, InterlacingType itl_d, InterlacingType itl_s);
-	void           create_plane_specs ();
-
-	static InterlacingType
-	               get_itl_type (bool itl_flag, bool top_flag);
+	               create_or_access_plane_filter (int plane_index, fmtcl::InterlacingType itl_d, fmtcl::InterlacingType itl_s);
+	void           create_all_plane_specs ();
 
 	vsutl::NodeRefSPtr
 	               _clip_src_sptr;
@@ -222,7 +183,9 @@ class Resample
 
 	               Resample ()                               = delete;
 	               Resample (const Resample &other)          = delete;
+	               Resample (Resample &&other)               = delete;
 	Resample &     operator = (const Resample &other)        = delete;
+	Resample &     operator = (Resample &&other)             = delete;
 	bool           operator == (const Resample &other) const = delete;
 	bool           operator != (const Resample &other) const = delete;
 
diff --git a/src/fmtc/Transfer.cpp b/src/fmtc/Transfer.cpp
index 8dc5b8d..688f61f 100644
--- a/src/fmtc/Transfer.cpp
+++ b/src/fmtc/Transfer.cpp
@@ -28,22 +28,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 #include "fmtc/Transfer.h"
 #include "fmtc/fnc.h"
-#include "fmtcl/TransOp2084.h"
-#include "fmtcl/TransOpAcesCc.h"
-#include "fmtcl/TransOpAffine.h"
-#include "fmtcl/TransOpBypass.h"
-#include "fmtcl/TransOpCanonLog.h"
-#include "fmtcl/TransOpCompose.h"
-#include "fmtcl/TransOpContrast.h"
-#include "fmtcl/TransOpErimm.h"
-#include "fmtcl/TransOpFilmStream.h"
-#include "fmtcl/TransOpHlg.h"
-#include "fmtcl/TransOpLinPow.h"
-#include "fmtcl/TransOpLogC.h"
-#include "fmtcl/TransOpLogTrunc.h"
-#include "fmtcl/TransOpPow.h"
-#include "fmtcl/TransOpSLog.h"
-#include "fmtcl/TransOpSLog3.h"
+#include "fmtcl/TransUtil.h"
 #include "fstb/fnc.h"
 #include "vsutl/CpuOpt.h"
 #include "vsutl/fnc.h"
@@ -80,6 +65,8 @@ Transfer::Transfer (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, :
 ,	_full_range_dst_flag (get_arg_int (in, out, "fulld", 1) != 0)
 ,	_curve_s (fmtcl::TransCurve_UNDEF)
 ,	_curve_d (fmtcl::TransCurve_UNDEF)
+,	_logc_ei_s (fmtcl::TransOpLogC::ExpIdx_800)
+,	_logc_ei_d (fmtcl::TransOpLogC::ExpIdx_800)
 ,	_loglut_flag (false)
 #if defined (_MSC_VER)
 #pragma warning (push)
@@ -135,7 +122,56 @@ Transfer::Transfer (const ::VSMap &in, ::VSMap &out, void * /*user_data_ptr*/, :
 	// Output format is validated.
 	_vi_out.format = &fmt_dst;
 
-	init_table ();
+	// Other parameters
+	_curve_s = fmtcl::TransUtil::conv_string_to_curve (_transs);
+	if (_curve_s == fmtcl::TransCurve_UNDEF)
+	{
+		throw_inval_arg ("invalid transs value.");
+	}
+	_curve_d = fmtcl::TransUtil::conv_string_to_curve (_transd);
+	if (_curve_d == fmtcl::TransCurve_UNDEF)
+	{
+		throw_inval_arg ("invalid transd value.");
+	}
+
+	const int      logc_ei_raw_s = get_arg_int (in, out, "logceis", 800);
+	_logc_ei_s = fmtcl::TransOpLogC::conv_logc_ei (logc_ei_raw_s);
+	if (_logc_ei_s == fmtcl::TransOpLogC::ExpIdx_INVALID)
+	{
+		throw_inval_arg ("invalid logceis value.");
+	}
+
+	const int      logc_ei_raw_d = get_arg_int (in, out, "logceid", 800);
+	_logc_ei_d = fmtcl::TransOpLogC::conv_logc_ei (logc_ei_raw_d);
+	if (_logc_ei_d == fmtcl::TransOpLogC::ExpIdx_INVALID)
+	{
+		throw_inval_arg ("invalid logceid value.");
+	}
+
+	if (_contrast <= 0)
+	{
+		throw_inval_arg ("invalid cont value.");
+	}
+	if (_gcor <= 0)
+	{
+		throw_inval_arg ("invalid gcor value.");
+	}
+	if (_lvl_black < 0)
+	{
+		throw_inval_arg ("invalid blacklvl value.");
+	}
+
+	// Finally...
+	const fmtcl::PicFmt  src_fmt =
+		conv_vsfmt_to_picfmt (*_vi_in.format , _full_range_src_flag);
+	const fmtcl::PicFmt  dst_fmt =
+		conv_vsfmt_to_picfmt (*_vi_out.format, _full_range_dst_flag);
+	_lut_uptr = fmtcl::TransUtil::build_lut (
+		dst_fmt, _curve_d, _logc_ei_d,
+		src_fmt, _curve_s, _logc_ei_s,
+		_contrast, _gcor, _lvl_black,
+		_sse2_flag, _avx2_flag
+	);
 }
 
 
@@ -201,7 +237,7 @@ const ::VSFrameRef *	Transfer::get_frame (int n, int activation_reason, void * &
 		}
 	}
 
-	return (dst_ptr);
+	return dst_ptr;
 }
 
 
@@ -318,388 +354,7 @@ const ::VSFormat &	Transfer::get_output_colorspace (const ::VSMap &in, ::VSMap &
 		);
 	}
 
-	return (*fmt_dst_ptr);
-}
-
-
-
-void	Transfer::init_table ()
-{
-	_curve_s = conv_string_to_curve (*this, _transs);
-	_curve_d = conv_string_to_curve (*this, _transd);
-	OpSPtr         op_s = conv_curve_to_op (_curve_s, true );
-	OpSPtr         op_d = conv_curve_to_op (_curve_d, false);
-
-	// Linear or log LUT?
-	_loglut_flag = false;
-	if (   _vi_in.format->sampleType == ::stFloat
-	    && _curve_s == fmtcl::TransCurve_LINEAR)
-	{
-		// Curves with extended range or with fast-evolving slope at 0.
-		// Actually we could just use the log LUT for all the curves...?
-		// 10 bits per stop + interpolation should be enough for all of them.
-		// What about the speed?
-		if (   _curve_d == fmtcl::TransCurve_470BG
-		    || _curve_d == fmtcl::TransCurve_LINEAR
-		    || _curve_d == fmtcl::TransCurve_61966_2_4
-		    || _curve_d == fmtcl::TransCurve_2084
-		    || _curve_d == fmtcl::TransCurve_428
-		    || _curve_d == fmtcl::TransCurve_HLG
-		    || _curve_d == fmtcl::TransCurve_1886
-		    || _curve_d == fmtcl::TransCurve_1886A
-		    || _curve_d == fmtcl::TransCurve_SLOG
-		    || _curve_d == fmtcl::TransCurve_SLOG2
-		    || _curve_d == fmtcl::TransCurve_SLOG3
-		    || _curve_d == fmtcl::TransCurve_LOGC2
-		    || _curve_d == fmtcl::TransCurve_LOGC3
-		    || _curve_d == fmtcl::TransCurve_CANONLOG
-		    || _curve_d == fmtcl::TransCurve_ACESCC
-		    || _curve_d == fmtcl::TransCurve_ERIMM)
-		{
-			_loglut_flag = true;
-		}
-		if (_gcor < 0.5)
-		{
-			_loglut_flag = true;
-		}
-		if (fabs (_contrast) >= 3.0/2 || fabs (_contrast) <= 2.0/3)
-		{
-			_loglut_flag = true;
-		}
-	}
-
-	// Black level
-	const double   lw = op_s->get_max ();
-	if (_lvl_black > 0 && _lvl_black < lw)
-	{
-		/*
-		Black level (brightness) and contrast settings as defined
-		in ITU-R BT.1886:
-			L = a' * fi (V + b')
-
-		With:
-			fi = EOTF (gamma to linear)
-			L  = Lb for V = 0
-			L  = Lw for V = Vmax
-
-		For power functions, could be rewritten as:
-			L = fi (a * V + b)
-
-		Substitution:
-			Lb = fi (           b)
-			Lw = fi (a * Vmax + b)
-
-		Then, given:
-			f = OETF (linear to gamma)
-
-		We get:
-			f (Lb) = b
-			f (Lw) = a * Vmax + b
-
-			b =           f (Lb)
-			a = (f (Lw) - f (Lb)) / Vmax
-		*/
-		OpSPtr         oetf = conv_curve_to_op (_curve_s, false);
-		const double   lwg  = (*oetf) (lw        );
-		const double   lbg  = (*oetf) (_lvl_black);
-		const double   vmax =  lwg;
-		const double   a    = (lwg - lbg) / vmax;
-		const double   b    =        lbg;
-		OpSPtr         op_a (new fmtcl::TransOpAffine (a, b));
-		op_s = OpSPtr (new fmtcl::TransOpCompose (op_a, op_s));
-	}
-
-	// Gamma correction
-	if (! fstb::is_eq (_gcor, 1.0))
-	{
-		OpSPtr         op_g (new fmtcl::TransOpPow (true, _gcor, 1, 1e6));
-		op_d = OpSPtr (new fmtcl::TransOpCompose (op_g, op_d));
-	}
-
-	// Contrast
-	if (! fstb::is_eq (_contrast, 1.0))
-	{
-		OpSPtr         op_c (new fmtcl::TransOpContrast (_contrast));
-		op_d = OpSPtr (new fmtcl::TransOpCompose (op_c, op_d));
-	}
-
-	// LUTify
-	OpSPtr         op_f (new fmtcl::TransOpCompose (op_s, op_d));
-
-	const fmtcl::SplFmt  src_fmt = conv_vsfmt_to_splfmt (*_vi_in.format);
-	const fmtcl::SplFmt  dst_fmt = conv_vsfmt_to_splfmt (*_vi_out.format);
-	_lut_uptr = std::unique_ptr <fmtcl::TransLut> (new fmtcl::TransLut (
-		*op_f, _loglut_flag,
-		src_fmt, _vi_in.format->bitsPerSample, _full_range_src_flag,
-		dst_fmt, _vi_out.format->bitsPerSample, _full_range_dst_flag,
-		_sse2_flag, _avx2_flag
-	));
-}
-
-
-
-// str should be already converted to lower case
-fmtcl::TransCurve	Transfer::conv_string_to_curve (const vsutl::FilterBase &flt, const std::string &str)
-{
-	fmtcl::TransCurve c = fmtcl::TransCurve_UNDEF;
-	if (str == "709")
-	{
-		c = fmtcl::TransCurve_709;
-	}
-	else if (str == "470m")
-	{
-		c = fmtcl::TransCurve_470M;
-	}
-	else if (str == "470bg")
-	{
-		c = fmtcl::TransCurve_470BG;
-	}
-	else if (str == "601")
-	{
-		c = fmtcl::TransCurve_601;
-	}
-	else if (str == "240")
-	{
-		c = fmtcl::TransCurve_240;
-	}
-	else if (str.empty () || str == "linear")
-	{
-		c = fmtcl::TransCurve_LINEAR;
-	}
-	else if (str == "log100")
-	{
-		c = fmtcl::TransCurve_LOG100;
-	}
-	else if (str == "log316")
-	{
-		c = fmtcl::TransCurve_LOG316;
-	}
-	else if (str == "61966-2-4")
-	{
-		c = fmtcl::TransCurve_61966_2_4;
-	}
-	else if (str == "1361")
-	{
-		c = fmtcl::TransCurve_1361;
-	}
-	else if (str == "61966-2-1" || str == "srgb" || str == "sycc")
-	{
-		c = fmtcl::TransCurve_SRGB;
-	}
-	else if (str == "2020_10")
-	{
-		c = fmtcl::TransCurve_2020_10;
-	}
-	else if (str == "2020_12" || str == "2020")
-	{
-		c = fmtcl::TransCurve_2020_12;
-	}
-	else if (str == "2084")
-	{
-		c = fmtcl::TransCurve_2084;
-	}
-	else if (str == "428-1" || str == "428")
-	{
-		c = fmtcl::TransCurve_428;
-	}
-	else if (str == "hlg")
-	{
-		c = fmtcl::TransCurve_HLG;
-	}
-	else if (str == "1886")
-	{
-		c = fmtcl::TransCurve_1886;
-	}
-	else if (str == "1886a")
-	{
-		c = fmtcl::TransCurve_1886A;
-	}
-	else if (str == "filmstream")
-	{
-		c = fmtcl::TransCurve_FILMSTREAM;
-	}
-	else if (str == "slog")
-	{
-		c = fmtcl::TransCurve_SLOG;
-	}
-	else if (str == "logc2")
-	{
-		c = fmtcl::TransCurve_LOGC2;
-	}
-	else if (str == "logc3")
-	{
-		c = fmtcl::TransCurve_LOGC3;
-	}
-	else if (str == "canonlog")
-	{
-		c = fmtcl::TransCurve_CANONLOG;
-	}
-	else if (str == "adobergb")
-	{
-		c = fmtcl::TransCurve_ADOBE_RGB;
-	}
-	else if (str == "romm")
-	{
-		c = fmtcl::TransCurve_ROMM_RGB;
-	}
-	else if (str == "acescc")
-	{
-		c = fmtcl::TransCurve_ACESCC;
-	}
-	else if (str == "erimm")
-	{
-		c = fmtcl::TransCurve_ERIMM;
-	}
-	else if (str == "slog2")
-	{
-		c = fmtcl::TransCurve_SLOG2;
-	}
-	else if (str == "slog3")
-	{
-		c = fmtcl::TransCurve_SLOG3;
-	}
-	else if (str == "vlog")
-	{
-		c = fmtcl::TransCurve_VLOG;
-	}
-	else
-	{
-		flt.throw_inval_arg ("unknown matrix identifier.");
-	}
-
-
-	return (c);
-}
-
-
-
-Transfer::OpSPtr	Transfer::conv_curve_to_op (fmtcl::TransCurve c, bool inv_flag)
-{
-	assert (c >= 0);
-
-	OpSPtr         ptr;
-
-	switch (c)
-	{
-	case fmtcl::TransCurve_709:
-	case fmtcl::TransCurve_601:
-	case fmtcl::TransCurve_2020_10:
-		ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5));
-		break;
-	case fmtcl::TransCurve_470BG:
-		ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 2.8));
-		break;
-	case fmtcl::TransCurve_240:
-		ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.1115, 0.0228, 0.45, 4.0));
-		break;
-	case fmtcl::TransCurve_LINEAR:
-		ptr = OpSPtr (new fmtcl::TransOpBypass);
-		break;
-	case fmtcl::TransCurve_LOG100:
-		ptr = OpSPtr (new fmtcl::TransOpLogTrunc (inv_flag, 0.5, 0.01));
-		break;
-	case fmtcl::TransCurve_LOG316:
-		ptr = OpSPtr (new fmtcl::TransOpLogTrunc (inv_flag, 0.4, sqrt (10) / 1000));
-		break;
-	case fmtcl::TransCurve_61966_2_4:
-		ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -1e9, 1e9));
-		break;
-	case fmtcl::TransCurve_1361:
-		ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -0.25, 1.33, 4));
-		break;
-	case fmtcl::TransCurve_470M:	// Assumed display gamma 2.2, almost like sRGB.
-	case fmtcl::TransCurve_SRGB:
-#if 1
-		{
-			// More exact formula giving C1 continuity
-			// https://en.wikipedia.org/wiki/SRGB#Theory_of_the_transformation
-			const double   gamma = 2.4;
-			const double   alpha = 1.055;
-			const double   k0    = (alpha - 1) / (gamma - 1);
-			const double   phi   =
-				  (pow (alpha, gamma) * pow (gamma - 1, gamma - 1))
-				/ (pow (alpha - 1, gamma - 1) * pow (gamma, gamma));
-			ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, alpha, k0 / phi, 1.0 / gamma, phi));
-		}
-#else
-		// Rounded constants used in IEC 61966-2-1
-		ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.055, 0.04045 / 12.92, 1.0 / 2.4, 12.92));
-#endif
-		break;
-	case fmtcl::TransCurve_2020_12:
-		ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1.09929682680944, 0.018053968510807, 0.45, 4.5));
-		break;
-	case fmtcl::TransCurve_2084:
-		ptr = OpSPtr (new fmtcl::TransOp2084 (inv_flag));
-		break;
-	case fmtcl::TransCurve_428:
-		ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 2.6, 48.0 / 52.37));
-		break;
-	case fmtcl::TransCurve_HLG:
-		ptr = OpSPtr (new fmtcl::TransOpHlg (inv_flag));
-		break;
-	case fmtcl::TransCurve_1886:
-		ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 2.4));
-		break;
-	case fmtcl::TransCurve_1886A:
-		{
-			const double   a1    = 2.6;
-			const double   a2    = 3.0;
-			const double   k0    = 0.35;
-			const double   slope = pow (k0, a2 - a1);
-			const double   beta  = pow (k0, a1);
-			ptr = OpSPtr (new fmtcl::TransOpLinPow (
-				inv_flag, 1, beta, 1.0 / a1, slope, 0, 1, 1, 1.0 / a2
-			));
-		}
-		break;
-	case fmtcl::TransCurve_FILMSTREAM:
-		ptr = OpSPtr (new fmtcl::TransOpFilmStream (inv_flag));
-		break;
-	case fmtcl::TransCurve_SLOG:
-		ptr = OpSPtr (new fmtcl::TransOpSLog (inv_flag, false));
-		break;
-	case fmtcl::TransCurve_LOGC2:
-		ptr = OpSPtr (new fmtcl::TransOpLogC (inv_flag, fmtcl::TransOpLogC::Type_LOGC_V2));
-		break;
-	case fmtcl::TransCurve_LOGC3:
-		ptr = OpSPtr (new fmtcl::TransOpLogC (inv_flag, fmtcl::TransOpLogC::Type_LOGC_V3));
-		break;
-	case fmtcl::TransCurve_CANONLOG:
-		ptr = OpSPtr (new fmtcl::TransOpCanonLog (inv_flag));
-		break;
-	case fmtcl::TransCurve_ADOBE_RGB:
-		ptr = OpSPtr (new fmtcl::TransOpPow (inv_flag, 563.0 / 256));
-		break;
-	case fmtcl::TransCurve_ROMM_RGB:
-		ptr = OpSPtr (new fmtcl::TransOpLinPow (inv_flag, 1, 0.001953, 1.0 / 1.8, 16));
-		break;
-	case fmtcl::TransCurve_ACESCC:
-		ptr = OpSPtr (new fmtcl::TransOpAcesCc (inv_flag));
-		break;
-	case fmtcl::TransCurve_ERIMM:
-		ptr = OpSPtr (new fmtcl::TransOpErimm (inv_flag));
-		break;
-	case fmtcl::TransCurve_SLOG2:
-		ptr = OpSPtr (new fmtcl::TransOpSLog (inv_flag, true));
-		break;
-	case fmtcl::TransCurve_SLOG3:
-		ptr = OpSPtr (new fmtcl::TransOpSLog3 (inv_flag));
-		break;
-	case fmtcl::TransCurve_VLOG:
-		ptr = OpSPtr (new fmtcl::TransOpLogC (inv_flag, fmtcl::TransOpLogC::Type_VLOG));
-		break;
-	default:
-		assert (false);
-		break;
-	}
-
-	if (ptr.get () == 0)
-	{
-		ptr = OpSPtr (new fmtcl::TransOpBypass);
-	}
-
-	return (ptr);
+	return *fmt_dst_ptr;
 }
 
 
diff --git a/src/fmtc/Transfer.h b/src/fmtc/Transfer.h
index 0597854..be3b009 100644
--- a/src/fmtc/Transfer.h
+++ b/src/fmtc/Transfer.h
@@ -32,6 +32,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 #include "fmtcl/TransCurve.h"
 #include "fmtcl/TransLut.h"
 #include "fmtcl/TransOpInterface.h"
+#include "fmtcl/TransOpLogC.h"
 #include "vsutl/FilterBase.h"
 #include "vsutl/NodeRefSPtr.h"
 #include "vsutl/PlaneProcCbInterface.h"
@@ -84,17 +85,11 @@ class Transfer
 	const ::VSFormat &
 	               get_output_colorspace (const ::VSMap &in, ::VSMap &out, ::VSCore &core, const ::VSFormat &fmt_src) const;
 
-	void           init_table ();
-
-	static fmtcl::TransCurve
-	               conv_string_to_curve (const vsutl::FilterBase &flt, const std::string &str);
-	static OpSPtr  conv_curve_to_op (fmtcl::TransCurve c, bool inv_flag);
-
 	vsutl::NodeRefSPtr
 	               _clip_src_sptr;
 	const ::VSVideoInfo             
-	               _vi_in;          // Input. Must be declared after _clip_src_sptr because of initialisation order.
-	::VSVideoInfo  _vi_out;         // Output. Must be declared after _vi_in.
+	               _vi_in;     // Input. Must be declared after _clip_src_sptr because of initialisation order.
+	::VSVideoInfo  _vi_out;    // Output. Must be declared after _vi_in.
 
 	bool           _sse2_flag;
 	bool           _avx2_flag;
@@ -109,6 +104,10 @@ class Transfer
 	               _curve_s;
 	fmtcl::TransCurve
 	               _curve_d;
+	fmtcl::TransOpLogC::ExpIdx // Exposure Index for the Arri Log C curves
+	               _logc_ei_s;
+	fmtcl::TransOpLogC::ExpIdx
+	               _logc_ei_d;
 	bool           _loglut_flag;
 
 	vsutl::PlaneProcessor
@@ -125,7 +124,9 @@ class Transfer
 
 	               Transfer ()                               = delete;
 	               Transfer (const Transfer &other)          = delete;
+	               Transfer (Transfer &&other)               = delete;
 	Transfer &     operator = (const Transfer &other)        = delete;
+	Transfer &     operator = (Transfer &&other)             = delete;
 	bool           operator == (const Transfer &other) const = delete;
 	bool           operator != (const Transfer &other) const = delete;
 
diff --git a/src/fmtc/fnc.cpp b/src/fmtc/fnc.cpp
index 96d2343..f590fb3 100644
--- a/src/fmtc/fnc.cpp
+++ b/src/fmtc/fnc.cpp
@@ -25,7 +25,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #include "fmtc/fnc.h"
-#include "fmtcl/Mat4.h"
+#include "fmtcl/fnc.h"
 #include "fmtcl/MatrixProc.h"
 #include "vsutl/FilterBase.h"
 #include "vsutl/fnc.h"
@@ -46,65 +46,23 @@ namespace fmtc
 
 	
 	
-static void	override_fmt_with_csp (::VSFormat &fmt, fmtcl::ColorSpaceH265 csp_out, int plane_out)
-{
-	if (plane_out >= 0)
-	{
-		fmt.numPlanes = 3;
-		if (csp_out == fmtcl::ColorSpaceH265_RGB)
-		{
-			fmt.colorFamily = ::cmRGB;
-		}
-		else if (csp_out == fmtcl::ColorSpaceH265_YCGCO)
-		{
-			fmt.colorFamily = ::cmYCoCg;
-		}
-		else
-		{
-			fmt.colorFamily = ::cmYUV;
-		}
-	}
-}
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 
 
-// Int: depends on the input format (may be float too)
-// R, G, B, Y: [0 ; 1]
-// U, V, Cg, Co : [-0.5 ; 0.5]
-static void	make_mat_flt_int (fmtcl::Mat4 &m, bool to_flt_flag, const ::VSFormat &fmt, bool full_flag)
+fmtcl::PicFmt	conv_vsfmt_to_picfmt (const ::VSFormat &fmt, bool full_flag)
 {
-	::VSFormat     fmt2 (fmt);
-	fmt2.sampleType = ::stFloat;
-
-	const ::VSFormat* fmt_src_ptr = &fmt2;
-	const ::VSFormat* fmt_dst_ptr = &fmt;
-	if (to_flt_flag)
-	{
-		std::swap (fmt_src_ptr, fmt_dst_ptr);
-	}
-
-	double         ay, by;
-	double         ac, bc;
-	const int      ch_plane = (fmt_dst_ptr->numPlanes > 1) ? 1 : 0;
-	vsutl::compute_fmt_mac_cst (
-		ay, by, *fmt_dst_ptr, full_flag, *fmt_src_ptr, full_flag, 0
-	);
-	vsutl::compute_fmt_mac_cst (
-		ac, bc, *fmt_dst_ptr, full_flag, *fmt_src_ptr, full_flag, ch_plane
-	);
+	fmtcl::PicFmt  pic_fmt;
+	pic_fmt._sf        = conv_vsfmt_to_splfmt (fmt);
+	pic_fmt._res       = fmt.bitsPerSample;
+	pic_fmt._col_fam   = conv_colfam_to_fmtcl (fmt);
+	pic_fmt._full_flag = full_flag;
 
-	m[0][0] = ay; m[0][1] =  0; m[0][2] =  0; m[0][3] = by;
-	m[1][0] =  0; m[1][1] = ac; m[1][2] =  0; m[1][3] = bc;
-	m[2][0] =  0; m[2][1] =  0; m[2][2] = ac; m[2][3] = bc;
-	m[3][0] =  0; m[3][1] =  0; m[3][2] =  0; m[3][3] =  1;
+	return pic_fmt;
 }
 
 
 
-/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
-
-
-
 fmtcl::SplFmt	conv_vsfmt_to_splfmt (const ::VSFormat &fmt)
 {
 	fmtcl::SplFmt  splfmt = fmtcl::SplFmt_ILLEGAL;
@@ -128,56 +86,40 @@ fmtcl::SplFmt	conv_vsfmt_to_splfmt (const ::VSFormat &fmt)
 		}
 	}
 
-	return (splfmt);
+	return splfmt;
 }
 
 
 
-void	prepare_matrix_coef (const vsutl::FilterBase &filter, fmtcl::MatrixProc &mat_proc, const fmtcl::Mat4 &mat_main, const ::VSFormat &fmt_dst, bool full_range_dst_flag, const ::VSFormat &fmt_src, bool full_range_src_flag, fmtcl::ColorSpaceH265 csp_out, int plane_out)
+fmtcl::ColorFamily	conv_colfam_to_fmtcl (const ::VSFormat &fmt)
 {
-	const bool     int_proc_flag =
-		(   fmt_src.sampleType == ::stInteger
-		 && fmt_dst.sampleType == ::stInteger);
+	auto          col_fam = fmtcl::ColorFamily_INVALID;
 
-	fmtcl::Mat4    m (1, fmtcl::Mat4::Preset_DIAGONAL);
-
-	::VSFormat     fmt_dst2 = fmt_dst;
-	if (int_proc_flag)
+	switch (fmt.colorFamily)
 	{
-		// For the coefficient calculation, use the same output bitdepth
-		// as the input. The bitdepth change will be done separately with
-		// a simple bitshift.
-		fmt_dst2.bitsPerSample = fmt_src.bitsPerSample;
+	case cmGray:  col_fam = fmtcl::ColorFamily_GRAY;  break;
+	case cmRGB:   col_fam = fmtcl::ColorFamily_RGB;   break;
+	case cmYUV:   col_fam = fmtcl::ColorFamily_YUV;   break;
+	case cmYCoCg: col_fam = fmtcl::ColorFamily_YCGCO; break;
+	default:      assert (false);                     break;
 	}
 
-	override_fmt_with_csp (fmt_dst2, csp_out, plane_out);
+	return col_fam;
+}
 
-	fmtcl::Mat4    m1s;
-	fmtcl::Mat4    m1d;
-	make_mat_flt_int (m1s, true , fmt_src , full_range_src_flag);
-	make_mat_flt_int (m1d, false, fmt_dst2, full_range_dst_flag);
-	m *= m1d;
-	if (! int_proc_flag)
-	{
-		if (plane_out > 0 && vsutl::is_chroma_plane (fmt_dst2, plane_out))
-		{
-			// When we extract a single plane, it's a conversion to R or
-			// to Y, so the outout range is always [0; 1]. Therefore we
-			// need to offset the chroma planes.
-			m [plane_out] [fmtcl::MatrixProc::NBR_PLANES] += 0.5;
-		}
-	}
-	m *= mat_main;
-	m *= m1s;
-
-	const fmtcl::SplFmt  splfmt_src = conv_vsfmt_to_splfmt (fmt_src);
-	const fmtcl::SplFmt  splfmt_dst = conv_vsfmt_to_splfmt (fmt_dst);
-	const fmtcl::MatrixProc::Err  ret_val = mat_proc.configure (
-		m, int_proc_flag,
-		splfmt_src, fmt_src.bitsPerSample,
-		splfmt_dst, fmt_dst.bitsPerSample,
-		plane_out
+
+
+void	prepare_matrix_coef (const vsutl::FilterBase &filter, fmtcl::MatrixProc &mat_proc, const fmtcl::Mat4 &mat_main, const ::VSFormat &fmt_dst, bool full_range_dst_flag, const ::VSFormat &fmt_src, bool full_range_src_flag, fmtcl::ColorSpaceH265 csp_out, int plane_out)
+{
+	const fmtcl::PicFmt  fmt_src_fmtcl =
+		conv_vsfmt_to_picfmt (fmt_src, full_range_src_flag);
+	const fmtcl::PicFmt  fmt_dst_fmtcl =
+		conv_vsfmt_to_picfmt (fmt_dst, full_range_dst_flag);
+
+	const int      ret_val = fmtcl::prepare_matrix_coef (
+		mat_proc, mat_main, fmt_dst_fmtcl, fmt_src_fmtcl, csp_out, plane_out
 	);
+
 	if (ret_val != fmtcl::MatrixProc::Err_OK)
 	{
 		if (ret_val == fmtcl::MatrixProc::Err_POSSIBLE_OVERFLOW)
diff --git a/src/fmtc/fnc.h b/src/fmtc/fnc.h
index 5403664..fc86889 100644
--- a/src/fmtc/fnc.h
+++ b/src/fmtc/fnc.h
@@ -27,7 +27,9 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
+#include "fmtcl/ColorFamily.h"
 #include "fmtcl/ColorSpaceH265.h"
+#include "fmtcl/PicFmt.h"
 #include "fmtcl/SplFmt.h"
 
 
@@ -49,7 +51,10 @@ namespace fmtc
 
 
 
+fmtcl::PicFmt  conv_vsfmt_to_picfmt (const ::VSFormat &fmt, bool full_flag);
 fmtcl::SplFmt  conv_vsfmt_to_splfmt (const ::VSFormat &fmt);
+fmtcl::ColorFamily
+               conv_colfam_to_fmtcl (const ::VSFormat &fmt);
 void           prepare_matrix_coef (const vsutl::FilterBase &filter, fmtcl::MatrixProc &mat_proc, const fmtcl::Mat4 &mat_main, const ::VSFormat &fmt_dst, bool full_range_dst_flag, const ::VSFormat &fmt_src, bool full_range_src_flag, fmtcl::ColorSpaceH265 csp_out = fmtcl::ColorSpaceH265_UNSPECIFIED, int plane_out = -1);
 
 
diff --git a/src/fmtc/version.h b/src/fmtc/version.h
index 12e40c1..3bc8b7a 100644
--- a/src/fmtc/version.h
+++ b/src/fmtc/version.h
@@ -1,5 +1,5 @@
 #pragma once
 
-#define fmtc_VERSION     "r22"
+#define fmtc_VERSION     "r23"
 #define fmtc_PLUGIN_NAME "fmtconv"
 #define fmtc_NAMESPACE   "fmtc"
diff --git a/src/fmtcl/Dither.cpp b/src/fmtcl/Dither.cpp
new file mode 100644
index 0000000..77a30f5
--- /dev/null
+++ b/src/fmtcl/Dither.cpp
@@ -0,0 +1,2605 @@
+/*****************************************************************************
+
+        Dither.cpp
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#if defined (_MSC_VER)
+	#pragma warning (1 : 4130 4223 4705 4706)
+	#pragma warning (4 : 4355 4786 4800)
+#endif
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fstb/def.h"
+
+#include "fmtcl/Dither.h"
+#include "fmtcl/fnc.h"
+#include "fmtcl/PicFmt.h"
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	#include "fmtcl/ProxyRwSse2.h"
+#endif
+#include "fmtcl/VoidAndCluster.h"
+#include "fstb/fnc.h"
+
+#include <algorithm>
+#include <stdexcept>
+
+#include <cassert>
+#include <cmath>
+
+
+
+namespace fmtcl
+{
+
+
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+constexpr int	Dither::_max_nbr_planes;
+constexpr int	Dither::_max_pat_width;
+
+
+
+Dither::Dither (
+	SplFmt src_fmt, int src_res, bool src_full_flag,
+	SplFmt dst_fmt, int dst_res, bool dst_full_flag,
+	ColorFamily color_fam, int nbr_planes, int w,
+	DMode dmode, int pat_size, double ampo, double ampn,
+	bool dyn_flag, bool static_noise_flag, bool correlated_planes_flag,
+	bool tpdfo_flag, bool tpdfn_flag,
+	bool sse2_flag, bool avx2_flag
+)
+:	_splfmt_src (src_fmt)
+,	_splfmt_dst (dst_fmt)
+,	_src_res (src_res)
+,	_dst_res (dst_res)
+,	_full_range_in_flag (src_full_flag)
+,	_full_range_out_flag (dst_full_flag)
+,	_color_fam (color_fam)
+,	_nbr_planes (nbr_planes)
+,	_sse2_flag (sse2_flag)
+,	_avx2_flag (avx2_flag)
+,	_dmode (dmode)
+,	_pat_size (pat_size)
+,	_ampo (ampo)
+,	_ampn (ampn)
+,	_dyn_flag (dyn_flag)
+,	_static_noise_flag (static_noise_flag)
+,	_correlated_planes_flag (correlated_planes_flag)
+,	_tpdfo_flag (tpdfo_flag)
+,	_tpdfn_flag (tpdfn_flag)
+{
+	assert (src_fmt >= 0);
+	assert (src_fmt < SplFmt::SplFmt_NBR_ELT);
+	assert (dst_fmt >= 0);
+	assert (dst_fmt < SplFmt::SplFmt_NBR_ELT);
+	assert (
+		   (SplFmt_is_int (src_fmt)   && (   (   src_res >=  8
+		                                      && src_res <= 12)
+		                                  ||     src_res == 14
+		                                  ||     src_res == 16))
+		|| (SplFmt_is_float (src_fmt) &&         src_res == 32 )
+	);
+	assert (
+		   (SplFmt_is_int (dst_fmt)   && (   (   dst_res >=  8
+		                                      && dst_res <= 10)
+		                                  ||     dst_res == 12
+		                                  ||     dst_res == 16))
+		|| (SplFmt_is_float (dst_fmt) &&         dst_res == 32 )
+	);
+	assert (color_fam >= 0);
+	assert (color_fam < ColorFamily_NBR_ELT);
+	assert (nbr_planes > 0);
+	assert (nbr_planes <= _max_nbr_planes);
+	assert (dmode >= 0);
+	assert (dmode < DMode_NBR_ELT);
+	assert (pat_size >= 4);
+	assert (_max_pat_width % pat_size == 0);
+	assert (ampo >= 0);
+	assert (ampn >= 0);
+
+	// No dithering required
+	if (   (   SplFmt_is_int (src_fmt)
+	        && (    SplFmt_is_float (dst_fmt)
+	            || (   _src_res <= _dst_res
+	                && ! _full_range_in_flag
+	                && ! _full_range_out_flag)))
+	    || (   SplFmt_is_float (src_fmt)
+	        && SplFmt_is_float (dst_fmt)))
+	{
+		_upconv_flag = true;
+	}
+
+	// Data scaling parameters
+	for (int plane_index = 0; plane_index < nbr_planes; ++plane_index)
+	{
+		SclInf &       scl_inf = _scale_info_arr [plane_index];
+		fmtcl::compute_fmt_mac_cst (
+			scl_inf._info._gain,
+			scl_inf._info._add_cst,
+			PicFmt { dst_fmt, dst_res, color_fam, _full_range_out_flag },
+			PicFmt { src_fmt, src_res, color_fam, _full_range_in_flag  },
+			plane_index
+		);
+
+		scl_inf._ptr = nullptr;
+		if (   _upconv_flag
+		    && SplFmt_is_int (src_fmt)
+		    && SplFmt_is_float (dst_fmt))
+		{
+			scl_inf._ptr = &scl_inf._info;
+		}
+	}
+
+	if (w <= 0)
+	{
+		w = _max_unk_width;
+	}
+	_buf_factory_uptr = std::make_unique <fmtcl::ErrDifBufFactory> (w);
+	_buf_pool.set_factory (*_buf_factory_uptr);
+
+	build_dither_pat ();
+
+	// Amplitude precalculations
+
+	// In case of TPDF, rescales the amplitude so the power is kept constant.
+	// Sum of two noises (uncorrelated signals) -> +3 dB
+	if (_tpdfo_flag)
+	{
+		ampo *= fstb::SQRT2 * 0.5;
+	}
+	if (_tpdfn_flag)
+	{
+		ampn *= fstb::SQRT2 * 0.5;
+	}
+
+	const int		amp_mul = 1 << _amp_bits;
+	const int      ampo_i_raw = fstb::round_int (ampo * amp_mul);
+	const int      ampn_i_raw = fstb::round_int (ampn * amp_mul);
+	_amp._o_i = std::min (ampo_i_raw, 127);
+	_amp._n_i = std::min (ampn_i_raw, 127);
+	_amp._n_f = float (ampn * (1.f / 256.f));
+
+	if (_errdif_flag)
+	{
+		_amp._e_i = fstb::limit (
+			fstb::round_int ((ampo - 1) * (128 << _amp_bits)),
+			0,
+			(2048 << _amp_bits) - 1
+		);
+		_amp._e_f = fstb::limit (float (ampo - 1), 0.f, 8.f);
+	}
+
+	_simple_flag = (ampo_i_raw == amp_mul && ampn_i_raw == 0);
+
+	// Processing function initialisation
+	if (_errdif_flag)
+	{
+		init_fnc_errdiff ();
+	}
+	else if (_dmode == DMode_QUASIRND)
+	{
+		init_fnc_quasirandom ();
+	}
+	else if (_dmode == DMode_FAST)
+	{
+		init_fnc_fast ();
+	}
+	else
+	{
+		init_fnc_ordered ();
+	}
+}
+
+
+
+void	Dither::process_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, int frame_index, int plane_index)
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+	assert (h > 0);
+	assert (frame_index >= 0);
+	assert (plane_index >= 0);
+	assert (plane_index < _max_nbr_planes);
+
+	if (_upconv_flag)
+	{
+		BitBltConv blitter (_sse2_flag, _avx2_flag);
+		blitter.bitblt (
+			_splfmt_dst, _dst_res, dst_ptr, nullptr, dst_stride,
+			_splfmt_src, _src_res, src_ptr, nullptr, src_stride,
+			w, h,
+			_scale_info_arr [plane_index]._ptr
+		);
+	}
+	else
+	{
+		dither_plane (
+			dst_ptr, dst_stride,
+			src_ptr, src_stride,
+			w, h,
+			_scale_info_arr [plane_index]._info,
+			frame_index, plane_index
+		);
+	}
+}
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+constexpr int	Dither::_pat_period;
+constexpr int	Dither::_amp_bits;
+constexpr int	Dither::_err_res;
+constexpr int	Dither::_max_unk_width;
+
+
+
+void	Dither::build_dither_pat ()
+{
+	_errdif_flag = false;
+
+	switch (_dmode)
+	{
+	case DMode_BAYER:
+		build_dither_pat_bayer ();
+		break;
+
+	case DMode_FILTERLITE:
+	case DMode_STUCKI:
+	case DMode_ATKINSON:
+	case DMode_FLOYD:
+	case DMode_OSTRO:
+		_errdif_flag = true;
+		_tpdfo_flag  = false;
+		break;
+
+	case DMode_ROUND:
+	case DMode_FAST:
+	default:
+		build_dither_pat_round ();
+		break;
+
+	case DMode_VOIDCLUST:
+		build_dither_pat_void_and_cluster (_pat_size);
+		break;
+
+	case DMode_QUASIRND:
+		// Nothing
+		break;
+	}
+}
+
+
+
+void	Dither::build_dither_pat_round ()
+{
+	PatData &      pat_data = _dither_pat_arr [0];
+	for (int y = 0; y < _max_pat_width; ++y)
+	{
+		for (int x = 0; x < _max_pat_width; ++x)
+		{
+			pat_data [y] [x] = 0;
+		}
+	}
+
+	build_next_dither_pat ();
+}
+
+
+
+void	Dither::build_dither_pat_bayer ()
+{
+	assert (fstb::is_pow_2 (int (_max_pat_width)));
+
+	PatData &      pat_data = _dither_pat_arr [0];
+	for (int y = 0; y < _max_pat_width; ++y)
+	{
+		for (int x = 0; x < _max_pat_width; ++x)
+		{
+			pat_data [y] [x] = -128;
+		}
+	}
+
+	for (int dith_size = 2; dith_size <= _max_pat_width; dith_size <<= 1)
+	{
+		for (int y = 0; y < _max_pat_width; y += 2)
+		{
+			for (int x = 0; x < _max_pat_width; x += 2)
+			{
+				const int      xx = (x >> 1) + (_max_pat_width >> 1);
+				const int      yy = (y >> 1) + (_max_pat_width >> 1);
+				const int      val = (pat_data [yy] [xx] + 128) >> 2;
+				pat_data [y    ] [x    ] = int16_t (val +   0-128);
+				pat_data [y    ] [x + 1] = int16_t (val + 128-128);
+				pat_data [y + 1] [x    ] = int16_t (val + 192-128);
+				pat_data [y + 1] [x + 1] = int16_t (val +  64-128);
+			}
+		}
+	}
+
+	build_next_dither_pat ();
+}
+
+
+
+void	Dither::build_dither_pat_void_and_cluster (int w)
+{
+	assert (_max_pat_width % w == 0);
+	VoidAndCluster   vc_gen;
+	MatrixWrap <uint16_t> pat_raw (w, w);
+	vc_gen.create_matrix (pat_raw);
+
+	PatData &      pat_data = _dither_pat_arr [0];
+	const int      area = w * w;
+	for (int y = 0; y < _max_pat_width; ++y)
+	{
+		for (int x = 0; x < _max_pat_width; ++x)
+		{
+			pat_data [y] [x] = int16_t (pat_raw (x, y) * 256 / area - 128);
+		}
+	}
+
+	build_next_dither_pat ();
+}
+
+
+
+void	Dither::build_next_dither_pat ()
+{
+	if (_tpdfo_flag)
+	{
+		for (int y = 0; y < _max_pat_width; ++y)
+		{
+			for (int x = 0; x < _max_pat_width; ++x)
+			{
+				const int      r = _dither_pat_arr [0] [y] [x];
+				const int      t = remap_tpdf_scalar (r);
+				_dither_pat_arr [0] [y] [x] = int16_t (t);
+			}
+		}
+	}
+
+	for (int seq = 1; seq < _pat_period; ++seq)
+	{
+		const int      angle = (_dyn_flag) ? seq & 3 : 0;
+		copy_dither_pat_rotate (
+			_dither_pat_arr [seq],
+			_dither_pat_arr [0],
+			angle
+		);
+	}
+}
+
+
+
+void	Dither::copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle) noexcept
+{
+	assert (angle >= 0);
+	assert (angle < 4);
+
+	static const int  sin_arr [4] = { 0, 1, 0, -1 };
+	const int      s = sin_arr [ angle         ];
+	const int      c = sin_arr [(angle + 1) & 3];
+
+	assert (fstb::is_pow_2 (int (_max_pat_width)));
+	const int		mask = _max_pat_width - 1;
+
+	for (int y = 0; y < _max_pat_width; ++y)
+	{
+		for (int x = 0; x < _max_pat_width; ++x)
+		{
+			const int		xs = (x * c - y * s) & mask;
+			const int		ys = (x * s + y * c) & mask;
+
+			dst [y] [x] = src [ys] [xs];
+		}
+	}
+}
+
+
+
+// All possible combinations
+#define fmtcl_Dither_SPAN_INT(SETP, NAMP, NAMF, simple_flag, tpdfo_flag, tpdfn_flag, dst_res, dst_fmt, src_res, src_fmt) \
+	switch (  ((simple_flag) << 7) \
+	        + ((tpdfo_flag) << 23) + ((tpdfn_flag) << 22) \
+	        + ((dst_res) << 24) + ((dst_fmt) << 16) \
+	        + ((src_res) <<  8) +  (src_fmt)) \
+	{ \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t,  9) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 10) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 10) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 16) \
+	}
+
+// All possible combinations using float as intermediary data
+#define fmtcl_Dither_SPAN_FLT(SETP, NAMP, NAMF, simple_flag, tpdfo_flag, tpdfn_flag, dst_res, dst_fmt, src_res, src_fmt) \
+	switch (  ((simple_flag) << 7) \
+	        + ((tpdfo_flag) << 23) + ((tpdfn_flag) << 22) \
+	        + ((dst_res) << 24) + ((dst_fmt) << 16) \
+	        + ((src_res) <<  8) +  (src_fmt)) \
+	{ \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT8 , uint8_t ,  8) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t,  9) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 10) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT8 , uint8_t ,  8, SplFmt_FLOAT, float   , 32) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT8 , uint8_t ,  8) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t,  9) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 10) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t,  9, SplFmt_FLOAT, float   , 32) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT8 , uint8_t ,  8) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t,  9) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 10) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 10, SplFmt_FLOAT, float   , 32) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT8 , uint8_t ,  8) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t,  9) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 10) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 12, SplFmt_FLOAT, float   , 32) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT8 , uint8_t ,  8) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t,  9) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 10) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 11) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 12) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 14) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_INT16, uint16_t, 16) \
+	SETP (NAMP, NAMF, SplFmt_INT16, uint16_t, 16, SplFmt_FLOAT, float   , 32) \
+	}
+
+
+
+#define fmtcl_Dither_SET_FNC_MULTI(FCASE, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (false, false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (false, false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (false, true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (false, true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (true , false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (true , false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (true , true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	FCASE (true , true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP)
+
+#define fmtcl_Dither_SET_FNC_INT_CASE(simple_flag, tpdfo_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	case   (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \
+	     + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
+		_process_seg_int_int_ptr = &process_seg_##NAMF##_int_int_cpp < \
+			simple_flag, tpdfo_flag, tpdfn_flag, DT, DP, ST, SP \
+		>; \
+		break;
+
+#define fmtcl_Dither_SET_FNC_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_INT_CASE, \
+		NAMP, NAMF, DF, DT, DP, SF, ST, SP)
+
+#define fmtcl_Dither_SET_FNC_FLT_CASE(simple_flag, tpdfo_flag, tpdfn_flag,NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	case   (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \
+	     + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
+		_process_seg_flt_int_ptr = &process_seg_##NAMF##_flt_int_cpp < \
+			simple_flag, tpdfo_flag, tpdfn_flag, DT, DP, ST \
+		>; \
+		break;
+
+#define fmtcl_Dither_SET_FNC_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_FLT_CASE, \
+		NAMP, NAMF, DF, DT, DP, SF, ST, SP)
+
+#define fmtcl_Dither_SET_FNC_INT_SSE2_CASE(simple_flag, tpdfo_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	case   (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \
+	     + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
+		_process_seg_int_int_ptr = &process_seg_##NAMF##_int_int_sse2 < \
+			simple_flag, tpdfo_flag, tpdfn_flag, DF, DP, SF, SP \
+		>; \
+		break;
+
+#define fmtcl_Dither_SET_FNC_INT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_INT_SSE2_CASE, \
+		NAMP, NAMF, DF, DT, DP, SF, ST, SP)
+
+#define fmtcl_Dither_SET_FNC_FLT_SSE2_CASE(simple_flag, tpdfo_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	case   (simple_flag << 7) + (tpdfn_flag << 22) + (tpdfo_flag << 23) \
+	     + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
+		_process_seg_flt_int_ptr = &process_seg_##NAMF##_flt_int_sse2 < \
+			simple_flag, tpdfo_flag, tpdfn_flag, DF, DP, SF \
+		>; \
+		break;
+
+#define fmtcl_Dither_SET_FNC_FLT_SSE2(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_MULTI (fmtcl_Dither_SET_FNC_FLT_SSE2_CASE, \
+		NAMP, NAMF, DF, DT, DP, SF, ST, SP)
+
+
+
+void	Dither::init_fnc_fast () noexcept
+{
+	fmtcl_Dither_SPAN_INT (
+		fmtcl_Dither_SET_FNC_INT, fast, fast, false, false, false,
+		_dst_res, _splfmt_dst, _src_res, _splfmt_src
+	)
+	fmtcl_Dither_SPAN_FLT (
+		fmtcl_Dither_SET_FNC_FLT, fast, fast, false, false, false,
+		_dst_res, _splfmt_dst, _src_res, _splfmt_src
+	)
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	if (_sse2_flag)
+	{
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_INT_SSE2, fast, fast, false, false, false,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_FLT_SSE2, fast, fast, false, false, false,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+	}
+#endif
+}
+
+
+
+void	Dither::init_fnc_ordered () noexcept
+{
+	assert (! _errdif_flag);
+
+	fmtcl_Dither_SPAN_INT (
+		fmtcl_Dither_SET_FNC_INT,
+		ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+		_dst_res, _splfmt_dst, _src_res, _splfmt_src
+	)
+	fmtcl_Dither_SPAN_FLT (
+		fmtcl_Dither_SET_FNC_FLT,
+		ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+		_dst_res, _splfmt_dst, _src_res, _splfmt_src
+	)
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	if (_sse2_flag)
+	{
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_INT_SSE2,
+			ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_FLT_SSE2,
+			ord, ord, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+	}
+#endif
+}
+
+
+
+void	Dither::init_fnc_quasirandom () noexcept
+{
+	assert (! _errdif_flag);
+
+	fmtcl_Dither_SPAN_INT (
+		fmtcl_Dither_SET_FNC_INT,
+		qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+		_dst_res, _splfmt_dst, _src_res, _splfmt_src
+	)
+	fmtcl_Dither_SPAN_FLT (
+		fmtcl_Dither_SET_FNC_FLT,
+		qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+		_dst_res, _splfmt_dst, _src_res, _splfmt_src
+	)
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	if (_sse2_flag)
+	{
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_INT_SSE2,
+			qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_FLT_SSE2,
+			qrs, qrs, _simple_flag, _tpdfo_flag, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+	}
+#endif
+}
+
+
+
+#undef fmtcl_Dither_SET_FNC_MULTI
+#undef fmtcl_Dither_SET_FNC_INT_CASE
+#undef fmtcl_Dither_SET_FNC_INT
+#undef fmtcl_Dither_SET_FNC_FLT_CASE
+#undef fmtcl_Dither_SET_FNC_FLT
+#undef fmtcl_Dither_SET_FNC_INT_SSE2_CASE
+#undef fmtcl_Dither_SET_FNC_INT_SSE2
+#undef fmtcl_Dither_SET_FNC_FLT_SSE2_CASE
+#undef fmtcl_Dither_SET_FNC_FLT_SSE2
+
+
+
+#define fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE(simple_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	case   (simple_flag << 7) + (tpdfn_flag << 22) \
+	     + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
+		_process_seg_int_int_ptr = &process_seg_errdif_int_int_cpp < \
+			simple_flag, tpdfn_flag, Diffuse##NAMF <DT, DP, ST, SP> \
+		>; \
+		break;
+
+#define fmtcl_Dither_SET_FNC_ERRDIF_INT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE (true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP)
+
+#define fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE(simple_flag, tpdfn_flag, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	case   (simple_flag << 7) + (tpdfn_flag << 22) \
+	     + (DP << 24) + (DF << 16) + (SP << 8) + SF: \
+		_process_seg_flt_int_ptr = &process_seg_errdif_flt_int_cpp < \
+			simple_flag, tpdfn_flag, Diffuse##NAMF <DT, DP, ST, SP> \
+		>; \
+		break;
+
+#define fmtcl_Dither_SET_FNC_ERRDIF_FLT(NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (false, false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (false, true , NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (true , false, NAMP, NAMF, DF, DT, DP, SF, ST, SP) \
+	fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE (true , true , NAMP, NAMF, DF, DT, DP, SF, ST, SP)
+
+
+
+void	Dither::init_fnc_errdiff () noexcept
+{
+	assert (_errdif_flag);
+
+	switch (_dmode)
+	{
+	case DMode_FILTERLITE:
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_ERRDIF_INT,
+			errdif, FilterLite, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_ERRDIF_FLT,
+			errdif, FilterLite, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		break;
+
+	case DMode_STUCKI:
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_ERRDIF_INT,
+			errdif, Stucki, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_ERRDIF_FLT,
+			errdif, Stucki, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		break;
+
+	case DMode_ATKINSON:
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_ERRDIF_INT,
+			errdif, Atkinson, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_ERRDIF_FLT,
+			errdif, Atkinson, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		break;
+
+	case DMode_FLOYD:
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_ERRDIF_INT,
+			errdif, FloydSteinberg, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_ERRDIF_FLT,
+			errdif, FloydSteinberg, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		break;
+
+	case DMode_OSTRO:
+		fmtcl_Dither_SPAN_INT (
+			fmtcl_Dither_SET_FNC_ERRDIF_INT,
+			errdif, Ostromoukhov, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		fmtcl_Dither_SPAN_FLT (
+			fmtcl_Dither_SET_FNC_ERRDIF_FLT,
+			errdif, Ostromoukhov, _simple_flag, false, _tpdfn_flag,
+			_dst_res, _splfmt_dst, _src_res, _splfmt_src
+		)
+		break;
+
+	default:
+		break;
+	}
+}
+
+
+
+#undef fmtcl_Dither_SET_FNC_ERRDIF_INT_CASE
+#undef fmtcl_Dither_SET_FNC_ERRDIF_INT
+#undef fmtcl_Dither_SET_FNC_ERRDIF_FLT_CASE
+#undef fmtcl_Dither_SET_FNC_ERRDIF_FLT
+
+
+
+#undef fmtcl_Dither_SPAN_INT
+#undef fmtcl_Dither_SPAN_FLT
+
+
+
+void	Dither::dither_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, const BitBltConv::ScaleInfo &scale_info, int frame_index, int plane_index)
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+	assert (h > 0);
+
+	SegContext     ctx;
+	ctx._scale_info_ptr = &scale_info;
+	ctx._amp            = _amp;
+
+	uint32_t       rnd_state = 0;
+	if (! _correlated_planes_flag)
+	{
+		rnd_state += plane_index << 16;
+	}
+	if (_static_noise_flag)
+	{
+		rnd_state += 55555;
+	}
+	else
+	{
+		rnd_state += frame_index;
+	}
+	ctx._rnd_state = rnd_state;
+
+	const bool     sc_flag =
+		(   _splfmt_src == SplFmt_FLOAT
+		 || ! fstb::is_eq (scale_info._gain * double ((uint64_t (1)) << (_src_res - _dst_res)), 1.0, 1e-6)
+		 || ! fstb::is_null (scale_info._add_cst, 1e-6));
+
+	void (* process_ptr) (uint8_t *dst_ptr, const uint8_t *src_ptr, int w, SegContext &ctx) =
+		  (sc_flag)
+		? _process_seg_flt_int_ptr
+		: _process_seg_int_int_ptr;
+	assert (process_ptr != nullptr);
+
+	ErrDifBuf *   ed_buf_ptr = nullptr;
+	if (_errdif_flag)
+	{
+		ed_buf_ptr = _buf_pool.take_obj ();
+		if (ed_buf_ptr == nullptr)
+		{
+			throw std::runtime_error (
+				"cannot allocate memory for temporary buffer."
+			);
+		}
+		ed_buf_ptr->clear ((sc_flag) ? sizeof (float) : sizeof (int16_t));
+	}
+
+	switch (_dmode)
+	{
+	case DMode_BAYER:
+	case DMode_ROUND:
+	case DMode_VOIDCLUST:
+		{
+			int            pat_index = 0;
+			if (! _correlated_planes_flag)
+			{
+				pat_index += plane_index;
+			}
+			if (_dyn_flag)
+			{
+				pat_index += frame_index;
+			}
+			pat_index &= _pat_period - 1;
+			const PatData& pattern   = _dither_pat_arr [pat_index];
+			ctx._pattern_ptr = &pattern;
+		}
+		break;
+
+	case DMode_FAST:
+		// Nothing
+		break;
+
+	case DMode_QUASIRND:
+		ctx._qrs_seed = 0;
+		if (_dyn_flag)
+		{
+			ctx._qrs_seed += uint32_t (frame_index * 73);
+		}
+		if (! _correlated_planes_flag)
+		{
+			ctx._qrs_seed += uint32_t (plane_index * 263);
+		}
+		break;
+
+	case DMode_FILTERLITE:
+	case DMode_STUCKI:
+	case DMode_ATKINSON:
+	case DMode_FLOYD:
+	case DMode_OSTRO:
+		ctx._ed_buf_ptr = ed_buf_ptr;
+		break;
+
+	default:
+		assert (false);
+		throw std::logic_error ("unexpected dithering algorithm");
+		break;
+	}
+
+	for (int y = 0; y < h; ++y)
+	{
+		ctx._y = y;
+
+		(*process_ptr) (dst_ptr, src_ptr, w, ctx);
+
+		src_ptr += src_stride;
+		dst_ptr += dst_stride;
+	}
+
+	if (ed_buf_ptr != nullptr)
+	{
+		_buf_pool.return_obj (*ed_buf_ptr);
+		ed_buf_ptr = nullptr;
+	}
+}
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+void	Dither::process_seg_fast_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	fstb::unused (ctx);
+
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+
+	constexpr int  dif_bits = SRC_BITS - DST_BITS;
+	static_assert (dif_bits >= 0, "This function cannot increase bidepth.");
+
+	const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
+	DST_TYPE * fstb_RESTRICT       dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
+
+	for (int pos = 0; pos < w; ++pos)
+	{
+		const int      s   = src_n_ptr [pos];
+		const int      pix = s >> dif_bits;
+		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
+	}
+}
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+void	Dither::process_seg_fast_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+	assert (ctx._scale_info_ptr != nullptr);
+
+	const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
+	DST_TYPE * fstb_RESTRICT       dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
+
+	const float    mul  = float (ctx._scale_info_ptr->_gain);
+	const float    add  = float (ctx._scale_info_ptr->_add_cst);
+	const int      vmax = (1 << DST_BITS) - 1;
+
+	for (int pos = 0; pos < w; ++pos)
+	{
+		float          s = float (src_n_ptr [pos]);
+		s = s * mul + add;
+		const int      quant = fstb::conv_int_fast (s);
+		const int      pix   = fstb::limit (quant, 0, vmax);
+		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
+	}
+}
+
+
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS>
+void	Dither::process_seg_fast_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	fstb::unused (ctx);
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+
+	constexpr int  dif_bits = SRC_BITS - DST_BITS;
+	static_assert (dif_bits >= 0, "This function cannot increase bidepth.");
+
+	typedef typename  ProxyRwSse2 <SRC_FMT>::PtrConst::Type SrcPtr;
+	typedef typename  ProxyRwSse2 <DST_FMT>::Ptr::Type      DstPtr;
+	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
+	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
+	const __m128i  zero      = _mm_setzero_si128 ();
+	const __m128i  mask_lsb  = _mm_set1_epi16 (0x00FF);
+
+	for (int pos = 0; pos < w; pos += 8)
+	{
+		const __m128i  s   =
+			ProxyRwSse2 <SRC_FMT>::read_i16 (src_n_ptr + pos, zero);
+		const __m128i  pix = _mm_srli_epi16 (s, dif_bits);
+		ProxyRwSse2 <DST_FMT>::write_i16 (dst_n_ptr + pos, pix, mask_lsb);
+	}
+}
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT>
+void	Dither::process_seg_fast_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+	assert (ctx._scale_info_ptr != nullptr);
+
+	typedef typename  ProxyRwSse2 <SRC_FMT>::PtrConst::Type  SrcPtr;
+	typedef typename  ProxyRwSse2 <DST_FMT>::Ptr::Type       DstPtr;
+	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
+	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
+
+	const __m128   mul      = _mm_set1_ps (float (ctx._scale_info_ptr->_gain));
+	const __m128   add      = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst));
+	const __m128   vmax     = _mm_set1_ps (float ((1 << DST_BITS) - 1));
+	const __m128   zero_f   = _mm_setzero_ps ();
+	const __m128i  zero_i   = _mm_setzero_si128 ();
+	const __m128i  mask_lsb = _mm_set1_epi16 (0x00FF);
+	const __m128i  sign_bit = _mm_set1_epi16 (-0x8000);
+	const __m128   offset   = _mm_set1_ps (-32768);
+
+	for (int pos = 0; pos < w; pos += 8)
+	{
+		__m128         s0;
+		__m128         s1;
+		ProxyRwSse2 <SRC_FMT>::read_flt (
+			src_n_ptr + pos, s0, s1, zero_i
+		);
+		s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add);
+		s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add);
+		s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f);
+		s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f);
+		ProxyRwSse2 <DST_FMT>::write_flt (
+			dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset
+		);
+	}
+}
+
+
+
+#endif
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+void	Dither::process_seg_ord_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	const PatRow & fstb_RESTRICT  pattern = ctx.extract_pattern_row ();
+
+	process_seg_common_int_int_cpp <
+		S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int pos)
+		{
+			return pattern [pos & (_max_pat_width - 1)];
+		}
+	);
+}
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+void	Dither::process_seg_ord_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	const PatRow & fstb_RESTRICT  pattern = ctx.extract_pattern_row ();
+
+	process_seg_common_flt_int_cpp <
+		S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int pos)
+		{
+			return pattern [pos & (_max_pat_width - 1)];
+		}
+	);
+}
+
+
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS>
+void	Dither::process_seg_ord_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	const PatRow & fstb_RESTRICT  pattern = ctx.extract_pattern_row ();
+
+	process_seg_common_int_int_sse2 <
+		S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT, SRC_BITS
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int pos)
+		{
+			return _mm_load_si128 (reinterpret_cast <const __m128i *> (
+				&pattern [pos & (_max_pat_width - 1)]
+			)); // 8 s16 [-128 ; +127]
+		}
+	);
+}
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT>
+void	Dither::process_seg_ord_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	const PatRow & fstb_RESTRICT  pattern = ctx.extract_pattern_row ();
+
+	process_seg_common_flt_int_sse2 <
+		S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int pos)
+		{
+			return _mm_load_si128 (reinterpret_cast <const __m128i *> (
+				&pattern [pos & (_max_pat_width - 1)]
+			)); // 8 s16 [-128 ; +127]
+		}
+	);
+}
+
+
+
+#endif   // fstb_ARCHI_X86
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+void	Dither::process_seg_qrs_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	// alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0
+	// Also:
+	// alpha1 =   (curt (2) * sq (curt (3)))
+	//          / (curt (9 - sqrt (69)) + curt (9 + sqrt (69)))
+	constexpr double  alpha1  = 1.0 / 1.3247179572447460259609088544781;
+	constexpr double  alpha2  = alpha1 * alpha1;
+	constexpr int     sc_l2   = 16; // 16 bits of fractional values
+	constexpr float   sc_mul  = float (1 << sc_l2);
+	constexpr int     qrs_shf = sc_l2 - 9;
+	constexpr int     qrs_inc = int (alpha1 * sc_mul + 0.5f);
+	uint32_t          qrs_cnt = uint32_t (std::llrint (
+		(alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul
+	));
+
+	process_seg_common_int_int_cpp <
+		S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int /*pos*/)
+		{
+			const int      p      = (qrs_cnt >> qrs_shf) & 0x1FF;
+			int            dith_o = (p > 255) ? 512 - 128 - p : p - 128; // s8
+			qrs_cnt += qrs_inc;
+
+			if (TO_FLAG)
+			{
+				dith_o = remap_tpdf_scalar (dith_o);
+			}
+
+			return dith_o;
+		}
+	);
+}
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+void	Dither::process_seg_qrs_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	// alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0
+	// Also:
+	// alpha1 =   (curt (2) * sq (curt (3)))
+	//          / (curt (9 - sqrt (69)) + curt (9 + sqrt (69)))
+	constexpr double  alpha1  = 1.0 / 1.3247179572447460259609088544781;
+	constexpr double  alpha2  = alpha1 * alpha1;
+	constexpr int     sc_l2   = 16; // 16 bits of fractional values
+	constexpr float   sc_mul  = float (1 << sc_l2);
+	constexpr int     qrs_shf = sc_l2 - 9;
+	constexpr int     qrs_inc = int (alpha1 * sc_mul + 0.5f);
+	uint32_t          qrs_cnt = uint32_t (std::llrint (
+		(alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul
+	));
+
+	process_seg_common_flt_int_cpp <
+		S_FLAG, TN_FLAG, DST_TYPE, DST_BITS, SRC_TYPE
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int /*pos*/)
+		{
+			const int      p      = (qrs_cnt >> qrs_shf) & 0x1FF;
+			int            dith_o = (p > 255) ? 512 - 128 - p : p - 128; // s8
+			qrs_cnt += qrs_inc;
+
+			if (TO_FLAG)
+			{
+				dith_o = remap_tpdf_scalar (dith_o);
+			}
+
+			return dith_o;
+		}
+	);
+}
+
+
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS>
+void	Dither::process_seg_qrs_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	// alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0
+	// Also:
+	// alpha1 =   (curt (2) * sq (curt (3)))
+	//          / (curt (9 - sqrt (69)) + curt (9 + sqrt (69)))
+	constexpr double  alpha1  = 1.0 / 1.3247179572447460259609088544781;
+	constexpr double  alpha2  = alpha1 * alpha1;
+	constexpr int     sc_l2   = 16; // 16 bits of fractional values
+	constexpr float   sc_mul  = float (1 << sc_l2);
+	constexpr int     qrs_shf = sc_l2 - 9;
+	constexpr int     qrs_inc = int (alpha1 * sc_mul + 0.5f);
+	uint32_t          qrs_cnt = uint32_t (std::llrint (
+		(alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul
+	));
+
+	const __m128i     qrs_inc_4 = _mm_set1_epi32 (4 * qrs_inc);
+	__m128i           qrs_cnt_4 = _mm_set1_epi32 (qrs_cnt);
+	const __m128i     qrs_ofs   = _mm_set_epi32 (qrs_inc * 3, qrs_inc * 2, qrs_inc, 0);
+	qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_ofs);
+	const __m128i     qrs_msk   = _mm_set1_epi32 (0x1FF);
+	const __m128i     c128      = _mm_set1_epi16 (128);
+	const __m128i     c256      = _mm_set1_epi16 (256);
+	const __m128i     c384      = _mm_set1_epi16 (384);
+
+	process_seg_common_int_int_sse2 <
+		S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT, SRC_BITS
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int /*pos*/)
+		{
+			auto           p03    = _mm_srli_epi32 (qrs_cnt_4, qrs_shf);
+			p03 = _mm_and_si128 (p03, qrs_msk);
+			qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4);
+			auto           p47    = _mm_srli_epi32 (qrs_cnt_4, qrs_shf);
+			p47 = _mm_and_si128 (p47, qrs_msk);
+			qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4);
+			const auto     p      = _mm_packs_epi32 (p03, p47);
+			const auto     tri_a  = _mm_sub_epi16 (p, c128);
+			const auto     tri_d  = _mm_sub_epi16 (c384, p);
+			const auto     cond   = _mm_cmplt_epi16 (p, c256);
+			auto           dith_o = _mm_or_si128 (
+				_mm_and_si128 (cond, tri_a),
+				_mm_andnot_si128 (cond, tri_d)
+			);
+
+			if (TO_FLAG)
+			{
+				dith_o = remap_tpdf_vec (dith_o);
+			}
+
+			return dith_o; // 8 s16 [-128 ; +127] or [-256 ; +255]
+		}
+	);
+}
+
+
+
+template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT>
+void	Dither::process_seg_qrs_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	// alpha1 = 1 / x, with x real solution of: x^3 - x - 1 = 0
+	// Also:
+	// alpha1 =   (curt (2) * sq (curt (3)))
+	//          / (curt (9 - sqrt (69)) + curt (9 + sqrt (69)))
+	constexpr double  alpha1  = 1.0 / 1.3247179572447460259609088544781;
+	constexpr double  alpha2  = alpha1 * alpha1;
+	constexpr int     sc_l2   = 16; // 16 bits of fractional values
+	constexpr float   sc_mul  = float (1 << sc_l2);
+	constexpr int     qrs_shf = sc_l2 - 9;
+	constexpr int     qrs_inc = int (alpha1 * sc_mul + 0.5f);
+	uint32_t          qrs_cnt = uint32_t (std::llrint (
+		(alpha2 * double (ctx._y + ctx._qrs_seed)) * sc_mul
+	));
+
+	const __m128i     qrs_inc_4 = _mm_set1_epi32 (4 * qrs_inc);
+	__m128i           qrs_cnt_4 = _mm_set1_epi32 (qrs_cnt);
+	const __m128i     qrs_ofs   = _mm_set_epi32 (qrs_inc * 3, qrs_inc * 2, qrs_inc, 0);
+	qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_ofs);
+	const __m128i     qrs_msk   = _mm_set1_epi32 (0x1FF);
+	const __m128i     c128      = _mm_set1_epi16 (128);
+	const __m128i     c256      = _mm_set1_epi16 (256);
+	const __m128i     c384      = _mm_set1_epi16 (384);
+
+	process_seg_common_flt_int_sse2 <
+		S_FLAG, TN_FLAG, DST_FMT, DST_BITS, SRC_FMT
+	> (dst_ptr, src_ptr, w, ctx,
+		[&] (int /*pos*/)
+		{
+			auto           p03    = _mm_srli_epi32 (qrs_cnt_4, qrs_shf);
+			p03 = _mm_and_si128 (p03, qrs_msk);
+			qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4);
+			auto           p47    = _mm_srli_epi32 (qrs_cnt_4, qrs_shf);
+			p47 = _mm_and_si128 (p47, qrs_msk);
+			qrs_cnt_4 = _mm_add_epi32 (qrs_cnt_4, qrs_inc_4);
+			const auto     p      = _mm_packs_epi32 (p03, p47);
+			const auto     tri_a  = _mm_sub_epi16 (p, c128);
+			const auto     tri_d  = _mm_sub_epi16 (c384, p);
+			const auto     cond   = _mm_cmplt_epi16 (p, c256);
+			auto           dith_o = _mm_or_si128 (
+				_mm_and_si128 (cond, tri_a),
+				_mm_andnot_si128 (cond, tri_d)
+			);
+
+			if (TO_FLAG)
+			{
+				dith_o = remap_tpdf_vec (dith_o);
+			}
+
+			return dith_o; // 8 s16 [-128 ; +127]
+		}
+	);
+}
+
+
+
+#endif   // fstb_ARCHI_X86
+
+
+
+template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS, typename DFNC>
+void	Dither::process_seg_common_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+
+	constexpr int  dif_bits = SRC_BITS - DST_BITS;
+	static_assert (dif_bits >= 1, "This function must reduce bidepth.");
+
+	uint32_t &     rnd_state = ctx._rnd_state;
+
+	const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
+	DST_TYPE * fstb_RESTRICT       dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
+
+	const int      rcst = 1 << (dif_bits - 1);
+	const int      vmax = (1 << DST_BITS) - 1;
+
+	const int      ao   = ctx._amp._o_i; // s8
+	const int      an   = ctx._amp._n_i; // s8
+
+	for (int pos = 0; pos < w; ++pos)
+	{
+		const int      s = src_n_ptr [pos];
+
+		const int      dith_o = dither_fnc (pos); // s8
+		int            dither;
+		if (S_FLAG)
+		{
+			constexpr int  dit_shft = 8 - dif_bits;
+			dither = fstb::sshift_r <int, dit_shft> (dith_o);
+		}
+		else
+		{
+			const int      dith_n = generate_dith_n_scalar <TN_FLAG> (rnd_state); // s8
+
+			constexpr int  dit_shft = _amp_bits + 8 - dif_bits;
+			dither = fstb::sshift_r <int, dit_shft> (dith_o * ao + dith_n * an);	// s16 = s8 * s8 // s16 = s16 >> cst
+		}
+		const int      sum   = s + dither;	// s16+
+		const int      quant = (sum + rcst) >> dif_bits;	// s16
+
+		const int      pix   = fstb::limit (quant, 0, vmax);
+		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
+	}
+
+	if (! S_FLAG)
+	{
+		generate_rnd_eol (rnd_state);
+	}
+}
+
+
+
+// int dither_fnc (int pos) noexcept;
+// Must provide the ordered dither value, in [-128 ; +127] nominal range
+// (doubled for TPDF)
+template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, typename DFNC>
+void	Dither::process_seg_common_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+
+	const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
+	DST_TYPE * fstb_RESTRICT       dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
+
+	uint32_t &     rnd_state = ctx._rnd_state;
+
+	const int      ao   = ctx._amp._o_i; // s8
+	const int      an   = ctx._amp._n_i; // s8
+
+	const float    mul  = float (ctx._scale_info_ptr->_gain);
+	const float    add  = float (ctx._scale_info_ptr->_add_cst);
+	const float    qt   = 1.0f / (1 << ((S_FLAG ? 0 : _amp_bits) + 8));
+	const int      vmax = (1 << DST_BITS) - 1;
+
+	for (int pos = 0; pos < w; ++pos)
+	{
+		float          s = float (src_n_ptr [pos]);
+		s = s * mul + add;
+
+		const int      dith_o = dither_fnc (pos); // s8
+
+		float          dither;
+		if (S_FLAG)
+		{
+			dither = float (dith_o) * qt;
+		}
+		else
+		{
+			const int      dith_n = generate_dith_n_scalar <TN_FLAG> (rnd_state); // s8
+			dither = float (dith_o * ao + dith_n * an) * qt;
+		}
+		const float    sum    = s + dither;
+		const int      quant  = fstb::round_int (sum);
+
+		const int      pix = fstb::limit (quant, 0, vmax);
+		dst_n_ptr [pos] = static_cast <DST_TYPE> (pix);
+	}
+
+	if (! S_FLAG)
+	{
+		generate_rnd_eol (rnd_state);
+	}
+}
+
+
+
+template <bool T_FLAG>
+int	Dither::generate_dith_n_scalar (uint32_t &rnd_state) noexcept
+{
+	generate_rnd (rnd_state);
+	int            dith_n = int8_t (rnd_state >> 24);
+	if (T_FLAG)
+	{
+		generate_rnd (rnd_state);
+		dith_n += int8_t (rnd_state >> 24);
+	}
+
+	return dith_n;
+}
+
+
+
+int	Dither::remap_tpdf_scalar (int d) noexcept
+{
+	// [-128 ; 127] to [-32767 ; +32767], representing [-1 ; 1] (15-bit scale)
+	auto           x2   = d * d;
+	x2 += x2;
+	x2 = std::min (x2, 0x7FFFF); // Saturated here because of the -min * -min overflow
+	auto           x4   = (x2  * x2 ) >> 15;
+	auto           x8   = (x4  * x4 ) >> 15;
+	auto           x16  = (x8  * x8 ) >> 15;
+	auto           x32  = (x16 * x16) >> 15;
+
+	// 15-bit scale
+	constexpr int  c3  = 0x8000 * 5 / 8;
+	constexpr int  c33 = 0x8000 * 3 / 8;
+
+	// 15-bit scale
+	auto           sum_s15 = (x2 * c3 + x32 * c33) >> 15;
+	const auto     x_s15   = d << 8;
+	const auto     sum_s7  = (sum_s15 * x_s15) >> (30 - 7);
+
+	d += sum_s7;
+
+	return d;
+}
+
+
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+
+
+
+// __m128i dither_fnc (int pos) noexcept;
+// Must provide the ordered dither values as a vector of 8 x int16_t,
+// in [-128 ; +127] nominal range (doubled for TPDF)
+template <bool S_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS, typename DFNC>
+void	Dither::process_seg_common_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+
+	constexpr int  dif_bits = SRC_BITS - DST_BITS;
+	static_assert (dif_bits >= 0, "This function cannot increase bidepth.");
+
+	uint32_t &     rnd_state = ctx._rnd_state;
+
+	typedef typename  ProxyRwSse2 <SRC_FMT>::PtrConst::Type SrcPtr;
+	typedef typename  ProxyRwSse2 <DST_FMT>::Ptr::Type      DstPtr;
+	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
+	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
+	const __m128i  zero      = _mm_setzero_si128 ();
+	const __m128i  mask_lsb  = _mm_set1_epi16 (0x00FF);
+	const __m128i  sign_bit  = _mm_set1_epi16 (-0x8000);
+	const __m128i  rcst      = _mm_set1_epi16 (1 << (dif_bits - 1));
+	const __m128i  vmax      = _mm_set1_epi16 ((1 << DST_BITS) - 1);
+
+	const __m128i  ampo_i    = _mm_set1_epi16 (int16_t (ctx._amp._o_i)); // 8 ?16 [0 ; 255]
+	const __m128i  ampn_i    = _mm_set1_epi16 (int16_t (ctx._amp._n_i)); // 8 ?16 [0 ; 255]
+
+	for (int pos = 0; pos < w; pos += 8)
+	{
+		const __m128i  s =	// 8 u16
+			ProxyRwSse2 <SRC_FMT>::read_i16 (src_n_ptr + pos, zero);
+
+		// 8 s16 [-128 ; +127] or [-256 ; 255]
+		__m128i        dith_o = dither_fnc (pos);
+
+		__m128i        dither;
+		if (S_FLAG)
+		{
+			constexpr int  dit_shft = 8 - dif_bits;
+			dither = _mm_srai_epi16 (dith_o, dit_shft);
+		}
+		else
+		{
+			// Random generation. 8 s16 [-128 ; 127] or [-256 ; 255]
+			__m128i			dith_n = generate_dith_n_vec <TN_FLAG> (rnd_state);
+
+			dith_o = _mm_mullo_epi16 (dith_o, ampo_i);      // 8 s16 (full range)
+			dith_n = _mm_mullo_epi16 (dith_n, ampn_i);      // 8 s16 (full range)
+			dither = _mm_adds_epi16 (dith_o, dith_n);       // 8 s16 = s8 * s8
+
+			constexpr int  dit_shft = _amp_bits + 8 - dif_bits;
+			dither = _mm_srai_epi16 (dither, dit_shft);     // 8 s16 = s16 >> cst
+		}
+
+		const __m128i  dith_rcst = _mm_adds_epi16 (dither, rcst);
+
+		__m128i        quant;
+		if (S_FLAG && SRC_BITS < 16)
+		{
+			__m128i        sum = _mm_adds_epi16 (s, dith_rcst);
+			quant = _mm_srai_epi16 (sum, dif_bits);
+		}
+		else
+		{
+			__m128i        sum  = _mm_xor_si128 (s, sign_bit); // 8 s16
+			sum   = _mm_adds_epi16 (sum, dith_rcst);
+			sum   = _mm_xor_si128 (sum, sign_bit);          // 8 u16
+			quant = _mm_srli_epi16 (sum, dif_bits);
+		}
+
+		__m128i        pix = quant;
+		if (SRC_BITS < 16)
+		{
+			pix = _mm_max_epi16 (pix, zero);
+			pix = _mm_min_epi16 (pix, vmax);
+		}
+
+		ProxyRwSse2 <DST_FMT>::write_i16 (dst_n_ptr + pos, pix, mask_lsb);
+	}
+
+	if (! S_FLAG)
+	{
+		generate_rnd_eol (rnd_state);
+	}
+}
+
+
+
+template <bool S_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, typename DFNC>
+void	Dither::process_seg_common_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+	assert (((_mm_getcsr () >> 13) & 3) == 0);   // 00 = Round to nearest (even)
+
+	uint32_t &     rnd_state = ctx._rnd_state;
+
+	const float    qt_cst    = 1.0f / (
+		65536.0f * float (1 << ((S_FLAG ? 0 : _amp_bits) + 8))
+	);
+
+	typedef typename  ProxyRwSse2 <SRC_FMT>::PtrConst::Type SrcPtr;
+	typedef typename  ProxyRwSse2 <DST_FMT>::Ptr::Type      DstPtr;
+	SrcPtr         src_n_ptr = reinterpret_cast <SrcPtr> (src_ptr);
+	DstPtr         dst_n_ptr = reinterpret_cast <DstPtr> (dst_ptr);
+	const __m128   zero_f    = _mm_setzero_ps ();
+	const __m128i  zero_i    = _mm_setzero_si128 ();
+	const __m128   mul       = _mm_set1_ps (float (ctx._scale_info_ptr->_gain));
+	const __m128   add       = _mm_set1_ps (float (ctx._scale_info_ptr->_add_cst));
+	const __m128   qt        = _mm_set1_ps (qt_cst);
+	const __m128   vmax      = _mm_set1_ps ((1 << DST_BITS) - 1);
+	const __m128   offset    = _mm_set1_ps (-32768);
+	const __m128i  mask_lsb  = _mm_set1_epi16 (0x00FF);
+	const __m128i  sign_bit  = _mm_set1_epi16 (-0x8000);
+
+	const __m128i  ampo_i    = _mm_set1_epi16 (int16_t (ctx._amp._o_i)); // 8 ?16 [0 ; 255]
+	const __m128i  ampn_i    = _mm_set1_epi16 (int16_t (ctx._amp._n_i)); // 8 ?16 [0 ; 255]
+
+	for (int pos = 0; pos < w; pos += 8)
+	{
+		__m128         s0;
+		__m128         s1;
+		ProxyRwSse2 <SRC_FMT>::read_flt (
+			src_n_ptr + pos, s0, s1, zero_i
+		);
+		s0 = _mm_add_ps (_mm_mul_ps (s0, mul), add);
+		s1 = _mm_add_ps (_mm_mul_ps (s1, mul), add);
+
+		// 8 s16 [-128 ; +127] or [-256 ; 255]
+		__m128i        dith_o = dither_fnc (pos);
+
+		__m128i        dither;
+		if (S_FLAG)
+		{
+			dither = dith_o;
+		}
+		else
+		{
+			// Random generation. 8 s16 [-128 ; 127] or [-256 ; 255]
+			__m128i			dith_n = generate_dith_n_vec <TN_FLAG> (rnd_state);
+
+			dith_o = _mm_mullo_epi16 (dith_o, ampo_i);      // 8 s16 (full range)
+			dith_n = _mm_mullo_epi16 (dith_n, ampn_i);      // 8 s16 (full range)
+			dither = _mm_adds_epi16 (dith_o, dith_n);       // 8 s16 = s8 * s8
+		}
+
+		__m128i        dither_03i = _mm_unpacklo_epi16 (zero_i, dither);  // 4 s32 << 16
+		__m128i        dither_47i = _mm_unpackhi_epi16 (zero_i, dither);  // 4 s32 << 16
+		__m128         dither_03  = _mm_cvtepi32_ps (dither_03i);
+		__m128         dither_47  = _mm_cvtepi32_ps (dither_47i);
+		dither_03 = _mm_mul_ps (dither_03, qt);
+		dither_47 = _mm_mul_ps (dither_47, qt);
+
+		s0 = _mm_add_ps (s0, dither_03);
+		s1 = _mm_add_ps (s1, dither_47);
+
+		s0 = _mm_max_ps (_mm_min_ps (s0, vmax), zero_f);
+		s1 = _mm_max_ps (_mm_min_ps (s1, vmax), zero_f);
+
+		ProxyRwSse2 <DST_FMT>::write_flt (
+			dst_n_ptr + pos, s0, s1, mask_lsb, sign_bit, offset
+		);
+	}
+
+	if (! S_FLAG)
+	{
+		generate_rnd_eol (rnd_state);
+	}
+}
+
+
+
+template <bool T_FLAG>
+__m128i	Dither::generate_dith_n_vec (uint32_t &rnd_state) noexcept
+{
+	generate_rnd (rnd_state);
+	const uint32_t rnd_03  = rnd_state;
+	generate_rnd (rnd_state);
+	const uint32_t rnd_47  = rnd_state;
+	const auto        zero = _mm_setzero_si128 ();
+
+	if (T_FLAG)
+	{
+		generate_rnd (rnd_state);
+		const uint32_t rnd_03x = rnd_state;
+		generate_rnd (rnd_state);
+		const uint32_t rnd_47x = rnd_state;
+		const auto     rnd_val = _mm_set_epi32 (rnd_47x, rnd_03x, rnd_47, rnd_03);
+		const auto     c256_16 = _mm_set1_epi16 (0x100);
+		const auto     x0      = _mm_unpacklo_epi8 (rnd_val, zero);
+		const auto     x1      = _mm_unpackhi_epi8 (rnd_val, zero);
+		const auto     dith_n  = _mm_sub_epi16 (_mm_add_epi16 (x0, x1), c256_16);
+		return dith_n; // 8 s16 [-256 ; 255]
+	}
+
+	else
+	{
+		const auto     rnd_val = _mm_set_epi32 (0, 0, rnd_47, rnd_03);
+		const auto     c128_16 = _mm_set1_epi16 (0x80);
+		const auto     x0      = _mm_unpacklo_epi8 (rnd_val, zero); // 8 ?16 [0 ; 255]
+		const auto     dith_n  = _mm_sub_epi16 (x0, c128_16);       
+
+		return dith_n; // 8 s16 [-128 ; 127]
+	}
+}
+
+
+
+// d: 8 s16 [-128 ; 127]
+// Returns: 8 s16 [-256 ; 255]
+// Formula:
+// f: [-1 ; +1] -> [-2 ; +2]
+//            x -> x + 5/8 * x^3 + 3/8 * x^33
+// as an approximation of:
+//            x -> 2 * sign (x) * (1 - sqrt (1 - abs (x)))
+__m128i	Dither::remap_tpdf_vec (__m128i d) noexcept
+{
+	// [-128 ; 127] to [-32767 ; +32767], representing [-1 ; 1] (15-bit scale)
+	auto           x2   = _mm_mullo_epi16 (d  , d  );
+	x2  = _mm_adds_epi16 (x2 , x2 ); // Saturated here because of the -min * -min overflow
+	auto           x4   = _mm_mulhi_epi16 (x2 , x2 );
+	x4  = _mm_add_epi16 (x4 , x4 );
+	auto           x8   = _mm_mulhi_epi16 (x4 , x4 );
+	x8  = _mm_add_epi16 (x8 , x8 );
+	auto           x16  = _mm_mulhi_epi16 (x8 , x8 );
+	x16 = _mm_add_epi16 (x16, x16);
+	auto           x32  = _mm_mulhi_epi16 (x16, x16);
+	x32 = _mm_add_epi16 (x32, x32);
+
+	// 15-bit scale
+	const auto     c3  = _mm_set1_epi16 (0x8000 * 5 / 8);
+	const auto     c33 = _mm_set1_epi16 (0x8000 * 3 / 8);
+
+	// 14-bit scale, losing a bit of precision at each mul
+	auto           sum_s14 = _mm_mulhi_epi16 (x2, c3);
+	sum_s14 = _mm_add_epi16 (sum_s14, _mm_mulhi_epi16 (x32, c33));
+
+	const auto     x_s15 = _mm_slli_epi16 (d, 8);
+	const auto     sum_s13 = _mm_mulhi_epi16 (sum_s14, x_s15);
+
+	const auto     sum_s7  = _mm_srai_epi16 (sum_s13, 13 - 7);
+
+	d = _mm_add_epi16 (d, sum_s7);
+
+	return d;
+}
+
+
+
+#endif
+
+
+
+template <class DT, int DB, class ST, int SB, int EL>
+constexpr int	Dither::ErrDifAddParam <DT, DB, ST, SB, EL>::_dst_bits;
+template <class DT, int DB, class ST, int SB, int EL>
+constexpr int	Dither::ErrDifAddParam <DT, DB, ST, SB, EL>::_src_bits;
+template <class DT, int DB, class ST, int SB, int EL>
+constexpr int	Dither::ErrDifAddParam <DT, DB, ST, SB, EL>::_nbr_err_lines;
+
+
+
+template <bool S_FLAG, bool T_FLAG, class ERRDIF>
+void	Dither::process_seg_errdif_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+	assert (ctx._y >= 0);
+
+	typedef typename ERRDIF::SrcType SRC_TYPE;
+	typedef typename ERRDIF::DstType DST_TYPE;
+	constexpr int  src_bits = ERRDIF::_src_bits;
+	constexpr int  dst_bits = ERRDIF::_dst_bits;
+
+	uint32_t &                rnd_state =  ctx._rnd_state;
+	ErrDifBuf & fstb_RESTRICT ed_buf    = *ctx._ed_buf_ptr;
+
+	const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
+	DST_TYPE * fstb_RESTRICT       dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
+
+	const int      ae = ctx._amp._e_i;
+
+	// Makes e1 point on the default buffer line for single-line
+	// error diffusor because we use it in prepare_next_line()
+	int            e0 = 0;
+	int            e1 = 0;
+	if (ERRDIF::_nbr_err_lines == 2)
+	{
+		e0 =      ctx._y & 1 ;
+		e1 = 1 - (ctx._y & 1);
+	}
+	int16_t *      err0_ptr = ed_buf.get_buf <int16_t> (e0);
+	int16_t *      err1_ptr = ed_buf.get_buf <int16_t> (e1);
+
+	int            err_nxt0 = ed_buf.use_mem <int16_t> (0);
+	int            err_nxt1 = ed_buf.use_mem <int16_t> (1);
+
+	// Forward
+	if ((ctx._y & 1) == 0)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			int            err = err_nxt0;
+			SRC_TYPE       src_raw;
+
+			quantize_pix_int <
+				S_FLAG, T_FLAG, DST_TYPE, dst_bits, SRC_TYPE, src_bits
+			> (
+				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, ctx._amp._n_i
+			);
+			ERRDIF::template diffuse <1> (
+				err, err_nxt0, err_nxt1,
+				err0_ptr + x, err1_ptr + x, src_raw
+			);
+		}
+		ERRDIF::prepare_next_line (err1_ptr + w);
+	}
+
+	// Backward
+	else
+	{
+		for (int x = w - 1; x >= 0; --x)
+		{
+			int            err = err_nxt0;
+			SRC_TYPE       src_raw;
+
+			quantize_pix_int <
+				S_FLAG, T_FLAG, DST_TYPE, dst_bits, SRC_TYPE, src_bits
+			> (
+				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, ctx._amp._n_i
+			);
+			ERRDIF::template diffuse <-1> (
+				err, err_nxt0, err_nxt1,
+				err0_ptr + x, err1_ptr + x, src_raw
+			);
+		}
+		ERRDIF::prepare_next_line (err1_ptr - 1);
+	}
+
+	ed_buf.use_mem <int16_t> (0) = int16_t (err_nxt0);
+	ed_buf.use_mem <int16_t> (1) = int16_t (err_nxt1);
+
+	if (! S_FLAG)
+	{
+		generate_rnd_eol (rnd_state);
+	}
+}
+
+
+
+template <bool S_FLAG, bool T_FLAG, class ERRDIF>
+void	Dither::process_seg_errdif_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (w > 0);
+	assert (ctx._y >= 0);
+
+	typedef typename ERRDIF::SrcType SRC_TYPE;
+	typedef typename ERRDIF::DstType DST_TYPE;
+	constexpr int  dst_bits = ERRDIF::_dst_bits;
+
+	uint32_t &                rnd_state =  ctx._rnd_state;
+	ErrDifBuf & fstb_RESTRICT ed_buf    = *ctx._ed_buf_ptr;
+
+	const SRC_TYPE * fstb_RESTRICT src_n_ptr = reinterpret_cast <const SRC_TYPE *> (src_ptr);
+	DST_TYPE * fstb_RESTRICT       dst_n_ptr = reinterpret_cast <      DST_TYPE *> (dst_ptr);
+
+	const float    mul = float (ctx._scale_info_ptr->_gain);
+	const float    add = float (ctx._scale_info_ptr->_add_cst);
+	const float    ae  = float (ctx._amp._e_f);
+	const float    an  = float (ctx._amp._n_f);
+
+	// Makes e1 point on the default buffer line for single-line
+	// error diffusor because we use it in prepare_next_line()
+	int            e0 = 0;
+	int            e1 = 0;
+	if (ERRDIF::_nbr_err_lines == 2)
+	{
+		e0 =      ctx._y & 1 ;
+		e1 = 1 - (ctx._y & 1);
+	}
+	float *        err0_ptr = ed_buf.get_buf <float> (e0);
+	float *        err1_ptr = ed_buf.get_buf <float> (e1);
+
+	float          err_nxt0 = ed_buf.use_mem <float> (0);
+	float          err_nxt1 = ed_buf.use_mem <float> (1);
+
+	// Forward
+	if ((ctx._y & 1) == 0)
+	{
+		for (int x = 0; x < w; ++x)
+		{
+			float          err = err_nxt0;
+			SRC_TYPE       src_raw;
+
+			quantize_pix_flt <S_FLAG, T_FLAG, DST_TYPE, dst_bits, SRC_TYPE> (
+				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add
+			);
+			ERRDIF::template diffuse <1> (
+				err, err_nxt0, err_nxt1,
+				err0_ptr + x, err1_ptr + x, src_raw
+			);
+		}
+		ERRDIF::prepare_next_line (err1_ptr + w);
+	}
+
+	// Backward
+	else
+	{
+		for (int x = w - 1; x >= 0; --x)
+		{
+			float          err = err_nxt0;
+			SRC_TYPE       src_raw;
+
+			quantize_pix_flt <S_FLAG, T_FLAG, DST_TYPE, dst_bits, SRC_TYPE> (
+				dst_n_ptr, src_n_ptr, src_raw, x, err, rnd_state, ae, an, mul, add
+			);
+			ERRDIF::template diffuse <-1> (
+				err, err_nxt0, err_nxt1,
+				err0_ptr + x, err1_ptr + x, src_raw
+			);
+		}
+		ERRDIF::prepare_next_line (err1_ptr - 1);
+	}
+
+	ed_buf.use_mem <float> (0) = err_nxt0;
+	ed_buf.use_mem <float> (1) = err_nxt1;
+
+	if (! S_FLAG)
+	{
+		generate_rnd_eol (rnd_state);
+	}
+}
+
+
+
+void	Dither::generate_rnd (uint32_t &state) noexcept
+{
+	state = state * uint32_t (1664525) + 1013904223;
+}
+
+
+
+void	Dither::generate_rnd_eol (uint32_t &state) noexcept
+{
+	state = state * uint32_t (1103515245) + 12345;
+	if ((state & 0x2000000) != 0)
+	{
+		state = state * uint32_t (134775813) + 1;
+	}
+}
+
+
+
+const Dither::PatRow &	Dither::SegContext::extract_pattern_row () const noexcept
+{
+	assert (_pattern_ptr != nullptr);
+	assert (_y >= 0);
+
+	return ((*_pattern_ptr) [_y & (_max_pat_width - 1)]);
+}
+
+
+
+template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+void	Dither::quantize_pix_int (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, int & fstb_RESTRICT err, uint32_t &rnd_state, int ampe_i, int ampn_i) noexcept
+{
+	constexpr int  dif_bits = SRC_BITS - DST_BITS;
+	constexpr int  tmp_bits =
+		  (dif_bits < 6 && SRC_BITS < _err_res && DST_BITS < _err_res)
+		? _err_res
+		: SRC_BITS;
+	constexpr int  tmp_shft = tmp_bits - SRC_BITS;
+	constexpr int  tmp_invs = tmp_bits - DST_BITS;
+
+	const int      rcst     = 1 << (tmp_invs - 1);
+	const int      vmax     = (1 << DST_BITS) - 1;
+
+	src_raw = src_ptr [x];
+	const int		src     = src_raw << tmp_shft;
+	const int      preq    = src + err;
+
+	int            sum     = preq;
+	if (! S_FLAG)
+	{
+		constexpr int  dit_shft = _amp_bits + 8 - tmp_invs;  // May be negative
+
+		const int      dith_n  = generate_dith_n_scalar <TN_FLAG> (rnd_state); // s8
+		const int		err_add = (err < 0) ? -ampe_i : ampe_i;
+		const int		noise   =
+			fstb::sshift_r <int, dit_shft> (dith_n * ampn_i + err_add);	// s16 = s8 * s8 // s16 = s16 >> cst
+
+		sum += noise;
+	}
+
+	const int      quant   = (sum + rcst) >> tmp_invs;
+
+	err = preq - (quant << tmp_invs);
+	const int      pix     = fstb::limit (quant, 0, vmax);
+
+	dst_ptr [x] = static_cast <DST_TYPE> (pix);
+}
+
+
+
+template <class SRC_TYPE>
+static inline SRC_TYPE	Dither_extract_src (SRC_TYPE src_read, float src) noexcept
+{
+	fstb::unused (src);
+
+	return (src_read);
+}
+
+static inline float	Dither_extract_src (float src_read, float src) noexcept
+{
+	fstb::unused (src_read);
+
+	return (src);
+}
+
+template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+void	Dither::quantize_pix_flt (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, float & fstb_RESTRICT err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add) noexcept
+{
+	const int      vmax = (1 << DST_BITS) - 1;
+
+	const SRC_TYPE src_read = src_ptr [x];
+	const float    src      = float (src_read) * mul + add;
+	src_raw = Dither_extract_src (src_read, src);
+	const float    preq     = src + err;
+
+	float          sum      = preq;
+	if (! S_FLAG)
+	{
+		const int      dith_n  = generate_dith_n_scalar <TN_FLAG> (rnd_state); // s8
+		const float    err_add = (err < 0) ? -ampe_f : (err > 0) ? ampe_f : 0;
+		const float    noise   = float (dith_n) * ampn_f + err_add;
+
+		sum += noise;
+	}
+
+	const int      quant   = fstb::round_int (sum);
+
+	err = preq - float (quant);
+	const int      pix = fstb::limit (quant, 0, vmax);
+
+	dst_ptr [x] = static_cast <DST_TYPE> (pix);
+}
+
+
+
+// Original coefficients                     : 7, 3, 5, 1
+// Optimised coefficients for serpentine scan: 7, 4, 5, 0
+// Source:
+// Sam Hocevar and Gary Niger,
+// Reinstating Floyd-Steinberg: Improved Metrics for Quality Assessment
+// of Error Diffusion Algorithms,
+// Lecture Notes in Computer Science LNCS 5099, pp. 38�45, 2008
+// (Proceedings of the International Conference on Image and Signal Processing
+// ICISP 2008) ISSN 0302-9743
+
+#define fmtcl_Dither_FS_OPTIMIZED_SERPENTINE_COEF
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (err_nxt1, err1_ptr, src_raw);
+
+#if defined (fmtcl_Dither_FS_OPTIMIZED_SERPENTINE_COEF)
+	const int      e1 = 0;
+	const int      e3 = (err * 4 + 8) >> 4;
+#else
+	const int      e1 = (err     + 8) >> 4;
+	const int      e3 = (err * 3 + 8) >> 4;
+#endif
+	const int      e5 = (err * 5 + 8) >> 4;
+	const int      e7 = err - e1 - e3 - e5;
+	spread_error <DIR> (e1, e3, e5, e7, err_nxt0, err0_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (err_nxt1, err1_ptr, src_raw);
+
+#if defined (fmtcl_Dither_FS_OPTIMIZED_SERPENTINE_COEF)
+	const float    e1 = 0;
+	const float    e3 = err * (4.0f / 16);
+#else
+	const float    e1 = err * (1.0f / 16);
+	const float    e3 = err * (3.0f / 16);
+#endif
+	const float    e5 = err * (5.0f / 16);
+	const float    e7 = err * (7.0f / 16);
+	spread_error <DIR> (e1, e3, e5, e7, err_nxt0, err0_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <typename EB>
+void	Dither::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept
+{
+	// Nothing
+	fstb::unused (err_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR, typename ET, typename EB>
+void	Dither::DiffuseFloydSteinberg <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e3, ET e5, ET e7, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept
+{
+	err_nxt0         = err0_ptr [DIR];
+	err0_ptr [-DIR] += EB (e3);
+	err0_ptr [   0] += EB (e5);
+	err0_ptr [ DIR]  = EB (e1);
+	err_nxt0        += e7;
+}
+
+
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (err_nxt1, err1_ptr, src_raw);
+
+	const int      e1 = (err + 2) >> 2;
+	const int      e2 = err - 2 * e1;
+	spread_error <DIR> (e1, e2, err_nxt0, err0_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (err_nxt1, err1_ptr, src_raw);
+
+	const float    e1 = err * (1.0f / 4);
+	const float    e2 = err * (2.0f / 4);
+	spread_error <DIR> (e1, e2, err_nxt0, err0_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <typename EB>
+void	Dither::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept
+{
+	err_ptr [0] = EB (0);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR, typename ET, typename EB>
+void	Dither::DiffuseFilterLite <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e2, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept
+{
+	err_nxt0         = err0_ptr [DIR];
+	err0_ptr [-DIR] += EB (e1);
+	err0_ptr [   0]  = EB (e1);
+	err_nxt0        += e2;
+}
+
+
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (src_raw);
+
+	const int      m  = (err << 4) / 42;
+	const int      e1 = (m + 8) >> 4;
+	const int      e2 = (m + 4) >> 3;
+	const int      e4 = (m + 2) >> 2;
+//	const int      e8 = (m + 1) >> 1;
+	const int      sum = (e1 << 1) + ((e2 + e4) << 2);
+	const int      e8 = (err - sum + 1) >> 1;
+	spread_error <DIR> (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (src_raw);
+
+	const float    e1 = err * (1.0f / 42);
+	const float    e2 = err * (2.0f / 42);
+	const float    e4 = err * (4.0f / 42);
+	const float    e8 = err * (8.0f / 42);
+	spread_error <DIR> (e1, e2, e4, e8, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <typename EB>
+void	Dither::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept
+{
+	// Nothing
+	fstb::unused (err_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR, typename ET, typename EB>
+void	Dither::DiffuseStucki <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e2, ET e4, ET e8, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept
+{
+	err_nxt0             = err_nxt1 + e8;
+	err_nxt1             = err1_ptr [DIR * 2] + e4;
+	err0_ptr [-DIR * 2] += EB (e2);
+	err0_ptr [-DIR    ] += EB (e4);
+	err0_ptr [   0    ] += EB (e8);
+	err0_ptr [ DIR    ] += EB (e4);
+	err0_ptr [ DIR * 2] += EB (e2);
+	err1_ptr [-DIR * 2] += EB (e1);
+	err1_ptr [-DIR    ] += EB (e2);
+	err1_ptr [   0    ] += EB (e4);
+	err1_ptr [ DIR    ] += EB (e2);
+	err1_ptr [ DIR * 2]  = EB (e1);
+}
+
+
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (src_raw);
+
+	const int      e1 = (err + 4) >> 3;
+	spread_error <DIR> (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (src_raw);
+
+	const float    e1 = err * (1.0f / 8);
+	spread_error <DIR> (e1, err_nxt0, err_nxt1, err0_ptr, err1_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <typename EB>
+void	Dither::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept
+{
+	err_ptr [0] = EB (0);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR, typename ET, typename EB>
+void	Dither::DiffuseAtkinson <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept
+{
+	err_nxt0         = err_nxt1           + e1;
+	err_nxt1         = err1_ptr [2 * DIR] + e1;
+	err0_ptr [-DIR] += EB (e1);
+	err0_ptr [   0] += EB (e1);
+	err0_ptr [+DIR] += EB (e1);
+	err1_ptr [   0]  = EB (e1);
+}
+
+
+
+constexpr int	Dither::DiffuseOstromoukhovBase::_t_bits;
+constexpr int	Dither::DiffuseOstromoukhovBase::_t_len;
+constexpr int	Dither::DiffuseOstromoukhovBase::_t_mask;
+
+
+
+template <int DST_BITS, int SRC_BITS>
+template <class SRC_TYPE>
+int	Dither::DiffuseOstromoukhovBase2 <DST_BITS, SRC_BITS>::get_index (SRC_TYPE src_raw) noexcept
+{
+	constexpr int  dif_bits = SRC_BITS - DST_BITS;
+
+	return (fstb::sshift_l <
+		int,
+		DiffuseOstromoukhovBase::_t_bits - dif_bits
+	> (src_raw) & DiffuseOstromoukhovBase::_t_mask);
+}
+
+template <int DST_BITS, int SRC_BITS>
+int	Dither::DiffuseOstromoukhovBase2 <DST_BITS, SRC_BITS>::get_index (float src_raw) noexcept
+{
+	return 
+		  fstb::round_int (src_raw * DiffuseOstromoukhovBase::_t_len)
+	   & DiffuseOstromoukhovBase::_t_mask;
+}
+
+// Victor Ostromoukhov,
+// A Simple and Efficient Error-Diffusion Algorithm
+// Proceedings of SIGGRAPH 2001, in ACM Computer Graphics,
+// Annual Conference Series, pp. 567-572, 2001.
+// Not optimised at all
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (err_nxt1, err1_ptr);
+
+	constexpr int  dif_bits = SRC_BITS - DST_BITS;
+
+	const int      index    = fstb::sshift_l <
+		int,
+		DiffuseOstromoukhov::_t_bits - dif_bits
+	> (src_raw) & DiffuseOstromoukhov::_t_mask;
+	const typename ThisType::TableEntry & fstb_RESTRICT te = ThisType::_table [index];
+	const int      d        = te._sum;
+
+	const int      e1 = err * te._c0 / d;
+	const int      e2 = err * te._c1 / d;
+	const int      e3 = err - e1 - e2;
+
+	spread_error <DIR> (e1, e2, e3, err_nxt0, err0_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR>
+void	Dither::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept
+{
+	fstb::unused (err_nxt1, err1_ptr);
+
+	const int      index    = DiffuseOstromoukhov::get_index (src_raw);
+	const typename ThisType::TableEntry & fstb_RESTRICT te = ThisType::_table [index];
+	const float    invd     = te._inv_sum;
+
+	const float    e1 = err * float (te._c0) * invd;
+	const float    e2 = err * float (te._c1) * invd;
+	const float    e3 = err - e1 - e2;
+
+	spread_error <DIR> (e1, e2, e3, err_nxt0, err0_ptr);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <typename EB>
+void	Dither::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept
+{
+	err_ptr [0] = EB (0);
+}
+
+template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+template <int DIR, typename ET, typename EB>
+void	Dither::DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS>::spread_error (ET e1, ET e2, ET e3, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept
+{
+	err_nxt0         = err0_ptr [DIR];
+	err0_ptr [-DIR] += EB (e2);
+	err0_ptr [   0]  = EB (e3);
+	err_nxt0        += e1;
+}
+
+
+
+const std::array <
+	Dither::DiffuseOstromoukhovBase::TableEntry,
+	Dither::DiffuseOstromoukhovBase::_t_len
+>	Dither::DiffuseOstromoukhovBase::_table =
+{{
+	{   13,    0,    5,   18, 1.0f /   18 },
+	{   13,    0,    5,   18, 1.0f /   18 },
+	{   21,    0,   10,   31, 1.0f /   31 },
+	{    7,    0,    4,   11, 1.0f /   11 },
+	{    8,    0,    5,   13, 1.0f /   13 },
+	{   47,    3,   28,   78, 1.0f /   78 },
+	{   23,    3,   13,   39, 1.0f /   39 },
+	{   15,    3,    8,   26, 1.0f /   26 },
+	{   22,    6,   11,   39, 1.0f /   39 },
+	{   43,   15,   20,   78, 1.0f /   78 },
+	{    7,    3,    3,   13, 1.0f /   13 },
+	{  501,  224,  211,  936, 1.0f /  936 },
+	{  249,  116,  103,  468, 1.0f /  468 },
+	{  165,   80,   67,  312, 1.0f /  312 },
+	{  123,   62,   49,  234, 1.0f /  234 },
+	{  489,  256,  191,  936, 1.0f /  936 },
+	{   81,   44,   31,  156, 1.0f /  156 },
+	{  483,  272,  181,  936, 1.0f /  936 },
+	{   60,   35,   22,  117, 1.0f /  117 },
+	{   53,   32,   19,  104, 1.0f /  104 },
+	{  237,  148,   83,  468, 1.0f /  468 },
+	{  471,  304,  161,  936, 1.0f /  936 },
+	{    3,    2,    1,    6, 1.0f /    6 },
+	{  481,  314,  185,  980, 1.0f /  980 },
+	{  354,  226,  155,  735, 1.0f /  735 },
+	{ 1389,  866,  685, 2940, 1.0f / 2940 },
+	{  227,  138,  125,  490, 1.0f /  490 },
+	{  267,  158,  163,  588, 1.0f /  588 },
+	{  327,  188,  220,  735, 1.0f /  735 },
+	{   61,   34,   45,  140, 1.0f /  140 },
+	{  627,  338,  505, 1470, 1.0f / 1470 },
+	{ 1227,  638, 1075, 2940, 1.0f / 2940 },
+
+	{   20,   10,   19,   49, 1.0f /   49 },
+	{ 1937, 1000, 1767, 4704, 1.0f / 4704 },
+	{  977,  520,  855, 2352, 1.0f / 2352 },
+	{  657,  360,  551, 1568, 1.0f / 1568 },
+	{   71,   40,   57,  168, 1.0f /  168 },
+	{ 2005, 1160, 1539, 4704, 1.0f / 4704 },
+	{  337,  200,  247,  784, 1.0f /  784 },
+	{ 2039, 1240, 1425, 4704, 1.0f / 4704 },
+	{  257,  160,  171,  588, 1.0f /  588 },
+	{  691,  440,  437, 1568, 1.0f / 1568 },
+	{ 1045,  680,  627, 2352, 1.0f / 2352 },
+	{  301,  200,  171,  672, 1.0f /  672 },
+	{  177,  120,   95,  392, 1.0f /  392 },
+	{ 2141, 1480, 1083, 4704, 1.0f / 4704 },
+	{ 1079,  760,  513, 2352, 1.0f / 2352 },
+	{  725,  520,  323, 1568, 1.0f / 1568 },
+	{  137,  100,   57,  294, 1.0f /  294 },
+	{ 2209, 1640,  855, 4704, 1.0f / 4704 },
+	{   53,   40,   19,  112, 1.0f /  112 },
+	{ 2243, 1720,  741, 4704, 1.0f / 4704 },
+	{  565,  440,  171, 1176, 1.0f / 1176 },
+	{  759,  600,  209, 1568, 1.0f / 1568 },
+	{ 1147,  920,  285, 2352, 1.0f / 2352 },
+	{ 2311, 1880,  513, 4704, 1.0f / 4704 },
+	{   97,   80,   19,  196, 1.0f /  196 },
+	{  335,  280,   57,  672, 1.0f /  672 },
+	{ 1181, 1000,  171, 2352, 1.0f / 2352 },
+	{  793,  680,   95, 1568, 1.0f / 1568 },
+	{  599,  520,   57, 1176, 1.0f / 1176 },
+	{ 2413, 2120,  171, 4704, 1.0f / 4704 },
+	{  405,  360,   19,  784, 1.0f /  784 },
+	{ 2447, 2200,   57, 4704, 1.0f / 4704 },
+
+	{   11,   10,    0,   21, 1.0f /   21 },
+	{  158,  151,    3,  312, 1.0f /  312 },
+	{  178,  179,    7,  364, 1.0f /  364 },
+	{ 1030, 1091,   63, 2184, 1.0f / 2184 },
+	{  248,  277,   21,  546, 1.0f /  546 },
+	{  318,  375,   35,  728, 1.0f /  728 },
+	{  458,  571,   63, 1092, 1.0f / 1092 },
+	{  878, 1159,  147, 2184, 1.0f / 2184 },
+	{    5,    7,    1,   13, 1.0f /   13 },
+	{  172,  181,   37,  390, 1.0f /  390 },
+	{   97,   76,   22,  195, 1.0f /  195 },
+	{   72,   41,   17,  130, 1.0f /  130 },
+	{  119,   47,   29,  195, 1.0f /  195 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{   65,   18,   17,  100, 1.0f /  100 },
+	{   95,   29,   26,  150, 1.0f /  150 },
+	{  185,   62,   53,  300, 1.0f /  300 },
+	{   30,   11,    9,   50, 1.0f /   50 },
+	{   35,   14,   11,   60, 1.0f /   60 },
+	{   85,   37,   28,  150, 1.0f /  150 },
+	{   55,   26,   19,  100, 1.0f /  100 },
+	{   80,   41,   29,  150, 1.0f /  150 },
+	{  155,   86,   59,  300, 1.0f /  300 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{  305,  176,  119,  600, 1.0f /  600 },
+	{  155,   86,   59,  300, 1.0f /  300 },
+	{  105,   56,   39,  200, 1.0f /  200 },
+	{   80,   41,   29,  150, 1.0f /  150 },
+	{   65,   32,   23,  120, 1.0f /  120 },
+	{   55,   26,   19,  100, 1.0f /  100 },
+	{  335,  152,  113,  600, 1.0f /  600 },
+	{   85,   37,   28,  150, 1.0f /  150 },
+	{  115,   48,   37,  200, 1.0f /  200 },
+	{   35,   14,   11,   60, 1.0f /   60 },
+	{  355,  136,  109,  600, 1.0f /  600 },
+	{   30,   11,    9,   50, 1.0f /   50 },
+	{  365,  128,  107,  600, 1.0f /  600 },
+	{  185,   62,   53,  300, 1.0f /  300 },
+	{   25,    8,    7,   40, 1.0f /   40 },
+	{   95,   29,   26,  150, 1.0f /  150 },
+	{  385,  112,  103,  600, 1.0f /  600 },
+	{   65,   18,   17,  100, 1.0f /  100 },
+	{  395,  104,  101,  600, 1.0f /  600 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+
+	// Symetric
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{  395,  104,  101,  600, 1.0f /  600 },
+	{   65,   18,   17,  100, 1.0f /  100 },
+	{  385,  112,  103,  600, 1.0f /  600 },
+	{   95,   29,   26,  150, 1.0f /  150 },
+	{   25,    8,    7,   40, 1.0f /   40 },
+	{  185,   62,   53,  300, 1.0f /  300 },
+	{  365,  128,  107,  600, 1.0f /  600 },
+	{   30,   11,    9,   50, 1.0f /   50 },
+	{  355,  136,  109,  600, 1.0f /  600 },
+	{   35,   14,   11,   60, 1.0f /   60 },
+	{  115,   48,   37,  200, 1.0f /  200 },
+	{   85,   37,   28,  150, 1.0f /  150 },
+	{  335,  152,  113,  600, 1.0f /  600 },
+	{   55,   26,   19,  100, 1.0f /  100 },
+	{   65,   32,   23,  120, 1.0f /  120 },
+	{   80,   41,   29,  150, 1.0f /  150 },
+	{  105,   56,   39,  200, 1.0f /  200 },
+	{  155,   86,   59,  300, 1.0f /  300 },
+	{  305,  176,  119,  600, 1.0f /  600 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{    5,    3,    2,   10, 1.0f /   10 },
+
+	{    5,    3,    2,   10, 1.0f /   10 },
+	{  155,   86,   59,  300, 1.0f /  300 },
+	{   80,   41,   29,  150, 1.0f /  150 },
+	{   55,   26,   19,  100, 1.0f /  100 },
+	{   85,   37,   28,  150, 1.0f /  150 },
+	{   35,   14,   11,   60, 1.0f /   60 },
+	{   30,   11,    9,   50, 1.0f /   50 },
+	{  185,   62,   53,  300, 1.0f /  300 },
+	{   95,   29,   26,  150, 1.0f /  150 },
+	{   65,   18,   17,  100, 1.0f /  100 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{    4,    1,    1,    6, 1.0f /    6 },
+	{  119,   47,   29,  195, 1.0f /  195 },
+	{   72,   41,   17,  130, 1.0f /  130 },
+	{   97,   76,   22,  195, 1.0f /  195 },
+	{  172,  181,   37,  390, 1.0f /  390 },
+	{    5,    7,    1,   13, 1.0f /   13 },
+	{  878, 1159,  147, 2184, 1.0f / 2184 },
+	{  458,  571,   63, 1092, 1.0f / 1092 },
+	{  318,  375,   35,  728, 1.0f /  728 },
+	{  248,  277,   21,  546, 1.0f /  546 },
+	{ 1030, 1091,   63, 2184, 1.0f / 2184 },
+	{  178,  179,    7,  364, 1.0f /  364 },
+	{  158,  151,    3,  312, 1.0f /  312 },
+	{   11,   10,    0,   21, 1.0f /   21 },
+
+	{ 2447, 2200,   57, 4704, 1.0f / 4704 },
+	{  405,  360,   19,  784, 1.0f /  784 },
+	{ 2413, 2120,  171, 4704, 1.0f / 4704 },
+	{  599,  520,   57, 1176, 1.0f / 1176 },
+	{  793,  680,   95, 1568, 1.0f / 1568 },
+	{ 1181, 1000,  171, 2352, 1.0f / 2352 },
+	{  335,  280,   57,  672, 1.0f /  672 },
+	{   97,   80,   19,  196, 1.0f /  196 },
+	{ 2311, 1880,  513, 4704, 1.0f / 4704 },
+	{ 1147,  920,  285, 2352, 1.0f / 2352 },
+	{  759,  600,  209, 1568, 1.0f / 1568 },
+	{  565,  440,  171, 1176, 1.0f / 1176 },
+	{ 2243, 1720,  741, 4704, 1.0f / 4704 },
+	{   53,   40,   19,  112, 1.0f /  112 },
+	{ 2209, 1640,  855, 4704, 1.0f / 4704 },
+	{  137,  100,   57,  294, 1.0f /  294 },
+	{  725,  520,  323, 1568, 1.0f / 1568 },
+	{ 1079,  760,  513, 2352, 1.0f / 2352 },
+	{ 2141, 1480, 1083, 4704, 1.0f / 4704 },
+	{  177,  120,   95,  392, 1.0f /  392 },
+	{  301,  200,  171,  672, 1.0f /  672 },
+	{ 1045,  680,  627, 2352, 1.0f / 2352 },
+	{  691,  440,  437, 1568, 1.0f / 1568 },
+	{  257,  160,  171,  588, 1.0f /  588 },
+	{ 2039, 1240, 1425, 4704, 1.0f / 4704 },
+	{  337,  200,  247,  784, 1.0f /  784 },
+	{ 2005, 1160, 1539, 4704, 1.0f / 4704 },
+	{   71,   40,   57,  168, 1.0f /  168 },
+	{  657,  360,  551, 1568, 1.0f / 1568 },
+	{  977,  520,  855, 2352, 1.0f / 2352 },
+	{ 1937, 1000, 1767, 4704, 1.0f / 4704 },
+	{   20,   10,   19,   49, 1.0f /   49 },
+
+	{ 1227,  638, 1075, 2940, 1.0f / 2940 },
+	{  627,  338,  505, 1470, 1.0f / 1470 },
+	{   61,   34,   45,  140, 1.0f /  140 },
+	{  327,  188,  220,  735, 1.0f /  735 },
+	{  267,  158,  163,  588, 1.0f /  588 },
+	{  227,  138,  125,  490, 1.0f /  490 },
+	{ 1389,  866,  685, 2940, 1.0f / 2940 },
+	{  354,  226,  155,  735, 1.0f /  735 },
+	{  481,  314,  185,  980, 1.0f /  980 },
+	{    3,    2,    1,    6, 1.0f /    6 },
+	{  471,  304,  161,  936, 1.0f /  936 },
+	{  237,  148,   83,  468, 1.0f /  468 },
+	{   53,   32,   19,  104, 1.0f /  104 },
+	{   60,   35,   22,  117, 1.0f /  117 },
+	{  483,  272,  181,  936, 1.0f /  936 },
+	{   81,   44,   31,  156, 1.0f /  156 },
+	{  489,  256,  191,  936, 1.0f /  936 },
+	{  123,   62,   49,  234, 1.0f /  234 },
+	{  165,   80,   67,  312, 1.0f /  312 },
+	{  249,  116,  103,  468, 1.0f /  468 },
+	{  501,  224,  211,  936, 1.0f /  936 },
+	{    7,    3,    3,   13, 1.0f /   13 },
+	{   43,   15,   20,   78, 1.0f /   78 },
+	{   22,    6,   11,   39, 1.0f /   39 },
+	{   15,    3,    8,   26, 1.0f /   26 },
+	{   23,    3,   13,   39, 1.0f /   39 },
+	{   47,    3,   28,   78, 1.0f /   78 },
+	{    8,    0,    5,   13, 1.0f /   13 },
+	{    7,    0,    4,   11, 1.0f /   11 },
+	{   21,    0,   10,   31, 1.0f /   31 },
+	{   13,    0,    5,   18, 1.0f /   18 },
+	{   13,    0,    5,   18, 1.0f /   18 }
+}};
+
+
+
+}  // namespace fmtcl
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/Dither.h b/src/fmtcl/Dither.h
new file mode 100644
index 0000000..edcc32a
--- /dev/null
+++ b/src/fmtcl/Dither.h
@@ -0,0 +1,448 @@
+/*****************************************************************************
+
+        Dither.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_Dither_HEADER_INCLUDED)
+#define fmtcl_Dither_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "conc/ObjPool.h"
+#include "fmtcl/ColorFamily.h"
+#include "fmtcl/BitBltConv.h"
+#include "fmtcl/ErrDifBuf.h"
+#include "fmtcl/ErrDifBufFactory.h"
+#include "fmtcl/SplFmt.h"
+#include "fstb/def.h"
+#include "fstb/ArrayAlign.h"
+
+#include <array>
+#include <memory>
+#include <vector>
+
+
+
+namespace fmtcl
+{
+
+
+
+class Dither
+{
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+public:
+
+	static constexpr int _max_nbr_planes = 3;
+	static constexpr int _max_pat_width = 32;   // Number of pixels for halftone dithering
+
+	enum DMode
+	{
+		DMode_ROUND_ALIAS = -1,
+		DMode_BAYER = 0,
+		DMode_ROUND,      // 1
+		DMode_FAST,       // 2
+		DMode_FILTERLITE, // 3
+		DMode_STUCKI,     // 4
+		DMode_ATKINSON,   // 5
+		DMode_FLOYD,      // 6
+		DMode_OSTRO,      // 7
+		DMode_VOIDCLUST,  // 8
+		DMode_QUASIRND,   // 9
+
+		DMode_NBR_ELT
+	};
+
+	explicit       Dither (
+		SplFmt src_fmt, int src_res, bool src_full_flag,
+		SplFmt dst_fmt, int dst_res, bool dst_full_flag,
+		ColorFamily color_fam, int nbr_planes, int w,
+		DMode dmode, int pat_size, double ampo, double ampn,
+		bool dyn_flag, bool static_noise_flag, bool correlated_planes_flag,
+		bool tpdfo_flag, bool tpdfn_flag,
+		bool sse2_flag, bool avx2_flag
+	);
+
+	void           process_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, int frame_index, int plane_index);
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+protected:
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+	static constexpr int _pat_period    =     4; // Must be a power of 2 (because cycled with & as modulo)
+	static constexpr int _amp_bits      =     5; // Bit depth of the amplitude fractionnal part. The whole thing is 7 bits, and we need a few bits for the integer part.
+	static constexpr int _err_res       =    24; // Resolution (bits) of the temporary data for error diffusion when source bitdepth is not high enough (relative to the destination bitdepth) to guarantee an accurate error diffusion.
+	static constexpr int _max_unk_width = 65536; // Maximum width (pixels) for variable formats
+
+	class SclInf
+	{
+	public:
+		BitBltConv::ScaleInfo
+		               _info;
+		BitBltConv::ScaleInfo *   // 0 if _info is not used.
+		               _ptr = 0;
+	};
+
+	typedef int16_t PatRow [_max_pat_width];  // Contains data in [-128; +127]
+	typedef PatRow  PatData [_max_pat_width]; // [y] [x]
+	typedef fstb::ArrayAlign <PatData, _pat_period, 16> PatDataArray;
+
+	class AmpInfo
+	{
+	public:
+		int            _o_i = 0;   // [0 ;  127], 1.0 = 1 << _amp_bits
+		int            _n_i = 0;   // [0 ;  127], 1.0 = 1 << _amp_bits
+		int            _e_i = 0;   // [0 ; 2047], 1.0 = 256
+		float          _e_f = 0;
+		float          _n_f = 0;
+	};
+
+	class SegContext
+	{
+	public:
+		inline const PatRow &
+		               extract_pattern_row () const noexcept;
+		const PatData* _pattern_ptr = nullptr; // Ordered dithering
+		uint32_t       _rnd_state   = 0;       // Anything excepted fast mode
+		const BitBltConv::ScaleInfo *          // Float processing
+		               _scale_info_ptr = nullptr;
+		ErrDifBuf *                            // Error diffusion
+		               _ed_buf_ptr  = nullptr;
+		int            _y           = -1;      // Ordered dithering and error diffusion
+		uint32_t       _qrs_seed    = 0;       // For the quasirandom sequences
+		AmpInfo        _amp;
+	};
+
+	void           build_dither_pat ();
+	void           build_dither_pat_round ();
+	void           build_dither_pat_bayer ();
+	void           build_dither_pat_void_and_cluster (int w);
+	void           build_next_dither_pat ();
+	void           copy_dither_pat_rotate (PatData &dst, const PatData &src, int angle) noexcept;
+	void           init_fnc_fast () noexcept;
+	void           init_fnc_ordered () noexcept;
+	void           init_fnc_quasirandom () noexcept;
+	void           init_fnc_errdiff () noexcept;
+
+	void           dither_plane (uint8_t *dst_ptr, int dst_stride, const uint8_t *src_ptr, int src_stride, int w, int h, const BitBltConv::ScaleInfo &scale_info, int frame_index, int plane_index);
+
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	static void    process_seg_fast_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &/*ctx*/) noexcept;
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+	static void    process_seg_fast_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS>
+	static void    process_seg_fast_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &/*ctx*/) noexcept;
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT>
+	static void    process_seg_fast_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+#endif
+
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	static void    process_seg_ord_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+	static void    process_seg_ord_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS>
+	static void    process_seg_ord_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT>
+	static void    process_seg_ord_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+#endif
+
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	static void    process_seg_qrs_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+	static void    process_seg_qrs_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS>
+	static void    process_seg_qrs_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+	template <bool S_FLAG, bool TO_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT>
+	static void    process_seg_qrs_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+#endif
+
+	template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS, typename DFNC>
+	static fstb_FORCEINLINE void
+	               process_seg_common_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept;
+	template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, typename DFNC>
+	static fstb_FORCEINLINE void
+	               process_seg_common_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept;
+	template <bool T_FLAG>
+	static fstb_FORCEINLINE int
+	               generate_dith_n_scalar (uint32_t &rnd_state) noexcept;
+	static fstb_FORCEINLINE int
+	               remap_tpdf_scalar (int d) noexcept;
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	template <bool S_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, int SRC_BITS, typename DFNC>
+	static fstb_FORCEINLINE void
+	               process_seg_common_int_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept;
+	template <bool S_FLAG, bool TN_FLAG, SplFmt DST_FMT, int DST_BITS, SplFmt SRC_FMT, typename DFNC>
+	static fstb_FORCEINLINE void
+	               process_seg_common_flt_int_sse2 (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx, DFNC dither_fnc) noexcept;
+	template <bool T_FLAG>
+	static fstb_FORCEINLINE __m128i
+	               generate_dith_n_vec (uint32_t &rnd_state) noexcept;
+	static fstb_FORCEINLINE __m128i
+	               remap_tpdf_vec (__m128i d) noexcept;
+#endif
+
+	template <bool S_FLAG, bool TN_FLAG, class ERRDIF>
+	static void    process_seg_errdif_int_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+	template <bool S_FLAG, bool TN_FLAG, class ERRDIF>
+	static void    process_seg_errdif_flt_int_cpp (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) noexcept;
+
+	static inline void
+	               generate_rnd (uint32_t &state) noexcept;
+	static inline void
+	               generate_rnd_eol (uint32_t &state) noexcept;
+
+	template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	static inline void
+	               quantize_pix_int (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, int & fstb_RESTRICT err, uint32_t &rnd_state, int ampe_i, int ampn_i) noexcept;
+	template <bool S_FLAG, bool TN_FLAG, class DST_TYPE, int DST_BITS, class SRC_TYPE>
+	static inline void
+	               quantize_pix_flt (DST_TYPE * fstb_RESTRICT dst_ptr, const SRC_TYPE * fstb_RESTRICT src_ptr, SRC_TYPE &src_raw, int x, float & fstb_RESTRICT err, uint32_t &rnd_state, float ampe_f, float ampn_f, float mul, float add) noexcept;
+
+	template <class DT, int DB, class ST, int SB, int EL>
+	class ErrDifAddParam
+	{
+	public:
+		typedef DT DstType;
+		typedef ST SrcType;
+		static constexpr int _dst_bits      = DB;
+		static constexpr int _src_bits      = SB;
+		static constexpr int _nbr_err_lines = EL;
+	};
+
+	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	class DiffuseFloydSteinberg
+	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 1>
+	{
+	public:
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <typename EB>
+		static fstb_FORCEINLINE void
+		               prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept;
+	private:
+		template <int DIR, typename ET, typename EB>
+		static fstb_FORCEINLINE void
+		               spread_error (ET e1, ET e3, ET e5, ET e7, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept;
+	};
+
+	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	class DiffuseFilterLite
+	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 1>
+	{
+	public:
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <typename EB>
+		static fstb_FORCEINLINE void
+		               prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept;
+	private:
+		template <int DIR, typename ET, typename EB>
+		static fstb_FORCEINLINE void
+		               spread_error (ET e1, ET e2, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept;
+	};
+
+	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	class DiffuseStucki
+	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 2>
+	{
+	public:
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <typename EB>
+		static fstb_FORCEINLINE void
+		               prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept;
+	private:
+		template <int DIR, typename ET, typename EB>
+		static fstb_FORCEINLINE void
+		               spread_error (ET e1, ET e2, ET e4, ET e8, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept;
+	};
+
+	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	class DiffuseAtkinson
+	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 2>
+	{
+	public:
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <typename EB>
+		static fstb_FORCEINLINE void
+		               prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept;
+	private:
+		template <int DIR, typename ET, typename EB>
+		static fstb_FORCEINLINE void
+		               spread_error (ET e1, ET & fstb_RESTRICT err_nxt0, ET & fstb_RESTRICT err_nxt1, EB * fstb_RESTRICT err0_ptr, EB * fstb_RESTRICT err1_ptr) noexcept;
+	};
+
+	class DiffuseOstromoukhovBase
+	{
+	public:
+		struct TableEntry
+		{
+			int            _c0;
+			int            _c1;
+			int            _c2;        // Actually not used
+			int            _sum;
+			float          _inv_sum;   // Possible optimization: store 1/_c0 and 1/_c1 instead of this field.
+		};
+
+		static constexpr int _t_bits = 8;
+		static constexpr int _t_len  = 1 << _t_bits;
+		static constexpr int _t_mask = _t_len - 1;
+
+		static const std::array <TableEntry, _t_len>
+		               _table;
+	};
+
+	template <int DST_BITS, int SRC_BITS>
+	class DiffuseOstromoukhovBase2
+	:	public DiffuseOstromoukhovBase
+	{
+	public:
+		template <class SRC_TYPE>
+		static inline int
+		               get_index (SRC_TYPE src_raw) noexcept;
+		static inline int
+		               get_index (float src_raw) noexcept;
+	};
+
+	template <class DST_TYPE, int DST_BITS, class SRC_TYPE, int SRC_BITS>
+	class DiffuseOstromoukhov
+	:	public ErrDifAddParam <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS, 1>
+	,	public DiffuseOstromoukhovBase2 <DST_BITS, SRC_BITS>
+	{
+	public:
+		typedef DiffuseOstromoukhov <DST_TYPE, DST_BITS, SRC_TYPE, SRC_BITS> ThisType;
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (int err, int & fstb_RESTRICT err_nxt0, int & fstb_RESTRICT err_nxt1, int16_t * fstb_RESTRICT err0_ptr, int16_t * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <int DIR>
+		static fstb_FORCEINLINE void
+		               diffuse (float err, float & fstb_RESTRICT err_nxt0, float & fstb_RESTRICT err_nxt1, float * fstb_RESTRICT err0_ptr, float * fstb_RESTRICT err1_ptr, SRC_TYPE src_raw) noexcept;
+		template <typename EB>
+		static fstb_FORCEINLINE void
+		               prepare_next_line (EB * fstb_RESTRICT err_ptr) noexcept;
+	private:
+		template <int DIR, typename ET, typename EB>
+		static fstb_FORCEINLINE void
+		               spread_error (ET e1, ET e2, ET e3, ET & fstb_RESTRICT err_nxt0, EB * fstb_RESTRICT err0_ptr) noexcept;
+	};
+
+	SplFmt         _splfmt_src = SplFmt_ILLEGAL;
+	SplFmt         _splfmt_dst = SplFmt_ILLEGAL;
+	int            _src_res    = 0;
+	int            _dst_res    = 0;
+	bool           _full_range_in_flag  = false;
+	bool           _full_range_out_flag = false;
+	ColorFamily    _color_fam  = ColorFamily_INVALID;
+	int            _nbr_planes = 0;
+
+	std::array <SclInf, _max_nbr_planes>
+	               _scale_info_arr;
+	bool           _upconv_flag = false;
+	bool           _sse2_flag   = false;
+	bool           _avx2_flag   = false;
+	bool           _range_def_flag = false;
+
+	int            _dmode    = DMode_FAST;
+	int            _pat_size = _max_pat_width;  // Must be a divisor of _max_pat_width
+	double         _ampo     = 1;
+	double         _ampn     = 0;
+	bool           _dyn_flag = false;
+	bool           _static_noise_flag      = false;
+	bool           _correlated_planes_flag = false;
+	bool           _tpdfo_flag = false;
+	bool           _tpdfn_flag = false;
+
+	bool           _errdif_flag = false;   // Indicates a dithering method using error diffusion.
+	bool           _simple_flag = false;   // Simplified implementation for ampo == 1 and ampn == 0
+	PatDataArray   _dither_pat_arr;        // Contains levels for ordered dithering
+
+	AmpInfo        _amp;
+
+	conc::ObjPool <ErrDifBuf>
+						_buf_pool;
+	std::unique_ptr <ErrDifBufFactory>
+	               _buf_factory_uptr;
+
+	void (*        _process_seg_int_int_ptr) (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) = nullptr;
+	void (*        _process_seg_flt_int_ptr) (uint8_t * fstb_RESTRICT dst_ptr, const uint8_t * fstb_RESTRICT src_ptr, int w, SegContext &ctx) = nullptr;
+
+
+
+/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+	               Dither ()                               = delete;
+	               Dither (const Dither &other)            = delete;
+	               Dither (Dither &&other)                 = delete;
+	Dither &       operator = (const Dither &other)        = delete;
+	Dither &       operator = (Dither &&other)             = delete;
+	bool           operator == (const Dither &other) const = delete;
+	bool           operator != (const Dither &other) const = delete;
+
+}; // class Dither
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/Dither.hpp"
+
+
+
+#endif   // fmtcl_Dither_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/InterlacingType.h b/src/fmtcl/InterlacingType.h
new file mode 100644
index 0000000..4403a9e
--- /dev/null
+++ b/src/fmtcl/InterlacingType.h
@@ -0,0 +1,70 @@
+/*****************************************************************************
+
+        InterlacingType.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_InterlacingType_HEADER_INCLUDED)
+#define fmtcl_InterlacingType_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+namespace fmtcl
+{
+
+
+
+enum InterlacingType
+{
+	InterlacingType_INVALID = -1,
+
+	InterlacingType_FRAME   = 0,
+	InterlacingType_TOP,
+	InterlacingType_BOT,
+
+	InterlacingType_NBR_ELT
+
+}; // enum InterlacingType
+
+
+
+inline InterlacingType	InterlacingType_get (bool itl_flag, bool top_flag)
+{
+	return
+		(itl_flag) ? ((top_flag) ? InterlacingType_TOP
+		                         : InterlacingType_BOT)
+		           :               InterlacingType_FRAME;
+}
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/InterlacingType.hpp"
+
+
+
+#endif   // fmtcl_InterlacingType_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
diff --git a/src/fmtcl/KernelData.h b/src/fmtcl/KernelData.h
index 4628d68..31c6524 100644
--- a/src/fmtcl/KernelData.h
+++ b/src/fmtcl/KernelData.h
@@ -50,8 +50,10 @@ class KernelData
 
 public:
 
-	               KernelData () = default;
-	virtual        ~KernelData () {}
+	               KernelData ()                   = default;
+	               ~KernelData ()                  = default;
+	               KernelData (KernelData &&other) = default;
+	KernelData &   operator = (KernelData &&other) = default;
 
    uint32_t       get_hash () const;
 
diff --git a/src/fmtcl/Matrix2020CLProc.cpp b/src/fmtcl/Matrix2020CLProc.cpp
index aa70589..d8b9bfd 100644
--- a/src/fmtcl/Matrix2020CLProc.cpp
+++ b/src/fmtcl/Matrix2020CLProc.cpp
@@ -30,6 +30,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 #include "fmtcl/fnc.h"
 #include "fmtcl/Matrix2020CLProc.h"
 #include "fmtcl/Matrix2020CLProc_macro.h"
+#include "fmtcl/PicFmt.h"
 #include "fmtcl/ProxyRwCpp.h"
 #include "fmtcl/TransOpLinPow.h"
 #include "fstb/fnc.h"
@@ -242,14 +243,14 @@ Matrix2020CLProc::Err	Matrix2020CLProc::setup_rgb_2_ycbcr ()
 		double         b_c;
 		compute_fmt_mac_cst (
 			a_y, b_y,
-			_dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag,
-			_dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true,
+			PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag },
+			PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true             },
 			0
 		);
 		compute_fmt_mac_cst (
 			a_c, b_c,
-			_dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag,
-			_dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true,
+			PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, _full_range_flag },
+			PicFmt { _dst_fmt, RGB_INT_BITS, ColorFamily_YUV, true             },
 			1
 		);
 		const int      dif_bits   = RGB_INT_BITS - _dst_bits;
@@ -358,14 +359,14 @@ Matrix2020CLProc::Err	Matrix2020CLProc::setup_ycbcr_2_rgb ()
 		double         b_c;
 		compute_fmt_mac_cst (
 			a_y, b_y,
-			_src_fmt, _src_bits, ColorFamily_YUV, true,
-			_src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag,
+			PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, true             },
+			PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag },
 			0
 		);
 		compute_fmt_mac_cst (
 			a_c, b_c,
-			_src_fmt, _src_bits, ColorFamily_YUV, true,
-			_src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag,
+			PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, true             },
+			PicFmt { _src_fmt, _src_bits, ColorFamily_YUV, _full_range_flag },
 			1
 		);
 		const int      dif_bits   = RGB_INT_BITS - _src_bits;
diff --git a/src/fmtcl/MatrixProc.cpp b/src/fmtcl/MatrixProc.cpp
index fe616b1..79cafe2 100644
--- a/src/fmtcl/MatrixProc.cpp
+++ b/src/fmtcl/MatrixProc.cpp
@@ -83,7 +83,7 @@ MatrixProc::Err	MatrixProc::configure (const Mat4 &m, bool int_proc_flag, SplFmt
 	assert (dst_fmt < SplFmt_NBR_ELT);
 	assert (dst_bits >= 8);
 	assert (dst_bits <= 32);
-	assert (plane_out <= NBR_PLANES);
+	assert (plane_out <= _nbr_planes);
 	assert (   (dst_fmt == SplFmt_FLOAT && src_fmt == SplFmt_FLOAT)
 	        || (dst_fmt != SplFmt_FLOAT && src_fmt != SplFmt_FLOAT));
 
@@ -196,7 +196,7 @@ MatrixProc::Err	MatrixProc::configure (const Mat4 &m, bool int_proc_flag, SplFmt
 
 
 
-void	MatrixProc::process (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (_proc_ptr != 0);
 
@@ -215,19 +215,19 @@ void	MatrixProc::process (uint8_t * const dst_ptr_arr [NBR_PLANES], const int ds
 
 void	MatrixProc::set_matrix_flt (const Mat4 &m, int plane_out)
 {
-	assert (plane_out <= NBR_PLANES);
+	assert (plane_out <= _nbr_planes);
 
 	const int      plane_beg = (plane_out >= 0) ? plane_out     : 0;
-	const int      plane_end = (plane_out >= 0) ? plane_out + 1 : NBR_PLANES;
+	const int      plane_end = (plane_out >= 0) ? plane_out + 1 : _nbr_planes;
 
-	_coef_flt_arr.resize (NBR_PLANES * MAT_SIZE, 0);
+	_coef_flt_arr.resize (_nbr_planes * _mat_size, 0);
 	for (int y = plane_beg; y < plane_end; ++y)
 	{
 		const int      y_dest = (plane_out >= 0) ? 0 : y;
-		for (int x = 0; x < MAT_SIZE; ++x)
+		for (int x = 0; x < _mat_size; ++x)
 		{
 			const float    c = float (m [y] [x]);
-			_coef_flt_arr [y_dest * MAT_SIZE + x] = c;
+			_coef_flt_arr [y_dest * _mat_size + x] = c;
 		}
 	}
 }
@@ -236,7 +236,7 @@ void	MatrixProc::set_matrix_flt (const Mat4 &m, int plane_out)
 
 MatrixProc::Err	MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int src_bits, int dst_bits)
 {
-	assert (plane_out <= NBR_PLANES);
+	assert (plane_out <= _nbr_planes);
 	assert (src_bits >= 8);
 	assert (src_bits <= 16);
 	assert (dst_bits >= 8);
@@ -245,9 +245,9 @@ MatrixProc::Err	MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr
 	Err            ret_val   = Err_OK;
 
 	const int      plane_beg = (plane_out >= 0) ? plane_out     : 0;
-	const int      plane_end = (plane_out >= 0) ? plane_out + 1 : NBR_PLANES;
+	const int      plane_end = (plane_out >= 0) ? plane_out + 1 : _nbr_planes;
 
-	_coef_int_arr.resize (NBR_PLANES * MAT_SIZE, 0);
+	_coef_int_arr.resize (_nbr_planes * _mat_size, 0);
 
 #if (fstb_ARCHI == fstb_ARCHI_X86)
 	if (_sse2_flag || _avx2_flag)
@@ -256,15 +256,15 @@ MatrixProc::Err	MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr
 		{
 			_coef_simd_arr.set_avx2_mode (true);
 		}
-		_coef_simd_arr.resize (NBR_PLANES * MAT_SIZE);
+		_coef_simd_arr.resize (_nbr_planes * _mat_size);
 	}
 #endif
 
 	// Coefficient scale
-	const double   cintsc    = double ((uint64_t (1)) << SHIFT_INT);
+	const double   cintsc    = double ((uint64_t (1)) << _shift_int);
 
 	// Rounding constant
-	const int      div_shift = SHIFT_INT + src_bits - dst_bits;
+	const int      div_shift = _shift_int + src_bits - dst_bits;
 	const int      rnd       = 1 << (div_shift - 1);
 
 	for (int y = plane_beg; y < plane_end; ++y)
@@ -282,11 +282,11 @@ MatrixProc::Err	MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr
 		double         bias_flt = (dst_bits == 16) ? -1 : 0;
 #endif   // fstb_ARCHI_X86
 
-		for (int x = 0; x < MAT_SIZE; ++x)
+		for (int x = 0; x < _mat_size; ++x)
 		{
-			const bool     add_flag = (x == NBR_PLANES);
+			const bool     add_flag = (x == _nbr_planes);
 			const int      y_dest   = (plane_out >= 0) ? 0 : y;
-			const int      index    = y_dest * MAT_SIZE + x;
+			const int      index    = y_dest * _mat_size + x;
 
 			const double   c        = m [y] [x];
 			double         scaled_c = c * cintsc;
@@ -338,7 +338,7 @@ MatrixProc::Err	MatrixProc::set_matrix_int (const Mat4 &m, int plane_out, int sr
 					if (dst_bits == 16 || src_bits == 16)
 					{
 						const double   scale = double (
-							(uint64_t (1)) << (src_bits + SHIFT_INT - 1)
+							(uint64_t (1)) << (src_bits + _shift_int - 1)
 						);
 						const int      bias = fstb::round_int (bias_flt * scale);
 
@@ -428,7 +428,7 @@ void	MatrixProc::setup_fnc_sse2 (bool int_proc_flag, SplFmt src_fmt, int src_bit
 
 
 template <typename DST, int DB, class SRC, int SB>
-void	MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -437,7 +437,7 @@ void	MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 
 	typedef typename SRC::PtrConst::Type SrcPtr;
 	typedef typename DST::Ptr::Type      DstPtr;
@@ -476,15 +476,15 @@ void	MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 			const int      d0 = (  s0 * _coef_int_arr [ 0]
 			                     + s1 * _coef_int_arr [ 1]
 			                     + s2 * _coef_int_arr [ 2]
-			                     +      _coef_int_arr [ 3]) >> (SHIFT_INT + SB - DB);
+			                     +      _coef_int_arr [ 3]) >> (_shift_int + SB - DB);
 			const int      d1 = (  s0 * _coef_int_arr [ 4]
 			                     + s1 * _coef_int_arr [ 5]
 			                     + s2 * _coef_int_arr [ 6]
-			                     +      _coef_int_arr [ 7]) >> (SHIFT_INT + SB - DB);
+			                     +      _coef_int_arr [ 7]) >> (_shift_int + SB - DB);
 			const int      d2 = (  s0 * _coef_int_arr [ 8]
 			                     + s1 * _coef_int_arr [ 9]
 			                     + s2 * _coef_int_arr [10]
-			                     +      _coef_int_arr [11]) >> (SHIFT_INT + SB - DB);
+			                     +      _coef_int_arr [11]) >> (_shift_int + SB - DB);
 
 			DST::template write_clip <DB> (dst_0_ptr, d0);
 			DST::template write_clip <DB> (dst_1_ptr, d1);
@@ -512,7 +512,7 @@ void	MatrixProc::process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 
 
 template <typename DST, int DB, class SRC, int SB>
-void	MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -521,7 +521,7 @@ void	MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 
 	typedef typename SRC::PtrConst::Type SrcPtr;
 	typedef typename DST::Ptr::Type      DstPtr;
@@ -554,7 +554,7 @@ void	MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 			const int      d0 = (  s0 * _coef_int_arr [ 0]
 			                     + s1 * _coef_int_arr [ 1]
 			                     + s2 * _coef_int_arr [ 2]
-			                     +      _coef_int_arr [ 3]) >> (SHIFT_INT + SB - DB);
+			                     +      _coef_int_arr [ 3]) >> (_shift_int + SB - DB);
 
 			DST::template write_clip <DB> (dst_0_ptr, d0);
 
@@ -575,7 +575,7 @@ void	MatrixProc::process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 
 
 
-void	MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -584,7 +584,7 @@ void	MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 	const int      sizeof_xt = int (sizeof (float));
 	assert (src_str_arr [0] % sizeof_xt == 0);
 	assert (src_str_arr [1] % sizeof_xt == 0);
@@ -645,7 +645,7 @@ void	MatrixProc::process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 
 
 
-void	MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -654,7 +654,7 @@ void	MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 	const int      sizeof_xt = int (sizeof (float));
 	assert (src_str_arr [0] % sizeof_xt == 0);
 	assert (src_str_arr [1] % sizeof_xt == 0);
@@ -703,7 +703,7 @@ void	MatrixProc::process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 
 // DST and SRC are ProxyRwSse2 classes
 template <class DST, int DB, class SRC, int SB, int NP>
-void	MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -712,7 +712,7 @@ void	MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 
 	enum { BPS_SRC = (SB + 7) >> 3 };
 	enum { BPS_DST = (DB + 7) >> 3 };
@@ -754,7 +754,7 @@ void	MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 				dst_str_arr [plane_index],
 				h
 			));
-			const int      cind    = plane_index * MAT_SIZE;
+			const int      cind    = plane_index * _mat_size;
 
 			for (int x = 0; x < w; x += packsize)
 			{
@@ -765,7 +765,7 @@ void	MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 				const __m128i  s1 = SrcS16R::read (src_1_ptr, zero, sign_bit);
 				const __m128i  s2 = SrcS16R::read (src_2_ptr, zero, sign_bit);
 
-				__m128i        d0 = _mm_load_si128 (coef_ptr + cind + NBR_PLANES);
+				__m128i        d0 = _mm_load_si128 (coef_ptr + cind + _nbr_planes);
 				__m128i        d1 = d0;
 
 				// src is variable, up to 16-bit signed (full range, +1 = 32767+1)
@@ -779,8 +779,8 @@ void	MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 				fstb::ToolsSse2::mac_s16_s16_s32 (
 					d0, d1, s2, _mm_load_si128 (coef_ptr + cind + 2));
 
-				d0 = _mm_srai_epi32 (d0, SHIFT_INT + SB - DB);
-				d1 = _mm_srai_epi32 (d1, SHIFT_INT + SB - DB);
+				d0 = _mm_srai_epi32 (d0, _shift_int + SB - DB);
+				d1 = _mm_srai_epi32 (d1, _shift_int + SB - DB);
 
 				__m128i			val = _mm_packs_epi32 (d0, d1);
 
@@ -806,7 +806,7 @@ void	MatrixProc::process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 
 
 
-void	MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -815,7 +815,7 @@ void	MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 	const int      sizeof_xt = int (sizeof (float));
 	assert (src_str_arr [0] % sizeof_xt == 0);
 	assert (src_str_arr [1] % sizeof_xt == 0);
@@ -892,7 +892,7 @@ void	MatrixProc::process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 
 
 
-void	MatrixProc::process_1_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_1_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -901,7 +901,7 @@ void	MatrixProc::process_1_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 	const int      sizeof_xt = int (sizeof (float));
 	assert (src_str_arr [0] % sizeof_xt == 0);
 	assert (src_str_arr [1] % sizeof_xt == 0);
diff --git a/src/fmtcl/MatrixProc.h b/src/fmtcl/MatrixProc.h
index 6e634ba..09289c8 100644
--- a/src/fmtcl/MatrixProc.h
+++ b/src/fmtcl/MatrixProc.h
@@ -60,8 +60,8 @@ class MatrixProc
 		Err_INVALID_FORMAT_COMBINATION
 	};
 
-	static const int  NBR_PLANES = 3;
-	static const int  MAT_SIZE   = NBR_PLANES + 1;
+	static constexpr int _nbr_planes = 3;
+	static constexpr int _mat_size   = _nbr_planes + 1;
 
 	explicit       MatrixProc (bool sse_flag, bool sse2_flag, bool avx_flag, bool avx2_flag);
 	virtual        ~MatrixProc () {}
@@ -71,7 +71,7 @@ class MatrixProc
 	// All stride values are in bytes
 	// h must be the frame height too, not only the processed stripe height
 	// (required for Stack16 formats to compute the lsb offset)
-	void           process (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
+	void           process (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
 
 
 
@@ -85,7 +85,7 @@ class MatrixProc
 
 private:
 
-	static const int  SHIFT_INT = 12;   // Number of bits for the fractional part
+	static constexpr int _shift_int = 12;  // Number of bits for the fractional part
 
 	void           set_matrix_flt (const Mat4 &m, int plane_out);
 	Err            set_matrix_int (const Mat4 &m, int plane_out, int src_bits, int dst_bits);
@@ -98,23 +98,23 @@ class MatrixProc
 #endif   // fstb_ARCHI_X86
 
 	template <typename DST, int DB, class SRC, int SB>
-	void           process_3_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
+	void           process_3_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
 	template <typename DT, int DB, typename ST, int SB>
-	void           process_1_int_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
+	void           process_1_int_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
 
-	void           process_3_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
-	void           process_1_flt_cpp (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
+	void           process_3_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
+	void           process_1_flt_cpp (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
 
 #if (fstb_ARCHI == fstb_ARCHI_X86)
 	template <class DST, int DB, class SRC, int SB, int NP>
-	void           process_n_int_sse2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
-	void           process_3_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
-	void           process_1_flt_sse (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
+	void           process_n_int_sse2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
+	void           process_3_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
+	void           process_1_flt_sse (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
 
 	template <class DST, int DB, class SRC, int SB, int NP>
-	void           process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
-	void           process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
-	void           process_1_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
+	void           process_n_int_avx2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
+	void           process_3_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
+	void           process_1_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
 #endif   // fstb_ARCHI_X86
 
 	bool           _sse_flag;
@@ -123,12 +123,12 @@ class MatrixProc
 	bool           _avx2_flag;
 
 	void (ThisType::*                   // 0 = not set
-	               _proc_ptr) (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const;
+	               _proc_ptr) (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const;
 
 	std::vector <float>
 	               _coef_flt_arr;
 
-	// Integer coefficients are all scaled with SHIFT_INT.
+	// Integer coefficients are all scaled with _shift_int.
 	// The additive coefficient contains the rounding constant too.
 	std::vector <int>
 	               _coef_int_arr;
diff --git a/src/fmtcl/MatrixProc_avx.cpp b/src/fmtcl/MatrixProc_avx.cpp
index fb81745..9c90059 100644
--- a/src/fmtcl/MatrixProc_avx.cpp
+++ b/src/fmtcl/MatrixProc_avx.cpp
@@ -70,7 +70,7 @@ void	MatrixProc::setup_fnc_avx (bool int_proc_flag, SplFmt src_fmt, int src_bits
 
 
 
-void	MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -79,7 +79,7 @@ void	MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 	const int      sizeof_xt = int (sizeof (float));
 	assert (src_str_arr [0] % sizeof_xt == 0);
 	assert (src_str_arr [1] % sizeof_xt == 0);
@@ -158,7 +158,7 @@ void	MatrixProc::process_3_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 
 
 
-void	MatrixProc::process_1_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_1_flt_avx (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -167,7 +167,7 @@ void	MatrixProc::process_1_flt_avx (uint8_t * const dst_ptr_arr [NBR_PLANES], co
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 	const int      sizeof_xt = int (sizeof (float));
 	assert (src_str_arr [0] % sizeof_xt == 0);
 	assert (src_str_arr [1] % sizeof_xt == 0);
diff --git a/src/fmtcl/MatrixProc_avx2.cpp b/src/fmtcl/MatrixProc_avx2.cpp
index 40a604a..0f508b7 100644
--- a/src/fmtcl/MatrixProc_avx2.cpp
+++ b/src/fmtcl/MatrixProc_avx2.cpp
@@ -94,7 +94,7 @@ void	MatrixProc::setup_fnc_avx2 (bool int_proc_flag, SplFmt src_fmt, int src_bit
 
 // DST and SRC are ProxyRwAvx2 classes
 template <class DST, int DB, class SRC, int SB, int NP>
-void	MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], const int dst_str_arr [NBR_PLANES], const uint8_t * const src_ptr_arr [NBR_PLANES], const int src_str_arr [NBR_PLANES], int w, int h) const
+void	MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [_nbr_planes], const int dst_str_arr [_nbr_planes], const uint8_t * const src_ptr_arr [_nbr_planes], const int src_str_arr [_nbr_planes], int w, int h) const
 {
 	assert (dst_ptr_arr != 0);
 	assert (dst_str_arr != 0);
@@ -103,7 +103,7 @@ void	MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 	assert (w > 0);
 	assert (h > 0);
 
-	static_assert (NBR_PLANES == 3, "Code is hardcoded for 3 planes");
+	static_assert (_nbr_planes == 3, "Code is hardcoded for 3 planes");
 
 	enum { BPS_SRC = (SB + 7) >> 3 };
 	enum { BPS_DST = (DB + 7) >> 3 };
@@ -145,7 +145,7 @@ void	MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 				dst_str_arr [plane_index],
 				h
 			));
-			const int      cind    = plane_index * MAT_SIZE;
+			const int      cind = plane_index * _mat_size;
 
 			for (int x = 0; x < w; x += packsize)
 			{
@@ -156,7 +156,7 @@ void	MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 				const __m256i  s1 = SrcS16R::read (src_1_ptr, zero, sign_bit);
 				const __m256i  s2 = SrcS16R::read (src_2_ptr, zero, sign_bit);
 
-				__m256i        d0 = _mm256_load_si256 (coef_ptr + cind + NBR_PLANES);
+				__m256i        d0 = _mm256_load_si256 (coef_ptr + cind + _nbr_planes);
 				__m256i        d1 = d0;
 
 				// src is variable, up to 16-bit signed (full range, +1 = 32767+1)
@@ -170,8 +170,8 @@ void	MatrixProc::process_n_int_avx2 (uint8_t * const dst_ptr_arr [NBR_PLANES], c
 				fstb::ToolsAvx2::mac_s16_s16_s32 (
 					d0, d1, s2, _mm256_load_si256 (coef_ptr + cind + 2));
 
-				d0 = _mm256_srai_epi32 (d0, SHIFT_INT + SB - DB);
-				d1 = _mm256_srai_epi32 (d1, SHIFT_INT + SB - DB);
+				d0 = _mm256_srai_epi32 (d0, _shift_int + SB - DB);
+				d1 = _mm256_srai_epi32 (d1, _shift_int + SB - DB);
 
 				__m256i			val = _mm256_packs_epi32 (d0, d1);
 
diff --git a/src/fmtcl/MatrixUtil.cpp b/src/fmtcl/MatrixUtil.cpp
new file mode 100644
index 0000000..e76adac
--- /dev/null
+++ b/src/fmtcl/MatrixUtil.cpp
@@ -0,0 +1,361 @@
+/*****************************************************************************
+
+        MatrixUtil.cpp
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#if defined (_MSC_VER)
+	#pragma warning (1 : 4130 4223 4705 4706)
+	#pragma warning (4 : 4355 4786 4800)
+#endif
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fstb/fnc.h"
+#include "fmtcl/Mat3.h"
+#include "fmtcl/Mat4.h"
+#include "fmtcl/MatrixUtil.h"
+
+#include <cassert>
+
+
+
+namespace fmtcl
+{
+
+
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+// mat should be already converted to lower case
+// Returns ColorSpaceH265_UNDEF if mat is unknown
+ColorSpaceH265	MatrixUtil::find_cs_from_mat_str (const std::string &mat, bool allow_2020cl_flag)
+{
+	ColorSpaceH265   cs = ColorSpaceH265_UNSPECIFIED;
+
+	if (mat.empty () || mat == "rgb")
+	{
+		cs = ColorSpaceH265_RGB;
+	}
+	else if (mat == "601")
+	{
+		cs = ColorSpaceH265_SMPTE170M;
+	}
+	else if (mat == "709")
+	{
+		cs = ColorSpaceH265_BT709;
+	}
+	else if (mat == "240")
+	{
+		cs = ColorSpaceH265_SMPTE240M;
+	}
+	else if (mat == "fcc")
+	{
+		cs = ColorSpaceH265_FCC;
+	}
+	else if (mat == "ycgco" || mat == "ycocg")
+	{
+		cs = ColorSpaceH265_YCGCO;
+	}
+	else if (mat == "2020")
+	{
+		cs = ColorSpaceH265_BT2020NCL;
+	}
+	else if (mat == "2020cl" && allow_2020cl_flag)
+	{
+		cs = ColorSpaceH265_BT2020CL;
+	}
+	else if (mat == "ydzdx")
+	{
+		cs = ColorSpaceH265_YDZDX;
+	}
+	else if (mat == "lms")
+	{
+		cs = ColorSpaceH265_LMS;
+	}
+	else if (mat == "ictcp_pq")
+	{
+		cs = ColorSpaceH265_ICTCP_PQ;
+	}
+	else if (mat == "ictcp_hlg")
+	{
+		cs = ColorSpaceH265_ICTCP_HLG;
+	}
+
+	// Unknown matrix identifier
+	else
+	{
+		assert (false);
+		cs = ColorSpaceH265_UNDEF;
+	}
+
+	return cs;
+}
+
+
+
+// Returns -1 if mat is unknown
+int	MatrixUtil::make_mat_from_str (Mat4 &m, const std::string &mat, bool to_rgb_flag)
+{
+	int            ret_val = 0;
+
+	if (mat.empty () || mat == "rgb")
+	{
+		m[0][0] = 1; m[0][1] = 0; m[0][2] = 0;
+		m[1][0] = 0; m[1][1] = 1; m[1][2] = 0;
+		m[2][0] = 0; m[2][1] = 0; m[2][2] = 1;
+		m.clean3 (1);
+	}
+	else if (mat == "601")
+	{
+		make_mat_yuv (m, 0.299, 0.587, 0.114, to_rgb_flag);
+	}
+	else if (mat == "709")
+	{
+		make_mat_yuv (m, 0.2126, 0.7152, 0.0722, to_rgb_flag);
+	}
+	else if (mat == "240")
+	{
+		make_mat_yuv (m, 0.212, 0.701, 0.087, to_rgb_flag);
+	}
+	else if (mat == "fcc")
+	{
+		make_mat_yuv (m, 0.30, 0.59, 0.11, to_rgb_flag);
+	}
+	else if (mat == "ycgco" || mat == "ycocg")
+	{
+		make_mat_ycgco (m, to_rgb_flag);
+	}
+	else if (mat == "2020")
+	{
+		make_mat_yuv (m, 0.2627, 0.678, 0.0593, to_rgb_flag);
+	}
+	else if (mat == "ydzdx")
+	{
+		make_mat_ydzdx (m, to_rgb_flag);
+	}
+	else if (mat == "lms")
+	{
+		make_mat_lms (m, to_rgb_flag);
+	}
+	else if (mat == "ictcp_pq")
+	{
+		make_mat_ictcp (m, false, to_rgb_flag);
+	}
+	else if (mat == "ictcp_hlg")
+	{
+		make_mat_ictcp (m, true, to_rgb_flag);
+	}
+	else
+	{
+		assert (false);
+		ret_val = -1;
+	}
+
+	return ret_val;
+}
+
+
+
+/*
+kr/kg/kb matrix (Rec. ITU-T H.265 2019-06, p. 413):
+
+R = Y                  + V*(1-Kr)
+G = Y - U*(1-Kb)*Kb/Kg - V*(1-Kr)*Kr/Kg
+B = Y + U*(1-Kb)
+
+Y =                  R * Kr        + G * Kg        + B * Kb
+U = (B-Y)/(1-Kb) = - R * Kr/(1-Kb) - G * Kg/(1-Kb) + B
+V = (R-Y)/(1-Kr) =   R             - G * Kg/(1-Kr) - B * Kb/(1-Kr)
+
+The given equations work for R, G, B in range [0 ; 1] and U and V in range
+[-1 ; 1]. Scaling must be applied to match the required range for U and V.
+
+R, G, B, Y range : [0 ; 1]
+U, V range : [-0.5 ; 0.5]
+*/
+
+void	MatrixUtil::make_mat_yuv (Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag)
+{
+	assert (! fstb::is_null (kg));
+	assert (! fstb::is_eq (kb, 1.0));
+	assert (! fstb::is_eq (kr, 1.0));
+
+	constexpr double  r = 0.5;
+	constexpr double  x = 1.0 / r;
+	if (to_rgb_flag)
+	{
+		m[0][0] = 1; m[0][1] =              0; m[0][2] = x*(1-kr)      ;
+		m[1][0] = 1; m[1][1] = x*(kb-1)*kb/kg; m[1][2] = x*(kr-1)*kr/kg;
+		m[2][0] = 1; m[2][1] = x*(1-kb)      ; m[2][2] =              0;
+	}
+
+	else
+	{
+		m[0][0] =     kr     ; m[0][1] =   kg       ; m[0][2] =   kb       ;
+		m[1][0] = r*kr/(kb-1); m[1][1] = r*kg/(kb-1); m[1][2] = r          ;
+		m[2][0] = r          ; m[2][1] = r*kg/(kr-1); m[2][2] = r*kb/(kr-1);
+	}
+
+	m.clean3 (1);
+}
+
+
+
+/*
+YCgCo matrix (Rec. ITU-T H.265 2019-06, p. 413):
+
+R  = Y - Cg + Co
+G  = Y + Cg
+B  = Y - Cg - Co
+
+Y  =  0.25 * R + 0.5  * G + 0.25 * B
+Cg = -0.25 * R + 0.5  * G - 0.25 * B
+Co =  0.5  * R            - 0.5  * B
+
+R, G, B, Y range : [0 ; 1]
+Cg, Co range : [-0.5 ; 0.5]
+
+Note: this implementation is not exactly the same as specified because the
+standard specifies specific steps to apply the RGB-to-YCgCo matrix, leading
+to different roundings.
+*/
+
+void	MatrixUtil::make_mat_ycgco (Mat4 &m, bool to_rgb_flag)
+{
+	if (to_rgb_flag)
+	{
+		m[0][0] = 1; m[0][1] = -1; m[0][2] =  1;
+		m[1][0] = 1; m[1][1] =  1; m[1][2] =  0;
+		m[2][0] = 1; m[2][1] = -1; m[2][2] = -1;
+	}
+	else
+	{
+		m[0][0] =  0.25; m[0][1] = 0.5; m[0][2] =  0.25;
+		m[1][0] = -0.25; m[1][1] = 0.5; m[1][2] = -0.25;
+		m[2][0] =  0.5 ; m[2][1] = 0  ; m[2][2] = -0.5 ;
+	}
+
+	m.clean3 (1);
+}
+
+
+
+/*
+YDzDx transform (Rec. ITU-T H.265 2019-06, p. 414)
+
+Y  = G
+Dz = 0.5 * (0.986566 * B - Y)
+Dx = 0.5 * (R - 0.991902 * Y)
+
+Y  =                      G
+Dz =         - 0.5      * G + 0.493283 * B
+Dx = 0.5 * R - 0.495951 * G
+*/
+
+void	MatrixUtil::make_mat_ydzdx (Mat4 &m, bool to_rgb_flag)
+{
+	Mat3           m3;
+	m3[0][0] = 0  ; m3[0][1] =  1       ; m3[0][2] = 0;
+	m3[1][0] = 0  ; m3[1][1] = -0.5     ; m3[1][2] = 0.493283;
+	m3[2][0] = 0.5; m3[2][1] = -0.495951; m3[2][2] = 0;
+
+	if (to_rgb_flag)
+	{
+		m3.invert ();
+	}
+
+	m.insert3 (m3);
+	m.clean3 (1);
+}
+
+
+
+/*
+LMS transform (Rec. ITU-T H.265 2019-06, p. 411)
+
+LMS is an intermediate colorspace for ICtCp transforms.
+LMS data are conveyed on RGB planes.
+Here, to_rgb_flag indicates real RGB target.
+*/
+
+void	MatrixUtil::make_mat_lms (Mat4 &m, bool to_rgb_flag)
+{
+	Mat3           m3;
+	m3[0][0] = 1688; m3[0][1] = 2146; m3[0][2] =  262;
+	m3[1][0] =  683; m3[1][1] = 2951; m3[1][2] =  462;
+	m3[2][0] =   99; m3[2][1] =  309; m3[2][2] = 3688;
+	m3 *= 1.0 / 4096;
+
+	if (to_rgb_flag)
+	{
+		m3.invert ();
+	}
+
+	m.insert3 (m3);
+	m.clean3 (1);
+}
+
+
+
+/*
+ICtCp transfrom from and to LMS (Rec. ITU-T H.265 2019-06, p. 414)
+
+LMS data are conveyed on RGB planes.
+*/
+
+void	MatrixUtil::make_mat_ictcp (Mat4 &m, bool hlg_flag, bool to_lms_flag)
+{
+	Mat3           m3;
+	m3[0][0] =  2048; m3[0][1] =   2048; m3[0][2] =    0;
+	if (hlg_flag)
+	{
+		m3[1][0] =  3625; m3[1][1] =  -7465; m3[1][2] = 3840;
+		m3[2][0] =  9500; m3[2][1] =  -9212; m3[2][2] = -288;
+	}
+	else
+	{
+		m3[1][0] =  6610; m3[1][1] = -13613; m3[1][2] = 7003;
+		m3[2][0] = 17933; m3[2][1] = -17390; m3[2][2] = -543;
+	}
+	m3 *= 1.0 / 4096;
+
+	if (to_lms_flag)
+	{
+		m3.invert ();
+	}
+
+	m.insert3 (m3);
+	m.clean3 (1);
+}
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+}  // namespace fmtcl
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/MatrixUtil.h b/src/fmtcl/MatrixUtil.h
new file mode 100644
index 0000000..f94a256
--- /dev/null
+++ b/src/fmtcl/MatrixUtil.h
@@ -0,0 +1,98 @@
+/*****************************************************************************
+
+        MatrixUtil.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_MatrixUtil_HEADER_INCLUDED)
+#define fmtcl_MatrixUtil_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/ColorSpaceH265.h"
+
+#include <string>
+
+
+
+namespace fmtcl
+{
+
+
+
+class Mat4;
+
+class MatrixUtil
+{
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+public:
+
+	static ColorSpaceH265
+	               find_cs_from_mat_str (const std::string &mat, bool allow_2020cl_flag);
+
+	static int     make_mat_from_str (Mat4 &m, const std::string &mat, bool to_rgb_flag);
+	static void    make_mat_yuv (Mat4 &m, double kr, double kg, double kb, bool to_rgb_flag);
+	static void    make_mat_ycgco (Mat4 &m, bool to_rgb_flag);
+	static void    make_mat_ydzdx (Mat4 &m, bool to_rgb_flag);
+	static void    make_mat_lms (Mat4 &m, bool to_rgb_flag);
+	static void    make_mat_ictcp (Mat4 &m, bool hlg_flag, bool to_lms_flag);
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+protected:
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+
+
+/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+	               MatrixUtil ()                               = delete;
+	               MatrixUtil (const MatrixUtil &other)        = delete;
+	               MatrixUtil (MatrixUtil &&other)             = delete;
+	MatrixUtil &   operator = (const MatrixUtil &other)        = delete;
+	MatrixUtil &   operator = (MatrixUtil &&other)             = delete;
+	bool           operator == (const MatrixUtil &other) const = delete;
+	bool           operator != (const MatrixUtil &other) const = delete;
+
+}; // class MatrixUtil
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/MatrixUtil.hpp"
+
+
+
+#endif   // fmtcl_MatrixUtil_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/PicFmt.h b/src/fmtcl/PicFmt.h
new file mode 100644
index 0000000..9ab63cd
--- /dev/null
+++ b/src/fmtcl/PicFmt.h
@@ -0,0 +1,91 @@
+/*****************************************************************************
+
+        PicFmt.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_PicFmt_HEADER_INCLUDED)
+#define fmtcl_PicFmt_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/ColorFamily.h"
+#include "fmtcl/SplFmt.h"
+
+
+
+namespace fmtcl
+{
+
+
+
+class PicFmt
+{
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+public:
+
+	bool           is_valid () const noexcept
+	{
+		return (
+			   _sf >= 0 && _sf < SplFmt_NBR_ELT
+			&& _res >= 8
+			&& _col_fam >= 0 && _col_fam < ColorFamily_NBR_ELT
+		);
+	}
+
+	SplFmt         _sf        = SplFmt_ILLEGAL;
+	int            _res       = 0;      // Number of bits per sample
+	ColorFamily    _col_fam   = ColorFamily_INVALID;
+	bool           _full_flag = false;  // Full range
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+protected:
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+
+
+/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+}; // class PicFmt
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/PicFmt.hpp"
+
+
+
+#endif   // fmtcl_PicFmt_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/PrimUtil.cpp b/src/fmtcl/PrimUtil.cpp
new file mode 100644
index 0000000..8ea8ff7
--- /dev/null
+++ b/src/fmtcl/PrimUtil.cpp
@@ -0,0 +1,301 @@
+/*****************************************************************************
+
+        PrimUtil.cpp
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#if defined (_MSC_VER)
+	#pragma warning (1 : 4130 4223 4705 4706)
+	#pragma warning (4 : 4355 4786 4800)
+#endif
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/PrimUtil.h"
+#include "fstb/fnc.h"
+
+#include <cassert>
+
+
+
+namespace fmtcl
+{
+
+
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+constexpr int	PrimUtil::_nbr_planes;
+
+
+
+Mat3	PrimUtil::compute_conversion_matrix (const RgbSystem &prim_s, const RgbSystem &prim_d)
+{
+	assert (prim_s.is_ready ());
+	assert (prim_d.is_ready ());
+
+	const Mat3     rgb2xyz = compute_rgb2xyz (prim_s);
+	const Mat3     xyz2rgb = compute_rgb2xyz (prim_d).invert ();
+	const Mat3     adapt   = compute_chroma_adapt (prim_s, prim_d);
+
+	return xyz2rgb * adapt * rgb2xyz;
+}
+
+
+
+// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
+Mat3	PrimUtil::compute_rgb2xyz (const RgbSystem &prim)
+{
+	assert (prim.is_ready ());
+
+	Mat3           m;
+
+	if (prim._preset == PrimariesPreset_CIEXYZ)
+	{
+		m = Mat3 (1, Mat3::Preset_DIAGONAL);
+	}
+
+	else
+	{
+		const Vec3     white = conv_xy_to_xyz (prim._white);
+
+		Mat3           xyzrgb;
+		for (int k = 0; k < _nbr_planes; ++k)
+		{
+			Vec3           comp_xyz = conv_xy_to_xyz (prim._rgb [k]);
+			xyzrgb.set_col (k, comp_xyz);
+		}
+
+		const Vec3     s = xyzrgb.compute_inverse () * white;
+
+		for (int u = 0; u < _nbr_planes; ++u)
+		{
+			m.set_col (u, xyzrgb.get_col (u) * s [u]);
+		}
+	}
+
+	return m;
+}
+
+
+
+// http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html
+Mat3	PrimUtil::compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d)
+{
+	assert (prim_s.is_ready ());
+	assert (prim_d.is_ready ());
+
+	const Vec3     white_s = conv_xy_to_xyz (prim_s._white);
+	const Vec3     white_d = conv_xy_to_xyz (prim_d._white);
+
+	// Bradford adaptation
+	const Mat3     ma ({
+		Vec3 {  0.8951,  0.2664, -0.1614 },
+		Vec3 { -0.7502,  1.7135,  0.0367 },
+		Vec3 {  0.0389, -0.0685,  1.0296 }
+	});
+
+	Vec3    crd_s = ma * white_s;
+	Vec3    crd_d = ma * white_d;
+	Mat3    scale (0.0);
+	for (int k = 0; k < _nbr_planes; ++k)
+	{
+		assert (crd_s [k] != 0);
+		scale [k] [k] = crd_d [k] / crd_s [k];
+	}
+
+	return ma.compute_inverse () * scale * ma;
+}
+
+
+
+// Obtains X, Y, Z from (x, y)
+// Y is assumed to be 1.0
+// X =      x      / y
+// Z = (1 - x - y) / y
+// http://www.brucelindbloom.com/index.html?Eqn_xyY_to_XYZ.html
+Vec3	PrimUtil::conv_xy_to_xyz (const RgbSystem::Vec2 &xy)
+{
+	Vec3           xyz;
+
+	// When y is null, X = Y = Z = 0.
+	if (fstb::is_null (xy [1]))
+	{
+		xyz [0] = 0;
+		xyz [1] = 0;
+		xyz [2] = 0;
+	}
+	else
+	{
+		xyz [0] =      xy [0]           / xy [1];
+		xyz [1] = 1;
+		xyz [2] = (1 - xy [0] - xy [1]) / xy [1];
+	}
+
+	return xyz;
+}
+
+
+
+// str should be already converted to lower case
+PrimariesPreset	PrimUtil::conv_string_to_primaries (const std::string &str)
+{
+	assert (! str.empty ());
+
+	PrimariesPreset  preset = PrimariesPreset_UNDEF;
+
+	if (        str == "709"
+	         || str == "1361"
+	         || str == "61966-2-1"
+	         || str == "61966-2-4"
+	         || str == "hdtv"
+	         || str == "srgb")
+	{
+		preset = PrimariesPreset_BT709;
+	}
+	else if (   str == "470m"
+	         || str == "ntsc")
+	{
+		preset = PrimariesPreset_FCC;
+	}
+	else if (   str == "470m93"
+	         || str == "ntscj")
+	{
+		preset = PrimariesPreset_NTSCJ;
+	}
+	else if (   str == "470bg"
+	         || str == "601-625"
+	         || str == "1358-625"
+	         || str == "1700-625"
+	         || str == "pal"
+	         || str == "secam")
+	{
+		preset = PrimariesPreset_BT470BG;
+	}
+	else if (   str == "170m"
+	         || str == "601-525"
+	         || str == "1358-525"
+	         || str == "1700-525")
+	{
+		preset = PrimariesPreset_SMPTE170M;
+	}
+	else if (   str == "240m")
+	{
+		preset = PrimariesPreset_SMPTE240M;
+	}
+	else if (   str == "filmc")
+	{
+		preset = PrimariesPreset_GENERIC_FILM;
+	}
+	else if (   str == "2020"
+	         || str == "2100"
+	         || str == "uhdtv")
+	{
+		preset = PrimariesPreset_BT2020;
+	}
+	else if (   str == "61966-2-2"
+	         || str == "scrgb")
+	{
+		preset = PrimariesPreset_SCRGB;
+	}
+	else if (   str == "adobe98")
+	{
+		preset = PrimariesPreset_ADOBE_RGB_98;
+	}
+	else if (   str == "adobewide")
+	{
+		preset = PrimariesPreset_ADOBE_RGB_WIDE;
+	}
+	else if (   str == "apple")
+	{
+		preset = PrimariesPreset_APPLE_RGB;
+	}
+	else if (   str == "photopro"
+	         || str == "romm")
+	{
+		preset = PrimariesPreset_ROMM;
+	}
+	else if (   str == "ciergb")
+	{
+		preset = PrimariesPreset_CIERGB;
+	}
+	else if (   str == "ciexyz")
+	{
+		preset = PrimariesPreset_CIEXYZ;
+	}
+	else if (   str == "p3d65"
+	         || str == "dcip3")
+	{
+		preset = PrimariesPreset_P3D65;
+	}
+	else if (   str == "aces")
+	{
+		preset = PrimariesPreset_ACES;
+	}
+	else if (   str == "ap1")
+	{
+		preset = PrimariesPreset_ACESAP1;
+	}
+	else if (   str == "sgamut"
+	         || str == "sgamut3")
+	{
+		preset = PrimariesPreset_SGAMUT;
+	}
+	else if (   str == "sgamut3cine")
+	{
+		preset = PrimariesPreset_SGAMUT3CINE;
+	}
+	else if (   str == "alexa")
+	{
+		preset = PrimariesPreset_ALEXA;
+	}
+	else if (   str == "vgamut")
+	{
+		preset = PrimariesPreset_VGAMUT;
+	}
+	else if (   str == "p3dci")
+	{
+		preset = PrimariesPreset_P3DCI;
+	}
+	else if (   str == "p3d60")
+	{
+		preset = PrimariesPreset_P3D60;
+	}
+	else if (   str == "3213")
+	{
+		preset = PrimariesPreset_EBU3213E;
+	}
+
+	return preset;
+}
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+}  // namespace fmtcl
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/PrimUtil.h b/src/fmtcl/PrimUtil.h
new file mode 100644
index 0000000..72cf11d
--- /dev/null
+++ b/src/fmtcl/PrimUtil.h
@@ -0,0 +1,96 @@
+/*****************************************************************************
+
+        PrimUtil.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_PrimUtil_HEADER_INCLUDED)
+#define fmtcl_PrimUtil_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/Mat3.h"
+#include "fmtcl/RgbSystem.h"
+
+#include <string>
+
+
+
+namespace fmtcl
+{
+
+
+
+class PrimUtil
+{
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+public:
+
+	static constexpr int _nbr_planes = RgbSystem::_nbr_planes;
+
+	static Mat3    compute_conversion_matrix (const RgbSystem &prim_s, const RgbSystem &prim_d);
+	static Mat3    compute_rgb2xyz (const RgbSystem &prim);
+	static Mat3    compute_chroma_adapt (const RgbSystem &prim_s, const RgbSystem &prim_d);
+	static Vec3    conv_xy_to_xyz (const RgbSystem::Vec2 &xy);
+	static PrimariesPreset
+	               conv_string_to_primaries (const std::string &str);
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+protected:
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+
+
+/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+	               PrimUtil ()                               = delete;
+	               PrimUtil (const PrimUtil &other)          = delete;
+	               PrimUtil (PrimUtil &&other)               = delete;
+	PrimUtil &     operator = (const PrimUtil &other)        = delete;
+	PrimUtil &     operator = (PrimUtil &&other)             = delete;
+	bool           operator == (const PrimUtil &other) const = delete;
+	bool           operator != (const PrimUtil &other) const = delete;
+
+}; // class PrimUtil
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/PrimUtil.hpp"
+
+
+
+#endif   // fmtcl_PrimUtil_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/ResamplePlaneData.h b/src/fmtcl/ResamplePlaneData.h
new file mode 100644
index 0000000..e3c7503
--- /dev/null
+++ b/src/fmtcl/ResamplePlaneData.h
@@ -0,0 +1,121 @@
+/*****************************************************************************
+
+        ResamplePlaneData.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_ResamplePlaneData_HEADER_INCLUDED)
+#define fmtcl_ResamplePlaneData_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/FilterResize.h"
+#include "fmtcl/InterlacingType.h"
+#include "fmtcl/KernelData.h"
+#include "fmtcl/ResampleSpecPlane.h"
+
+#include <array>
+
+
+
+namespace fmtcl
+{
+
+
+
+class ResamplePlaneData
+{
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+public:
+
+	               ResamplePlaneData ()                          = default;
+	               ~ResamplePlaneData ()                         = default;
+	               ResamplePlaneData (ResamplePlaneData &&other) = default;
+	ResamplePlaneData &
+	               operator = (ResamplePlaneData &&other)        = default;
+
+	// Array order: [dest] [src]
+	typedef std::array <ResampleSpecPlane, InterlacingType_NBR_ELT> SpecSrcArray;
+	typedef std::array <SpecSrcArray,      InterlacingType_NBR_ELT> SpecArray;
+
+	class Win
+	{
+	public:
+		// Data is in full coordinates whatever the plane (never subsampled)
+		double         _x = 0;
+		double         _y = 0;
+		double         _w = 0;
+		double         _h = 0;
+	};
+
+	typedef std::array <
+		KernelData,
+		FilterResize::Dir_NBR_ELT
+	>  KernelArray;
+
+	Win            _win;
+	SpecArray      _spec_arr;        // Contains the spec (used as a key) for each plane/interlacing combination
+	KernelArray    _kernel_arr;
+	double         _kernel_scale_h = 1;  // Can be negative (forced scaling)
+	double         _kernel_scale_v = 1;  // Can be negative (forced scaling)
+	double         _gain           = 1;
+	double         _add_cst        = 0;
+	bool           _preserve_center_flag = true;
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+protected:
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+
+
+/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+	               ResamplePlaneData (const ResamplePlaneData &other) = delete;
+	ResamplePlaneData &
+	               operator = (const ResamplePlaneData &other)        = delete;
+	bool           operator == (const ResamplePlaneData &other) const = delete;
+	bool           operator != (const ResamplePlaneData &other) const = delete;
+
+}; // class ResamplePlaneData
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/ResamplePlaneData.hpp"
+
+
+
+#endif   // fmtcl_ResamplePlaneData_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/ResampleSpecPlane.cpp b/src/fmtcl/ResampleSpecPlane.cpp
index ed7555c..17b7738 100644
--- a/src/fmtcl/ResampleSpecPlane.cpp
+++ b/src/fmtcl/ResampleSpecPlane.cpp
@@ -26,6 +26,8 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 #include "fmtcl/ResampleSpecPlane.h"
 
+#include <tuple>
+
 #include <cassert>
 
 
@@ -41,57 +43,43 @@ namespace fmtcl
 
 bool	ResampleSpecPlane::operator < (const ResampleSpecPlane &other) const
 {
-	if (_src_width        < other._src_width       ) { return (true ); }
-	if (_src_width        > other._src_width       ) { return (false); }
-
-	if (_src_height       < other._src_height      ) { return (true ); }
-	if (_src_height       > other._src_height      ) { return (false); }
-
-	if (_dst_width        < other._dst_width       ) { return (true ); }
-	if (_dst_width        > other._dst_width       ) { return (false); }
-
-	if (_dst_height       < other._dst_height      ) { return (true ); }
-	if (_dst_height       > other._dst_height      ) { return (false); }
-
-	if (_win_x            < other._win_x           ) { return (true ); }
-	if (_win_x            > other._win_x           ) { return (false); }
-
-	if (_win_y            < other._win_y           ) { return (true ); }
-	if (_win_y            > other._win_y           ) { return (false); }
-
-	if (_win_w            < other._win_w           ) { return (true ); }
-	if (_win_w            > other._win_w           ) { return (false); }
-
-	if (_win_h            < other._win_h           ) { return (true ); }
-	if (_win_h            > other._win_h           ) { return (false); }
-
-	if (_center_pos_src_h < other._center_pos_src_h) { return (true ); }
-	if (_center_pos_src_h > other._center_pos_src_h) { return (false); }
-
-	if (_center_pos_src_v < other._center_pos_src_v) { return (true ); }
-	if (_center_pos_src_v > other._center_pos_src_v) { return (false); }
-
-	if (_center_pos_dst_h < other._center_pos_dst_h) { return (true ); }
-	if (_center_pos_dst_h > other._center_pos_dst_h) { return (false); }
-
-	if (_center_pos_dst_v < other._center_pos_dst_v) { return (true ); }
-	if (_center_pos_dst_v > other._center_pos_dst_v) { return (false); }
-
-	if (_kernel_scale_h   < other._kernel_scale_h  ) { return (true ); }
-	if (_kernel_scale_h   > other._kernel_scale_h  ) { return (false); }
-
-	if (_kernel_scale_v   < other._kernel_scale_v  ) { return (true ); }
-	if (_kernel_scale_v   > other._kernel_scale_v  ) { return (false); }
-
-	if (_add_cst          < other._add_cst         ) { return (true ); }
-
-	if (_kernel_hash_h    < other._kernel_hash_h   ) { return (true ); }
-	if (_kernel_hash_h    > other._kernel_hash_h   ) { return (false); }
-
-	if (_kernel_hash_v    < other._kernel_hash_v    ) { return (true ); }
-	if (_kernel_hash_v    > other._kernel_hash_v    ) { return (false); }
-
-	return (false);
+	return std::tie (
+		_src_width,
+		_src_height,
+		_dst_width,
+		_dst_height,
+		_win_x,
+		_win_y,
+		_win_w,
+		_win_h,
+		_center_pos_src_h,
+		_center_pos_src_v,
+		_center_pos_dst_h,
+		_center_pos_dst_v,
+		_kernel_scale_h,
+		_kernel_scale_v,
+		_add_cst,
+		_kernel_hash_h,
+		_kernel_hash_v
+	) < std::tie (
+		other._src_width,
+		other._src_height,
+		other._dst_width,
+		other._dst_height,
+		other._win_x,
+		other._win_y,
+		other._win_w,
+		other._win_h,
+		other._center_pos_src_h,
+		other._center_pos_src_v,
+		other._center_pos_dst_h,
+		other._center_pos_dst_v,
+		other._kernel_scale_h,
+		other._kernel_scale_v,
+		other._add_cst,
+		other._kernel_hash_h,
+		other._kernel_hash_v
+	);
 }
 
 
diff --git a/src/fmtcl/ResampleUtil.cpp b/src/fmtcl/ResampleUtil.cpp
new file mode 100644
index 0000000..2f36a5a
--- /dev/null
+++ b/src/fmtcl/ResampleUtil.cpp
@@ -0,0 +1,126 @@
+/*****************************************************************************
+
+        ResampleUtil.cpp
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#if defined (_MSC_VER)
+	#pragma warning (1 : 4130 4223 4705 4706)
+	#pragma warning (4 : 4355 4786 4800)
+#endif
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/ChromaPlacement.h"
+#include "fmtcl/FilterResize.h"
+#include "fmtcl/fnc.h"
+#include "fmtcl/InterlacingType.h"
+#include "fmtcl/ResamplePlaneData.h"
+#include "fmtcl/ResampleSpecPlane.h"
+#include "fmtcl/ResampleUtil.h"
+
+#include <cassert>
+
+
+
+namespace fmtcl
+{
+
+
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+void	ResampleUtil::create_plane_specs (ResamplePlaneData &plane_data, int plane_index, ColorFamily src_cf, int src_w, int src_ss_h, int src_h, int src_ss_v, ChromaPlacement cplace_s, ColorFamily dst_cf, int dst_w, int dst_ss_h, int dst_h, int dst_ss_v, ChromaPlacement cplace_d)
+{
+	assert (plane_index >= 0);
+
+	ResampleSpecPlane spec;
+
+	spec._src_width  =
+		compute_plane_width (src_cf, src_ss_h, src_w, plane_index);
+	spec._src_height =
+		compute_plane_height (src_cf, src_ss_v, src_h, plane_index);
+	spec._dst_width  =
+		compute_plane_width (dst_cf, dst_ss_h, dst_w, plane_index);
+	spec._dst_height =
+		compute_plane_height (dst_cf, dst_ss_v, dst_h, plane_index);
+
+	const double   subspl_h = double (1 << src_ss_h);
+	const double   subspl_v = double (1 << src_ss_v);
+
+	const ResamplePlaneData::Win &   s = plane_data._win;
+	spec._win_x = s._x / subspl_h;
+	spec._win_y = s._y / subspl_v;
+	spec._win_w = s._w / subspl_h;
+	spec._win_h = s._h / subspl_v;
+
+	spec._add_cst        = plane_data._add_cst;
+	spec._kernel_scale_h = plane_data._kernel_scale_h;
+	spec._kernel_scale_v = plane_data._kernel_scale_v;
+	spec._kernel_hash_h  = plane_data._kernel_arr [FilterResize::Dir_H].get_hash ();
+	spec._kernel_hash_v  = plane_data._kernel_arr [FilterResize::Dir_V].get_hash ();
+
+	for (int itl_d = 0; itl_d < InterlacingType_NBR_ELT; ++itl_d)
+	{
+		for (int itl_s = 0; itl_s < InterlacingType_NBR_ELT; ++itl_s)
+		{
+			double         cp_s_h = 0;
+			double         cp_s_v = 0;
+			double         cp_d_h = 0;
+			double         cp_d_v = 0;
+			if (plane_data._preserve_center_flag)
+			{
+				ChromaPlacement_compute_cplace (
+					cp_s_h, cp_s_v, cplace_s, plane_index, src_ss_h, src_ss_v,
+					(src_cf == ColorFamily_RGB),
+					(itl_s  != InterlacingType_FRAME),
+					(itl_s  == InterlacingType_TOP)
+				);
+				ChromaPlacement_compute_cplace (
+					cp_d_h, cp_d_v, cplace_d, plane_index, dst_ss_h, dst_ss_v,
+					(dst_cf == ColorFamily_RGB),
+					(itl_d  != InterlacingType_FRAME),
+					(itl_d  == InterlacingType_TOP)
+				);
+			}
+
+			spec._center_pos_src_h = cp_s_h;
+			spec._center_pos_src_v = cp_s_v;
+			spec._center_pos_dst_h = cp_d_h;
+			spec._center_pos_dst_v = cp_d_v;
+
+			plane_data._spec_arr [itl_d] [itl_s] = spec;
+		}  // for itl_s
+	}  // for itl_d
+}
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+}  // namespace fmtcl
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/ResampleUtil.h b/src/fmtcl/ResampleUtil.h
new file mode 100644
index 0000000..f09dfbb
--- /dev/null
+++ b/src/fmtcl/ResampleUtil.h
@@ -0,0 +1,89 @@
+/*****************************************************************************
+
+        ResampleUtil.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_ResampleUtil_HEADER_INCLUDED)
+#define fmtcl_ResampleUtil_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/ChromaPlacement.h"
+#include "fmtcl/ColorFamily.h"
+
+
+
+namespace fmtcl
+{
+
+
+
+class ResamplePlaneData;
+
+class ResampleUtil
+{
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+public:
+
+	static void    create_plane_specs (ResamplePlaneData &plane_data, int plane_index, ColorFamily src_cf, int src_w, int src_ss_h, int src_h, int src_ss_v, ChromaPlacement cplace_s, ColorFamily dst_cf, int dst_w, int dst_ss_h, int dst_h, int dst_ss_v, ChromaPlacement cplace_d);
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+protected:
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+
+
+/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+	               ResampleUtil ()                               = delete;
+	               ResampleUtil (const ResampleUtil &other)      = delete;
+	               ResampleUtil (ResampleUtil &&other)           = delete;
+	ResampleUtil & operator = (const ResampleUtil &other)        = delete;
+	ResampleUtil & operator = (ResampleUtil &&other)             = delete;
+	bool           operator == (const ResampleUtil &other) const = delete;
+	bool           operator != (const ResampleUtil &other) const = delete;
+
+}; // class ResampleUtil
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/ResampleUtil.hpp"
+
+
+
+#endif   // fmtcl_ResampleUtil_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/RgbSystem.cpp b/src/fmtcl/RgbSystem.cpp
index 12a7a93..25956fe 100644
--- a/src/fmtcl/RgbSystem.cpp
+++ b/src/fmtcl/RgbSystem.cpp
@@ -39,6 +39,10 @@ namespace fmtcl
 
 
 
+constexpr int	RgbSystem::_nbr_planes;
+
+
+
 RgbSystem::Vec2::Vec2 (double c0, double c1)
 :	Inherited ({ { c0, c1 } })
 {
diff --git a/src/fmtcl/RgbSystem.h b/src/fmtcl/RgbSystem.h
index 1422e1d..1a1041f 100644
--- a/src/fmtcl/RgbSystem.h
+++ b/src/fmtcl/RgbSystem.h
@@ -45,29 +45,31 @@ class RgbSystem
 
 public:
 
-	static const int  NBR_PLANES = 3;
+	static constexpr int _nbr_planes = 3;
 
 	class Vec2
-	:	public std::array <double, NBR_PLANES - 1>
+	:	public std::array <double, _nbr_planes - 1>
 	{
-		typedef std::array <double, NBR_PLANES - 1> Inherited;
+		typedef std::array <double, _nbr_planes - 1> Inherited;
 	public:
 		               Vec2 () = default;
 		               Vec2 (double c0, double c1);
 	};
 
 	               RgbSystem ();
-	               RgbSystem (const RgbSystem &other)         = default;
-	virtual        ~RgbSystem ()                              = default;
-	RgbSystem &    operator = (const RgbSystem &other)        = default;
+	               RgbSystem (const RgbSystem &other)  = default;
+	               RgbSystem (RgbSystem &&other)       = default;
+	virtual        ~RgbSystem ()                       = default;
+	RgbSystem &    operator = (const RgbSystem &other) = default;
+	RgbSystem &    operator = (RgbSystem &&other)      = default;
 
 	void           set (PrimariesPreset preset);
 	bool           is_ready () const;
 
-	std::array <Vec2, NBR_PLANES>       // x,y coordinates for R, G and B
+	std::array <Vec2, _nbr_planes>      // x,y coordinates for R, G and B
 	               _rgb;
 	Vec2           _white;              // XYZ coordinates for the ref. white
-	std::array <bool, NBR_PLANES + 1>   // R, G, B, W
+	std::array <bool, _nbr_planes + 1>  // R, G, B, W
 	               _init_flag_arr;
 	fmtcl::PrimariesPreset              // If known
 	               _preset;
diff --git a/src/fmtcl/Scaler.cpp b/src/fmtcl/Scaler.cpp
index aa85390..9685313 100644
--- a/src/fmtcl/Scaler.cpp
+++ b/src/fmtcl/Scaler.cpp
@@ -932,7 +932,7 @@ void	Scaler::build_scale_data ()
 		const float    thr_1_flt = 1e-5f;
 		if (info._kernel_size == 1)
 		{
-			const float    d_flt = fabs (_coef_flt_arr [info._coef_index] - 1.0f);
+			const float    d_flt = fabsf (_coef_flt_arr [info._coef_index] - 1.0f);
 			info._copy_flt_flag = (d_flt <= thr_1_flt);
 
 			if (_can_int_flag)
diff --git a/src/fmtcl/SplFmt.h b/src/fmtcl/SplFmt.h
index 3b7421b..a02ac07 100644
--- a/src/fmtcl/SplFmt.h
+++ b/src/fmtcl/SplFmt.h
@@ -49,6 +49,8 @@ enum SplFmt
 
 
 
+inline bool SplFmt_is_float (SplFmt fmt);
+inline bool SplFmt_is_int (SplFmt fmt);
 inline int	SplFmt_get_unit_size (SplFmt fmt);
 inline int	SplFmt_get_data_size (SplFmt fmt);
 
diff --git a/src/fmtcl/SplFmt.hpp b/src/fmtcl/SplFmt.hpp
index d29be5b..cecea9e 100644
--- a/src/fmtcl/SplFmt.hpp
+++ b/src/fmtcl/SplFmt.hpp
@@ -35,6 +35,26 @@ namespace fmtcl
 
 
 
+bool	SplFmt_is_float (SplFmt fmt)
+{
+	assert (fmt >= 0);
+	assert (fmt < SplFmt_NBR_ELT);
+
+	return (fmt == SplFmt_FLOAT);
+}
+
+
+
+bool	SplFmt_is_int (SplFmt fmt)
+{
+	assert (fmt >= 0);
+	assert (fmt < SplFmt_NBR_ELT);
+
+	return (fmt != SplFmt_FLOAT);
+}
+
+
+
 int	SplFmt_get_unit_size (SplFmt fmt)
 {
 	assert (fmt >= 0);
diff --git a/src/fmtcl/TransCurve.h b/src/fmtcl/TransCurve.h
index 2ef520e..2783397 100644
--- a/src/fmtcl/TransCurve.h
+++ b/src/fmtcl/TransCurve.h
@@ -84,6 +84,18 @@ enum TransCurve
 
 
 
+inline bool TransCurve_is_valid (TransCurve curve)
+{
+	return (
+		   curve >= 0
+		&& (   curve < TransCurve_NBR_ELT
+		    || curve > TransCurve_ISO_RANGE_LAST)
+		&& curve < TransCurve_NBR_ELT_CUSTOM
+	);
+}
+
+
+
 }	// namespace fmtcl
 
 
diff --git a/src/fmtcl/TransLut.cpp b/src/fmtcl/TransLut.cpp
index 56a9f60..718facb 100644
--- a/src/fmtcl/TransLut.cpp
+++ b/src/fmtcl/TransLut.cpp
@@ -284,7 +284,7 @@ void	TransLut::MapperLog::find_index (const FloatIntMix &val, int &index, float
 	static const uint32_t frac_mask = (1 << frac_size) - 1;
 
 	const uint32_t val_u = val._i & 0x7FFFFFFF;
-	const float    val_a = fabs (val._f);
+	const float    val_a = fabsf (val._f);
 
 	// index is set relatively to the x=0 index...
 	if (val_a < val_min)
@@ -300,7 +300,7 @@ void	TransLut::MapperLog::find_index (const FloatIntMix &val, int &index, float
 	else
 	{
 		index = ((val_u - base) >> frac_size) + 1;
-		frac  = (val_u & frac_mask) * (1.0f / (1 << frac_size));
+		frac  = float (val_u & frac_mask) * (1.0f / (1 << frac_size));
 	}
 
 	// ...and shifted or mirrored depending on the sign
@@ -328,8 +328,8 @@ double	TransLut::MapperLog::find_val (int index) const
 	assert (index >= 0);
 	assert (index < LOGLUT_SIZE);
 
-	static const float    val_min  = 1.0f / (int64_t (1) << -LOGLUT_MIN_L2);
-	static const int      seg_size = 1 << LOGLUT_RES_L2;
+	static constexpr float   val_min  = 1.0f / (int64_t (1) << -LOGLUT_MIN_L2);
+	static constexpr int     seg_size = 1 << LOGLUT_RES_L2;
 
 	// float is OK because the values are exactly represented in float.
 	float          val   = 0;
@@ -339,8 +339,8 @@ double	TransLut::MapperLog::find_val (int index) const
 		const int      ind_3     = std::abs (ind_2) - 1;
 		const int      log2_part = ind_3 >> LOGLUT_RES_L2;
 		const int      seg_part  = ind_3 & (seg_size - 1);
-		const float    lerp      = seg_part * (1.0f / seg_size);
-		const float    v0        = (int64_t (1) << log2_part) * val_min;
+		const float    lerp      = float (seg_part) * (1.0f / seg_size);
+		const float    v0        = float (int64_t (1) << log2_part) * val_min;
 		val = v0 * (1 + lerp);
 		if (ind_2 < 0)
 		{
diff --git a/src/fmtcl/TransOpLogC.cpp b/src/fmtcl/TransOpLogC.cpp
index fc778db..bf71302 100644
--- a/src/fmtcl/TransOpLogC.cpp
+++ b/src/fmtcl/TransOpLogC.cpp
@@ -38,27 +38,21 @@ namespace fmtcl
 
 
 
-const double		TransOpLogC::_noise_margin = -8.0 / 65536;
-
-
-
 /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 
 
-TransOpLogC::TransOpLogC (bool inv_flag, Type type)
+TransOpLogC::TransOpLogC (bool inv_flag, Type type, ExpIdx ei)
 :	_inv_flag (inv_flag)
-,	_cut ((type == Type_VLOG) ? 0.01     : (type == Type_LOGC_V2) ? 0.000000 : 0.010591)
-,	_a (  (type == Type_VLOG) ? 1.0      : (type == Type_LOGC_V2) ? 5.061087 : 5.555556)
-,	_b (  (type == Type_VLOG) ? 0.00873  : (type == Type_LOGC_V2) ? 0.089004 : 0.052272)
-,	_c (  (type == Type_VLOG) ? 0.241514 : (type == Type_LOGC_V2) ? 0.247189 : 0.247190)
-,	_d (  (type == Type_VLOG) ? 0.598206 : (type == Type_LOGC_V2) ? 0.391007 : 0.385537)
-,	_e (  (type == Type_VLOG) ? 5.6      : (type == Type_LOGC_V2) ? 4.950469 : 5.367655)
-,	_f (  (type == Type_VLOG) ? 0.125    : (type == Type_LOGC_V2) ? 0.131313 : 0.092809)
-,	_n (  (type == Type_VLOG) ? 0        : _noise_margin)
-,	_cut_i (_e * _cut + _f)
+,	_n ((type == Type_VLOG) ? 0 : _noise_margin)
+,	_curve (
+		  (type == Type_VLOG   ) ? _vlog
+		: (type == Type_LOGC_V2) ? _v2_table [ei]
+		:                          _v3_table [ei]
+	)
 {
-	// Nothing
+	assert (ei >= 0);
+	assert (ei < ExpIdx_NBR_ELT);
 }
 
 
@@ -66,12 +60,39 @@ TransOpLogC::TransOpLogC (bool inv_flag, Type type)
 // 1 is log peak white.
 double	TransOpLogC::operator () (double x) const
 {
-	return ((_inv_flag) ? compute_inverse (x) : compute_direct (x));
+	return (_inv_flag) ? compute_inverse (x) : compute_direct (x);
 }
 
 double	TransOpLogC::get_max () const
 {
-	return (compute_inverse (1.0));
+	return compute_inverse (1.0);
+}
+
+
+
+TransOpLogC::ExpIdx	TransOpLogC::conv_logc_ei (int val_raw)
+{
+	ExpIdx         ei = ExpIdx_INVALID;
+
+	switch (val_raw)
+	{
+	case  160: ei = ExpIdx_160;  break;
+	case  200: ei = ExpIdx_200;  break;
+	case  250: ei = ExpIdx_250;  break;
+	case  320: ei = ExpIdx_320;  break;
+	case  400: ei = ExpIdx_400;  break;
+	case  500: ei = ExpIdx_500;  break;
+	case  640: ei = ExpIdx_640;  break;
+	case  800: ei = ExpIdx_800;  break;
+	case 1000: ei = ExpIdx_1000; break;
+	case 1280: ei = ExpIdx_1280; break;
+	case 1600: ei = ExpIdx_1600; break;
+	default:
+		assert (false);
+		break;
+	}
+
+	return ei;
 }
 
 
@@ -88,28 +109,69 @@ double	TransOpLogC::compute_direct (double x) const
 {
 	x = std::max (x, _n);
 	double         y =
-		  (x > _cut  )
-		? _c * log10 (_a * x + _b) + _d
-		: _e * x + _f;
+		  (x > _curve._cut  )
+		? _curve._c * log10 (_curve._a * x + _curve._b) + _curve._d
+		: _curve._e * x + _curve._f;
 	y = std::min (y, 1.0);
 
-	return (y);
+	return y;
 }
 
 double	TransOpLogC::compute_inverse (double x) const
 {
 	x = std::min (x, 1.0);
 	double         y =
-		  (x > _cut_i)
-		? (pow (10, (x - _d) / _c) - _b) / _a
-		: (x - _f) / _e;
+		  (x > _curve._cut_i)
+		? (pow (10, (x - _curve._d) / _curve._c) - _curve._b) / _curve._a
+		: (x - _curve._f) / _curve._e;
 	y = std::max (y, _n);
 
-	return (y);
+	return y;
 }
 
 
 
+const double	TransOpLogC::_noise_margin = -8.0 / 65536;
+
+const TransOpLogC::CurveData	TransOpLogC::_vlog =
+{
+	0.01, 1.0, 0.00873, 0.241514, 0.598206, 5.6, 0.125, 5.6 * 0.01 + 0.125
+};
+
+const std::array <TransOpLogC::CurveData, TransOpLogC::ExpIdx_NBR_ELT>	TransOpLogC::_v2_table =
+{{
+	// cut, a,       b,        c,        d,        e,        f,        e*cut+f
+	{ 0.0, 5.061087, 0.089004, 0.269035, 0.391007, 6.332427, 0.108361, 0.108361 },
+	{ 0.0, 5.061087, 0.089004, 0.266007, 0.391007, 6.189953, 0.111543, 0.111543 },
+	{ 0.0, 5.061087, 0.089004, 0.262978, 0.391007, 6.034414, 0.114725, 0.114725 },
+	{ 0.0, 5.061087, 0.089004, 0.259627, 0.391007, 5.844973, 0.118246, 0.118246 },
+	{ 0.0, 5.061087, 0.089004, 0.256598, 0.391007, 5.656190, 0.121428, 0.121428 },
+	{ 0.0, 5.061087, 0.089004, 0.253569, 0.391007, 5.449261, 0.124610, 0.124610 },
+	{ 0.0, 5.061087, 0.089004, 0.250218, 0.391007, 5.198031, 0.128130, 0.128130 },
+	{ 0.0, 5.061087, 0.089004, 0.247189, 0.391007, 4.950469, 0.131313, 0.131313 },
+	{ 0.0, 5.061087, 0.089004, 0.244161, 0.391007, 4.684112, 0.134495, 0.134495 },
+	{ 0.0, 5.061087, 0.089004, 0.240810, 0.391007, 4.369609, 0.138015, 0.138015 },
+	{ 0.0, 5.061087, 0.089004, 0.237781, 0.391007, 4.070466, 0.141197, 0.141197 }
+}};
+
+const std::array <TransOpLogC::CurveData, TransOpLogC::ExpIdx_NBR_ELT>	TransOpLogC::_v3_table =
+{{
+	// cut,     a,        b,        c,        d,        e,        f,        e*cut+f
+	{ 0.005561, 5.555556, 0.080216, 0.269036, 0.381991, 5.842037, 0.092778, 0.125266 },
+	{ 0.006208, 5.555556, 0.076621, 0.266007, 0.382478, 5.776265, 0.092782, 0.128643 },
+	{ 0.006871, 5.555556, 0.072941, 0.262978, 0.382966, 5.710494, 0.092786, 0.132021 },
+	{ 0.007622, 5.555556, 0.068768, 0.259627, 0.383508, 5.637732, 0.092791, 0.135761 },
+	{ 0.008318, 5.555556, 0.064901, 0.256598, 0.383999, 5.571960, 0.092795, 0.139142 },
+	{ 0.009031, 5.555556, 0.060939, 0.253569, 0.384493, 5.506188, 0.092800, 0.142526 },
+	{ 0.009840, 5.555556, 0.056443, 0.250219, 0.385040, 5.433426, 0.092805, 0.146271 },
+	{ 0.010591, 5.555556, 0.052272, 0.247190, 0.385537, 5.367655, 0.092809, 0.149658 },
+	{ 0.011361, 5.555556, 0.047996, 0.244161, 0.386036, 5.301883, 0.092814, 0.153047 },
+	{ 0.012235, 5.555556, 0.043137, 0.240810, 0.386590, 5.229121, 0.092819, 0.156799 },
+	{ 0.013047, 5.555556, 0.038625, 0.237781, 0.387093, 5.163350, 0.092824, 0.160192 }
+}};
+
+
+
 }	// namespace fmtcl
 
 
diff --git a/src/fmtcl/TransOpLogC.h b/src/fmtcl/TransOpLogC.h
index 9a5af2f..37294c9 100644
--- a/src/fmtcl/TransOpLogC.h
+++ b/src/fmtcl/TransOpLogC.h
@@ -3,6 +3,11 @@
         TransOpLogC.h
         Author: Laurent de Soras, 2015
 
+Source:
+Harald Brendel,
+ALEXA Log C Curve Usage in VFX,
+ARRI, 2011-10-05
+
 --- Legal stuff ---
 
 This program is free software. It comes without any warranty, to
@@ -29,6 +34,8 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 #include "fmtcl/TransOpInterface.h"
 
+#include <array>
+
 
 
 namespace fmtcl
@@ -53,13 +60,35 @@ class TransOpLogC
 		Type_NBR_ELT
 	};
 
-	explicit       TransOpLogC (bool inv_flag, Type type);
+	// Exposure Index (EI)
+	enum ExpIdx
+	{
+		ExpIdx_INVALID = -1,
+
+		ExpIdx_160 = 0,
+		ExpIdx_200,
+		ExpIdx_250,
+		ExpIdx_320,
+		ExpIdx_400,
+		ExpIdx_500,
+		ExpIdx_640,
+		ExpIdx_800,
+		ExpIdx_1000,
+		ExpIdx_1280,
+		ExpIdx_1600,
+
+		ExpIdx_NBR_ELT
+	};
+
+	explicit       TransOpLogC (bool inv_flag, Type type, ExpIdx ei = ExpIdx_800);
 	virtual        ~TransOpLogC () {}
 
 	// TransOpInterface
 	virtual double operator () (double x) const;
 	virtual double get_max () const;
 
+	static ExpIdx  conv_logc_ei (int val_raw);
+
 
 
 /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
@@ -72,22 +101,35 @@ class TransOpLogC
 
 private:
 
+	class CurveData
+	{
+	public:
+		double         _cut;
+		double         _a;
+		double         _b;
+		double         _c;
+		double         _d;
+		double         _e;
+		double         _f;
+		double         _cut_i; // _e * _cut + _f
+	};
+
 	double         compute_direct (double x) const;
 	double         compute_inverse (double x) const;
 
 	const bool     _inv_flag;
-	const double   _cut;
-	const double   _a;
-	const double   _b;
-	const double   _c;
-	const double   _d;
-	const double   _e;
-	const double   _f;
 	const double   _n;
-	const double   _cut_i;
+	const CurveData
+	               _curve;
 
 	static const double
 		            _noise_margin;
+	static const CurveData
+	               _vlog;
+	static const std::array <CurveData, ExpIdx_NBR_ELT>
+	               _v2_table;
+	static const std::array <CurveData, ExpIdx_NBR_ELT>
+	               _v3_table;
 
 
 
diff --git a/src/fmtcl/TransUtil.cpp b/src/fmtcl/TransUtil.cpp
new file mode 100644
index 0000000..8da0469
--- /dev/null
+++ b/src/fmtcl/TransUtil.cpp
@@ -0,0 +1,471 @@
+/*****************************************************************************
+
+        TransUtil.cpp
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#if defined (_MSC_VER)
+	#pragma warning (1 : 4130 4223 4705 4706)
+	#pragma warning (4 : 4355 4786 4800)
+#endif
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/TransOp2084.h"
+#include "fmtcl/TransOpAcesCc.h"
+#include "fmtcl/TransOpAffine.h"
+#include "fmtcl/TransOpBypass.h"
+#include "fmtcl/TransOpCanonLog.h"
+#include "fmtcl/TransOpCompose.h"
+#include "fmtcl/TransOpContrast.h"
+#include "fmtcl/TransOpErimm.h"
+#include "fmtcl/TransOpFilmStream.h"
+#include "fmtcl/TransOpHlg.h"
+#include "fmtcl/TransOpLinPow.h"
+#include "fmtcl/TransOpLogC.h"
+#include "fmtcl/TransOpLogTrunc.h"
+#include "fmtcl/TransOpPow.h"
+#include "fmtcl/TransOpSLog.h"
+#include "fmtcl/TransOpSLog3.h"
+#include "fmtcl/TransUtil.h"
+#include "fstb/fnc.h"
+
+#include <cassert>
+
+
+
+namespace fmtcl
+{
+
+
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+std::unique_ptr <TransLut>	TransUtil::build_lut (PicFmt dst_fmt, TransCurve curve_d, TransOpLogC::ExpIdx logc_ei_d, PicFmt src_fmt, TransCurve curve_s, TransOpLogC::ExpIdx logc_ei_s, double contrast, double gcor, double lvl_black, bool sse2_flag, bool avx2_flag)
+{
+	assert (dst_fmt.is_valid ());
+	assert (TransCurve_is_valid (curve_d));
+	assert (logc_ei_d >= 0);
+	assert (logc_ei_d < TransOpLogC::ExpIdx_NBR_ELT);
+	assert (src_fmt.is_valid ());
+	assert (TransCurve_is_valid (curve_s));
+	assert (logc_ei_s >= 0);
+	assert (logc_ei_s < TransOpLogC::ExpIdx_NBR_ELT);
+	assert (contrast > 0);
+	assert (gcor > 0);
+	assert (lvl_black >= 0);
+
+	OpSPtr         op_s = conv_curve_to_op (curve_s, true , logc_ei_s);
+	OpSPtr         op_d = conv_curve_to_op (curve_d, false, logc_ei_d);
+
+	// Linear or log LUT?
+	bool           loglut_flag = false;
+	if (   SplFmt_is_float (src_fmt._sf)
+	    && curve_s == TransCurve_LINEAR)
+	{
+		// Curves with extended range or with fast-evolving slope at 0.
+		// Actually we could just use the log LUT for all the curves...?
+		// 10 bits per stop + interpolation should be enough for all of them.
+		// What about the speed?
+		if (   curve_d == TransCurve_470BG
+		    || curve_d == TransCurve_LINEAR
+		    || curve_d == TransCurve_61966_2_4
+		    || curve_d == TransCurve_2084
+		    || curve_d == TransCurve_428
+		    || curve_d == TransCurve_HLG
+		    || curve_d == TransCurve_1886
+		    || curve_d == TransCurve_1886A
+		    || curve_d == TransCurve_SLOG
+		    || curve_d == TransCurve_SLOG2
+		    || curve_d == TransCurve_SLOG3
+		    || curve_d == TransCurve_LOGC2
+		    || curve_d == TransCurve_LOGC3
+		    || curve_d == TransCurve_CANONLOG
+		    || curve_d == TransCurve_ACESCC
+		    || curve_d == TransCurve_ERIMM)
+		{
+			loglut_flag = true;
+		}
+		if (gcor < 0.5)
+		{
+			loglut_flag = true;
+		}
+		if (fabs (contrast) >= 3.0/2 || fabs (contrast) <= 2.0/3)
+		{
+			loglut_flag = true;
+		}
+	}
+
+	// Black level
+	const double   lw = op_s->get_max ();
+	if (lvl_black > 0 && lvl_black < lw)
+	{
+		/*
+		Black level (brightness) and contrast settings as defined
+		in ITU-R BT.1886:
+			L = a' * fi (V + b')
+
+		With:
+			fi = EOTF (gamma to linear)
+			L  = Lb for V = 0
+			L  = Lw for V = Vmax
+
+		For power functions, could be rewritten as:
+			L = fi (a * V + b)
+
+		Substitution:
+			Lb = fi (           b)
+			Lw = fi (a * Vmax + b)
+
+		Then, given:
+			f = OETF (linear to gamma)
+
+		We get:
+			f (Lb) = b
+			f (Lw) = a * Vmax + b
+
+			b =           f (Lb)
+			a = (f (Lw) - f (Lb)) / Vmax
+		*/
+		auto           oetf = conv_curve_to_op (curve_s, false, logc_ei_s);
+		const double   lwg  = (*oetf) (lw       );
+		const double   lbg  = (*oetf) (lvl_black);
+		const double   vmax =  lwg;
+		const double   a    = (lwg - lbg) / vmax;
+		const double   b    =        lbg;
+		auto           op_a = std::make_shared <fmtcl::TransOpAffine> (a, b);
+		op_s = std::make_shared <fmtcl::TransOpCompose> (op_a, op_s);
+	}
+
+	// Gamma correction
+	if (! fstb::is_eq (gcor, 1.0))
+	{
+		auto           op_g =
+			std::make_shared <fmtcl::TransOpPow> (true, gcor, 1, 1e6);
+		op_d = std::make_shared <fmtcl::TransOpCompose> (op_g, op_d);
+	}
+
+	// Contrast
+	if (! fstb::is_eq (contrast, 1.0))
+	{
+		auto           op_c =
+			std::make_shared <fmtcl::TransOpContrast> (contrast);
+		op_d = std::make_shared <fmtcl::TransOpCompose> (op_c, op_d);
+	}
+
+	// LUTify
+	auto           op_f = std::make_shared <fmtcl::TransOpCompose> (op_s, op_d);
+
+	auto           lut_uptr = std::make_unique <fmtcl::TransLut> (
+		*op_f, loglut_flag,
+		src_fmt._sf, src_fmt._res, src_fmt._full_flag,
+		dst_fmt._sf, dst_fmt._res, dst_fmt._full_flag,
+		sse2_flag, avx2_flag
+	);
+
+	return lut_uptr;
+}
+
+
+
+// str should be already converted to lower case
+TransCurve	TransUtil::conv_string_to_curve (const std::string &str)
+{
+	assert (! str.empty ());
+
+	TransCurve c = TransCurve_UNDEF;
+	if (str == "709")
+	{
+		c = TransCurve_709;
+	}
+	else if (str == "470m")
+	{
+		c = TransCurve_470M;
+	}
+	else if (str == "470bg")
+	{
+		c = TransCurve_470BG;
+	}
+	else if (str == "601")
+	{
+		c = TransCurve_601;
+	}
+	else if (str == "240")
+	{
+		c = TransCurve_240;
+	}
+	else if (str.empty () || str == "linear")
+	{
+		c = TransCurve_LINEAR;
+	}
+	else if (str == "log100")
+	{
+		c = TransCurve_LOG100;
+	}
+	else if (str == "log316")
+	{
+		c = TransCurve_LOG316;
+	}
+	else if (str == "61966-2-4")
+	{
+		c = TransCurve_61966_2_4;
+	}
+	else if (str == "1361")
+	{
+		c = TransCurve_1361;
+	}
+	else if (str == "61966-2-1" || str == "srgb" || str == "sycc")
+	{
+		c = TransCurve_SRGB;
+	}
+	else if (str == "2020_10")
+	{
+		c = TransCurve_2020_10;
+	}
+	else if (str == "2020_12" || str == "2020")
+	{
+		c = TransCurve_2020_12;
+	}
+	else if (str == "2084")
+	{
+		c = TransCurve_2084;
+	}
+	else if (str == "428-1" || str == "428")
+	{
+		c = TransCurve_428;
+	}
+	else if (str == "hlg")
+	{
+		c = TransCurve_HLG;
+	}
+	else if (str == "1886")
+	{
+		c = TransCurve_1886;
+	}
+	else if (str == "1886a")
+	{
+		c = TransCurve_1886A;
+	}
+	else if (str == "filmstream")
+	{
+		c = TransCurve_FILMSTREAM;
+	}
+	else if (str == "slog")
+	{
+		c = TransCurve_SLOG;
+	}
+	else if (str == "logc2")
+	{
+		c = TransCurve_LOGC2;
+	}
+	else if (str == "logc3")
+	{
+		c = TransCurve_LOGC3;
+	}
+	else if (str == "canonlog")
+	{
+		c = TransCurve_CANONLOG;
+	}
+	else if (str == "adobergb")
+	{
+		c = TransCurve_ADOBE_RGB;
+	}
+	else if (str == "romm")
+	{
+		c = TransCurve_ROMM_RGB;
+	}
+	else if (str == "acescc")
+	{
+		c = TransCurve_ACESCC;
+	}
+	else if (str == "erimm")
+	{
+		c = TransCurve_ERIMM;
+	}
+	else if (str == "slog2")
+	{
+		c = TransCurve_SLOG2;
+	}
+	else if (str == "slog3")
+	{
+		c = TransCurve_SLOG3;
+	}
+	else if (str == "vlog")
+	{
+		c = TransCurve_VLOG;
+	}
+	else
+	{
+		assert (false);
+	}
+
+	return c;
+}
+
+
+
+TransUtil::OpSPtr	TransUtil::conv_curve_to_op (TransCurve c, bool inv_flag, TransOpLogC::ExpIdx logc_ei)
+{
+	assert (c >= 0);
+	assert (logc_ei >= 0);
+	assert (logc_ei < TransOpLogC::ExpIdx_NBR_ELT);
+
+	OpSPtr         ptr;
+
+	switch (c)
+	{
+	case TransCurve_709:
+	case TransCurve_601:
+	case TransCurve_2020_10:
+		ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5));
+		break;
+	case TransCurve_470BG:
+		ptr = OpSPtr (new TransOpPow (inv_flag, 2.8));
+		break;
+	case TransCurve_240:
+		ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.1115, 0.0228, 0.45, 4.0));
+		break;
+	case TransCurve_LINEAR:
+		ptr = OpSPtr (new TransOpBypass);
+		break;
+	case TransCurve_LOG100:
+		ptr = OpSPtr (new TransOpLogTrunc (inv_flag, 0.5, 0.01));
+		break;
+	case TransCurve_LOG316:
+		ptr = OpSPtr (new TransOpLogTrunc (inv_flag, 0.4, sqrt (10) / 1000));
+		break;
+	case TransCurve_61966_2_4:
+		ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -1e9, 1e9));
+		break;
+	case TransCurve_1361:
+		ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.099, 0.018, 0.45, 4.5, -0.25, 1.33, 4));
+		break;
+	case TransCurve_470M:	// Assumed display gamma 2.2, almost like sRGB.
+	case TransCurve_SRGB:
+#if 1
+		{
+			// More exact formula giving C1 continuity
+			// https://en.wikipedia.org/wiki/SRGB#Theory_of_the_transformation
+			const double   gamma = 2.4;
+			const double   alpha = 1.055;
+			const double   k0    = (alpha - 1) / (gamma - 1);
+			const double   phi   =
+				  (pow (alpha, gamma) * pow (gamma - 1, gamma - 1))
+				/ (pow (alpha - 1, gamma - 1) * pow (gamma, gamma));
+			ptr = OpSPtr (new TransOpLinPow (inv_flag, alpha, k0 / phi, 1.0 / gamma, phi));
+		}
+#else
+		// Rounded constants used in IEC 61966-2-1
+		ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.055, 0.04045 / 12.92, 1.0 / 2.4, 12.92));
+#endif
+		break;
+	case TransCurve_2020_12:
+		ptr = OpSPtr (new TransOpLinPow (inv_flag, 1.09929682680944, 0.018053968510807, 0.45, 4.5));
+		break;
+	case TransCurve_2084:
+		ptr = OpSPtr (new TransOp2084 (inv_flag));
+		break;
+	case TransCurve_428:
+		ptr = OpSPtr (new TransOpPow (inv_flag, 2.6, 48.0 / 52.37));
+		break;
+	case TransCurve_HLG:
+		ptr = OpSPtr (new TransOpHlg (inv_flag));
+		break;
+	case TransCurve_1886:
+		ptr = OpSPtr (new TransOpPow (inv_flag, 2.4));
+		break;
+	case TransCurve_1886A:
+		{
+			const double   a1    = 2.6;
+			const double   a2    = 3.0;
+			const double   k0    = 0.35;
+			const double   slope = pow (k0, a2 - a1);
+			const double   beta  = pow (k0, a1);
+			ptr = OpSPtr (new TransOpLinPow (
+				inv_flag, 1, beta, 1.0 / a1, slope, 0, 1, 1, 1.0 / a2
+			));
+		}
+		break;
+	case TransCurve_FILMSTREAM:
+		ptr = OpSPtr (new TransOpFilmStream (inv_flag));
+		break;
+	case TransCurve_SLOG:
+		ptr = OpSPtr (new TransOpSLog (inv_flag, false));
+		break;
+	case TransCurve_LOGC2:
+		ptr = OpSPtr (new TransOpLogC (
+			inv_flag, TransOpLogC::Type_LOGC_V2, logc_ei
+		));
+		break;
+	case TransCurve_LOGC3:
+		ptr = OpSPtr (new TransOpLogC (
+			inv_flag, TransOpLogC::Type_LOGC_V3, logc_ei
+		));
+		break;
+	case TransCurve_CANONLOG:
+		ptr = OpSPtr (new TransOpCanonLog (inv_flag));
+		break;
+	case TransCurve_ADOBE_RGB:
+		ptr = OpSPtr (new TransOpPow (inv_flag, 563.0 / 256));
+		break;
+	case TransCurve_ROMM_RGB:
+		ptr = OpSPtr (new TransOpLinPow (inv_flag, 1, 0.001953, 1.0 / 1.8, 16));
+		break;
+	case TransCurve_ACESCC:
+		ptr = OpSPtr (new TransOpAcesCc (inv_flag));
+		break;
+	case TransCurve_ERIMM:
+		ptr = OpSPtr (new TransOpErimm (inv_flag));
+		break;
+	case TransCurve_SLOG2:
+		ptr = OpSPtr (new TransOpSLog (inv_flag, true));
+		break;
+	case TransCurve_SLOG3:
+		ptr = OpSPtr (new TransOpSLog3 (inv_flag));
+		break;
+	case TransCurve_VLOG:
+		ptr = OpSPtr (new TransOpLogC (inv_flag, TransOpLogC::Type_VLOG));
+		break;
+	default:
+		assert (false);
+		break;
+	}
+
+	if (ptr.get () == 0)
+	{
+		ptr = OpSPtr (new TransOpBypass);
+	}
+
+	return ptr;
+}
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+}  // namespace fmtcl
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/TransUtil.h b/src/fmtcl/TransUtil.h
new file mode 100644
index 0000000..add44f1
--- /dev/null
+++ b/src/fmtcl/TransUtil.h
@@ -0,0 +1,100 @@
+/*****************************************************************************
+
+        TransUtil.h
+        Author: Laurent de Soras, 2021
+
+--- Legal stuff ---
+
+This program is free software. It comes without any warranty, to
+the extent permitted by applicable law. You can redistribute it
+and/or modify it under the terms of the Do What The Fuck You Want
+To Public License, Version 2, as published by Sam Hocevar. See
+http://www.wtfpl.net/ for more details.
+
+*Tab=3***********************************************************************/
+
+
+
+#pragma once
+#if ! defined (fmtcl_TransUtil_HEADER_INCLUDED)
+#define fmtcl_TransUtil_HEADER_INCLUDED
+
+
+
+/*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+#include "fmtcl/PicFmt.h"
+#include "fmtcl/TransCurve.h"
+#include "fmtcl/TransLut.h"
+#include "fmtcl/TransOpInterface.h"
+#include "fmtcl/TransOpLogC.h"
+
+#include <memory>
+#include <string>
+
+
+
+namespace fmtcl
+{
+
+
+
+class TransUtil
+{
+
+/*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+public:
+
+	typedef  std::shared_ptr <TransOpInterface> OpSPtr;
+
+	static std::unique_ptr <TransLut>
+	               build_lut (PicFmt dst_fmt, TransCurve curve_d, TransOpLogC::ExpIdx logc_ei_d, PicFmt src_fmt, TransCurve curve_s, TransOpLogC::ExpIdx logc_ei_s, double contrast, double gcor, double lvl_black, bool sse2_flag, bool avx2_flag);
+
+	static TransCurve
+	               conv_string_to_curve (const std::string &str);
+	static OpSPtr  conv_curve_to_op (TransCurve c, bool inv_flag, TransOpLogC::ExpIdx logc_ei);
+
+
+
+/*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+protected:
+
+
+
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+
+
+/*\\\ FORBIDDEN MEMBER FUNCTIONS \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+private:
+
+	               TransUtil ()                               = delete;
+	               TransUtil (const TransUtil &other)         = delete;
+	               TransUtil (TransUtil &&other)              = delete;
+	TransUtil &    operator = (const TransUtil &other)        = delete;
+	TransUtil &    operator = (TransUtil &&other)             = delete;
+	bool           operator == (const TransUtil &other) const = delete;
+	bool           operator != (const TransUtil &other) const = delete;
+
+}; // class TransUtil
+
+
+
+}  // namespace fmtcl
+
+
+
+//#include "fmtcl/TransUtil.hpp"
+
+
+
+#endif   // fmtcl_TransUtil_HEADER_INCLUDED
+
+
+
+/*\\\ EOF \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
diff --git a/src/fmtcl/VoidAndCluster.h b/src/fmtcl/VoidAndCluster.h
index 2f0305e..74be32d 100644
--- a/src/fmtcl/VoidAndCluster.h
+++ b/src/fmtcl/VoidAndCluster.h
@@ -3,6 +3,18 @@
         VoidAndCluster.h
         Author: Laurent de Soras, 2015
 
+Reference:
+Robert Ulichney,
+The Void-And-Cluster Method for Dither Array Generation
+Proc. SPIE, Human Vision, Visual Processing, and Digital Display IV,
+vol. 1913, pp. 332-343, Feb. 1-4, 1993
+
+*** TO DO: implement:
+Hakan Ancin, Anoop K. Bhattacharjya, Joseph Shou-Pyng Shu,
+New void-and-cluster method for improved halftone uniformity,
+Journal of Electronic Imaging 8(1), January 1999,
+https://doi.org/10.1117/1.482701 ***
+
 --- Legal stuff ---
 
 This program is free software. It comes without any warranty, to
diff --git a/src/fmtcl/fnc.cpp b/src/fmtcl/fnc.cpp
index 49f1a0c..5174035 100644
--- a/src/fmtcl/fnc.cpp
+++ b/src/fmtcl/fnc.cpp
@@ -25,6 +25,11 @@ To Public License, Version 2, as published by Sam Hocevar. See
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #include "fmtcl/fnc.h"
+#include "fmtcl/Mat4.h"
+#include "fmtcl/MatrixProc.h"
+#include "fmtcl/PicFmt.h"
+
+#include <algorithm>
 
 #include <cassert>
 #include <cstdint>
@@ -36,10 +41,104 @@ namespace fmtcl
 
 
 
+/*\\\ PRIVATE \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
+
+
+
+static void	override_fmt_with_csp (PicFmt &fmt, ColorSpaceH265 csp_out, int plane_out)
+{
+	if (plane_out >= 0)
+	{
+		if (csp_out == ColorSpaceH265_RGB)
+		{
+			fmt._col_fam = ColorFamily_RGB;
+		}
+		else if (csp_out == ColorSpaceH265_YCGCO)
+		{
+			fmt._col_fam = ColorFamily_YCGCO;
+		}
+		else
+		{
+			fmt._col_fam = ColorFamily_YUV;
+		}
+	}
+}
+
+
+
+// Int: depends on the input format (may be float too)
+// R, G, B, Y: [0 ; 1]
+// U, V, Cg, Co : [-0.5 ; 0.5]
+static void	make_mat_flt_int (Mat4 &m, bool to_flt_flag, const PicFmt &fmt)
+{
+	PicFmt         fmt2 (fmt);
+	fmt2._sf = SplFmt_FLOAT;
+
+	const PicFmt * fmt_src_ptr = &fmt2;
+	const PicFmt * fmt_dst_ptr = &fmt;
+	if (to_flt_flag)
+	{
+		std::swap (fmt_src_ptr, fmt_dst_ptr);
+	}
+
+	double         ay, by;
+	double         ac, bc;
+	const int      ch_plane = (fmt_dst_ptr->_col_fam != ColorFamily_GRAY) ? 1 : 0;
+	compute_fmt_mac_cst (ay, by, *fmt_dst_ptr, *fmt_src_ptr, 0       );
+	compute_fmt_mac_cst (ac, bc, *fmt_dst_ptr, *fmt_src_ptr, ch_plane);
+
+	m[0][0] = ay; m[0][1] =  0; m[0][2] =  0; m[0][3] = by;
+	m[1][0] =  0; m[1][1] = ac; m[1][2] =  0; m[1][3] = bc;
+	m[2][0] =  0; m[2][1] =  0; m[2][2] = ac; m[2][3] = bc;
+	m[3][0] =  0; m[3][1] =  0; m[3][2] =  0; m[3][3] =  1;
+}
+
+
+
 /*\\\ PUBLIC \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 
 
+int	compute_plane_width (ColorFamily col_fam, int ss_h, int base_w, int plane_index)
+{
+	assert (col_fam >= 0);
+	assert (col_fam < ColorFamily_NBR_ELT);
+	assert (plane_index >= 0);
+	assert (ss_h >= 0);
+	assert (base_w >= 0);
+
+	int            plane_w = base_w;
+	if (is_chroma_plane (col_fam, plane_index))
+	{
+		assert ((base_w & ((1 << ss_h) - 1)) == 0);
+		plane_w >>= ss_h;
+	}
+
+	return plane_w;
+}
+
+
+
+int	compute_plane_height (ColorFamily col_fam, int ss_v, int base_h, int plane_index)
+{
+	assert (col_fam >= 0);
+	assert (col_fam < ColorFamily_NBR_ELT);
+	assert (plane_index >= 0);
+	assert (ss_v >= 0);
+	assert (base_h >= 0);
+
+	int            plane_h = base_h;
+	if (is_chroma_plane (col_fam, plane_index))
+	{
+		assert ((base_h & ((1 << ss_v) - 1)) == 0);
+		plane_h >>= ss_v;
+	}
+
+	return plane_h;
+}
+
+
+
 bool	has_chroma (ColorFamily col_fam)
 {
 	assert (col_fam >= 0);
@@ -73,25 +172,21 @@ bool	is_full_range_default (ColorFamily col_fam)
 
 
 
-double	compute_pix_scale (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag)
+double	compute_pix_scale (const PicFmt &fmt, int plane_index)
 {
-	assert (spl_fmt >= 0);
-	assert (spl_fmt < SplFmt_NBR_ELT);
-	assert (nbr_bits > 0);
-	assert (col_fam >= 0);
-	assert (col_fam < ColorFamily_NBR_ELT);
+	assert (fmt.is_valid ());
 	assert (plane_index >= 0);
 
 	double         scale = 1.0;
 
-	if (spl_fmt != SplFmt_FLOAT)
+	if (fmt._sf != SplFmt_FLOAT)
 	{
-		const int      bps_m8 = nbr_bits - 8;
-		if (full_flag)
+		const int      bps_m8 = fmt._res - 8;
+		if (fmt._full_flag)
 		{
-			scale = double ((uint64_t (1) << nbr_bits) - 1);
+			scale = double ((uint64_t (1) << fmt._res) - 1);
 		}
-		else if (is_chroma_plane (col_fam, plane_index))
+		else if (is_chroma_plane (fmt._col_fam, plane_index))
 		{
 			scale = double ((uint64_t (224)) << bps_m8);
 		}
@@ -101,32 +196,28 @@ double	compute_pix_scale (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int
 		}
 	}
 
-	return (scale);
+	return scale;
 }
 
 
 
-double	get_pix_min (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag)
+double	get_pix_min (const PicFmt &fmt, int plane_index)
 {
-	assert (spl_fmt >= 0);
-	assert (spl_fmt < SplFmt_NBR_ELT);
-	assert (nbr_bits > 0);
-	assert (col_fam >= 0);
-	assert (col_fam < ColorFamily_NBR_ELT);
+	assert (fmt.is_valid ());
 	assert (plane_index >= 0);
 
 	double         add_val = 0;
 
-	if (spl_fmt == SplFmt_FLOAT)
+	if (fmt._sf == SplFmt_FLOAT)
 	{
-		if (is_chroma_plane (col_fam, plane_index))
+		if (is_chroma_plane (fmt._col_fam, plane_index))
 		{
 			add_val = -0.5;
 		}
 	}
-	else if (full_flag)
+	else if (fmt._full_flag)
 	{
-		if (is_chroma_plane (col_fam, plane_index))
+		if (is_chroma_plane (fmt._col_fam, plane_index))
 		{
 			// So the neutral value (0) is exactly: 1 << (nbr_bits - 1)
 			add_val = 0.5;
@@ -134,38 +225,78 @@ double	get_pix_min (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane
 	}
 	else
 	{
-		add_val = double ((uint64_t (16)) << (nbr_bits - 8));
+		add_val = double ((uint64_t (16)) << (fmt._res - 8));
 	}
 
-	return (add_val);
+	return add_val;
 }
 
 
 
-void	compute_fmt_mac_cst (double &gain, double &add_cst, SplFmt dst_spl_fmt, int dst_nbr_bits, ColorFamily dst_col_fam, bool dst_full_flag, SplFmt src_spl_fmt, int src_nbr_bits, ColorFamily src_col_fam, bool src_full_flag, int plane_index)
+void	compute_fmt_mac_cst (double &gain, double &add_cst, const PicFmt &dst_fmt, const PicFmt &src_fmt, int plane_index)
 {
 	// (X_d - m_d) / S_d  =  (X_s - m_s) / S_s
 	// X_d = X_s * (S_d / S_s) + (m_d - m_s * S_d / S_s)
 	//                gain              add_cst
-	const double   scale_src = compute_pix_scale (
-		src_spl_fmt, src_nbr_bits, src_col_fam, plane_index, src_full_flag
-	);
-	const double   scale_dst = compute_pix_scale (
-		dst_spl_fmt, dst_nbr_bits, dst_col_fam, plane_index, dst_full_flag
-	);
+	const double   scale_src = compute_pix_scale (src_fmt, plane_index);
+	const double   scale_dst = compute_pix_scale (dst_fmt, plane_index);
 	gain = scale_dst / scale_src;
 
-	const double   cst_src = get_pix_min (
-		src_spl_fmt, src_nbr_bits, src_col_fam, plane_index, src_full_flag
-	);
-	const double   cst_dst = get_pix_min (
-		dst_spl_fmt, dst_nbr_bits, dst_col_fam, plane_index, dst_full_flag
-	);
+	const double   cst_src = get_pix_min (src_fmt, plane_index);
+	const double   cst_dst = get_pix_min (dst_fmt, plane_index);
 	add_cst = cst_dst - cst_src * gain;
 }
 
 
 
+int	prepare_matrix_coef (MatrixProc &mat_proc, const Mat4 &mat_main, const PicFmt &dst_fmt, const PicFmt &src_fmt, ColorSpaceH265 csp_out, int plane_out)
+{
+	const bool     int_proc_flag =
+		(SplFmt_is_int (src_fmt._sf) && SplFmt_is_int (dst_fmt._sf));
+
+	Mat4           m (1, Mat4::Preset_DIAGONAL);
+
+	PicFmt         dst_fmt2 = dst_fmt;
+	if (int_proc_flag)
+	{
+		// For the coefficient calculation, use the same output bitdepth
+		// as the input. The bitdepth change will be done separately with
+		// a simple bitshift.
+		dst_fmt2._res = src_fmt._res;
+	}
+
+	override_fmt_with_csp (dst_fmt2, csp_out, plane_out);
+
+	Mat4           m1s;
+	Mat4           m1d;
+	make_mat_flt_int (m1s, true , src_fmt );
+	make_mat_flt_int (m1d, false, dst_fmt2);
+	m *= m1d;
+	if (! int_proc_flag)
+	{
+		if (plane_out > 0 && is_chroma_plane (dst_fmt2._col_fam, plane_out))
+		{
+			// When we extract a single plane, it's a conversion to R or
+			// to Y, so the outout range is always [0; 1]. Therefore we
+			// need to offset the chroma planes.
+			m [plane_out] [MatrixProc::_nbr_planes] += 0.5;
+		}
+	}
+	m *= mat_main;
+	m *= m1s;
+
+	const MatrixProc::Err   ret_val = mat_proc.configure (
+		m, int_proc_flag,
+		src_fmt._sf, src_fmt._res,
+		dst_fmt._sf, dst_fmt._res,
+		plane_out
+	);
+
+	return ret_val;
+}
+
+
+
 /*\\\ PROTECTED \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 
diff --git a/src/fmtcl/fnc.h b/src/fmtcl/fnc.h
index d0c9659..6c908aa 100644
--- a/src/fmtcl/fnc.h
+++ b/src/fmtcl/fnc.h
@@ -28,6 +28,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #include "fmtcl/ColorFamily.h"
+#include "fmtcl/ColorSpaceH265.h"
 #include "fmtcl/SplFmt.h"
 
 
@@ -35,14 +36,21 @@ To Public License, Version 2, as published by Sam Hocevar. See
 namespace fmtcl
 {
 
+class Mat4;
+class MatrixProc;
+class PicFmt;
 
 
+
+int    compute_plane_width (ColorFamily col_fam, int ss_h, int base_w, int plane_index);
+int    compute_plane_height (ColorFamily col_fam, int ss_v, int base_h, int plane_index);
 bool   has_chroma (ColorFamily col_fam);
 bool   is_chroma_plane (ColorFamily col_fam, int plane_index);
 bool   is_full_range_default (ColorFamily col_fam);
-double compute_pix_scale (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag);
-double get_pix_min (SplFmt spl_fmt, int nbr_bits, ColorFamily col_fam, int plane_index, bool full_flag);
-void   compute_fmt_mac_cst (double &gain, double &add_cst, SplFmt dst_spl_fmt, int dst_nbr_bits, ColorFamily dst_col_fam, bool dst_full_flag, SplFmt src_spl_fmt, int src_nbr_bits, ColorFamily src_col_fam, bool src_full_flag, int plane_index);
+double compute_pix_scale (const PicFmt &fmt, int plane_index);
+double get_pix_min (const PicFmt &fmt, int plane_index);
+void   compute_fmt_mac_cst (double &gain, double &add_cst, const PicFmt &dst_fmt, const PicFmt &src_fmt, int plane_index);
+int    prepare_matrix_coef (MatrixProc &mat_proc, const Mat4 &mat_main, const PicFmt &dst_fmt, const PicFmt &src_fmt, ColorSpaceH265 csp_out, int plane_out);
 
 
 
diff --git a/src/fstb/AllocAlign.h b/src/fstb/AllocAlign.h
index ce54cfe..0f7cb75 100644
--- a/src/fstb/AllocAlign.h
+++ b/src/fstb/AllocAlign.h
@@ -30,6 +30,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 #include <memory>
 
 #include <cstddef>
+#include <cstdint>
 
 
 
@@ -46,7 +47,7 @@ class AllocAlign
 
 public:
 
-	static const long ALIGNMENT = ALIG;
+	static constexpr long ALIGNMENT = ALIG;
 
 	typedef	T	value_type;
 	typedef	value_type *	pointer;
@@ -56,31 +57,36 @@ class AllocAlign
 	typedef	size_t	size_type;
 	typedef	ptrdiff_t	difference_type;
 
-	               AllocAlign ()                                  = default;
-	               AllocAlign (AllocAlign <T, ALIG> const &other) = default;
+	               AllocAlign ()                                   = default;
+	               AllocAlign (AllocAlign <T, ALIG> const &other)  = default;
 	template <typename U>
 	               AllocAlign (AllocAlign <U, ALIG> const &/*other*/) {}
-	               ~AllocAlign ()                                 = default;
+	               ~AllocAlign ()                                  = default;
 
 	// Address
-	inline pointer address (reference r);
-	inline const_pointer
-	               address (const_reference r);
+	[[deprecated]] inline pointer
+	               address (reference r) noexcept;
+	[[deprecated]] inline const_pointer
+	               address (const_reference r) noexcept;
 
 	// Memory allocation
-	inline pointer allocate (size_type n, typename std::allocator <void>::const_pointer ptr = 0);
-	inline void    deallocate (pointer p, size_type n);
+	[[deprecated]] inline pointer
+	               allocate (size_type n, const void *ptr);
+	inline pointer allocate (size_type n);
+	inline void    deallocate (pointer p, size_type n) noexcept;
 
 	// Size
-	inline size_type
-	               max_size() const;
+	[[deprecated]] inline size_type
+	               max_size () const noexcept;
 
 	// Construction/destruction
-	inline void    construct (pointer ptr, const T & t);
-	inline void    destroy (pointer ptr);
+	[[deprecated]] inline void
+	               construct (pointer ptr, const T & t);
+	[[deprecated]] inline void
+	               destroy (pointer ptr);
 
-	inline bool    operator == (AllocAlign <T, ALIG> const &other);
-	inline bool    operator != (AllocAlign <T, ALIG> const &other);
+	inline bool    operator == (AllocAlign <T, ALIG> const &other) noexcept;
+	inline bool    operator != (AllocAlign <T, ALIG> const &other) noexcept;
 
 	template <typename U>
 	struct rebind
diff --git a/src/fstb/AllocAlign.hpp b/src/fstb/AllocAlign.hpp
index 67538e9..9aeedc3 100644
--- a/src/fstb/AllocAlign.hpp
+++ b/src/fstb/AllocAlign.hpp
@@ -41,39 +41,46 @@ namespace fstb
 
 
 template <class T, long ALIG>
-typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::address (reference r)
+typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::address (reference r) noexcept
 {
-	return (&r);
+	return &r;
 }
 
 
 
 template <class T, long ALIG>
-typename AllocAlign <T, ALIG>::const_pointer	AllocAlign <T, ALIG>::address (const_reference r)
+typename AllocAlign <T, ALIG>::const_pointer	AllocAlign <T, ALIG>::address (const_reference r) noexcept
 {
-	return (&r);
+	return &r;
 }
 
 
 
 template <class T, long ALIG>
-typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::allocate (size_type n, typename std::allocator <void>::const_pointer /*ptr*/)
+typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::allocate (size_type n, const void *ptr)
 {
-	static_assert ((sizeof (ptrdiff_t) >= sizeof (void *)), "");
+	fstb::unused (ptr);
+	return allocate (n);
+}
+
 
-	assert (n >= 0);
 
+template <class T, long ALIG>
+typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::allocate (size_type n)
+{
 	const size_t   nbr_bytes = sizeof (T) * n;
 
 #if defined (_MSC_VER)
 
-	pointer        zone_ptr = reinterpret_cast <pointer> (
+	pointer        zone_ptr = static_cast <pointer> (
 		_aligned_malloc (nbr_bytes, ALIG)
 	);
 
-#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__)
+//#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__)
+#elif (defined (_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \
+	&& ! defined (STM32H750xx)
 
-	pointer        zone_ptr = 0;
+	pointer        zone_ptr = nullptr;
 	void *         tmp_ptr;
 	if (posix_memalign (&tmp_ptr, ALIG, nbr_bytes) == 0)
 	{
@@ -82,16 +89,16 @@ typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::allocate (size_type
 
 #else // Platform-independent implementation
 
-	const size_t   ptr_size = sizeof (void *);
-	const size_t   offset = ptr_size + ALIG - 1;
+	const size_t   ptr_size    = sizeof (void *);
+	const size_t   offset      = ptr_size + ALIG - 1;
 	const size_t   alloc_bytes = offset + nbr_bytes;
-	void *         alloc_ptr = new char [alloc_bytes];
-	pointer        zone_ptr = 0;
-	if (alloc_ptr != 0)
+	void *         alloc_ptr   = new char [alloc_bytes];
+	pointer        zone_ptr    = nullptr;
+	if (alloc_ptr != nullptr)
 	{
-		const ptrdiff_t   alloc_l = reinterpret_cast <ptrdiff_t> (alloc_ptr);
-		const ptrdiff_t   zone_l = (alloc_l + offset) & (-ALIG);
-		assert (zone_l >= ptrdiff_t (alloc_l + ptr_size));
+		const intptr_t    alloc_l = reinterpret_cast <intptr_t> (alloc_ptr);
+		const intptr_t    zone_l  = (alloc_l + offset) & (-ALIG);
+		assert (zone_l >= intptr_t (alloc_l + ptr_size));
 		void **        ptr_ptr = reinterpret_cast <void **> (zone_l - ptr_size);
 		*ptr_ptr = alloc_ptr;
 		zone_ptr = reinterpret_cast <pointer> (zone_l);
@@ -99,9 +106,11 @@ typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::allocate (size_type
 
 #endif
 
-	if (zone_ptr == 0)
+	if (zone_ptr == nullptr)
 	{
+#if defined (__cpp_exceptions) || ! defined (__GNUC__)
 		throw std::bad_alloc ();
+#endif
 	}
 
 	return (zone_ptr);
@@ -110,27 +119,38 @@ typename AllocAlign <T, ALIG>::pointer	AllocAlign <T, ALIG>::allocate (size_type
 
 
 template <class T, long ALIG>
-void	AllocAlign <T, ALIG>::deallocate (pointer ptr, size_type /*n*/)
+void	AllocAlign <T, ALIG>::deallocate (pointer ptr, size_type n) noexcept
 {
-	if (ptr != 0)
+	fstb::unused (n);
+
+	if (ptr != nullptr)
 	{
 
 #if defined (_MSC_VER)
 
-		_aligned_free (ptr);
+		try
+		{
+			_aligned_free (ptr);
+		}
+		catch (...)
+		{
+			assert (false);
+		}
 
-#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__)
+//#elif ! defined (__MINGW32__) && ! defined (__MINGW64__) && ! defined (__CYGWIN__)
+#elif (defined (_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112L) \
+	&& ! defined (STM32H750xx)
 
 		free (ptr);
 
 #else // Platform-independent implementation
 
-		const size_t   ptr_size = sizeof (void *);
-		const ptrdiff_t   zone_l = reinterpret_cast <ptrdiff_t> (ptr);
-		void **			ptr_ptr = reinterpret_cast <void **> (zone_l - ptr_size);
+		const size_t   ptr_size  = sizeof (void *);
+		const intptr_t zone_l    = reinterpret_cast <intptr_t> (ptr);
+		void **			ptr_ptr   = reinterpret_cast <void **> (zone_l - ptr_size);
 		void *			alloc_ptr = *ptr_ptr;
-		assert (alloc_ptr != 0);
-		assert (reinterpret_cast <ptrdiff_t> (alloc_ptr) < zone_l);
+		assert (alloc_ptr != nullptr);
+		assert (reinterpret_cast <intptr_t> (alloc_ptr) < zone_l);
 
 		delete [] reinterpret_cast <char *> (alloc_ptr);
 
@@ -141,7 +161,7 @@ void	AllocAlign <T, ALIG>::deallocate (pointer ptr, size_type /*n*/)
 
 
 template <class T, long ALIG>
-typename AllocAlign <T, ALIG>::size_type	AllocAlign <T, ALIG>::max_size () const
+typename AllocAlign <T, ALIG>::size_type	AllocAlign <T, ALIG>::max_size () const noexcept
 {
 	static_assert ((static_cast <size_type> (-1) > 0), "");
 
@@ -153,7 +173,7 @@ typename AllocAlign <T, ALIG>::size_type	AllocAlign <T, ALIG>::max_size () const
 template <class T, long ALIG>
 void	AllocAlign <T, ALIG>::construct (pointer ptr, const T &t)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 
 	new (ptr) T (t);
 }
@@ -163,7 +183,7 @@ void	AllocAlign <T, ALIG>::construct (pointer ptr, const T &t)
 template <class T, long ALIG>
 void	AllocAlign <T, ALIG>::destroy (pointer ptr)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 
 	ptr->~T ();
 }
@@ -171,17 +191,17 @@ void	AllocAlign <T, ALIG>::destroy (pointer ptr)
 
 
 template <class T, long ALIG>
-bool	AllocAlign <T, ALIG>::operator == (AllocAlign <T, ALIG> const &other)
+bool	AllocAlign <T, ALIG>::operator == (AllocAlign <T, ALIG> const &other) noexcept
 {
 	fstb::unused (other);
 
-	return (true);
+	return true;
 }
 
 
 
 template <class T, long ALIG>
-bool	AllocAlign <T, ALIG>::operator != (AllocAlign <T, ALIG> const &other)
+bool	AllocAlign <T, ALIG>::operator != (AllocAlign <T, ALIG> const &other) noexcept
 {
 	return (! operator == (other));
 }
diff --git a/src/fstb/ArrayAlign.h b/src/fstb/ArrayAlign.h
index 2039155..c892d39 100644
--- a/src/fstb/ArrayAlign.h
+++ b/src/fstb/ArrayAlign.h
@@ -63,16 +63,21 @@ class ArrayAlign
 	ArrayAlign &   operator = (const ArrayAlign &other);
 
 	inline const Element &
-	               operator [] (long pos) const;
+	               operator [] (long pos) const noexcept;
 	inline Element &
-	               operator [] (long pos);
+	               operator [] (long pos) noexcept;
+
+	inline const Element *
+	               data () const noexcept;
+	inline Element *
+	               data () noexcept;
 
 	static inline long
-	               size ();
+	               size () noexcept;
 	static inline long
-	               length ();
+	               length () noexcept;
 	static inline long
-	               get_alignment ();
+	               get_alignment () noexcept;
 
 
 
diff --git a/src/fstb/ArrayAlign.hpp b/src/fstb/ArrayAlign.hpp
index 350f455..f32d90a 100644
--- a/src/fstb/ArrayAlign.hpp
+++ b/src/fstb/ArrayAlign.hpp
@@ -22,8 +22,6 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
-#include "fstb/def.h"
-
 #include <new>
 
 #include <cassert>
@@ -54,10 +52,9 @@ class DestroyAux <true>
 public:
 	template <typename T>
 	static inline void
-	               destroy_elt (T *ptr)
+	               destroy_elt (T * /*ptr*/)
 	{
 		// Nothing
-		fstb::unused (ptr);
 	}
 };
 
@@ -89,8 +86,6 @@ ArrayAlign <T, LEN, AL>::ArrayAlign (const ArrayAlign <T, LEN, AL> &other)
 :/*	_data ()
 ,*/	_data_ptr (0)
 {
-	assert (&other != 0);
-
 	_data_ptr = reinterpret_cast <Element *> (
 		reinterpret_cast <ptrdiff_t> (&_data [ALIGNMENT - 1]) & -ALIGNMENT
 	);
@@ -122,8 +117,6 @@ ArrayAlign <T, LEN, AL>::~ArrayAlign ()
 template <typename T, long LEN, long AL>
 ArrayAlign <T, LEN, AL> &	ArrayAlign <T, LEN, AL>::operator = (const ArrayAlign <T, LEN, AL> &other)
 {
-	assert (&other != 0);
-
 	if (this != &other)
 	{
 		for (long pos = 0; pos < NBR_ELT; ++pos)
@@ -132,55 +125,75 @@ ArrayAlign <T, LEN, AL> &	ArrayAlign <T, LEN, AL>::operator = (const ArrayAlign
 		}
 	}
 
-	return (*this);
+	return *this;
 }
 
 
 
 template <typename T, long LEN, long AL>
-const typename ArrayAlign <T, LEN, AL>::Element &	ArrayAlign <T, LEN, AL>::operator [] (long pos) const
+const typename ArrayAlign <T, LEN, AL>::Element &	ArrayAlign <T, LEN, AL>::operator [] (long pos) const noexcept
 {
 	assert (_data_ptr != 0);
 	assert (pos >= 0);
 	assert (pos < NBR_ELT);
 
-	return (_data_ptr [pos]);
+	return _data_ptr [pos];
 }
 
 
 
 template <typename T, long LEN, long AL>
-typename ArrayAlign <T, LEN, AL>::Element &	ArrayAlign <T, LEN, AL>::operator [] (long pos)
+typename ArrayAlign <T, LEN, AL>::Element &	ArrayAlign <T, LEN, AL>::operator [] (long pos) noexcept
 {
 	assert (_data_ptr != 0);
 	assert (pos >= 0);
 	assert (pos < NBR_ELT);
 
-	return (_data_ptr [pos]);
+	return _data_ptr [pos];
+}
+
+
+
+template <typename T, long LEN, long AL>
+const typename ArrayAlign <T, LEN, AL>::Element *	ArrayAlign <T, LEN, AL>::data () const noexcept
+{
+	assert (_data_ptr != 0);
+
+	return _data_ptr;
+}
+
+
+
+template <typename T, long LEN, long AL>
+typename ArrayAlign <T, LEN, AL>::Element *	ArrayAlign <T, LEN, AL>::data () noexcept
+{
+	assert (_data_ptr != 0);
+
+	return _data_ptr;
 }
 
 
 
 template <typename T, long LEN, long AL>
-long	ArrayAlign <T, LEN, AL>::size ()
+long	ArrayAlign <T, LEN, AL>::size () noexcept
 {
-	return (NBR_ELT);
+	return NBR_ELT;
 }
 
 
 
 template <typename T, long LEN, long AL>
-long	ArrayAlign <T, LEN, AL>::length ()
+long	ArrayAlign <T, LEN, AL>::length () noexcept
 {
-	return (NBR_ELT);
+	return NBR_ELT;
 }
 
 
 
 template <typename T, long LEN, long AL>
-long	ArrayAlign <T, LEN, AL>::get_alignment ()
+long	ArrayAlign <T, LEN, AL>::get_alignment () noexcept
 {
-	return (ALIGNMENT);
+	return ALIGNMENT;
 }
 
 
diff --git a/src/fstb/CpuId.cpp b/src/fstb/CpuId.cpp
index 8e0de55..77d4634 100644
--- a/src/fstb/CpuId.cpp
+++ b/src/fstb/CpuId.cpp
@@ -25,9 +25,8 @@ To Public License, Version 2, as published by Sam Hocevar. See
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
 #include "fstb/CpuId.h"
-#include "fstb/def.h"
 
-#if (fstb_ARCHI == fstb_ARCHI_X86)
+#if fstb_ARCHI == fstb_ARCHI_X86
 	#if defined (__GNUC__)
 		#include <cpuid.h>
 	#elif defined (_MSC_VER)
@@ -48,10 +47,9 @@ namespace fstb
 
 
 
-// https://en.wikipedia.org/wiki/CPUID
 CpuId::CpuId ()
 {
-#if (fstb_ARCHI == fstb_ARCHI_X86)
+#if fstb_ARCHI == fstb_ARCHI_X86
 
 	unsigned int   eax;
 	unsigned int   ebx;
@@ -66,6 +64,7 @@ CpuId::CpuId ()
 	call_cpuid (0x00000001, 0, eax, ebx, ecx, edx);
 
 	_mmx_flag     = ((edx & (1L << 23)) != 0);
+	_fxsr_flag    = ((edx & (1L << 24)) != 0);
 	_sse_flag     = ((edx & (1L << 25)) != 0);
 	_sse2_flag    = ((edx & (1L << 26)) != 0);
 	_sse3_flag    = ((ecx & (1L <<  0)) != 0);
@@ -74,6 +73,7 @@ CpuId::CpuId ()
 	_fma3_flag    = ((ecx & (1L << 16)) != 0);
 	_sse41_flag   = ((ecx & (1L << 19)) != 0);
 	_sse42_flag   = ((ecx & (1L << 20)) != 0);
+	_abm_flag     = ((ecx & (1L << 23)) != 0);
 	_avx_flag     = ((ecx & (1L << 28)) != 0);
 	_f16c_flag    = ((ecx & (1L << 29)) != 0);
 
@@ -81,7 +81,9 @@ CpuId::CpuId ()
 	{
 		// Extended Features
 		call_cpuid (0x00000007, 0, eax, ebx, ecx, edx);
+		_bmi1_flag    = ((ebx & (1L <<  3)) != 0);
 		_avx2_flag    = ((ebx & (1L <<  5)) != 0);
+		_bmi2_flag    = ((ebx & (1L <<  8)) != 0);
 		_avx512f_flag = ((ebx & (1L << 16)) != 0);
 	}
 
@@ -94,6 +96,7 @@ CpuId::CpuId ()
 		_isse_flag    = ((edx & (1L << 22)) != 0) || _sse_flag;
 		_sse4a_flag   = ((ecx & (1L <<  6)) != 0);
 		_fma4_flag    = ((ecx & (1L << 16)) != 0);
+		_3dnow_flag   = ((ecx & (1L << 31)) != 0);
 	}
 
 #endif
@@ -101,7 +104,7 @@ CpuId::CpuId ()
 
 
 
-#if (fstb_ARCHI == fstb_ARCHI_X86)
+#if fstb_ARCHI == fstb_ARCHI_X86
 
 void	CpuId::call_cpuid (unsigned int fnc_nbr, unsigned int subfnc_nbr, unsigned int &v_eax, unsigned int &v_ebx, unsigned int &v_ecx, unsigned int &v_edx)
 {
@@ -110,11 +113,10 @@ void	CpuId::call_cpuid (unsigned int fnc_nbr, unsigned int subfnc_nbr, unsigned
 	#if defined (__x86_64__)
 	__cpuid_count (fnc_nbr, subfnc_nbr, v_eax, v_ebx, v_ecx, v_edx);
 	#else
-	fstb::unused (subfnc_nbr);
 	__cpuid (fnc_nbr, v_eax, v_ebx, v_ecx, v_edx);
 	#endif
 
-#elif (_MSC_VER)
+#elif defined (_MSC_VER)
 
 	int            cpu_info [4];
 	__cpuidex (cpu_info, fnc_nbr, subfnc_nbr);
diff --git a/src/fstb/CpuId.h b/src/fstb/CpuId.h
index 1150591..764115a 100644
--- a/src/fstb/CpuId.h
+++ b/src/fstb/CpuId.h
@@ -45,15 +45,16 @@ class CpuId
 
 	               CpuId ();
 	               CpuId (const CpuId &other)      = default;
-	virtual        ~CpuId ()                       = default;
 
 	CpuId &        operator = (const CpuId &other) = default;
 
-#if (fstb_ARCHI == fstb_ARCHI_X86)
+#if fstb_ARCHI == fstb_ARCHI_X86
 	static void		call_cpuid (unsigned int fnc_nbr, unsigned int subfnc_nbr, unsigned int &v_eax, unsigned int &v_ebx, unsigned int &v_ecx, unsigned int &v_edx);
 #endif
 
 	bool           _mmx_flag     = false;
+	bool           _fxsr_flag    = false;  // FXSAVE, FXRESTOR, CR4 bit 9
+	bool           _3dnow_flag   = false;
 	bool           _isse_flag    = false;
 	bool           _sse_flag     = false;
 	bool           _sse2_flag    = false;
@@ -69,6 +70,9 @@ class CpuId
 	bool           _avx512f_flag = false;
 	bool           _f16c_flag    = false;  // Half-precision FP
 	bool           _cx16_flag    = false;  // CMPXCHG16B
+	bool           _abm_flag     = false;  // POPCNT + LZCNT
+	bool           _bmi1_flag    = false;  // Bit Manipulation Instruction Set
+	bool           _bmi2_flag    = false;
 
 
 
diff --git a/src/fstb/SingleObj.h b/src/fstb/SingleObj.h
index 893a1a4..b4bca90 100644
--- a/src/fstb/SingleObj.h
+++ b/src/fstb/SingleObj.h
@@ -47,8 +47,8 @@ class SingleObj
 	               SingleObj ();
 	virtual        ~SingleObj ();
 
-	T *            operator -> () const;
-	T &            operator * () const;
+	T *            operator -> () const noexcept;
+	T &            operator * () const noexcept;
 
 
 
@@ -72,8 +72,11 @@ class SingleObj
 private:
 
 	               SingleObj (const SingleObj <T, A> &other)         = delete;
+	               SingleObj (const SingleObj <T, A> &&other)        = delete;
 	SingleObj <T, A> &
 	               operator = (const SingleObj <T, A> &other)        = delete;
+	SingleObj <T, A> &
+	               operator = (const SingleObj <T, A> &&other)       = delete;
 	bool           operator == (const SingleObj <T, A> &other) const = delete;
 	bool           operator != (const SingleObj <T, A> &other) const = delete;
 
diff --git a/src/fstb/SingleObj.hpp b/src/fstb/SingleObj.hpp
index 5bdc086..97798ab 100644
--- a/src/fstb/SingleObj.hpp
+++ b/src/fstb/SingleObj.hpp
@@ -38,23 +38,28 @@ namespace fstb
 template <class T, class A>
 SingleObj <T, A>::SingleObj ()
 :	_allo ()
-,	_obj_ptr (0)
+,	_obj_ptr (_allo.allocate (1))
 {
-	_obj_ptr = _allo.allocate (1);
-	if (_obj_ptr == 0)
+	if (_obj_ptr == nullptr)
 	{
+#if defined (__cpp_exceptions) || ! defined (__GNUC__)
 		throw std::bad_alloc ();
+#endif
 	}
 
+#if defined (__cpp_exceptions) || ! defined (__GNUC__)
 	try
+#endif
 	{
 		new (_obj_ptr) T ();
 	}
+#if defined (__cpp_exceptions) || ! defined (__GNUC__)
 	catch (...)
 	{
 		_allo.deallocate (_obj_ptr, 1);
 		throw;
 	}
+#endif
 }
 
 
@@ -64,27 +69,27 @@ SingleObj <T, A>::~SingleObj ()
 {
 	_obj_ptr->~T ();
 	_allo.deallocate (_obj_ptr, 1);
-	_obj_ptr = 0;
+	_obj_ptr = nullptr;
 }
 
 
 
 template <class T, class A>
-T *	SingleObj <T, A>::operator -> () const
+T *	SingleObj <T, A>::operator -> () const noexcept
 {
-	assert (_obj_ptr != 0);
+	assert (_obj_ptr != nullptr);
 
-	return (_obj_ptr);
+	return _obj_ptr;
 }
 
 
 
 template <class T, class A>
-T &	SingleObj <T, A>::operator * () const
+T &	SingleObj <T, A>::operator * () const noexcept
 {
 	assert (_obj_ptr != 0);
 
-	return (*_obj_ptr);
+	return *_obj_ptr;
 }
 
 
diff --git a/src/fstb/ToolsAvx2.hpp b/src/fstb/ToolsAvx2.hpp
index 26d459d..2ccda50 100644
--- a/src/fstb/ToolsAvx2.hpp
+++ b/src/fstb/ToolsAvx2.hpp
@@ -42,63 +42,63 @@ namespace fstb
 template <class T>
 inline bool	ToolsAvx2::check_ptr_align (T *ptr)
 {
-	return (ptr != 0 && (reinterpret_cast <ptrdiff_t> (ptr) & 31) == 0);
+	return (ptr != nullptr && (reinterpret_cast <intptr_t> (ptr) & 31) == 0);
 }
 
 
 
 __m256i	ToolsAvx2::load_16_16ml (const void *msb_ptr, const void *lsb_ptr)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 
 	const __m256i  val_msb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 (
-		reinterpret_cast <const __m128i *> (msb_ptr)
+		static_cast <const __m128i *> (msb_ptr)
 	));
 	const __m256i  val_lsb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 (
-		reinterpret_cast <const __m128i *> (lsb_ptr)
+		static_cast <const __m128i *> (lsb_ptr)
 	));
 	const __m256i  val = _mm256_or_si256 (
 		_mm256_slli_epi16 (val_msb, 8),
 		val_lsb
 	);
 
-	return (val);
+	return val;
 }
 
 
 
 __m256i	ToolsAvx2::load_16_16m (const void *msb_ptr)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 
 	const __m256i  val_msb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 (
-		reinterpret_cast <const __m128i *> (msb_ptr)
+		static_cast <const __m128i *> (msb_ptr)
 	));
 	const __m256i  val = _mm256_slli_epi16 (val_msb, 8);
 
-	return (val);
+	return val;
 }
 
 
 
 __m256i	ToolsAvx2::load_16_16l (const void *lsb_ptr)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 
 	const __m256i  val_lsb = _mm256_cvtepu8_epi16 (_mm_loadu_si128 (
-		reinterpret_cast <const __m128i *> (lsb_ptr)
+		static_cast <const __m128i *> (lsb_ptr)
 	));
 
-	return (val_lsb);
+	return val_lsb;
 }
 
 
 
 __m256i	ToolsAvx2::load_16_16ml_partial (const void *msb_ptr, const void *lsb_ptr, int len)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 16);
 
@@ -111,14 +111,14 @@ __m256i	ToolsAvx2::load_16_16ml_partial (const void *msb_ptr, const void *lsb_pt
 		val_lsb
 	);
 
-	return (val);
+	return val;
 }
 
 
 
 __m256i	ToolsAvx2::load_16_16m_partial (const void *msb_ptr, int len)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 16);
 
@@ -126,21 +126,21 @@ __m256i	ToolsAvx2::load_16_16m_partial (const void *msb_ptr, int len)
 		_mm256_cvtepu8_epi16 (ToolsSse2::load_si128_partial (msb_ptr, len));
 	const __m256i  val = _mm256_slli_epi16 (val_msb, 8);
 
-	return (val);
+	return val;
 }
 
 
 
 __m256i	ToolsAvx2::load_16_16l_partial (const void *lsb_ptr, int len)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 16);
 
 	const __m256i  val =
 		_mm256_cvtepu8_epi16 (ToolsSse2::load_si128_partial (lsb_ptr, len));
 
-	return (val);
+	return val;
 }
 
 
@@ -148,8 +148,8 @@ __m256i	ToolsAvx2::load_16_16l_partial (const void *lsb_ptr, int len)
 // mask_lsb = 0x00FF00FF00FF00...
 void	ToolsAvx2::store_16_16ml (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (lsb_ptr != msb_ptr);
 	
 	const __m256i	lsb = _mm256_and_si256 (mask_lsb, val);
@@ -160,12 +160,10 @@ void	ToolsAvx2::store_16_16ml (void *msb_ptr, void *lsb_ptr, __m256i val, __m256
 	lsbmsb = _mm256_permute4x64_epi64 (lsbmsb, (0<<0) + (2<<2) + (1<<4) + (3<<6));
 
 	_mm_storeu_si128 (
-		reinterpret_cast <__m128i *> (lsb_ptr),
-		_mm256_castsi256_si128 (lsbmsb)
+		static_cast <__m128i *> (lsb_ptr), _mm256_castsi256_si128 (lsbmsb)
 	);
 	_mm_storeu_si128 (
-		reinterpret_cast <__m128i *> (msb_ptr),
-		_mm256_extractf128_si256 (lsbmsb, 0x1)
+		static_cast <__m128i *> (msb_ptr), _mm256_extractf128_si256 (lsbmsb, 0x1)
 	);
 }
 
@@ -174,8 +172,8 @@ void	ToolsAvx2::store_16_16ml (void *msb_ptr, void *lsb_ptr, __m256i val, __m256
 // mask_lsb = 0x00FF00FF00FF00...
 void	ToolsAvx2::store_16_16ml_perm (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb, __m256i permute)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (lsb_ptr != msb_ptr);
 	
 	const __m256i	lsb = _mm256_and_si256 (mask_lsb, val);
@@ -186,12 +184,10 @@ void	ToolsAvx2::store_16_16ml_perm (void *msb_ptr, void *lsb_ptr, __m256i val, _
 	lsbmsb = _mm256_permutevar8x32_epi32 (lsbmsb, permute);
 
 	_mm_storeu_si128 (
-		reinterpret_cast <__m128i *> (lsb_ptr),
-		_mm256_castsi256_si128 (lsbmsb)
+		static_cast <__m128i *> (lsb_ptr), _mm256_castsi256_si128 (lsbmsb)
 	);
 	_mm_storeu_si128 (
-		reinterpret_cast <__m128i *> (msb_ptr),
-		_mm256_extractf128_si256 (lsbmsb, 0x1)
+		static_cast <__m128i *> (msb_ptr), _mm256_extractf128_si256 (lsbmsb, 0x1)
 	);
 }
 
@@ -200,15 +196,14 @@ void	ToolsAvx2::store_16_16ml_perm (void *msb_ptr, void *lsb_ptr, __m256i val, _
 // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF
 void	ToolsAvx2::store_16_16m (void *msb_ptr, __m256i val, __m256i mask_lsb)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 
 	__m256i        msb = _mm256_andnot_si256 (mask_lsb, val);
 	msb = _mm256_srli_si256 (msb, 1);
 	msb = _mm256_packus_epi16 (msb, msb);
 	msb = _mm256_permute4x64_epi64 (msb, (0<<0) + (2<<2));
 	_mm_storeu_si128 (
-		reinterpret_cast <__m128i *> (msb_ptr),
-		_mm256_castsi256_si128 (msb)
+		static_cast <__m128i *> (msb_ptr), _mm256_castsi256_si128 (msb)
 	);
 }
 
@@ -217,14 +212,13 @@ void	ToolsAvx2::store_16_16m (void *msb_ptr, __m256i val, __m256i mask_lsb)
 // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF
 void	ToolsAvx2::store_16_16l (void *lsb_ptr, __m256i val, __m256i mask_lsb)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 
 	__m256i        lsb = _mm256_and_si256 (mask_lsb, val);
 	lsb = _mm256_packus_epi16 (lsb, lsb);
 	lsb = _mm256_permute4x64_epi64 (lsb, (0<<0) + (2<<2));
 	_mm_storeu_si128 (
-		reinterpret_cast <__m128i *> (lsb_ptr),
-		_mm256_castsi256_si128 (lsb)
+		static_cast <__m128i *> (lsb_ptr), _mm256_castsi256_si128 (lsb)
 	);
 }
 
@@ -232,8 +226,8 @@ void	ToolsAvx2::store_16_16l (void *lsb_ptr, __m256i val, __m256i mask_lsb)
 
 void	ToolsAvx2::store_16_16ml_partial (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb, int len)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (lsb_ptr != msb_ptr);
 	assert (len >= 0);
 	assert (len < 16);
@@ -256,8 +250,8 @@ void	ToolsAvx2::store_16_16ml_partial (void *msb_ptr, void *lsb_ptr, __m256i val
 
 void	ToolsAvx2::store_16_16ml_perm_partial (void *msb_ptr, void *lsb_ptr, __m256i val, __m256i mask_lsb, __m256i permute, int len)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (lsb_ptr != msb_ptr);
 	assert (len >= 0);
 	assert (len < 16);
@@ -280,7 +274,7 @@ void	ToolsAvx2::store_16_16ml_perm_partial (void *msb_ptr, void *lsb_ptr, __m256
 
 void	ToolsAvx2::store_16_16m_partial (void *msb_ptr, __m256i val, __m256i mask_lsb, int len)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 16);
 
@@ -296,7 +290,7 @@ void	ToolsAvx2::store_16_16m_partial (void *msb_ptr, __m256i val, __m256i mask_l
 
 void	ToolsAvx2::store_16_16l_partial (void *lsb_ptr, __m256i val, __m256i mask_lsb, int len)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 16);
 
@@ -311,7 +305,7 @@ void	ToolsAvx2::store_16_16l_partial (void *lsb_ptr, __m256i val, __m256i mask_l
 
 __m256	ToolsAvx2::load_ps_partial (const void *ptr, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 8);
 
@@ -319,9 +313,9 @@ __m256	ToolsAvx2::load_ps_partial (const void *ptr, int len)
 	if (len >= 4)
 	{
 		const __m128   src_0 =
-			_mm_loadu_ps (reinterpret_cast <const float *> (ptr));
+			_mm_loadu_ps (static_cast <const float *> (ptr));
 		const __m128   src_1 = ToolsSse2::load_ps_partial (
-			reinterpret_cast <const char *> (ptr) + sizeof (src_0),
+			static_cast <const char *> (ptr) + sizeof (src_0),
 			len - 4
 		);
 		val = _mm256_insertf128_ps (
@@ -343,7 +337,7 @@ __m256	ToolsAvx2::load_ps_partial (const void *ptr, int len)
 
 __m256i	ToolsAvx2::load_si256_partial (const void *ptr, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 32);
 
@@ -351,9 +345,9 @@ __m256i	ToolsAvx2::load_si256_partial (const void *ptr, int len)
 	if (len >= 16)
 	{
 		const __m128i  src_0 =
-			_mm_loadu_si128 (reinterpret_cast <const __m128i *> (ptr));
+			_mm_loadu_si128 (static_cast <const __m128i *> (ptr));
 		const __m128i  src_1 = ToolsSse2::load_si128_partial (
-			reinterpret_cast <const char *> (ptr) + sizeof (src_0),
+			static_cast <const char *> (ptr) + sizeof (src_0),
 			len - 16
 		);
 		val = _mm256_insertf128_si256 (
@@ -375,24 +369,22 @@ __m256i	ToolsAvx2::load_si256_partial (const void *ptr, int len)
 
 void	ToolsAvx2::store_ps_partial (void *ptr, __m256 val, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 8);
 
 	const __m128   val_0 = _mm256_castps256_ps128 (val);
 	if (len >= 4)
 	{
-		_mm_storeu_ps (reinterpret_cast <float *> (ptr), val_0);
+		_mm_storeu_ps (static_cast <float *> (ptr), val_0);
 		const __m128   val_1 = _mm256_extractf128_ps (val, 1);
 		ToolsSse2::store_ps_partial (
-			reinterpret_cast <char *> (ptr) + sizeof (val_0), val_1, len - 4
+			static_cast <char *> (ptr) + sizeof (val_0), val_1, len - 4
 		);
 	}
 	else
 	{
-		ToolsSse2::store_ps_partial (
-			reinterpret_cast <char *> (ptr)                 , val_0, len
-		);
+		ToolsSse2::store_ps_partial (static_cast <char *> (ptr), val_0, len);
 	}
 }
 
@@ -400,22 +392,22 @@ void	ToolsAvx2::store_ps_partial (void *ptr, __m256 val, int len)
 
 void	ToolsAvx2::store_si256_partial (void *ptr, __m256i val, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 32);
 
 	const __m128i  val_0 = _mm256_castsi256_si128 (val);
 	if (len >= 16)
 	{
-		_mm_storeu_si128 (reinterpret_cast <__m128i *> (ptr), val_0);
+		_mm_storeu_si128 (static_cast <__m128i *> (ptr), val_0);
 		const __m128i  val_1 = _mm256_extractf128_si256 (val, 1);
 		ToolsSse2::store_si128_partial (
-			reinterpret_cast <char *> (ptr) + sizeof (val_0), val_1, len - 16
+			static_cast <char *> (ptr) + sizeof (val_0), val_1, len - 16
 		);
 	}
 	else
 	{
-		ToolsSse2::store_si128_partial (reinterpret_cast <char *> (ptr)                 , val_0, len     );
+		ToolsSse2::store_si128_partial (static_cast <char *> (ptr), val_0, len);
 	}
 }
 
diff --git a/src/fstb/ToolsSse2.hpp b/src/fstb/ToolsSse2.hpp
index b28392c..2fa868c 100644
--- a/src/fstb/ToolsSse2.hpp
+++ b/src/fstb/ToolsSse2.hpp
@@ -42,15 +42,15 @@ namespace fstb
 template <class T>
 inline bool	ToolsSse2::check_ptr_align (T *ptr)
 {
-	return (ptr != 0 && (reinterpret_cast <ptrdiff_t> (ptr) & 15) == 0);
+	return (ptr != nullptr && (reinterpret_cast <intptr_t> (ptr) & 15) == 0);
 }
 
 
 
 __m128i	ToolsSse2::load_8_16ml (const void *msb_ptr, const void *lsb_ptr)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 
 	const __m128i  val_msb = _mm_loadl_epi64 (
 		reinterpret_cast <const __m128i *> (msb_ptr)
@@ -60,43 +60,43 @@ __m128i	ToolsSse2::load_8_16ml (const void *msb_ptr, const void *lsb_ptr)
 	);
 	const __m128i  val = _mm_unpacklo_epi8 (val_lsb, val_msb);
 
-	return (val);
+	return val;
 }
 
 
 
 __m128i	ToolsSse2::load_8_16m (const void *msb_ptr, __m128i zero)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 
 	const __m128i  val_msb = _mm_loadl_epi64 (
 		reinterpret_cast <const __m128i *> (msb_ptr)
 	);
 	const __m128i  val = _mm_unpacklo_epi8 (zero, val_msb);
 
-	return (val);
+	return val;
 }
 
 
 
 __m128i	ToolsSse2::load_8_16l (const void *lsb_ptr, __m128i zero)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 
 	const __m128i  val_lsb = _mm_loadl_epi64 (
 		reinterpret_cast <const __m128i *> (lsb_ptr)
 	);
 	const __m128i  val = _mm_unpacklo_epi8 (val_lsb, zero);
 
-	return (val);
+	return val;
 }
 
 
 
 __m128i	ToolsSse2::load_8_16ml_partial (const void *msb_ptr, const void *lsb_ptr, int len)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 8);
 
@@ -104,35 +104,35 @@ __m128i	ToolsSse2::load_8_16ml_partial (const void *msb_ptr, const void *lsb_ptr
 	const __m128i  val_lsb = load_epi64_partial (lsb_ptr, len);
 	const __m128i  val = _mm_unpacklo_epi8 (val_lsb, val_msb);
 
-	return (val);
+	return val;
 }
 
 
 
 __m128i	ToolsSse2::load_8_16m_partial (const void *msb_ptr, __m128i zero, int len)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 8);
 
 	const __m128i  val_msb = load_epi64_partial (msb_ptr, len);
 	const __m128i  val = _mm_unpacklo_epi8 (zero, val_msb);
 
-	return (val);
+	return val;
 }
 
 
 
 __m128i	ToolsSse2::load_8_16l_partial (const void *lsb_ptr, __m128i zero, int len)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 8);
 
 	const __m128i  val_lsb = load_epi64_partial (lsb_ptr, len);
 	const __m128i  val = _mm_unpacklo_epi8 (val_lsb, zero);
 
-	return (val);
+	return val;
 }
 
 
@@ -140,8 +140,8 @@ __m128i	ToolsSse2::load_8_16l_partial (const void *lsb_ptr, __m128i zero, int le
 // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF
 void	ToolsSse2::store_8_16ml (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i mask_lsb)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (lsb_ptr != msb_ptr);
 
 	const __m128i  lsb = _mm_and_si128 (mask_lsb, val);
@@ -149,10 +149,10 @@ void	ToolsSse2::store_8_16ml (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i
 	msb = _mm_srli_si128 (msb, 1);
 
 	__m128i        tmp = _mm_packus_epi16 (lsb, msb);
-	_mm_storel_epi64 (reinterpret_cast <__m128i *> (lsb_ptr), tmp);
+	_mm_storel_epi64 (static_cast <__m128i *> (lsb_ptr), tmp);
 
 	tmp = _mm_unpackhi_epi64 (tmp, tmp);
-	_mm_storel_epi64 (reinterpret_cast <__m128i *> (msb_ptr), tmp);
+	_mm_storel_epi64 (static_cast <__m128i *> (msb_ptr), tmp);
 }
 
 
@@ -160,12 +160,12 @@ void	ToolsSse2::store_8_16ml (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i
 // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF
 void	ToolsSse2::store_8_16m (void *msb_ptr, __m128i val, __m128i mask_lsb)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 
 	__m128i        msb = _mm_andnot_si128 (mask_lsb, val);
 	msb = _mm_srli_si128 (msb, 1);
 	msb = _mm_packus_epi16 (msb, msb);
-	_mm_storel_epi64 (reinterpret_cast <__m128i *> (msb_ptr), msb);
+	_mm_storel_epi64 (static_cast <__m128i *> (msb_ptr), msb);
 }
 
 
@@ -173,19 +173,19 @@ void	ToolsSse2::store_8_16m (void *msb_ptr, __m128i val, __m128i mask_lsb)
 // mask_lsb = 0x00FF00FF00FF00FF00FF00FF00FF00FF
 void	ToolsSse2::store_8_16l (void *lsb_ptr, __m128i val, __m128i mask_lsb)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 
 	__m128i        lsb = _mm_and_si128 (mask_lsb, val);
 	lsb = _mm_packus_epi16 (lsb, lsb);
-	_mm_storel_epi64 (reinterpret_cast <__m128i *> (lsb_ptr), lsb);
+	_mm_storel_epi64 (static_cast <__m128i *> (lsb_ptr), lsb);
 }
 
 
 
 void	ToolsSse2::store_8_16ml_partial (void *msb_ptr, void *lsb_ptr, __m128i val, __m128i mask_lsb, int len)
 {
-	assert (msb_ptr != 0);
-	assert (lsb_ptr != 0);
+	assert (msb_ptr != nullptr);
+	assert (lsb_ptr != nullptr);
 	assert (lsb_ptr != msb_ptr);
 
 	const __m128i  lsb = _mm_and_si128 (mask_lsb, val);
@@ -203,7 +203,7 @@ void	ToolsSse2::store_8_16ml_partial (void *msb_ptr, void *lsb_ptr, __m128i val,
 
 void	ToolsSse2::store_8_16m_partial (void *msb_ptr, __m128i val, __m128i mask_lsb, int len)
 {
-	assert (msb_ptr != 0);
+	assert (msb_ptr != nullptr);
 
 	__m128i        msb = _mm_andnot_si128 (mask_lsb, val);
 	msb = _mm_srli_si128 (msb, 1);
@@ -215,7 +215,7 @@ void	ToolsSse2::store_8_16m_partial (void *msb_ptr, __m128i val, __m128i mask_ls
 
 void	ToolsSse2::store_8_16l_partial (void *lsb_ptr, __m128i val, __m128i mask_lsb, int len)
 {
-	assert (lsb_ptr != 0);
+	assert (lsb_ptr != nullptr);
 
 	__m128i        lsb = _mm_and_si128 (mask_lsb, val);
 	lsb = _mm_packus_epi16 (lsb, lsb);
@@ -226,7 +226,7 @@ void	ToolsSse2::store_8_16l_partial (void *lsb_ptr, __m128i val, __m128i mask_ls
 
 __m128	ToolsSse2::load_ps_partial (const void *ptr, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 4);
 
@@ -234,18 +234,18 @@ __m128	ToolsSse2::load_ps_partial (const void *ptr, int len)
 	while (len > 0)
 	{
 		-- len;
-		tmp_arr [len] = reinterpret_cast <const float *> (ptr) [len];
+		tmp_arr [len] = static_cast <const float *> (ptr) [len];
 	}
 	const __m128   val = _mm_load_ps (tmp_arr);
 
-	return (val);
+	return val;
 }
 
 
 
 __m128i	ToolsSse2::load_si128_partial (const void *ptr, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 16);
 
@@ -253,27 +253,27 @@ __m128i	ToolsSse2::load_si128_partial (const void *ptr, int len)
 	if ((len & 1) != 0)
 	{
 		-- len;
-		tmp = *(reinterpret_cast <const uint8_t *> (ptr) + len);
+		tmp = *(static_cast <const uint8_t *> (ptr) + len);
 	}
 	if ((len & 2) != 0)
 	{
 		len -= 2;
 		tmp <<= 16;
 		const int      ofs = len >> 1;
-		tmp += *(reinterpret_cast <const uint16_t *> (ptr) + ofs);
+		tmp += *(static_cast <const uint16_t *> (ptr) + ofs);
 	}
 	__m128i        val;
 	if (len >= 8)
 	{
-		const int      tmp0 = *(reinterpret_cast <const int32_t *> (ptr)    );
-		const int      tmp1 = *(reinterpret_cast <const int32_t *> (ptr) + 1);
+		const int      tmp0 = *(static_cast <const int32_t *> (ptr)    );
+		const int      tmp1 = *(static_cast <const int32_t *> (ptr) + 1);
 		if (len == 8)
 		{
 			val = _mm_set_epi32 (0, tmp, tmp1, tmp0);
 		}
 		else
 		{
-			const int      tmp2 = *(reinterpret_cast <const int32_t *> (ptr) + 2);
+			const int      tmp2 = *(static_cast <const int32_t *> (ptr) + 2);
 			val = _mm_set_epi32 (tmp, tmp2, tmp1, tmp0);
 		}
 	}
@@ -285,19 +285,19 @@ __m128i	ToolsSse2::load_si128_partial (const void *ptr, int len)
 		}
 		else
 		{
-			const int      tmp0 = *reinterpret_cast <const int32_t *> (ptr);
+			const int      tmp0 = *static_cast <const int32_t *> (ptr);
 			val = _mm_set_epi32 (0, 0, tmp, tmp0);
 		}
 	}
 
-	return (val);
+	return val;
 }
 
 
 
 __m128i	ToolsSse2::load_epi64_partial (const void *ptr, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 8);
 
@@ -305,19 +305,19 @@ __m128i	ToolsSse2::load_epi64_partial (const void *ptr, int len)
 	if ((len & 1) != 0)
 	{
 		-- len;
-		tmp = *(reinterpret_cast <const uint8_t *> (ptr) + len);
+		tmp = *(static_cast <const uint8_t *> (ptr) + len);
 	}
 	if ((len & 2) != 0)
 	{
 		len -= 2;
 		tmp <<= 16;
 		const int      ofs = len >> 1;
-		tmp += *(reinterpret_cast <const uint16_t *> (ptr) + ofs);
+		tmp += *(static_cast <const uint16_t *> (ptr) + ofs);
 	}
 	__m128i        val;
 	if ((len & 4) != 0)
 	{
-		const int      tmp2 = *reinterpret_cast <const int32_t *> (ptr);
+		const int      tmp2 = *static_cast <const int32_t *> (ptr);
 		val = _mm_set_epi32 (0, 0, tmp, tmp2);
 	}
 	else
@@ -325,14 +325,14 @@ __m128i	ToolsSse2::load_epi64_partial (const void *ptr, int len)
 		val = _mm_set_epi32 (0, 0, 0, tmp);
 	}
 
-	return (val);
+	return val;
 }
 
 
 
 void	ToolsSse2::store_ps_partial (void *ptr, __m128 val, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 4);
 
@@ -341,7 +341,7 @@ void	ToolsSse2::store_ps_partial (void *ptr, __m128 val, int len)
 	while (len > 0)
 	{
 		-- len;
-		reinterpret_cast <float *> (ptr) [len] = tmp_arr [len];
+		static_cast <float *> (ptr) [len] = tmp_arr [len];
 	}
 }
 
@@ -349,7 +349,7 @@ void	ToolsSse2::store_ps_partial (void *ptr, __m128 val, int len)
 
 void	ToolsSse2::store_si128_partial (void *ptr, __m128i val, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 16);
 
@@ -365,22 +365,22 @@ void	ToolsSse2::store_si128_partial (void *ptr, __m128i val, int len)
 
 	if ((len & 1) != 0)
 	{
-		*(reinterpret_cast <uint8_t  *> (ptr) + len - 1) = tmp.v08 [len - 1];
+		*(static_cast <uint8_t  *> (ptr) + len - 1) = tmp.v08 [len - 1];
 	}
 	len >>= 1;
 	if ((len & 1) != 0)
 	{
-		*(reinterpret_cast <uint16_t *> (ptr) + len - 1) = tmp.v16 [len - 1];
+		*(static_cast <uint16_t *> (ptr) + len - 1) = tmp.v16 [len - 1];
 	}
 	len >>= 1;
 	if ((len & 1) != 0)
 	{
-		*(reinterpret_cast <uint32_t *> (ptr) + len - 1) = tmp.v32 [len - 1];
+		*(static_cast <uint32_t *> (ptr) + len - 1) = tmp.v32 [len - 1];
 	}
 	len >>= 1;
 	if (len != 0)
 	{
-		* reinterpret_cast <uint64_t *> (ptr)            = tmp.v64 [0      ];
+		* static_cast <uint64_t *> (ptr)            = tmp.v64 [0      ];
 	}
 }
 
@@ -388,7 +388,7 @@ void	ToolsSse2::store_si128_partial (void *ptr, __m128i val, int len)
 
 void	ToolsSse2::store_epi64_partial (void *ptr, __m128i val, int len)
 {
-	assert (ptr != 0);
+	assert (ptr != nullptr);
 	assert (len >= 0);
 	assert (len < 8);
 
@@ -397,19 +397,19 @@ void	ToolsSse2::store_epi64_partial (void *ptr, __m128i val, int len)
 	uint64_t       tmp = _mm_cvtsi128_si64 (val);
 	if ((len & 4) != 0)
 	{
-		*reinterpret_cast <uint32_t *> (ptr) = uint32_t (tmp);
-		ptr = reinterpret_cast <uint32_t *> (ptr) + 1;
+		*static_cast <uint32_t *> (ptr) = uint32_t (tmp);
+		ptr = static_cast <uint32_t *> (ptr) + 1;
 		tmp >>= 32;
 	}
 	if ((len & 2) != 0)
 	{
-		*reinterpret_cast <uint16_t *> (ptr) = uint16_t (tmp);
-		ptr = reinterpret_cast <uint16_t *> (ptr) + 1;
+		*static_cast <uint16_t *> (ptr) = uint16_t (tmp);
+		ptr = static_cast <uint16_t *> (ptr) + 1;
 		tmp >>= 16;
 	}
 	if ((len & 1) != 0)
 	{
-		*reinterpret_cast <uint8_t *> (ptr) = uint8_t (tmp);
+		*static_cast <uint8_t *> (ptr) = uint8_t (tmp);
 	}
 
 #else
@@ -425,17 +425,17 @@ void	ToolsSse2::store_epi64_partial (void *ptr, __m128i val, int len)
 
 	if ((len & 1) != 0)
 	{
-		*(reinterpret_cast <uint8_t  *> (ptr) + len - 1) = tmp.v08 [len - 1];
+		*(static_cast <uint8_t  *> (ptr) + len - 1) = tmp.v08 [len - 1];
 	}
 	len >>= 1;
 	if ((len & 1) != 0)
 	{
-		*(reinterpret_cast <uint16_t *> (ptr) + len - 1) = tmp.v16 [len - 1];
+		*(static_cast <uint16_t *> (ptr) + len - 1) = tmp.v16 [len - 1];
 	}
 	len >>= 1;
 	if (len != 0)
 	{
-		*(reinterpret_cast <uint32_t *> (ptr) + len - 1) = tmp.v32 [len - 1];
+		*(static_cast <uint32_t *> (ptr) + len - 1) = tmp.v32 [len - 1];
 	}
 
 #endif
@@ -490,7 +490,7 @@ __m128i	ToolsSse2::mul_s32_s15_s16 (__m128i src0, __m128i src1, __m128i coef)
 
 	const __m128i	res  = _mm_packs_epi32 (sum0, sum1);
 
-	return (res);
+	return res;
 }
 
 
@@ -522,7 +522,7 @@ __m128i	ToolsSse2::mullo_epi32 (const __m128i &a, const __m128i &b)
 	const __m128i  prod23 = _mm_unpackhi_epi32 (prod02, prod13); // (-,-,a3*b3,a2*b2)
 	const __m128i  res    = _mm_unpacklo_epi64 (prod01 ,prod23); // (ab3,ab2,ab1,ab0)
 
-	return (res);
+	return res;
 }
 
 
@@ -566,7 +566,7 @@ __m128i	ToolsSse2::pack_epi16 (__m128i a, __m128i b)
 
 #endif
 
-	return (p);
+	return p;
 }
 
 
@@ -577,7 +577,7 @@ __m128i	ToolsSse2::select (const __m128i &cond, const __m128i &v_t, const __m128
 	const __m128i  cond_0   = _mm_andnot_si128 (cond, v_f);
 	const __m128i  res      = _mm_or_si128 (cond_0, cond_1);
 
-	return (res);
+	return res;
 }
 
 
@@ -588,7 +588,7 @@ __m128	ToolsSse2::select (const __m128 &cond, const __m128 &v_t, const __m128 &v
 	const __m128   cond_0   = _mm_andnot_ps (cond, v_f);
 	const __m128   res      = _mm_or_ps (cond_0, cond_1);
 
-	return (res);
+	return res;
 }
 
 
@@ -597,14 +597,14 @@ __m128i	ToolsSse2::select_16_equ (const __m128i &lhs, const __m128i &rhs, const
 {
 	const __m128i  cond = _mm_cmpeq_epi16 (lhs, rhs);
 
-	return (ToolsSse2::select (cond, v_t, v_f));
+	return ToolsSse2::select (cond, v_t, v_f);
 }
 
 
 
 __m128i	ToolsSse2::limit_epi16 (const __m128i &x, const __m128i &mi, const __m128i &ma)
 {
-	return (_mm_max_epi16 (_mm_min_epi16 (x, ma), mi));
+	return _mm_max_epi16 (_mm_min_epi16 (x, ma), mi);
 }
 
 
@@ -614,7 +614,7 @@ __m128i	ToolsSse2::abs_dif_epu16 (const __m128i &a, const __m128i &b)
 	const __m128i  p  = _mm_subs_epu16 (a, b);
 	const __m128i  n  = _mm_subs_epu16 (b, a);
 
-	return (_mm_or_si128 (p, n));
+	return _mm_or_si128 (p, n);
 }
 
 
@@ -628,7 +628,7 @@ __m128i	ToolsSse2::abs_dif_epi16 (const __m128i &a, const __m128i &b)
 	const __m128i  au = _mm_xor_si128 (a, mask_s);
 	const __m128i  bu = _mm_xor_si128 (b, mask_s);
 
-	return (abs_dif_epu16 (au, bu));
+	return abs_dif_epu16 (au, bu);
 }
 
 
diff --git a/src/fstb/def.h b/src/fstb/def.h
index 61250e7..60c66cf 100644
--- a/src/fstb/def.h
+++ b/src/fstb/def.h
@@ -34,8 +34,6 @@ namespace fstb
 
 
 
-#define fstb_IS(prop, val) (defined (fstb_##prop##_##val) && (fstb_##prop) == (fstb_##prop##_##val))
-
 #define fstb_ARCHI_X86	(1)
 #define fstb_ARCHI_ARM	(2)
 
@@ -69,7 +67,7 @@ namespace fstb
 #if (fstb_ARCHI == fstb_ARCHI_X86)
 	#define fstb_ENDIAN fstb_ENDIAN_LITTLE
 #elif (fstb_ARCHI == fstb_ARCHI_ARM)
-	#if defined (__ARMEL__) || defined (__LITTLE_ENDIAN__)
+	#if ! defined (__ARM_BIG_ENDIAN) || defined (__ARMEL__) || defined (__LITTLE_ENDIAN__)
 		#define fstb_ENDIAN fstb_ENDIAN_LITTLE
 	#else
 		#define fstb_ENDIAN fstb_ENDIAN_BIG
@@ -117,10 +115,17 @@ namespace fstb
 #define fstb_COMPILER_GCC  (1)
 #define fstb_COMPILER_MSVC (2)
 
-#if defined (__GNUC__)
+#if defined (__GNUC__) || defined (__clang__)
 	#define fstb_COMPILER fstb_COMPILER_GCC
 #elif defined (_MSC_VER)
 	#define fstb_COMPILER fstb_COMPILER_MSVC
+	#if _MSC_VER >= 2000 && __cplusplus < 201402L
+		// The MS compiler keeps __cplusplus at 199711L, even if C++14 or above
+		// is enforced and standard compliance is activated. C++11 is not
+		// officially supported, but almost works with _MSC_VER >= 1900.
+		// /Zc:__cplusplus sets the macro to the right value for C++ >= 2014.
+		#error Please compile with /Zc:__cplusplus
+	#endif
 #else
 	#error
 #endif
@@ -138,7 +143,7 @@ namespace fstb
 
 
 
-// Alignment
+// Alignment. Or better directly use alignas()
 #if defined (_MSC_VER)
 	#define	fstb_TYPEDEF_ALIGN( alignsize, srctype, dsttype)	\
 		typedef __declspec (align (alignsize)) srctype dsttype
@@ -162,39 +167,75 @@ namespace fstb
 #else
 	#define fstb_CDECL
 #endif
-#if fstb_IS (SYS, WIN)
+#if fstb_SYS == fstb_SYS_WIN
 	#if defined (__GNUC__)
-		#define fstb_EXPORT(f) extern "C" __attribute__((dllexport)) f
+		#define fstb_EXPORT(f) extern "C" __attribute__((dllexport)) f noexcept
 	#else
-		#define fstb_EXPORT(f) extern "C" __declspec(dllexport) f
+		#define fstb_EXPORT(f) extern "C" __declspec(dllexport) f noexcept
 	#endif
 #else
-	#define fstb_EXPORT(f) extern "C" __attribute__((visibility("default"))) f
+	#define fstb_EXPORT(f) extern "C" __attribute__((visibility("default"))) f noexcept
 #endif
 
 
 
-// Convenient helper to declare unused function parameters
-template <typename... T> inline void unused (T &&...) {}
+// constexpr functions without too much restrictions
+#if (__cplusplus >= 201402L)
+	#define fstb_CONSTEXPR14 constexpr
+#else
+	#define fstb_CONSTEXPR14
+#endif
 
 
 
+// SIMD instruction set availability
+#undef fstb_HAS_SIMD
+#if fstb_ARCHI == fstb_ARCHI_ARM
+	#if defined (__ARM_NEON_FP)
+		#define fstb_HAS_SIMD (1)
+	#endif
+#elif fstb_ARCHI == fstb_ARCHI_X86
+	#if (fstb_WORD_SIZE == 64)
+		#define fstb_HAS_SIMD (1)
+	#elif fstb_COMPILER == fstb_COMPILER_MSVC
+		#if defined (_M_IX86_FP) && _M_IX86_FP >= 2
+			#define fstb_HAS_SIMD (1)
+		#endif
+	#else
+		#if defined (__SSE2__)
+			#define fstb_HAS_SIMD (1)
+		#endif
+	#endif
+#endif
 
-const double PI      = 3.1415926535897932384626433832795;
-const double LN2     = 0.69314718055994530941723212145818;
-const double LN10    = 2.3025850929940456840179914546844;
-const double LOG10_2 = 0.30102999566398119521373889472449;
-const double LOG2_E  = 1.0  / LN2;
-const double LOG2_10 = LN10 / LN2;
-const double EXP1    = 2.7182818284590452353602874713527;
-const double SQRT2   = 1.4142135623730950488016887242097;
-const double TWOP32  = 256.0 * 256 * 256 * 256;
-const double TWOPM32 = 1.0 / TWOP32;
 
-const float  ANTI_DENORMAL_F32     = 1e-20f;
-const double ANTI_DENORMAL_F64     = 1e-290;
-const float  ANTI_DENORMAL_F32_CUB = 1e-10f;  // Anti-denormal for float numbers aimed to be raised to the power of 2 or 3.
-const double ANTI_DENORMAL_F64_CUB = 1e-100;
+
+// Convenient helper to declare unused function parameters
+template <typename... T> inline void unused (T &&...) noexcept {}
+
+
+
+constexpr double PI      = 3.1415926535897932384626433832795;
+constexpr double LN2     = 0.69314718055994530941723212145818;
+constexpr double LN10    = 2.3025850929940456840179914546844;
+constexpr double LOG10_2 = 0.30102999566398119521373889472449;
+constexpr double LOG2_E  = 1.0  / LN2;
+constexpr double LOG2_10 = LN10 / LN2;
+constexpr double EXP1    = 2.7182818284590452353602874713527;
+constexpr double SQRT2   = 1.4142135623730950488016887242097;
+
+// Exact representation in 32-bit float
+constexpr float  TWOP16  = 65536.f;
+constexpr float  TWOP32  = TWOP16 * TWOP16;
+constexpr float  TWOP64  = TWOP32 * TWOP32;
+constexpr float  TWOPM16 = 1.f / TWOP16;
+constexpr float  TWOPM32 = 1.f / TWOP32;
+constexpr float  TWOPM64 = 1.f / TWOP64;
+
+constexpr float  ANTI_DENORMAL_F32     = 1e-20f;
+constexpr double ANTI_DENORMAL_F64     = 1e-290;
+constexpr float  ANTI_DENORMAL_F32_CUB = 1e-10f;  // Anti-denormal for float numbers aimed to be raised to the power of 2 or 3.
+constexpr double ANTI_DENORMAL_F64_CUB = 1e-100;
 
 
 
diff --git a/src/fstb/fnc.h b/src/fstb/fnc.h
index 3e21694..4b620ed 100644
--- a/src/fstb/fnc.h
+++ b/src/fstb/fnc.h
@@ -27,9 +27,14 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 /*\\\ INCLUDE FILES \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\*/
 
+#include "fstb/def.h"
+
+#include <array>
 #include <string>
 
+#include <cstddef>
 #include <cstdint>
+#include <cstdio>
 
 
 
@@ -39,31 +44,77 @@ namespace fstb
 
 
 template <class T>
-inline T       limit (T x, T mi, T ma);
+inline constexpr int     sgn (T x) noexcept;
+template <class T>
+inline constexpr T       limit (T x, T mi, T ma) noexcept;
+template <class T>
+inline constexpr void    sort_2_elt (T &mi,T &ma, T a, T b) noexcept;
+template <class T>
+inline constexpr bool    is_pow_2 (T x) noexcept;
+inline double  round (double x) noexcept;
+inline float   round (float x) noexcept;
+inline int     round_int (float x) noexcept;
+inline int     round_int (double x) noexcept;
+inline int     round_int_accurate (double x) noexcept;
+inline int64_t round_int64 (double x) noexcept;
+inline int     floor_int (float x) noexcept;
+inline int     floor_int (double x) noexcept;
+inline int     floor_int_accurate (double x) noexcept;
+inline int64_t floor_int64 (double x) noexcept;
+inline int     ceil_int (double x) noexcept;
 template <class T>
-inline bool    is_pow_2 (T x);
-inline double  round (double x);
-inline int     round_int (double x);
-inline int     floor_int (double x);
-inline int     ceil_int (double x);
+inline int     trunc_int (T x) noexcept;
 template <class T>
-inline int     conv_int_fast (T x);
+inline int     conv_int_fast (T x) noexcept;
 template <class T>
-inline bool    is_null (T val, T eps = T (1e-9));
+inline constexpr bool    is_null (T val, T eps = T (1e-9)) noexcept;
 template <class T>
-inline bool    is_eq (T v1, T v2, T eps = T (1e-9));
+inline constexpr bool    is_eq (T v1, T v2, T eps = T (1e-9)) noexcept;
 template <class T>
-inline bool    is_eq_rel (T v1, T v2, T tol = T (1e-6));
-inline int     get_prev_pow_2 (uint32_t x);
-inline double  sinc (double x);
+inline constexpr bool    is_eq_rel (T v1, T v2, T tol = T (1e-6)) noexcept;
+inline int     get_prev_pow_2 (uint32_t x) noexcept;
+inline int     get_next_pow_2 (uint32_t x) noexcept;
+inline constexpr double  sinc (double x) noexcept;
+inline double  pseudo_exp (double x, double c) noexcept;
+inline double  pseudo_log (double y, double c) noexcept;
 template <class T, int S>
-inline T       sshift_l (T x);
+inline constexpr T       sshift_l (T x) noexcept;
 template <class T, int S>
-inline T       sshift_r (T x);
+inline constexpr T       sshift_r (T x) noexcept;
+template <class T>
+inline constexpr T       sq (T x) noexcept;
+template <class T>
+inline constexpr T       cube (T x) noexcept;
+template <class T, class U>
+inline constexpr T       ipow (T x, U n) noexcept;
+template <class T, class U>
+inline constexpr T       ipowp (T x, U n) noexcept;
+template <int N, class T>
+inline constexpr T       ipowpc (T x) noexcept;
+template <class T>
+inline constexpr T       rcp_uint (int x) noexcept;
+template <class T>
+inline constexpr T       lerp (T v0, T v1, T p) noexcept;
+template <class T>
+inline constexpr T       find_extremum_pos_parabolic (T r1, T r2, T r3);
+
+template <std::size_t N, typename T>
+constexpr std::array <T, N> make_array (const T &init_val);
+
+template <class T>
+inline T       read_unalign (const void *ptr) noexcept;
+template <class T>
+inline void    write_unalign (void *ptr, T val) noexcept;
+template <typename T>
+inline void    copy_no_overlap (T * fstb_RESTRICT dst_ptr, const T * fstb_RESTRICT src_ptr, int nbr_elt) noexcept;
 
 void           conv_to_lower_case (std::string &str);
 
 int            snprintf4all (char *out_0, size_t size, const char *format_0, ...);
+//FILE *         fopen_utf8 (const char *filename_0, const char *mode_0);
+
+template <typename T>
+inline bool    is_ptr_align_nz (const T *ptr, int a = 16) noexcept;
 
 
 
diff --git a/src/fstb/fnc.hpp b/src/fstb/fnc.hpp
index 1ed1791..db6d6bd 100644
--- a/src/fstb/fnc.hpp
+++ b/src/fstb/fnc.hpp
@@ -24,8 +24,15 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 #include "fstb/def.h"
 
+#include <algorithm>
 #include <type_traits>
 
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+	#if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64
+		#include <emmintrin.h>
+	#endif
+#endif
+
 #if defined (_MSC_VER)
 	#if (fstb_ARCHI == fstb_ARCHI_X86)
 		#include <intrin.h>
@@ -39,6 +46,7 @@ To Public License, Version 2, as published by Sam Hocevar. See
 #include <climits>
 #include <cmath>
 #include <cstdint>
+#include <cstring>
 
 
 
@@ -52,51 +60,126 @@ namespace fstb
 
 
 template <class T>
-T	limit (T x, T mi, T ma)
+constexpr int	sgn (T x) noexcept
+{
+	return x < T (0) ? -1 : x > T (0) ? 1 : 0;
+}
+
+
+
+template <class T>
+constexpr T	limit (T x, T mi, T ma) noexcept
 {
-	return ((x < mi) ? mi : ((x > ma) ? ma : x));
+	return (x < mi) ? mi : ((x > ma) ? ma : x);
 }
 
 
 
 template <class T>
-bool	is_pow_2 (T x)
+constexpr void	sort_2_elt (T &mi, T &ma, T a, T b) noexcept
+{
+	if (a < b)
+	{
+		mi = a;
+		ma = b;
+	}
+	else
+	{
+		mi = b;
+		ma = a;
+	}
+}
+
+
+
+template <class T>
+constexpr bool	is_pow_2 (T x) noexcept
 {
 	return ((x & -x) == x);
 }
 
 
 
-double	round (double x)
+double	round (double x) noexcept
 {
-	return (floor (x + 0.5));
+	return floor (x + 0.5f);
 }
 
 
 
-int	round_int (double x)
+float	round (float x) noexcept
+{
+	return floorf (x + 0.5f);
+}
+
+
+
+int	round_int (float x) noexcept
 {
 	assert (x <= double (INT_MAX));
-	assert (x >= double (INT_MIN));
+	assert (x >= static_cast <double> (INT_MIN));
 
 #if (fstb_ARCHI == fstb_ARCHI_X86)
 
- #if defined (_MSC_VER)
+ #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64
 
-	assert (x <= double (INT_MAX/2));
-	assert (x >= double (INT_MIN/2));
+	return _mm_cvtss_si32 (_mm_set_ss (x));
 
-	static const float	round_to_nearest = 0.5f;
-	int				i;
+ #elif defined (_MSC_VER)
+
+	int            i;
+	__asm
+	{
+		fld            x
+		fistp          i
+	}
+	return i;
+
+ #else
+
+	return int (floorf (x + 0.5f));
 
-  #if defined (_WIN64) || defined (__64BIT__) || defined (__amd64__) || defined (__x86_64__)
+ #endif  // Compiler
+
+#else // fstb_ARCHI_X86
+
+	// Slow
+	return int (floorf (x + 0.5f));
 
-	const double	xx = x + x + round_to_nearest;
+#endif // fstb_ARCHI_X86
+}
+
+
+
+int	round_int (double x) noexcept
+{
+	return round_int (float (x));
+}
+
+
+
+int	round_int_accurate (double x) noexcept
+{
+	assert (x <= double (INT_MAX));
+	assert (x >= static_cast <double> (INT_MIN));
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+
+ #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64
+
+	constexpr float   round_to_nearest = 0.5f;
+	const double	xx     = x + x + round_to_nearest;
 	const __m128d	x_128d = _mm_set_sd (xx);
-	i = _mm_cvtsd_si32 (x_128d);
-	i >>= 1;
+	const int      i      = _mm_cvtsd_si32 (x_128d) >> 1;
+	return i;
 
-  #else
+ #elif defined (_MSC_VER)
+
+	assert (x <= double (INT_MAX/2));
+	assert (x >= double (INT_MIN/2));
+
+	static const float	round_to_nearest = 0.5f;
+	int				i;
 
 	__asm
 	{
@@ -107,11 +190,9 @@ int	round_int (double x)
 		sar            i, 1
 	}
 
-  #endif
-
 	assert (i == int (floor (x + 0.5)));
 
-	return (i);
+	return i;
 
  #elif defined (__GNUC__)
 
@@ -134,50 +215,124 @@ int	round_int (double x)
 
 	assert (i == int (floor (x + 0.5)));
 
-	return (i);
+	return i;
 
  #else
 
 	// Slow
-	return (int (floor (x + 0.5)));
+	return int (floor (x + 0.5));
 
  #endif  // Compiler
 
 #else // fstb_ARCHI_X86
 
 	// Slow
-	return (int (floor (x + 0.5)));
+	return int (floor (x + 0.5));
 
 #endif // fstb_ARCHI_X86
 }
 
 
 
+int64_t round_int64 (double x) noexcept
+{
+	return int64_t (round (x));
+}
+
+
+
 // May not give the right result for very small negative values.
-int	floor_int (double x)
+int	floor_int (float x) noexcept
 {
 	assert (x <= double (INT_MAX));
-	assert (x >= double (INT_MIN));
+	assert (x >= static_cast <double> (INT_MIN));
 
 #if (fstb_ARCHI == fstb_ARCHI_X86)
 
- #if defined (_MSC_VER)
+ #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64
+
+	constexpr float   round_toward_m_i = -0.5f;
+	const float    xx     = x + x + round_toward_m_i;
+	const __m128   x_128  = _mm_set_ss (xx);
+	const int      i      = _mm_cvt_ss2si (x_128) >> 1;
+	return i;
+
+ #elif defined (_MSC_VER)
 
 	assert (x <= double (INT_MAX/2));
 	assert (x >= double (INT_MIN/2));
 
 	int            i;
-	static const float   round_toward_m_i = -0.5f;
+	constexpr float   round_toward_m_i = -0.5f;
+	__asm
+	{
+		fld            x
+		fadd           round_toward_m_i
+		fistp          i
+	}
+	return i;
+
+ #elif defined (__GNUC__)
+
+	assert (x <= double (INT_MAX/2));
+	assert (x >= double (INT_MIN/2));
+	int				i;
+	static const float	round_toward_m_i = -0.5f;
+	asm (
+		"fldl				%[x]				\n"
+		"fadds			(%[rm])				\n"
+		"fistpl			%[i]				\n"
+	:	[i]	"=m"	(i)
+ 	:	[rm]	"r"	(&round_toward_m_i)
+	,	[x]	"m"	(x)
+	:	//"st"
+	);
+	return i;
+
+ #else
+
+	return int (floorf (x));
+
+ #endif // Compiler
+
+#else  // fstb_ARCHI_X86
+
+	return int (floorf (x));
+
+#endif // fstb_ARCHI_X86
+}
 
-  #if defined (_WIN64) || defined (__64BIT__) || defined (__amd64__) || defined (__x86_64__)
 
-	const double   xx = x + x + round_toward_m_i;
+
+int	floor_int (double x) noexcept
+{
+	return floor_int (float (x));
+}
+
+
+
+// May not give the right result for very small negative values.
+int	floor_int_accurate (double x) noexcept
+{
+	assert (x <= double (INT_MAX));
+	assert (x >= static_cast <double> (INT_MIN));
+
+#if (fstb_ARCHI == fstb_ARCHI_X86)
+
+ #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64
+
+	constexpr float   round_toward_m_i = -0.5f;
+	const double   xx     = x + x + round_toward_m_i;
 	const __m128d  x_128d = _mm_set_sd (xx);
-	i = _mm_cvtsd_si32 (x_128d);
-	i >>= 1;
+	const int      i      = _mm_cvtsd_si32 (x_128d) >> 1;
+	return i;
 
-  #else
+ #elif defined (_MSC_VER)
 
+	assert (x <= double (INT_MAX/2));
+	assert (x >= double (INT_MIN/2));
+	int            i;
+	static const float   round_toward_m_i = -0.5f;
 	__asm
 	{
 		fld            x
@@ -186,21 +341,14 @@ int	floor_int (double x)
 		fistp          i
 		sar            i, 1
 	}
-
-  #endif
-
-	using namespace std;
 	assert (i == int (floor (x)) || fabs (i - x) < 1e-10);
-
-	return (i);
+	return i;
 
  #elif defined (__GNUC__)
 
 	assert (x <= double (INT_MAX/2));
 	assert (x >= double (INT_MIN/2));
-
-	int				i;
-	
+	int				i;	
 	static const float	round_toward_m_i = -0.5f;
 	asm (
 		"fldl				%[x]				\n"
@@ -216,50 +364,55 @@ int	floor_int (double x)
 	
 	assert (i == int (floor (x)) || fabs (i - x) < 1e-10);
 
-	return (i);
+	return i;
 
  #else
 
 	// Slow
-	return (int (floor (x)));
+	return int (floor (x));
 
  #endif // Compiler
 
 #else  // fstb_ARCHI_X86
 
 	// Slow
-	return (int (floor (x)));
+	return int (floor (x));
 
 #endif // fstb_ARCHI_X86
 }
 
 
 
+int64_t	floor_int64 (double x) noexcept
+{
+	return int64_t (floor (x));
+}
+
+
+
 // May not give the right result for very small positive values.
-int	ceil_int (double x)
+int	ceil_int (double x) noexcept
 {
 	assert (x <= double (INT_MAX));
-	assert (x >= double (INT_MIN));
+	assert (x >= static_cast <double> (INT_MIN));
 
 #if (fstb_ARCHI == fstb_ARCHI_X86)
 
- #if (defined (_MSC_VER))
+ #if defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64
+
+	constexpr float   round_toward_p_i = -0.5f;
+	const double   xx     = round_toward_p_i - (x + x);
+	const __m128d  x_128d = _mm_set_sd (xx);
+	const int      i      = _mm_cvtsd_si32 (x_128d) >> 1;
+	return -i;
+
+ #elif (defined (_MSC_VER))
 
 	assert (x <= double (INT_MAX/2));
 	assert (x >= double (INT_MIN/2));
 
 	int            i;
 	static const float   round_toward_p_i = -0.5f;
-
-  #if defined (_WIN64) || defined (__64BIT__) || defined (__amd64__) || defined (__x86_64__)
-
-	const double   xx = round_toward_p_i - (x + x);
-	const __m128d  x_128d = _mm_set_sd (xx);
-	i = _mm_cvtsd_si32 (x_128d);
-	i >>= 1;
-
-  #else
-
 	__asm
 	{
 		fld            x
@@ -268,20 +421,14 @@ int	ceil_int (double x)
 		fistp          i
 		sar            i, 1
 	}
-
-  #endif
-
 	assert (-i == int (ceil (x)) || fabs (-i - x) < 1e-10);
-
-	return (-i);
+	return -i;
 
  #elif defined (__GNUC__)
 
 	assert (x <= double (INT_MAX/2));
 	assert (x >= double (INT_MIN/2));
-
 	int				i;
-
 	static const float	round_toward_p_i = -0.5f;
 	asm (
 		"fldl				%[x]				\n"
@@ -294,31 +441,40 @@ int	ceil_int (double x)
 	,	[x]	"m"	(x)
 	:	//"st"
 	);
-
-	using namespace std;
 	assert (-i == int (ceil (x)) || fabs (-i - x) < 1e-10);
-
-	return (-i);
+	return -i;
 
  #else
 
 	// Slow
-	return (int (ceil (x)));
+	return int (ceil (x));
 
  #endif
 
 #else
 
 	// Slow
-	return (int (ceil (x)));
+	return int (ceil (x));
+
+#endif
+}
 
+
+
+template <class T>
+int	trunc_int (T x) noexcept
+{
+#if (fstb_ARCHI == fstb_ARCHI_X86) && (defined (fstb_HAS_SIMD) || fstb_WORD_SIZE == 64)
+	return _mm_cvtt_ss2si (_mm_set1_ps (float (x)));
+#else
+	return int (x);
 #endif
 }
 
 
 
 template <class T>
-int	conv_int_fast (T x)
+int	conv_int_fast (T x) noexcept
 {
 	static_assert (std::is_floating_point <T>::value, "T must be floating point");
 
@@ -367,13 +523,13 @@ int	conv_int_fast (T x)
 
 #endif
 
-	return (p);
+	return p;
 }
 
 
 
 template <class T>
-bool	is_null (T val, T eps)
+constexpr bool	is_null (T val, T eps) noexcept
 {
 	static_assert (std::is_floating_point <T>::value, "T must be floating point");
 	assert (eps >= 0);
@@ -384,18 +540,18 @@ bool	is_null (T val, T eps)
 
 
 template <class T>
-bool	is_eq (T v1, T v2, T eps)
+constexpr bool	is_eq (T v1, T v2, T eps) noexcept
 {
 	static_assert (std::is_floating_point <T>::value, "T must be floating point");
 	assert (eps >= 0);
 
-	return (is_null (v2 - v1, eps));
+	return is_null (v2 - v1, eps);
 }
 
 
 
 template <class T>
-bool	is_eq_rel (T v1, T v2, T tol)
+constexpr bool	is_eq_rel (T v1, T v2, T tol) noexcept
 {
 	static_assert (std::is_floating_point <T>::value, "T must be floating point");
 	assert (tol >= 0);
@@ -404,7 +560,7 @@ bool	is_eq_rel (T v1, T v2, T tol)
 	const T        v2a = T (fabs (v2));
 	const T        eps = std::max (v1a, v2a) * tol;
 
-	return (is_eq (v1, v2, eps));
+	return is_eq (v1, v2, eps);
 }
 
 
@@ -422,7 +578,7 @@ Throws: Nothing
 ==============================================================================
 */
 
-int	get_prev_pow_2 (uint32_t x)
+int	get_prev_pow_2 (uint32_t x) noexcept
 {
 	assert (x > 0);
 
@@ -467,21 +623,127 @@ int	get_prev_pow_2 (uint32_t x)
 
 #endif
 
-	return (int (p));
+	return int (p);
 }
 
 
 
-double	sinc (double x)
+/*
+==============================================================================
+Name: get_next_pow2
+Description:
+	Computes the exponent of the power of two equal to or immediately greater
+	than the parameter. It is the base-2 log rounded toward plus infinity.
+Input parameters:
+	- x: Number which we want to compute the base-2 log.
+Returns: The exponent
+Throws: Nothing
+==============================================================================
+*/
+
+int	get_next_pow_2 (uint32_t x) noexcept
 {
+	assert (x > 0);
+
+#if (fstb_ARCHI == fstb_ARCHI_X86) && defined (_MSC_VER)
+
+  #if ((_MSC_VER / 100) < 14)
+
+	-- x;
+	int				p;
+
 	if (x == 0)
 	{
-		return (1);
+		p = 0;
+	}
+	else
+	{
+		__asm
+		{
+			xor				eax, eax
+			bsr				eax, x
+			inc				eax
+			mov				p, eax
+		}
+	}
+
+  #else
+
+	unsigned long	p;
+	if (_BitScanReverse (&p, x - 1) == 0)
+	{
+		p = 0;
+	}
+	else
+	{
+		++ p;
+	}
+
+  #endif
+
+#else
+
+	--x;
+	int				p = 0;
+
+	while ((x & ~(uint32_t (0xFFFFL))) != 0)
+	{
+		p += 16;
+		x >>= 16;
+	}
+	while ((x & ~(uint32_t (0xFL))) != 0)
+	{
+		p += 4;
+		x >>= 4;
+	}
+	while (x > 0)
+	{
+		++p;
+		x >>= 1;
+	}
+
+#endif
+
+	return int (p);
+}
+
+
+
+constexpr double	sinc (double x) noexcept
+{
+	if (x == 0)
+	{
+		return 1;
 	}
 
 	const double   xp = x * PI;
 
-	return (sin (xp) / xp);
+	return sin (xp) / xp;
+}
+
+
+
+double  pseudo_exp (double x, double c) noexcept
+{
+	assert (x >= 0);
+	assert (c > 0);
+
+	const double   num = exp (c * x) - 1;
+	const double   den = exp (c    ) - 1;
+
+	return num / den;
+}
+
+
+
+double  pseudo_log (double y, double c) noexcept
+{
+	assert (y >= 0);
+	assert (c > 0);
+
+	const double   num = log (y * (exp (c) - 1) + 1);
+
+	return num / c;
 }
 
 
@@ -491,28 +753,318 @@ class fnc_ShiftGeneric
 {
 public:
 	static_assert (S < int (sizeof (T) * CHAR_BIT), "Shift too large");
-	static T sh (T x) { return (x << S); }
+	static constexpr T sh (T x) noexcept { return x << S; }
 };
 template <class T, int S>
 class fnc_ShiftGeneric <T, S, false>
 {
 public:
 	static_assert (S < int (sizeof (T) * CHAR_BIT), "Shift too large");
-	static T sh (T x) { return (x >> S); }
+	static constexpr T sh (T x) noexcept { return x >> S; }
 };
 
 template <class T, int S>
-T	sshift_l (T x)
+constexpr T	sshift_l (T x) noexcept
 {
 	static_assert (std::is_integral <T>::value, "T must be integer");
-	return (fnc_ShiftGeneric <T, (S < 0) ? -S : S, (S > 0)>::sh (x));
+	return fnc_ShiftGeneric <T, (S < 0) ? -S : S, (S > 0)>::sh (x);
 }
 
 template <class T, int S>
-T	sshift_r (T x)
+constexpr T	sshift_r (T x) noexcept
 {
 	static_assert (std::is_integral <T>::value, "T must be integer");
-	return (fnc_ShiftGeneric <T, (S < 0) ? -S : S, (S < 0)>::sh (x));
+	return fnc_ShiftGeneric <T, (S < 0) ? -S : S, (S < 0)>::sh (x);
+}
+
+
+
+template <class T>
+constexpr T	sq (T x) noexcept
+{
+	return x * x;
+}
+
+
+
+template <class T>
+constexpr T	cube (T x) noexcept
+{
+	return x * x * x;
+}
+
+
+
+// U must be a signed integer type
+template <class T, class U>
+constexpr T	ipow (T x, U n) noexcept
+{
+	const U			abs_n = std::abs (n);
+	const T			z (ipowp (x, abs_n));
+
+	return (n < U (0)) ? T (1) / z : z;
+}
+
+
+
+// U must be an integer type (signed or not)
+template <class T, class U>
+constexpr T	ipowp (T x, U n) noexcept
+{
+	assert (! (n < U (0)));
+
+#if 1
+	T					z (1);
+
+	while (n != U (0))
+	{
+		if ((n & U (1)) != U (0))
+		{
+			z *= x;
+		}
+		n >>= 1;
+		x *= x;
+	}
+#else
+	T              z (((n & U (1)) != U (0)) ? x : 1);
+	n >>= 1;
+	if (n > U (0))
+	{
+		x *= x;
+		z *= ipowp (x, n);
+	}
+#endif
+
+	return z;
+}
+
+
+
+// Result looks optimal with all optimisations enabled
+template <int N, class T>
+constexpr T	ipowpc (T x) noexcept
+{
+	static_assert (N >= 0, "N must be positive or null.");
+
+#if (__cplusplus >= 201402L)
+	if (N == 0)
+	{
+		return T (1);
+	}
+	else if (N > 1)
+	{
+		T              y = ipowpc <N / 2> (x);
+		y *= y;
+		if ((N & 1) != 0)
+		{
+			y *= x;
+		}
+		return y;
+	}
+
+	return x;
+#else
+	return
+		  (N == 0) ? 1
+		: (N >  1) ? (sq (ipowpc <N / 2> (x)) * (((N & 1) != 0) ? x : 1))
+		: x;
+#endif
+}
+
+
+
+template <class T>
+constexpr T	rcp_uint (int x) noexcept
+{
+	static_assert (std::is_floating_point <T>::value, "T must be floating point");
+
+	constexpr int  table_len           = 256;
+	constexpr T    rcp_arr [table_len] =
+	{
+		T (0.00000000000000E+00), T (1.00000000000000E+00), T (5.00000000000000E-01), T (3.33333333333333E-01),
+		T (2.50000000000000E-01), T (2.00000000000000E-01), T (1.66666666666667E-01), T (1.42857142857143E-01),
+		T (1.25000000000000E-01), T (1.11111111111111E-01), T (1.00000000000000E-01), T (9.09090909090909E-02),
+		T (8.33333333333333E-02), T (7.69230769230769E-02), T (7.14285714285714E-02), T (6.66666666666667E-02),
+		T (6.25000000000000E-02), T (5.88235294117647E-02), T (5.55555555555556E-02), T (5.26315789473684E-02),
+		T (5.00000000000000E-02), T (4.76190476190476E-02), T (4.54545454545455E-02), T (4.34782608695652E-02),
+		T (4.16666666666667E-02), T (4.00000000000000E-02), T (3.84615384615385E-02), T (3.70370370370370E-02),
+		T (3.57142857142857E-02), T (3.44827586206897E-02), T (3.33333333333333E-02), T (3.22580645161290E-02),
+		T (3.12500000000000E-02), T (3.03030303030303E-02), T (2.94117647058823E-02), T (2.85714285714286E-02),
+		T (2.77777777777778E-02), T (2.70270270270270E-02), T (2.63157894736842E-02), T (2.56410256410256E-02),
+		T (2.50000000000000E-02), T (2.43902439024390E-02), T (2.38095238095238E-02), T (2.32558139534884E-02),
+		T (2.27272727272727E-02), T (2.22222222222222E-02), T (2.17391304347826E-02), T (2.12765957446808E-02),
+		T (2.08333333333333E-02), T (2.04081632653061E-02), T (2.00000000000000E-02), T (1.96078431372549E-02),
+		T (1.92307692307692E-02), T (1.88679245283019E-02), T (1.85185185185185E-02), T (1.81818181818182E-02),
+		T (1.78571428571429E-02), T (1.75438596491228E-02), T (1.72413793103448E-02), T (1.69491525423729E-02),
+		T (1.66666666666667E-02), T (1.63934426229508E-02), T (1.61290322580645E-02), T (1.58730158730159E-02),
+		T (1.56250000000000E-02), T (1.53846153846154E-02), T (1.51515151515152E-02), T (1.49253731343284E-02),
+		T (1.47058823529412E-02), T (1.44927536231884E-02), T (1.42857142857143E-02), T (1.40845070422535E-02),
+		T (1.38888888888889E-02), T (1.36986301369863E-02), T (1.35135135135135E-02), T (1.33333333333333E-02),
+		T (1.31578947368421E-02), T (1.29870129870130E-02), T (1.28205128205128E-02), T (1.26582278481013E-02),
+		T (1.25000000000000E-02), T (1.23456790123457E-02), T (1.21951219512195E-02), T (1.20481927710843E-02),
+		T (1.19047619047619E-02), T (1.17647058823529E-02), T (1.16279069767442E-02), T (1.14942528735632E-02),
+		T (1.13636363636364E-02), T (1.12359550561798E-02), T (1.11111111111111E-02), T (1.09890109890110E-02),
+		T (1.08695652173913E-02), T (1.07526881720430E-02), T (1.06382978723404E-02), T (1.05263157894737E-02),
+		T (1.04166666666667E-02), T (1.03092783505155E-02), T (1.02040816326531E-02), T (1.01010101010101E-02),
+		T (1.00000000000000E-02), T (9.90099009900990E-03), T (9.80392156862745E-03), T (9.70873786407767E-03),
+		T (9.61538461538462E-03), T (9.52380952380952E-03), T (9.43396226415094E-03), T (9.34579439252336E-03),
+		T (9.25925925925926E-03), T (9.17431192660550E-03), T (9.09090909090909E-03), T (9.00900900900901E-03),
+		T (8.92857142857143E-03), T (8.84955752212389E-03), T (8.77192982456140E-03), T (8.69565217391304E-03),
+		T (8.62068965517241E-03), T (8.54700854700855E-03), T (8.47457627118644E-03), T (8.40336134453782E-03),
+		T (8.33333333333333E-03), T (8.26446280991736E-03), T (8.19672131147541E-03), T (8.13008130081301E-03),
+		T (8.06451612903226E-03), T (8.00000000000000E-03), T (7.93650793650794E-03), T (7.87401574803150E-03),
+		T (7.81250000000000E-03), T (7.75193798449612E-03), T (7.69230769230769E-03), T (7.63358778625954E-03),
+		T (7.57575757575758E-03), T (7.51879699248120E-03), T (7.46268656716418E-03), T (7.40740740740741E-03),
+		T (7.35294117647059E-03), T (7.29927007299270E-03), T (7.24637681159420E-03), T (7.19424460431655E-03),
+		T (7.14285714285714E-03), T (7.09219858156028E-03), T (7.04225352112676E-03), T (6.99300699300699E-03),
+		T (6.94444444444444E-03), T (6.89655172413793E-03), T (6.84931506849315E-03), T (6.80272108843537E-03),
+		T (6.75675675675676E-03), T (6.71140939597315E-03), T (6.66666666666667E-03), T (6.62251655629139E-03),
+		T (6.57894736842105E-03), T (6.53594771241830E-03), T (6.49350649350649E-03), T (6.45161290322581E-03),
+		T (6.41025641025641E-03), T (6.36942675159236E-03), T (6.32911392405063E-03), T (6.28930817610063E-03),
+		T (6.25000000000000E-03), T (6.21118012422360E-03), T (6.17283950617284E-03), T (6.13496932515337E-03),
+		T (6.09756097560976E-03), T (6.06060606060606E-03), T (6.02409638554217E-03), T (5.98802395209581E-03),
+		T (5.95238095238095E-03), T (5.91715976331361E-03), T (5.88235294117647E-03), T (5.84795321637427E-03),
+		T (5.81395348837209E-03), T (5.78034682080925E-03), T (5.74712643678161E-03), T (5.71428571428571E-03),
+		T (5.68181818181818E-03), T (5.64971751412429E-03), T (5.61797752808989E-03), T (5.58659217877095E-03),
+		T (5.55555555555556E-03), T (5.52486187845304E-03), T (5.49450549450549E-03), T (5.46448087431694E-03),
+		T (5.43478260869565E-03), T (5.40540540540541E-03), T (5.37634408602151E-03), T (5.34759358288770E-03),
+		T (5.31914893617021E-03), T (5.29100529100529E-03), T (5.26315789473684E-03), T (5.23560209424084E-03),
+		T (5.20833333333333E-03), T (5.18134715025907E-03), T (5.15463917525773E-03), T (5.12820512820513E-03),
+		T (5.10204081632653E-03), T (5.07614213197969E-03), T (5.05050505050505E-03), T (5.02512562814070E-03),
+		T (5.00000000000000E-03), T (4.97512437810945E-03), T (4.95049504950495E-03), T (4.92610837438424E-03),
+		T (4.90196078431373E-03), T (4.87804878048781E-03), T (4.85436893203883E-03), T (4.83091787439613E-03),
+		T (4.80769230769231E-03), T (4.78468899521531E-03), T (4.76190476190476E-03), T (4.73933649289100E-03),
+		T (4.71698113207547E-03), T (4.69483568075117E-03), T (4.67289719626168E-03), T (4.65116279069768E-03),
+		T (4.62962962962963E-03), T (4.60829493087558E-03), T (4.58715596330275E-03), T (4.56621004566210E-03),
+		T (4.54545454545455E-03), T (4.52488687782805E-03), T (4.50450450450451E-03), T (4.48430493273543E-03),
+		T (4.46428571428571E-03), T (4.44444444444444E-03), T (4.42477876106195E-03), T (4.40528634361234E-03),
+		T (4.38596491228070E-03), T (4.36681222707424E-03), T (4.34782608695652E-03), T (4.32900432900433E-03),
+		T (4.31034482758621E-03), T (4.29184549356223E-03), T (4.27350427350427E-03), T (4.25531914893617E-03),
+		T (4.23728813559322E-03), T (4.21940928270042E-03), T (4.20168067226891E-03), T (4.18410041841004E-03),
+		T (4.16666666666667E-03), T (4.14937759336100E-03), T (4.13223140495868E-03), T (4.11522633744856E-03),
+		T (4.09836065573771E-03), T (4.08163265306122E-03), T (4.06504065040650E-03), T (4.04858299595142E-03),
+		T (4.03225806451613E-03), T (4.01606425702811E-03), T (4.00000000000000E-03), T (3.98406374501992E-03),
+		T (3.96825396825397E-03), T (3.95256916996047E-03), T (3.93700787401575E-03), T (3.92156862745098E-03),
+	};
+
+	assert (x > 0);
+
+	if (x < table_len)
+	{
+		return rcp_arr [x];
+	}
+
+	return T (1) / T (x);
+}
+
+
+
+template <class T>
+constexpr T	lerp (T v0, T v1, T p) noexcept
+{
+	return v0 + p * (v1 - v0);
+}
+
+
+
+// Finds the x position of the extremum (min or max) in the parabolic-
+// interpolated curve passes through (-1, r1), (0, r2) and (+1, r3).
+// The curve is implicitely defined by:
+// f(x) = ((r3 + r1) / 2 - r2) * x^2 + ((r3 - r1) / 2) * x + r2
+// The points must not be aligned so the extremum exists.
+// It is not necessariy located between -1 and 1.
+template <class T>
+inline constexpr T       find_extremum_pos_parabolic (T r1, T r2, T r3)
+{
+	const T        den = T (2) * r2 - (r3 + r1);
+	assert (den != T (0));
+
+	const T        pos = (r3 - r1) * T (0.5) / den;
+
+	return pos;
+}
+
+
+
+namespace detail
+{
+	template <typename T, std::size_t... IS>
+	constexpr std::array <T, sizeof... (IS)>
+	make_array (const T &init_val, std::index_sequence <IS...>)
+	{
+		return {{ (static_cast <void> (IS), init_val)... }};
+	}
+}
+
+// Default-initializes an array with a specified value
+// Source: https://stackoverflow.com/a/41259045
+template <std::size_t N, typename T>
+constexpr std::array <T, N> make_array (const T &init_val)
+{
+	return detail::make_array (init_val, std::make_index_sequence <N> ());
+}
+
+
+
+template <class T>
+T	read_unalign (const void *ptr) noexcept
+{
+	static_assert (
+		std::is_trivially_copyable <T>::value, "T must be trivially copiable"
+	);
+	assert (ptr != nullptr);
+
+	T              val;
+	memcpy (&val, ptr, sizeof (val));
+	return val;
+}
+
+
+
+template <class T>
+void	write_unalign (void *ptr, T val) noexcept
+{
+	static_assert (
+		std::is_trivially_copyable <T>::value, "T must be trivially copiable"
+	);
+	assert (ptr != nullptr);
+
+	memcpy (ptr, &val, sizeof (val));
+}
+
+
+
+// std::copy is already optimized like this but uses memmove instead of
+// memcpy.
+template <typename T>
+void    copy_no_overlap (T * fstb_RESTRICT dst_ptr, const T * fstb_RESTRICT src_ptr, int nbr_elt) noexcept
+{
+	assert (dst_ptr != nullptr);
+	assert (src_ptr != nullptr);
+	assert (nbr_elt > 0);
+
+	if (std::is_trivially_copyable <T>::value)
+	{
+		memcpy (dst_ptr, src_ptr, nbr_elt * sizeof (*dst_ptr));
+	}
+	else
+	{
+		std::copy (src_ptr, src_ptr + nbr_elt, dst_ptr);
+	}
+}
+
+
+
+template <typename T>
+bool	is_ptr_align_nz (const T *ptr, int a) noexcept
+{
+	assert (a > 0);
+	assert (is_pow_2 (a));
+
+	return (
+		   ptr != nullptr
+		&& (reinterpret_cast <intptr_t> (ptr) & (a - 1)) == 0
+	);
 }
 
 
diff --git a/src/fstb/fnc.cpp b/src/fstb/fnc_fstb.cpp
similarity index 100%
rename from src/fstb/fnc.cpp
rename to src/fstb/fnc_fstb.cpp
diff --git a/src/main.cpp b/src/main.cpp
index 09179fc..cd9d8ab 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -164,7 +164,7 @@ class TmpHistLuma
 				const int      h = _vsapi.getFrameHeight (dst_ptr, plane);
 				if (_vi_out.format->bytesPerSample == 2)
 				{
-					const uint16_t    fill_cst = 1 << (bits - 1);
+					const uint16_t    fill_cst = uint16_t (1 << (bits - 1));
 					for (int y = 0; y < h; ++y)
 					{
 						for (int x = 0; x < w; ++x)
@@ -555,6 +555,9 @@ VS_EXTERNAL_API (void) VapourSynthPluginInit (::VSConfigPlugin config_fnc, ::VSR
 		"staticnoise:int:opt;"
 		"cpuopt:int:opt;"
 		"patsize:int:opt;"
+		"tpdfo:int:opt;"
+		"tpdfn:int:opt;"
+		"corplane:int:opt;"
 		, &vsutl::Redirect <fmtc::Bitdepth>::create, 0, plugin_ptr
 	);
 
diff --git a/src/vsutl/FrameRefSPtr.h b/src/vsutl/FrameRefSPtr.h
index f63dc04..6c13dcf 100644
--- a/src/vsutl/FrameRefSPtr.h
+++ b/src/vsutl/FrameRefSPtr.h
@@ -37,10 +37,22 @@ namespace vsutl
 
 
 
+class FrameRefSPtr_FncWrapper
+{
+public:
+	static inline const ::VSFrameRef * clone (const ::VSAPI &vsapi, const ::VSFrameRef *f) VS_NOEXCEPT
+	{
+		return (*vsapi.cloneFrameRef) (f);
+	}
+	static inline void free (const ::VSAPI &vsapi, const ::VSFrameRef *f) VS_NOEXCEPT
+	{
+		(*vsapi.freeFrame) (f);
+	}
+};
+
 typedef	ObjRefSPtr <
 	const ::VSFrameRef,
-	&VSAPI::cloneFrameRef,
-	&VSAPI::freeFrame
+	FrameRefSPtr_FncWrapper
 >	FrameRefSPtr;
 
 
diff --git a/src/vsutl/FuncRefSPtr.h b/src/vsutl/FuncRefSPtr.h
index d6d435f..3e581ae 100644
--- a/src/vsutl/FuncRefSPtr.h
+++ b/src/vsutl/FuncRefSPtr.h
@@ -37,10 +37,22 @@ namespace vsutl
 
 
 
+class FuncRefSPtr_FncWrapper
+{
+public:
+	static inline ::VSFuncRef * clone (const ::VSAPI &vsapi, ::VSFuncRef *func) VS_NOEXCEPT
+	{
+		return (*vsapi.cloneFuncRef) (func);
+	}
+	static inline void free (const ::VSAPI &vsapi, ::VSFuncRef *func) VS_NOEXCEPT
+	{
+		(*vsapi.freeFunc) (func);
+	}
+};
+
 typedef	ObjRefSPtr <
 	::VSFuncRef,
-	&VSAPI::cloneFuncRef,
-	&VSAPI::freeFunc
+	FuncRefSPtr_FncWrapper
 >	FuncRefSPtr;
 
 
diff --git a/src/vsutl/NodeRefSPtr.h b/src/vsutl/NodeRefSPtr.h
index d0ecb04..5536f8b 100644
--- a/src/vsutl/NodeRefSPtr.h
+++ b/src/vsutl/NodeRefSPtr.h
@@ -37,10 +37,22 @@ namespace vsutl
 
 
 
+class NodeRefSPtr_FncWrapper
+{
+public:
+	static inline ::VSNodeRef * clone (const ::VSAPI &vsapi, ::VSNodeRef *node) VS_NOEXCEPT
+	{
+		return (*vsapi.cloneNodeRef) (node);
+	}
+	static inline void free (const ::VSAPI &vsapi, ::VSNodeRef *node) VS_NOEXCEPT
+	{
+		(*vsapi.freeNode) (node);
+	}
+};
+
 typedef	ObjRefSPtr <
 	::VSNodeRef,
-	&VSAPI::cloneNodeRef,
-	&VSAPI::freeNode
+	NodeRefSPtr_FncWrapper
 >	NodeRefSPtr;
 
 
diff --git a/src/vsutl/ObjRefSPtr.h b/src/vsutl/ObjRefSPtr.h
index 01b1f45..bf9c18a 100644
--- a/src/vsutl/ObjRefSPtr.h
+++ b/src/vsutl/ObjRefSPtr.h
@@ -9,8 +9,10 @@ Template parameters:
 
 - T: The type of the object possibly with const, but without pointer
 	(currently ::VSNodeRef, const ::VSFrameRef or const ::VSFuncRef).
-- FC: VSAPI member pointer to the function for cloning const T *.
-- FF: VSAPI member pointer to the function for freeing const T *.
+
+- FW: Wrapper class for clone and free functions. Requires:
+	static inline T * FW::clone (const ::VSAPI &, T *) VS_NOEXCEPT;
+	static inline void FW::free (const ::VSAPI &, T *) VS_NOEXCEPT;
 
 --- Legal stuff ---
 
@@ -40,20 +42,12 @@ To Public License, Version 2, as published by Sam Hocevar. See
 
 
 
-#if (__cplusplus >= 201703L)
-	#define vsutl_ObjRefSPtr_VS_NOEXCEPT VS_NOEXCEPT
-#else
-	#define vsutl_ObjRefSPtr_VS_NOEXCEPT
-#endif
-
-
-
 namespace vsutl
 {
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
+template <typename T, typename FW>
 class ObjRefSPtr
 {
 
@@ -63,11 +57,14 @@ class ObjRefSPtr
 
 	               ObjRefSPtr () = default;
 	               ObjRefSPtr (T *ptr, const ::VSAPI &vsapi);
-	               ObjRefSPtr (const ObjRefSPtr <T, FC, FF> &other);
+	               ObjRefSPtr (const ObjRefSPtr <T, FW> &other);
+	               ObjRefSPtr (ObjRefSPtr <T, FW> &&other);
 	virtual        ~ObjRefSPtr ();
 
-	ObjRefSPtr <T, FC, FF> &
-	               operator = (const ObjRefSPtr <T, FC, FF> &other);
+	ObjRefSPtr <T, FW> &
+	               operator = (const ObjRefSPtr <T, FW> &other);
+	ObjRefSPtr <T, FW> &
+	               operator = (ObjRefSPtr <T, FW> &&other);
 
 	T *            operator -> () const;
 	T &            operator * () const;
@@ -89,8 +86,8 @@ class ObjRefSPtr
 
 	void           release_resource ();
 
-	T *            _obj_ptr   = 0;
-	const ::VSAPI* _vsapi_ptr = 0;      // Can be 0 only if _obj_ptr is 0 too.
+	T *            _obj_ptr   = nullptr;
+	const ::VSAPI* _vsapi_ptr = nullptr;   // Can be 0 only if _obj_ptr is 0 too.
 
 
 
@@ -98,8 +95,8 @@ class ObjRefSPtr
 
 private:
 
-	bool           operator == (const ObjRefSPtr <T, FC, FF> &other) const;
-	bool           operator != (const ObjRefSPtr <T, FC, FF> &other) const;
+	bool           operator == (const ObjRefSPtr <T, FW> &other) const;
+	bool           operator != (const ObjRefSPtr <T, FW> &other) const;
 
 };	// class ObjRefSPtr
 
diff --git a/src/vsutl/ObjRefSPtr.hpp b/src/vsutl/ObjRefSPtr.hpp
index cb81623..a6e46cf 100644
--- a/src/vsutl/ObjRefSPtr.hpp
+++ b/src/vsutl/ObjRefSPtr.hpp
@@ -40,25 +40,25 @@ namespace vsutl
 
 
 // Does not increase the reference count.
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-ObjRefSPtr <T, FC, FF>::ObjRefSPtr (T *ptr, const ::VSAPI &vsapi)
+template <typename T, typename FW>
+ObjRefSPtr <T, FW>::ObjRefSPtr (T *ptr, const ::VSAPI &vsapi)
 :	_obj_ptr (ptr)
 ,	_vsapi_ptr (&vsapi)
 {
-	assert (_obj_ptr == 0 || _vsapi_ptr != 0);
+	assert (_obj_ptr == nullptr || _vsapi_ptr != nullptr);
 }
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-ObjRefSPtr <T, FC, FF>::ObjRefSPtr (const ObjRefSPtr <T, FC, FF> &other)
-:	_obj_ptr (0)
+template <typename T, typename FW>
+ObjRefSPtr <T, FW>::ObjRefSPtr (const ObjRefSPtr <T, FW> &other)
+:	_obj_ptr (nullptr)
 ,	_vsapi_ptr (other._vsapi_ptr)
 {
-	if (other._obj_ptr != 0)
+	if (other._obj_ptr != nullptr)
 	{
-		_obj_ptr = (_vsapi_ptr->*FC) (other._obj_ptr);
-		if (_obj_ptr == 0)
+		_obj_ptr = FW::clone (*_vsapi_ptr, other._obj_ptr);
+		if (_obj_ptr == nullptr)
 		{
 			throw std::runtime_error ("Cannot clone VS object reference.");
 		}
@@ -67,31 +67,41 @@ ObjRefSPtr <T, FC, FF>::ObjRefSPtr (const ObjRefSPtr <T, FC, FF> &other)
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-ObjRefSPtr <T, FC, FF>::~ObjRefSPtr ()
+template <typename T, typename FW>
+ObjRefSPtr <T, FW>::ObjRefSPtr (ObjRefSPtr <T, FW> &&other)
+:	_obj_ptr (other._obj_ptr)
+,	_vsapi_ptr (other._vsapi_ptr)
+{
+	other._obj_ptr = nullptr;
+}
+
+
+
+template <typename T, typename FW>
+ObjRefSPtr <T, FW>::~ObjRefSPtr ()
 {
 	release_resource ();
 }
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-ObjRefSPtr <T, FC, FF> &	ObjRefSPtr <T, FC, FF>::operator = (const ObjRefSPtr <T, FC, FF> &other)
+template <typename T, typename FW>
+ObjRefSPtr <T, FW> &	ObjRefSPtr <T, FW>::operator = (const ObjRefSPtr <T, FW> &other)
 {
 	if (other._obj_ptr != _obj_ptr)
 	{
-		T *            tmp_ptr = 0;
+		T *            tmp_ptr = nullptr;
 
-		if (other._obj_ptr != 0)
+		if (other._obj_ptr != nullptr)
 		{
-			if (_vsapi_ptr == 0)
+			if (_vsapi_ptr == nullptr)
 			{
-				assert (other._vsapi_ptr != 0);
+				assert (other._vsapi_ptr != nullptr);
 				_vsapi_ptr = other._vsapi_ptr;
 			}
 
-			tmp_ptr = (_vsapi_ptr->*FC) (other._obj_ptr);
-			if (tmp_ptr == 0)
+			tmp_ptr = FW::clone (*_vsapi_ptr, other._obj_ptr);
+			if (tmp_ptr == nullptr)
 			{
 				throw std::runtime_error ("Cannot clone VS object reference.");
 			}
@@ -102,50 +112,65 @@ ObjRefSPtr <T, FC, FF> &	ObjRefSPtr <T, FC, FF>::operator = (const ObjRefSPtr <T
 		_obj_ptr = tmp_ptr;
 	}
 
-	return (*this);
+	return *this;
+}
+
+
+
+template <typename T, typename FW>
+ObjRefSPtr <T, FW> &	ObjRefSPtr <T, FW>::operator = (ObjRefSPtr <T, FW> &&other)
+{
+	if (other._obj_ptr != _obj_ptr)
+	{
+		_obj_ptr   = other._obj_ptr;
+		_vsapi_ptr = other._vsapi_ptr;
+		other._obj_ptr = nullptr;
+	}
+
+	return *this;
 }
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-T *	ObjRefSPtr <T, FC, FF>::operator -> () const
+template <typename T, typename FW>
+T *	ObjRefSPtr <T, FW>::operator -> () const
 {
-	return (_obj_ptr);
+	return _obj_ptr;
 }
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-T &	ObjRefSPtr <T, FC, FF>::operator * () const
+template <typename T, typename FW>
+T &	ObjRefSPtr <T, FW>::operator * () const
 {
-	return (*_obj_ptr);
+	return *_obj_ptr;
 }
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-T *	ObjRefSPtr <T, FC, FF>::get () const
+template <typename T, typename FW>
+T *	ObjRefSPtr <T, FW>::get () const
 {
-	return (_obj_ptr);
+	return _obj_ptr;
 }
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-T *	ObjRefSPtr <T, FC, FF>::dup () const
+template <typename T, typename FW>
+T *	ObjRefSPtr <T, FW>::dup () const
 {
-	assert (_obj_ptr != 0);
-	assert (_vsapi_ptr != 0);
+	assert (_obj_ptr != nullptr);
+	assert (_vsapi_ptr != nullptr);
 
-	T *            tmp_ptr = (_vsapi_ptr->*FC) (_obj_ptr);
+	T *            tmp_ptr = FW::clone (*_vsapi_ptr, _obj_ptr);
 
-	return (tmp_ptr);
+	return tmp_ptr;
 }
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-void	ObjRefSPtr <T, FC, FF>::clear ()
+template <typename T, typename FW>
+void	ObjRefSPtr <T, FW>::clear ()
 {
 	release_resource ();
 }
@@ -160,14 +185,14 @@ void	ObjRefSPtr <T, FC, FF>::clear ()
 
 
 
-template <class T, T * (VS_CC *::VSAPI::*FC) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT, void (VS_CC *::VSAPI::*FF) (T *) vsutl_ObjRefSPtr_VS_NOEXCEPT>
-void	ObjRefSPtr <T, FC, FF>::release_resource ()
+template <typename T, typename FW>
+void	ObjRefSPtr <T, FW>::release_resource ()
 {
-	if (_obj_ptr != 0)
+	if (_obj_ptr != nullptr)
 	{
-		assert (_vsapi_ptr != 0);
-		(_vsapi_ptr->*FF) (_obj_ptr);
-		_obj_ptr = 0;
+		assert (_vsapi_ptr != nullptr);
+		FW::free (*_vsapi_ptr, _obj_ptr);
+		_obj_ptr = nullptr;
 	}
 }
 
diff --git a/src/vsutl/PlaneProcessor.cpp b/src/vsutl/PlaneProcessor.cpp
index cb027be..d8e3f68 100644
--- a/src/vsutl/PlaneProcessor.cpp
+++ b/src/vsutl/PlaneProcessor.cpp
@@ -174,7 +174,7 @@ void	PlaneProcessor::set_filter (const ::VSMap &in, ::VSMap &out, const ::VSVide
 						_vsapi.setError (&out, err_msg.c_str ());
 						ok_flag = false;
 					}
-					else if (   plane_content >= PlaneProcMode_NBR_ELT
+					else if (   plane_content >= double (PlaneProcMode_NBR_ELT)
 								|| (int_flag && -plane_content >= max_val))
 					{
 						const std::string err_msg =
@@ -229,7 +229,7 @@ const ::VSFrameRef *	PlaneProcessor::try_initial (::VSCore &core)
 			for (int plane_index = 0; plane_index < _nbr_planes; ++plane_index)
 			{
 				const double		val = _proc_mode_arr [plane_index];
-				if (val < PlaneProcMode_COPY1)
+				if (val < double (PlaneProcMode_COPY1))
 				{
 					fill_plane (
 						const_cast < ::VSFrameRef &> (*_blank_frame_sptr),
@@ -294,7 +294,7 @@ int	PlaneProcessor::process_frame (::VSFrameRef &dst, int n, void *frame_data_pt
 				copy_plane (dst, *src_sptr, plane_index);
 			}
 		}
-		else if (mode < PlaneProcMode_COPY1)
+		else if (mode_i < PlaneProcMode_COPY1)
 		{
 			fill_plane (dst, -mode, plane_index);
 		}
diff --git a/src/vsutl/fnc.cpp b/src/vsutl/fnc.cpp
index 5a5010b..30d6fa3 100644
--- a/src/vsutl/fnc.cpp
+++ b/src/vsutl/fnc.cpp
@@ -164,7 +164,7 @@ int	compute_plane_width (const ::VSFormat &fmt, int plane_index, int base_w)
 		base_w >>= fmt.subSamplingW;
 	}
 
-	return (base_w);
+	return base_w;
 }
 
 
@@ -181,7 +181,7 @@ int	compute_plane_height (const ::VSFormat &fmt, int plane_index, int base_h)
 		base_h >>= fmt.subSamplingH;
 	}
 
-	return (base_h);
+	return base_h;
 }
 
 
@@ -238,7 +238,7 @@ int	conv_str_to_chroma_subspl (int &ssh, int &ssv, std::string css)
 		ret_val = -1;
 	}
 
-	return (ret_val);
+	return ret_val;
 }
 
 
diff --git a/zip-release.bat b/zip-release.bat
index a826c35..991df48 100644
--- a/zip-release.bat
+++ b/zip-release.bat
@@ -13,6 +13,7 @@
 @mkdir "reltmp\win64"
 @xcopy /I "doc"                                "reltmp\doc"
 @xcopy /I "build\unix"                         "reltmp\build\unix"
+@xcopy /I "build\unix\m4"                      "reltmp\build\unix\m4"
 @xcopy /I "src\conc"                           "reltmp\src\conc"
 @xcopy /I "src\ffft"                           "reltmp\src\ffft"
 @xcopy /I "src\fmtc"                           "reltmp\src\fmtc"