From 3e53eee88aa96910bcccc6cbab09e6455a4ebc38 Mon Sep 17 00:00:00 2001
From: Martin Steinegger <themartinsteinegger@gmail.com>
Date: Fri, 18 Oct 2024 23:01:59 +0200
Subject: [PATCH] Update tantan

---
 CMakeLists.txt                       |   4 +
 lib/tantan/CMakeLists.txt            |   2 +
 lib/tantan/mcf_simd.h                | 538 ++++++++++++++++++++++++++
 lib/tantan/tantan.cpp                | 549 +++++++++++++++++++++++++++
 {src/commons => lib/tantan}/tantan.h |  56 ++-
 src/CMakeLists.txt                   |   2 +-
 src/alignment/PSSMMasker.h           |   6 +-
 src/commons/BaseMatrix.h             |   2 +-
 src/commons/CMakeLists.txt           |   2 -
 src/commons/tantan.cpp               | 506 ------------------------
 src/linclust/kmermatcher.cpp         |   4 +-
 src/prefiltering/IndexBuilder.cpp    |   5 +-
 src/util/masksequence.cpp            |   4 +-
 13 files changed, 1140 insertions(+), 540 deletions(-)
 create mode 100644 lib/tantan/CMakeLists.txt
 create mode 100644 lib/tantan/mcf_simd.h
 create mode 100644 lib/tantan/tantan.cpp
 rename {src/commons => lib/tantan}/tantan.h (68%)
 delete mode 100644 src/commons/tantan.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index adde42630..c0d920427 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -205,6 +205,10 @@ add_subdirectory(lib/tinyexpr EXCLUDE_FROM_ALL)
 include_directories(lib/microtar)
 add_subdirectory(lib/microtar)
 
+# tantan
+include_directories(lib/tantan)
+add_subdirectory(lib/tantan)
+
 # simde
 include_directories(lib/simde)
 
diff --git a/lib/tantan/CMakeLists.txt b/lib/tantan/CMakeLists.txt
new file mode 100644
index 000000000..b4746a56c
--- /dev/null
+++ b/lib/tantan/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_library(tantan tantan.cpp tantan.h mcf_simd.h)
+set_target_properties(tantan PROPERTIES COMPILE_FLAGS "${MMSEQS_C_FLAGS}" LINK_FLAGS "${MMSEQS_C_FLAGS}")
diff --git a/lib/tantan/mcf_simd.h b/lib/tantan/mcf_simd.h
new file mode 100644
index 000000000..c5dd4687a
--- /dev/null
+++ b/lib/tantan/mcf_simd.h
@@ -0,0 +1,538 @@
+// Author: Martin C. Frith 2019
+// SPDX-License-Identifier: MPL-2.0
+
+#ifndef MCF_SIMD_HH
+#define MCF_SIMD_HH
+
+#if defined __SSE4_1__
+#include <immintrin.h>
+#elif defined __ARM_NEON
+#include <arm_neon.h>
+#endif
+
+#include <stddef.h>  // size_t
+
+namespace mcf {
+
+#if defined __AVX2__
+
+typedef __m256i SimdInt;
+typedef __m256i SimdUint1;
+typedef __m256d SimdDbl;
+
+const int simdBytes = 32;
+
+static inline SimdInt simdZero() {
+  return _mm256_setzero_si256();
+}
+
+static inline SimdInt simdZero1() {
+  return _mm256_setzero_si256();
+}
+
+static inline SimdDbl simdZeroDbl() {
+  return _mm256_setzero_pd();
+}
+
+static inline SimdInt simdOnes1() {
+  return _mm256_set1_epi32(-1);
+}
+
+static inline SimdInt simdLoad(const void *p) {
+  return _mm256_loadu_si256((const SimdInt *)p);
+}
+
+static inline SimdInt simdLoad1(const void *p) {
+  return _mm256_loadu_si256((const SimdInt *)p);
+}
+
+static inline SimdDbl simdLoadDbl(const double *p) {
+  return _mm256_loadu_pd(p);
+}
+
+static inline void simdStore(void *p, SimdInt x) {
+  _mm256_storeu_si256((SimdInt *)p, x);
+}
+
+static inline void simdStore1(void *p, SimdInt x) {
+  _mm256_storeu_si256((SimdInt *)p, x);
+}
+
+static inline void simdStoreDbl(double *p, SimdDbl x) {
+  _mm256_storeu_pd(p, x);
+}
+
+static inline SimdInt simdOr1(SimdInt x, SimdInt y) {
+  return _mm256_or_si256(x, y);
+}
+
+static inline SimdInt simdBlend(SimdInt x, SimdInt y, SimdInt mask) {
+  return _mm256_blendv_epi8(x, y, mask);
+}
+
+const int simdLen = 8;
+const int simdDblLen = 4;
+
+static inline SimdInt simdSet(int i7, int i6, int i5, int i4,
+			      int i3, int i2, int i1, int i0) {
+  return _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0);
+}
+
+static inline SimdInt simdSet1(char jF, char jE, char jD, char jC,
+			       char jB, char jA, char j9, char j8,
+			       char j7, char j6, char j5, char j4,
+			       char j3, char j2, char j1, char j0,
+			       char iF, char iE, char iD, char iC,
+			       char iB, char iA, char i9, char i8,
+			       char i7, char i6, char i5, char i4,
+			       char i3, char i2, char i1, char i0) {
+  return _mm256_set_epi8(jF, jE, jD, jC, jB, jA, j9, j8,
+			 j7, j6, j5, j4, j3, j2, j1, j0,
+			 iF, iE, iD, iC, iB, iA, i9, i8,
+			 i7, i6, i5, i4, i3, i2, i1, i0);
+}
+
+static inline SimdDbl simdSetDbl(double i3, double i2, double i1, double i0) {
+  return _mm256_set_pd(i3, i2, i1, i0);
+}
+
+static inline SimdInt simdFill(int x) {
+  return _mm256_set1_epi32(x);
+}
+
+static inline SimdInt simdFill1(char x) {
+  return _mm256_set1_epi8(x);
+}
+
+static inline SimdDbl simdFillDbl(double x) {
+  return _mm256_set1_pd(x);
+}
+
+static inline SimdInt simdGt(SimdInt x, SimdInt y) {
+  return _mm256_cmpgt_epi32(x, y);
+}
+
+static inline SimdInt simdGe1(SimdInt x, SimdInt y) {
+  return _mm256_cmpeq_epi8(_mm256_min_epu8(x, y), y);
+}
+
+static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
+  return _mm256_add_epi32(x, y);
+}
+
+static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
+  return _mm256_add_epi8(x, y);
+}
+
+static inline SimdInt simdAdds1(SimdInt x, SimdInt y) {
+  return _mm256_adds_epu8(x, y);
+}
+
+static inline SimdDbl simdAddDbl(SimdDbl x, SimdDbl y) {
+  return _mm256_add_pd(x, y);
+}
+
+static inline SimdInt simdSub(SimdInt x, SimdInt y) {
+  return _mm256_sub_epi32(x, y);
+}
+
+static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
+  return _mm256_sub_epi8(x, y);
+}
+
+static inline SimdDbl simdMulDbl(SimdDbl x, SimdDbl y) {
+  return _mm256_mul_pd(x, y);
+}
+
+static inline SimdInt simdQuadruple1(SimdInt x) {
+  return _mm256_slli_epi32(x, 2);
+}
+
+static inline SimdInt simdMax(SimdInt x, SimdInt y) {
+  return _mm256_max_epi32(x, y);
+}
+
+static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
+  return _mm256_min_epu8(x, y);
+}
+
+static inline int simdHorizontalMax(SimdInt x) {
+  __m128i z = _mm256_castsi256_si128(x);
+  z = _mm_max_epi32(z, _mm256_extracti128_si256(x, 1));
+  z = _mm_max_epi32(z, _mm_shuffle_epi32(z, 0x4E));
+  z = _mm_max_epi32(z, _mm_shuffle_epi32(z, 0xB1));
+  return _mm_cvtsi128_si32(z);
+}
+
+static inline int simdHorizontalMin1(SimdInt x) {
+  __m128i z = _mm256_castsi256_si128(x);
+  z = _mm_min_epu8(z, _mm256_extracti128_si256(x, 1));
+  z = _mm_min_epu8(z, _mm_srli_epi16(z, 8));
+  z = _mm_minpos_epu16(z);
+  return _mm_extract_epi16(z, 0);
+}
+
+static inline double simdHorizontalAddDbl(SimdDbl x) {
+  __m128d z = _mm256_castpd256_pd128(x);
+  z = _mm_add_pd(z, _mm256_extractf128_pd(x, 1));
+  return _mm_cvtsd_f64(_mm_hadd_pd(z, z));
+}
+
+static inline SimdInt simdChoose1(SimdInt items, SimdInt choices) {
+  return _mm256_shuffle_epi8(items, choices);
+}
+
+#elif defined __SSE4_1__
+
+typedef __m128i SimdInt;
+typedef __m128i SimdUint1;
+typedef __m128d SimdDbl;
+
+const int simdBytes = 16;
+
+static inline SimdInt simdZero() {
+  return _mm_setzero_si128();
+}
+
+static inline SimdInt simdZero1() {
+  return _mm_setzero_si128();
+}
+
+static inline SimdDbl simdZeroDbl() {
+  return _mm_setzero_pd();
+}
+
+static inline SimdInt simdOnes1() {
+  return _mm_set1_epi32(-1);
+}
+
+static inline SimdInt simdLoad(const void *p) {
+  return _mm_loadu_si128((const SimdInt *)p);
+}
+
+static inline SimdInt simdLoad1(const void *p) {
+  return _mm_loadu_si128((const SimdInt *)p);
+}
+
+static inline SimdDbl simdLoadDbl(const double *p) {
+  return _mm_loadu_pd(p);
+}
+
+static inline void simdStore(void *p, SimdInt x) {
+  _mm_storeu_si128((SimdInt *)p, x);
+}
+
+static inline void simdStore1(void *p, SimdInt x) {
+  _mm_storeu_si128((SimdInt *)p, x);
+}
+
+static inline void simdStoreDbl(double *p, SimdDbl x) {
+  _mm_storeu_pd(p, x);
+}
+
+static inline SimdInt simdOr1(SimdInt x, SimdInt y) {
+  return _mm_or_si128(x, y);
+}
+
+static inline SimdInt simdBlend(SimdInt x, SimdInt y, SimdInt mask) {
+  return _mm_blendv_epi8(x, y, mask);  // SSE4.1
+}
+
+const int simdLen = 4;
+const int simdDblLen = 2;
+
+static inline SimdInt simdSet(int i3, int i2, int i1, int i0) {
+  return _mm_set_epi32(i3, i2, i1, i0);
+}
+
+static inline SimdInt simdSet1(char iF, char iE, char iD, char iC,
+			       char iB, char iA, char i9, char i8,
+			       char i7, char i6, char i5, char i4,
+			       char i3, char i2, char i1, char i0) {
+  return _mm_set_epi8(iF, iE, iD, iC, iB, iA, i9, i8,
+		      i7, i6, i5, i4, i3, i2, i1, i0);
+}
+
+static inline SimdDbl simdSetDbl(double i1, double i0) {
+  return _mm_set_pd(i1, i0);
+}
+
+static inline SimdInt simdFill(int x) {
+  return _mm_set1_epi32(x);
+}
+
+static inline SimdInt simdFill1(char x) {
+  return _mm_set1_epi8(x);
+}
+
+static inline SimdDbl simdFillDbl(double x) {
+  return _mm_set1_pd(x);
+}
+
+static inline SimdInt simdGt(SimdInt x, SimdInt y) {
+  return _mm_cmpgt_epi32(x, y);
+}
+
+static inline SimdInt simdGe1(SimdInt x, SimdInt y) {
+  return _mm_cmpeq_epi8(_mm_min_epu8(x, y), y);
+}
+
+static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
+  return _mm_add_epi32(x, y);
+}
+
+static inline SimdInt simdAdd1(SimdInt x, SimdInt y) {
+  return _mm_add_epi8(x, y);
+}
+
+static inline SimdInt simdAdds1(SimdInt x, SimdInt y) {
+  return _mm_adds_epu8(x, y);
+}
+
+static inline SimdDbl simdAddDbl(SimdDbl x, SimdDbl y) {
+  return _mm_add_pd(x, y);
+}
+
+static inline SimdInt simdSub(SimdInt x, SimdInt y) {
+  return _mm_sub_epi32(x, y);
+}
+
+static inline SimdInt simdSub1(SimdInt x, SimdInt y) {
+  return _mm_sub_epi8(x, y);
+}
+
+static inline SimdDbl simdMulDbl(SimdDbl x, SimdDbl y) {
+  return _mm_mul_pd(x, y);
+}
+
+static inline SimdInt simdQuadruple1(SimdInt x) {
+  return _mm_slli_epi32(x, 2);
+}
+
+static inline SimdInt simdMax(SimdInt x, SimdInt y) {
+  return _mm_max_epi32(x, y);  // SSE4.1
+}
+
+static inline SimdInt simdMin1(SimdInt x, SimdInt y) {
+  return _mm_min_epu8(x, y);
+}
+
+static inline int simdHorizontalMax(SimdInt x) {
+  x = simdMax(x, _mm_shuffle_epi32(x, 0x4E));
+  x = simdMax(x, _mm_shuffle_epi32(x, 0xB1));
+  return _mm_cvtsi128_si32(x);
+}
+
+static inline int simdHorizontalMin1(SimdInt x) {
+  x = _mm_min_epu8(x, _mm_srli_epi16(x, 8));
+  x = _mm_minpos_epu16(x);  // SSE4.1
+  return _mm_extract_epi16(x, 0);
+}
+
+static inline double simdHorizontalAddDbl(SimdDbl x) {
+  return _mm_cvtsd_f64(_mm_hadd_pd(x, x));
+}
+
+static inline SimdInt simdChoose1(SimdInt items, SimdInt choices) {
+  return _mm_shuffle_epi8(items, choices);  // SSSE3
+}
+
+#elif defined __ARM_NEON
+
+typedef int32x4_t SimdInt;
+typedef uint32x4_t SimdUint;
+typedef uint8x16_t SimdUint1;
+typedef float64x2_t SimdDbl;
+
+const int simdBytes = 16;
+
+static inline SimdInt simdZero() {
+  return vdupq_n_s32(0);
+}
+
+static inline SimdUint1 simdZero1() {
+  return vdupq_n_u8(0);
+}
+
+static inline SimdDbl simdZeroDbl() {
+  return vdupq_n_f64(0);
+}
+
+static inline SimdUint1 simdOnes1() {
+  return vdupq_n_u8(-1);
+}
+
+static inline SimdInt simdLoad(const int *p) {
+  return vld1q_s32(p);
+}
+
+static inline SimdUint1 simdLoad1(const unsigned char *p) {
+  return vld1q_u8(p);
+}
+
+static inline SimdDbl simdLoadDbl(const double *p) {
+  return vld1q_f64(p);
+}
+
+static inline void simdStore(int *p, SimdInt x) {
+  vst1q_s32(p, x);
+}
+
+static inline void simdStore1(unsigned char *p, SimdUint1 x) {
+  vst1q_u8(p, x);
+}
+
+static inline void simdStoreDbl(double *p, SimdDbl x) {
+  vst1q_f64(p, x);
+}
+
+static inline SimdUint1 simdOr1(SimdUint1 x, SimdUint1 y) {
+  return vorrq_u8(x, y);
+}
+
+static inline SimdInt simdBlend(SimdInt x, SimdInt y, SimdUint mask) {
+  return vbslq_s32(mask, y, x);
+}
+
+const int simdLen = 4;
+const int simdDblLen = 2;
+
+static inline SimdInt simdSet(unsigned i3, unsigned i2,
+                              unsigned i1, unsigned i0) {
+  size_t lo = i1;
+  size_t hi = i3;
+  return
+    vcombine_s32(vcreate_s32((lo << 32) | i0), vcreate_s32((hi << 32) | i2));
+}
+
+static inline SimdUint1 simdSet1(unsigned char iF, unsigned char iE,
+				 unsigned char iD, unsigned char iC,
+				 unsigned char iB, unsigned char iA,
+				 unsigned char i9, unsigned char i8,
+				 unsigned char i7, unsigned char i6,
+				 unsigned char i5, unsigned char i4,
+				 unsigned char i3, unsigned char i2,
+				 unsigned char i1, unsigned char i0) {
+  size_t lo =
+    (size_t)i0       | (size_t)i1 <<  8 | (size_t)i2 << 16 | (size_t)i3 << 24 |
+    (size_t)i4 << 32 | (size_t)i5 << 40 | (size_t)i6 << 48 | (size_t)i7 << 56;
+
+  size_t hi =
+    (size_t)i8       | (size_t)i9 <<  8 | (size_t)iA << 16 | (size_t)iB << 24 |
+    (size_t)iC << 32 | (size_t)iD << 40 | (size_t)iE << 48 | (size_t)iF << 56;
+
+  return vcombine_u8(vcreate_u8(lo), vcreate_u8(hi));
+}
+
+static inline SimdDbl simdSetDbl(double i1, double i0) {
+  return vcombine_f64(vdup_n_f64(i0), vdup_n_f64(i1));
+}
+
+static inline SimdInt simdFill(int x) {
+  return vdupq_n_s32(x);
+}
+
+static inline SimdUint1 simdFill1(unsigned char x) {
+  return vdupq_n_u8(x);
+}
+
+static inline SimdDbl simdFillDbl(double x) {
+  return vdupq_n_f64(x);
+}
+
+static inline SimdUint simdGt(SimdInt x, SimdInt y) {
+  return vcgtq_s32(x, y);
+}
+
+static inline SimdUint1 simdGe1(SimdUint1 x, SimdUint1 y) {
+  return vcgeq_u8(x, y);
+}
+
+static inline SimdInt simdAdd(SimdInt x, SimdInt y) {
+  return vaddq_s32(x, y);
+}
+
+static inline SimdUint1 simdAdd1(SimdUint1 x, SimdUint1 y) {
+  return vaddq_u8(x, y);
+}
+
+static inline SimdUint1 simdAdds1(SimdUint1 x, SimdUint1 y) {
+  return vqaddq_u8(x, y);
+}
+
+static inline SimdDbl simdAddDbl(SimdDbl x, SimdDbl y) {
+  return vaddq_f64(x, y);
+}
+
+static inline SimdInt simdSub(SimdInt x, SimdInt y) {
+  return vsubq_s32(x, y);
+}
+
+static inline SimdUint1 simdSub1(SimdUint1 x, SimdUint1 y) {
+  return vsubq_u8(x, y);
+}
+
+static inline SimdDbl simdMulDbl(SimdDbl x, SimdDbl y) {
+  return vmulq_f64(x, y);
+}
+
+static inline SimdUint1 simdQuadruple1(SimdUint1 x) {
+  return vshlq_n_u8(x, 2);
+}
+
+static inline SimdInt simdMax(SimdInt x, SimdInt y) {
+  return vmaxq_s32(x, y);
+}
+
+static inline SimdUint1 simdMin1(SimdUint1 x, SimdUint1 y) {
+  return vminq_u8(x, y);
+}
+
+static inline int simdHorizontalMax(SimdInt x) {
+  return vmaxvq_s32(x);
+}
+
+static inline int simdHorizontalMin1(SimdUint1 x) {
+  return vminvq_u8(x);
+}
+
+static inline double simdHorizontalAddDbl(SimdDbl x) {
+  return vaddvq_f64(x);
+}
+
+static inline SimdUint1 simdChoose1(SimdUint1 items, SimdUint1 choices) {
+  return vqtbl1q_u8(items, choices);
+}
+
+#else
+
+typedef int SimdInt;
+typedef double SimdDbl;
+const int simdBytes = 1;
+const int simdLen = 1;
+const int simdDblLen = 1;
+static inline int simdZero() { return 0; }
+static inline double simdZeroDbl() { return 0; }
+static inline int simdSet(int x) { return x; }
+static inline double simdSetDbl(double x) { return x; }
+static inline int simdFill(int x) { return x; }
+static inline int simdLoad(const int *p) { return *p; }
+static inline double simdLoadDbl(const double *p) { return *p; }
+static inline void simdStore(int *p, int x) { *p = x; }
+static inline void simdStoreDbl(double *p, double x) { *p = x; }
+static inline double simdFillDbl(double x) { return x; }
+static inline int simdGt(int x, int y) { return x > y; }
+static inline int simdAdd(int x, int y) { return x + y; }
+static inline double simdAddDbl(double x, double y) { return x + y; }
+static inline int simdSub(int x, int y) { return x - y; }
+static inline double simdMulDbl(double x, double y) { return x * y; }
+static inline int simdMax(int x, int y) { return x > y ? x : y; }
+static inline int simdBlend(int x, int y, int mask) { return mask ? y : x; }
+static inline int simdHorizontalMax(int a) { return a; }
+static inline double simdHorizontalAddDbl(double x) { return x; }
+
+#endif
+
+}
+
+#endif
diff --git a/lib/tantan/tantan.cpp b/lib/tantan/tantan.cpp
new file mode 100644
index 000000000..571539d91
--- /dev/null
+++ b/lib/tantan/tantan.cpp
@@ -0,0 +1,549 @@
+// Author: Martin C. Frith 2010
+// SPDX-License-Identifier: MPL-2.0
+
+#include "tantan.h"
+#include "mcf_simd.h"
+
+#include <algorithm>  // fill, max
+#include <cassert>
+#include <cmath>  // pow, abs
+#include <iostream>  // cerr
+#include <numeric>  // accumulate
+#include <vector>
+
+#define BEG(v) ((v).empty() ? 0 : &(v).front())
+#define END(v) ((v).empty() ? 0 : &(v).back() + 1)
+
+namespace tantan {
+
+using namespace mcf;
+
+void multiplyAll(std::vector<double> &v, double factor) {
+  for (std::vector<double>::iterator i = v.begin(); i < v.end(); ++i)
+    *i *= factor;
+}
+
+double firstRepeatOffsetProb(double probMult, int maxRepeatOffset) {
+  if (probMult < 1 || probMult > 1) {
+    return (1 - probMult) / (1 - std::pow(probMult, maxRepeatOffset));
+  }
+  return 1.0 / maxRepeatOffset;
+}
+
+void checkForwardAndBackwardTotals(double fTot, double bTot) {
+  double x = std::abs(fTot);
+  double y = std::abs(bTot);
+
+  // ??? Is 1e6 suitable here ???
+  if (std::abs(fTot - bTot) > std::max(x, y) / 1e6)
+    std::cerr << "tantan: warning: possible numeric inaccuracy\n"
+              << "tantan:          forward algorithm total: " << fTot << "\n"
+              << "tantan:          backward algorithm total: " << bTot << "\n";
+}
+
+struct Tantan {
+  enum { scaleStepSize = 16 };
+
+  const uchar *seqBeg;  // start of the sequence
+  const uchar *seqEnd;  // end of the sequence
+  const uchar *seqPtr;  // current position in the sequence
+
+  int maxRepeatOffset;
+
+  const const_double_ptr *likelihoodRatioMatrix;
+
+  double b2b;  // transition probability from background to background
+  double f2b;  // transition probability from foreground to background
+  double g2g;  // transition probability from gap/indel to gap/indel
+  //double f2g;  // transition probability from foreground to gap/indel
+  //double g2f;  // transition probability from gap/indel to foreground
+  double oneGapProb;  // f2g * g2f
+  double endGapProb;  // f2g * 1
+  double f2f0;  // foreground to foreground, if there are 0 indel transitions
+  double f2f1;  // foreground to foreground, if there is 1 indel transition
+  double f2f2;  // foreground to foreground, if there are 2 indel transitions
+  double b2fDecay;
+  double b2fGrowth;
+  double b2fFirst;  // background state to first foreground state
+  double b2fLast;  // background state to last foreground state
+
+  double backgroundProb;
+  std::vector<double> b2fProbs;  // background state to each foreground state
+  std::vector<double> foregroundProbs;
+  std::vector<double> insertionProbs;
+
+  std::vector<double> scaleFactors;
+
+  Tantan(const uchar *seqBeg,
+         const uchar *seqEnd,
+         int maxRepeatOffset,
+         const const_double_ptr *likelihoodRatioMatrix,
+         double repeatProb,
+         double repeatEndProb,
+         double repeatOffsetProbDecay,
+         double firstGapProb,
+         double otherGapProb) {
+    assert(maxRepeatOffset > 0);
+    assert(repeatProb >= 0 && repeatProb < 1);
+    // (if repeatProb==1, then any sequence is impossible)
+    assert(repeatEndProb >= 0 && repeatEndProb <= 1);
+    assert(repeatOffsetProbDecay > 0 && repeatOffsetProbDecay <= 1);
+    assert(otherGapProb >= 0 && otherGapProb <= 1);
+    assert(firstGapProb >= 0);
+    assert(repeatEndProb + firstGapProb * 2 <= 1);
+
+    this->seqBeg = seqBeg;
+    this->seqEnd = seqEnd;
+    this->seqPtr = seqBeg;
+    this->maxRepeatOffset = maxRepeatOffset;
+    this->likelihoodRatioMatrix = likelihoodRatioMatrix;
+
+    b2b = 1 - repeatProb;
+    f2b = repeatEndProb;
+    g2g = otherGapProb;
+    //f2g = firstGapProb;
+    //g2f = 1 - otherGapProb;
+    oneGapProb = firstGapProb * (1 - otherGapProb);
+    endGapProb = firstGapProb * (maxRepeatOffset > 1);
+    f2f0 = 1 - repeatEndProb;
+    f2f1 = 1 - repeatEndProb - firstGapProb;
+    f2f2 = 1 - repeatEndProb - firstGapProb * 2;
+
+    b2fDecay = repeatOffsetProbDecay;
+    b2fGrowth = 1 / repeatOffsetProbDecay;
+
+    b2fFirst = repeatProb * firstRepeatOffsetProb(b2fDecay, maxRepeatOffset);
+    b2fLast = repeatProb * firstRepeatOffsetProb(b2fGrowth, maxRepeatOffset);
+
+    b2fProbs.resize(maxRepeatOffset);
+    foregroundProbs.resize(maxRepeatOffset);
+    insertionProbs.resize(maxRepeatOffset - 1);
+
+    double p = b2fFirst;
+    for (int i = 0; i < maxRepeatOffset; ++i) {
+      b2fProbs[i] = p;
+      p *= b2fDecay;
+    }
+
+    scaleFactors.resize((seqEnd - seqBeg) / scaleStepSize);
+  }
+
+  void initializeForwardAlgorithm() {
+    backgroundProb = 1.0;
+    std::fill(foregroundProbs.begin(), foregroundProbs.end(), 0.0);
+    std::fill(insertionProbs.begin(), insertionProbs.end(), 0.0);
+  }
+
+  double forwardTotal() {
+    double fromForeground = std::accumulate(foregroundProbs.begin(),
+                                            foregroundProbs.end(), 0.0);
+    double total = backgroundProb * b2b + fromForeground * f2b;
+    assert(total > 0);
+    return total;
+  }
+
+  void initializeBackwardAlgorithm() {
+    backgroundProb = b2b;
+    std::fill(foregroundProbs.begin(), foregroundProbs.end(), f2b);
+    std::fill(insertionProbs.begin(), insertionProbs.end(), 0.0);
+  }
+
+  double backwardTotal() {
+    assert(backgroundProb > 0);
+    return backgroundProb;
+  }
+
+  void calcForwardTransitionProbsWithGaps() {
+    double fromBackground = backgroundProb * b2fLast;
+    double *foregroundPtr = &foregroundProbs.back();
+    double f = *foregroundPtr;
+    double fromForeground = f;
+
+    double *insertionPtr = &insertionProbs.back();
+    double i = *insertionPtr;
+    *foregroundPtr = fromBackground + f * f2f1 + i * endGapProb;
+    double d = f;
+    --foregroundPtr;
+    fromBackground *= b2fGrowth;
+
+    while (foregroundPtr > &foregroundProbs.front()) {
+      f = *foregroundPtr;
+      fromForeground += f;
+      i = *(insertionPtr - 1);
+      *foregroundPtr = fromBackground + f * f2f2 + (i + d) * oneGapProb;
+      *insertionPtr = f + i * g2g;
+      d = f + d * g2g;
+      --foregroundPtr;
+      --insertionPtr;
+      fromBackground *= b2fGrowth;
+    }
+
+    f = *foregroundPtr;
+    fromForeground += f;
+    *foregroundPtr = fromBackground + f * f2f1 + d * endGapProb;
+    *insertionPtr = f;
+
+    backgroundProb = backgroundProb * b2b + fromForeground * f2b;
+  }
+
+  void calcBackwardTransitionProbsWithGaps() {
+    double toBackground = f2b * backgroundProb;
+    double *foregroundPtr = &foregroundProbs.front();
+    double f = *foregroundPtr;
+    double toForeground = f;
+
+    double *insertionPtr = &insertionProbs.front();
+    double i = *insertionPtr;
+    *foregroundPtr = toBackground + f2f1 * f + i;
+    double d = endGapProb * f;
+    ++foregroundPtr;
+    toForeground *= b2fGrowth;
+
+    while (foregroundPtr < &foregroundProbs.back()) {
+      f = *foregroundPtr;
+      toForeground += f;
+      i = *(insertionPtr + 1);
+      *foregroundPtr = toBackground + f2f2 * f + (i + d);
+      double oneGapProb_f = oneGapProb * f;
+      *insertionPtr = oneGapProb_f + g2g * i;
+      d = oneGapProb_f + g2g * d;
+      ++foregroundPtr;
+      ++insertionPtr;
+      toForeground *= b2fGrowth;
+    }
+
+    f = *foregroundPtr;
+    toForeground += f;
+    *foregroundPtr = toBackground + f2f1 * f + d;
+    *insertionPtr = endGapProb * f;
+
+    backgroundProb = b2b * backgroundProb + b2fLast * toForeground;
+  }
+
+  void calcForwardTransitionProbs() {
+    if (endGapProb > 0) return calcForwardTransitionProbsWithGaps();
+
+    double b = backgroundProb;
+    double fromForeground = 0;
+    double *foregroundBeg = BEG(foregroundProbs);
+
+    for (int i = 0; i < maxRepeatOffset; ++i) {
+      double f = foregroundBeg[i];
+      fromForeground += f;
+      foregroundBeg[i] = b * b2fProbs[i] + f * f2f0;
+    }
+
+    backgroundProb = b * b2b + fromForeground * f2b;
+  }
+
+  void calcBackwardTransitionProbs() {
+    if (endGapProb > 0) return calcBackwardTransitionProbsWithGaps();
+
+    double toBackground = f2b * backgroundProb;
+    double toForeground = 0;
+    double *foregroundBeg = BEG(foregroundProbs);
+
+    for (int i = 0; i < maxRepeatOffset; ++i) {
+      double f = foregroundBeg[i];
+      toForeground += b2fProbs[i] * f;
+      foregroundBeg[i] = toBackground + f2f0 * f;
+    }
+
+    backgroundProb = b2b * backgroundProb + toForeground;
+  }
+
+  void addEndCounts(double forwardProb,
+                    double totalProb,
+                    double *transitionCounts) {
+    double toEnd = forwardProb * b2b / totalProb;
+    transitionCounts[0] += toEnd;
+  }
+
+  void addTransitionCounts(double forwardProb,
+                           double totalProb,
+                           double *transitionCounts) {
+    double toBg = forwardProb * b2b / totalProb;
+    double toFg = forwardProb * b2fFirst / totalProb;
+
+    transitionCounts[0] += backgroundProb * toBg;
+
+    for (double *i = BEG(foregroundProbs); i < END(foregroundProbs); ++i) {
+      ++transitionCounts;
+      *transitionCounts += *i * toFg;
+      toFg *= b2fDecay;
+    }
+  }
+
+  bool isNearSeqBeg() {
+    return seqPtr - seqBeg < maxRepeatOffset;
+  }
+
+  int maxOffsetInTheSequence() {
+    return isNearSeqBeg() ? (seqPtr - seqBeg) : maxRepeatOffset;
+  }
+
+  const uchar *seqFurthestBack() {
+    return isNearSeqBeg() ? seqBeg : seqPtr - maxRepeatOffset;
+  }
+
+  void calcEmissionProbs() {
+    const double *lrRow = likelihoodRatioMatrix[*seqPtr];
+    const uchar *seqStop = seqFurthestBack();
+    double *foregroundPtr = BEG(foregroundProbs);
+    const uchar *offsetPtr = seqPtr;
+
+    while (offsetPtr > seqStop) {
+      --offsetPtr;
+      *foregroundPtr *= lrRow[*offsetPtr];
+      ++foregroundPtr;
+    }
+
+    while (foregroundPtr < END(foregroundProbs)) {
+      *foregroundPtr *= 0;
+      ++foregroundPtr;
+    }
+  }
+
+  void calcForwardTransitionAndEmissionProbs() {
+    if (endGapProb > 0) {
+      calcForwardTransitionProbsWithGaps();
+      calcEmissionProbs();
+      return;
+    }
+
+    double b = backgroundProb;
+    const double *b2f = BEG(b2fProbs);
+    double *fp = BEG(foregroundProbs);
+    const double *lrRow = likelihoodRatioMatrix[*seqPtr];
+    int maxOffset = maxOffsetInTheSequence();
+    const uchar *sp = seqPtr;
+
+    SimdDbl bV = simdFillDbl(b);
+    SimdDbl tV = simdFillDbl(f2f0);
+    SimdDbl sV = simdZeroDbl();
+
+    int i = 0;
+    for (; i <= maxOffset - simdDblLen; i += simdDblLen) {
+      SimdDbl rV = simdSetDbl(
+#if defined __SSE4_1__ || defined __ARM_NEON
+#ifdef __AVX2__
+			      lrRow[sp[-i-4]],
+			      lrRow[sp[-i-3]],
+#endif
+			      lrRow[sp[-i-2]],
+#endif
+			      lrRow[sp[-i-1]]);
+      SimdDbl fV = simdLoadDbl(fp+i);
+      sV = simdAddDbl(sV, fV);
+      SimdDbl xV = simdMulDbl(bV, simdLoadDbl(b2f+i));
+      simdStoreDbl(fp+i, simdMulDbl(simdAddDbl(xV, simdMulDbl(fV, tV)), rV));
+    }
+    double fromForeground = simdHorizontalAddDbl(sV);
+    for (; i < maxOffset; ++i) {
+      double f = fp[i];
+      fromForeground += f;
+      fp[i] = (b * b2f[i] + f * f2f0) * lrRow[sp[-i-1]];
+    }
+
+    backgroundProb = b * b2b + fromForeground * f2b;
+  }
+
+  void calcEmissionAndBackwardTransitionProbs() {
+    if (endGapProb > 0) {
+      calcEmissionProbs();
+      calcBackwardTransitionProbsWithGaps();
+      return;
+    }
+
+    double toBackground = f2b * backgroundProb;
+    const double *b2f = BEG(b2fProbs);
+    double *fp = BEG(foregroundProbs);
+    const double *lrRow = likelihoodRatioMatrix[*seqPtr];
+    int maxOffset = maxOffsetInTheSequence();
+    const uchar *sp = seqPtr;
+
+    SimdDbl bV = simdFillDbl(toBackground);
+    SimdDbl tV = simdFillDbl(f2f0);
+    SimdDbl sV = simdZeroDbl();
+
+    int i = 0;
+    for (; i <= maxOffset - simdDblLen; i += simdDblLen) {
+      SimdDbl rV = simdSetDbl(
+#if defined __SSE4_1__ || defined __ARM_NEON
+#ifdef __AVX2__
+			      lrRow[sp[-i-4]],
+			      lrRow[sp[-i-3]],
+#endif
+			      lrRow[sp[-i-2]],
+#endif
+			      lrRow[sp[-i-1]]);
+      SimdDbl fV = simdMulDbl(simdLoadDbl(fp+i), rV);
+      sV = simdAddDbl(sV, simdMulDbl(simdLoadDbl(b2f+i), fV));
+      simdStoreDbl(fp+i, simdAddDbl(bV, simdMulDbl(tV, fV)));
+    }
+    double toForeground = simdHorizontalAddDbl(sV);
+    for (; i < maxOffset; ++i) {
+      double f = fp[i] * lrRow[sp[-i-1]];
+      toForeground += b2f[i] * f;
+      fp[i] = toBackground + f2f0 * f;
+    }
+
+    backgroundProb = b2b * backgroundProb + toForeground;
+  }
+
+  void rescale(double scale) {
+    backgroundProb *= scale;
+    multiplyAll(foregroundProbs, scale);
+    multiplyAll(insertionProbs, scale);
+  }
+
+  void rescaleForward() {
+    if ((seqPtr - seqBeg) % scaleStepSize == scaleStepSize - 1) {
+      assert(backgroundProb > 0);
+      double scale = 1 / backgroundProb;
+      scaleFactors[(seqPtr - seqBeg) / scaleStepSize] = scale;
+      rescale(scale);
+    }
+  }
+
+  void rescaleBackward() {
+    if ((seqPtr - seqBeg) % scaleStepSize == scaleStepSize - 1) {
+      double scale = scaleFactors[(seqPtr - seqBeg) / scaleStepSize];
+      rescale(scale);
+    }
+  }
+
+  void calcRepeatProbs(float *letterProbs) {
+    initializeForwardAlgorithm();
+
+    while (seqPtr < seqEnd) {
+      calcForwardTransitionAndEmissionProbs();
+      rescaleForward();
+      *letterProbs = static_cast<float>(backgroundProb);
+      ++letterProbs;
+      ++seqPtr;
+    }
+
+    double z = forwardTotal();
+
+    initializeBackwardAlgorithm();
+
+    while (seqPtr > seqBeg) {
+      --seqPtr;
+      --letterProbs;
+      double nonRepeatProb = *letterProbs * backgroundProb / z;
+      // Convert nonRepeatProb to a float, so that it is more likely
+      // to be exactly 1 when it should be, e.g. for the 1st letter of
+      // a sequence:
+      *letterProbs = 1 - static_cast<float>(nonRepeatProb);
+      rescaleBackward();
+      calcEmissionAndBackwardTransitionProbs();
+    }
+
+    double z2 = backwardTotal();
+    checkForwardAndBackwardTotals(z, z2);
+  }
+
+  void countTransitions(double *transitionCounts) {
+    std::vector<float> p(seqEnd - seqBeg);
+    float *letterProbs = BEG(p);
+
+    initializeForwardAlgorithm();
+
+    while (seqPtr < seqEnd) {
+      *letterProbs = static_cast<float>(backgroundProb);
+      calcForwardTransitionProbs();
+      calcEmissionProbs();
+      rescaleForward();
+      ++letterProbs;
+      ++seqPtr;
+    }
+
+    double z = forwardTotal();
+
+    addEndCounts(backgroundProb, z, transitionCounts);
+
+    initializeBackwardAlgorithm();
+
+    while (seqPtr > seqBeg) {
+      --seqPtr;
+      --letterProbs;
+      rescaleBackward();
+      calcEmissionProbs();
+      addTransitionCounts(*letterProbs, z, transitionCounts);
+      calcBackwardTransitionProbs();
+    }
+
+    double z2 = backwardTotal();
+    checkForwardAndBackwardTotals(z, z2);
+  }
+};
+
+void maskSequences(uchar *seqBeg,
+                   uchar *seqEnd,
+                   int maxRepeatOffset,
+                   const const_double_ptr *likelihoodRatioMatrix,
+                   double repeatProb,
+                   double repeatEndProb,
+                   double repeatOffsetProbDecay,
+                   double firstGapProb,
+                   double otherGapProb,
+                   double minMaskProb,
+                   const uchar *maskTable) {
+  std::vector<float> p(seqEnd - seqBeg);
+  float *probabilities = BEG(p);
+
+  getProbabilities(seqBeg, seqEnd, maxRepeatOffset,
+                   likelihoodRatioMatrix, repeatProb, repeatEndProb,
+                   repeatOffsetProbDecay, firstGapProb, otherGapProb,
+                   probabilities);
+
+  maskProbableLetters(seqBeg, seqEnd, probabilities, minMaskProb, maskTable);
+}
+
+void getProbabilities(const uchar *seqBeg,
+                      const uchar *seqEnd,
+                      int maxRepeatOffset,
+                      const const_double_ptr *likelihoodRatioMatrix,
+                      double repeatProb,
+                      double repeatEndProb,
+                      double repeatOffsetProbDecay,
+                      double firstGapProb,
+                      double otherGapProb,
+                      float *probabilities) {
+  Tantan tantan(seqBeg, seqEnd, maxRepeatOffset, likelihoodRatioMatrix,
+                repeatProb, repeatEndProb, repeatOffsetProbDecay,
+                firstGapProb, otherGapProb);
+  tantan.calcRepeatProbs(probabilities);
+}
+
+void maskProbableLetters(uchar *seqBeg,
+                         uchar *seqEnd,
+                         const float *probabilities,
+                         double minMaskProb,
+                         const uchar *maskTable) {
+  while (seqBeg < seqEnd) {
+    if (*probabilities >= minMaskProb)
+      *seqBeg = maskTable[*seqBeg];
+    ++probabilities;
+    ++seqBeg;
+  }
+}
+
+void countTransitions(const uchar *seqBeg,
+                      const uchar *seqEnd,
+                      int maxRepeatOffset,
+                      const const_double_ptr *likelihoodRatioMatrix,
+                      double repeatProb,
+                      double repeatEndProb,
+                      double repeatOffsetProbDecay,
+                      double firstGapProb,
+                      double otherGapProb,
+                      double *transitionCounts) {
+  Tantan tantan(seqBeg, seqEnd, maxRepeatOffset, likelihoodRatioMatrix,
+                repeatProb, repeatEndProb, repeatOffsetProbDecay,
+                firstGapProb, otherGapProb);
+  tantan.countTransitions(transitionCounts);
+}
+
+}
diff --git a/src/commons/tantan.h b/lib/tantan/tantan.h
similarity index 68%
rename from src/commons/tantan.h
rename to lib/tantan/tantan.h
index 88af9d7ef..08da49c3b 100644
--- a/src/commons/tantan.h
+++ b/lib/tantan/tantan.h
@@ -1,16 +1,6 @@
-// Copyright 2010 Martin C. Frith
-// tantan is distributed under the GNU General Public License, either
-//        version 3 of the License, or (at your option) any later version.  For
-//        details, see COPYING.txt.
-//
-// If you use tantan in your research, please cite:
-// "A new repeat-masking method enables specific detection of homologous
-// sequences", MC Frith, Nucleic Acids Research 2011 39(4):e23.
-//
-// tantan's website is: http://www.cbrc.jp/tantan/
-//
-// If you have any questions, comments, or problems concerning tantan,
-// please email: tantan (ATmark) cbrc (dot) jp.
+// Author: Martin C. Frith 2010
+// SPDX-License-Identifier: MPL-2.0
+
 // These are routines for masking simple regions (low-complexity and
 // short-period tandem repeats) in biological sequences.  To
 // understand them in detail, see the published article (in
@@ -64,10 +54,11 @@
 
 namespace tantan {
 
+typedef unsigned char uchar;
 typedef const double *const_double_ptr;
 
-int maskSequences(char *seqBeg,
-                   char *seqEnd,
+void maskSequences(uchar *seqBeg,
+                   uchar *seqEnd,
                    int maxRepeatOffset,
                    const const_double_ptr *likelihoodRatioMatrix,
                    double repeatProb,
@@ -76,14 +67,14 @@ int maskSequences(char *seqBeg,
                    double firstGapProb,
                    double otherGapProb,
                    double minMaskProb,
-                   const char *maskTable);
+                   const uchar *maskTable);
 
 // The following routine gets the posterior probability that each
 // letter is repetitive.  It stores the results in "probabilities",
 // which must point to enough pre-allocated space to fit the results.
 
-void getProbabilities(const char *seqBeg,
-                      const char *seqEnd,
+void getProbabilities(const uchar *seqBeg,
+                      const uchar *seqEnd,
                       int maxRepeatOffset,
                       const const_double_ptr *likelihoodRatioMatrix,
                       double repeatProb,
@@ -96,11 +87,34 @@ void getProbabilities(const char *seqBeg,
 // The following routine masks each letter whose corresponding entry
 // in "probabilities" is >= minMaskProb.
 
-int maskProbableLetters(char *seqBeg,
-                         char *seqEnd,
+void maskProbableLetters(uchar *seqBeg,
+                         uchar *seqEnd,
                          const float *probabilities,
                          double minMaskProb,
-                         const char *maskTable);
+                         const uchar *maskTable);
+
+// The following routine counts the expected number of transitions
+// from the background (non-repeat) state to other states.  It adds
+// the results to "transitionCounts", which must point to
+// pre-initialized space for (maxRepeatOffset+1) items.  The
+// background->background transition count is stored in
+// transitionCounts[0].  The background->(period-i repeat) transition
+// count is stored in transitionCounts[i].
+
+// (In this routine, the HMM begin and end states are counted as
+// background states.  Thus, begin->X is added to background->X, and
+// X->end is added to X->background.)
+
+void countTransitions(const uchar *seqBeg,
+                      const uchar *seqEnd,
+                      int maxRepeatOffset,
+                      const const_double_ptr *likelihoodRatioMatrix,
+                      double repeatProb,
+                      double repeatEndProb,
+                      double repeatOffsetProbDecay,
+                      double firstGapProb,
+                      double otherGapProb,
+                      double *transitionCounts);
 
 }
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index a84726da5..320cd3a38 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -130,7 +130,7 @@ else ()
     message("-- OMPTL sorting fallback")
 endif ()
 
-target_link_libraries(mmseqs-framework tinyexpr ${ZSTD_LIBRARIES} microtar)
+target_link_libraries(mmseqs-framework tinyexpr ${ZSTD_LIBRARIES} microtar tantan)
 # if (CYGWIN)
 #     target_link_libraries(mmseqs-framework nedmalloc)
 # endif ()
diff --git a/src/alignment/PSSMMasker.h b/src/alignment/PSSMMasker.h
index e00007c2c..e598165c8 100644
--- a/src/alignment/PSSMMasker.h
+++ b/src/alignment/PSSMMasker.h
@@ -9,7 +9,7 @@
 class PSSMMasker {
 public:
     PSSMMasker(size_t maxSeqLen, ProbabilityMatrix& probMatrix, BaseMatrix& subMat) : maxSeqLen(maxSeqLen), probMatrix(probMatrix), xAmioAcid(subMat.aa2num[static_cast<int>('X')]) {
-        charSequence = (char*)malloc(sizeof(char) * maxSeqLen);
+        charSequence = (unsigned char*)malloc(sizeof(char) * maxSeqLen);
     }
 
     ~PSSMMasker() {
@@ -19,7 +19,7 @@ class PSSMMasker {
     void mask(Sequence& centerSequence, float maskProb, PSSMCalculator::Profile& pssmRes) {
         if ((size_t)centerSequence.L > maxSeqLen) {
             maxSeqLen = sizeof(char) * centerSequence.L * 1.5;
-            charSequence = (char*)realloc(charSequence, maxSeqLen);
+            charSequence = (unsigned char*)realloc(charSequence, maxSeqLen);
         }
         memcpy(charSequence, centerSequence.numSequence, sizeof(unsigned char) * centerSequence.L);
         tantan::maskSequences(charSequence, charSequence + centerSequence.L,
@@ -43,7 +43,7 @@ class PSSMMasker {
         }
     }
 private:
-    char *charSequence;
+    unsigned char *charSequence;
     size_t maxSeqLen;
     ProbabilityMatrix& probMatrix;
     const int xAmioAcid;
diff --git a/src/commons/BaseMatrix.h b/src/commons/BaseMatrix.h
index 7d86bdf22..4e9783799 100644
--- a/src/commons/BaseMatrix.h
+++ b/src/commons/BaseMatrix.h
@@ -102,7 +102,7 @@ class ProbabilityMatrix {
         delete[] probMatrixPointers;
     }
 
-    char hardMaskTable[256];
+    unsigned char hardMaskTable[256];
     const double **probMatrixPointers;
 
 private:
diff --git a/src/commons/CMakeLists.txt b/src/commons/CMakeLists.txt
index dc7ab3e74..35f59b392 100644
--- a/src/commons/CMakeLists.txt
+++ b/src/commons/CMakeLists.txt
@@ -38,7 +38,6 @@ set(commons_header_files
         commons/StringBlock.h
         commons/SubstitutionMatrix.h
         commons/SubstitutionMatrixProfileStates.h
-        commons/tantan.h
         commons/TranslateNucl.h
         commons/Timer.h
         commons/UniprotKB.h
@@ -72,7 +71,6 @@ set(commons_source_files
         commons/Sequence.cpp
         commons/SequenceWeights.cpp
         commons/SubstitutionMatrix.cpp
-        commons/tantan.cpp
         commons/UniprotKB.cpp
         commons/Util.cpp
         PARENT_SCOPE
diff --git a/src/commons/tantan.cpp b/src/commons/tantan.cpp
deleted file mode 100644
index e870fa58b..000000000
--- a/src/commons/tantan.cpp
+++ /dev/null
@@ -1,506 +0,0 @@
-// Copyright 2010 Martin C. Frith
-
-#include "tantan.h"
-
-#include <algorithm>  // fill, max
-#include <cassert>
-#include <cmath>  // pow, abs
-#include <iostream>  // cerr
-#include <numeric>  // accumulate
-#include <vector>
-
-#define BEG(v) ((v).empty() ? 0 : &(v).front())
-#define END(v) ((v).empty() ? 0 : &(v).back() + 1)
-
-namespace tantan {
-
-    void multiplyAll(std::vector<double> &v, double factor) {
-        for (std::vector<double>::iterator i = v.begin(); i < v.end(); ++i)
-            *i *= factor;
-    }
-
-    double firstRepeatOffsetProb(double probMult, int maxRepeatOffset) {
-        if (probMult < 1 || probMult > 1) {
-            return (1 - probMult) / (1 - std::pow(probMult, maxRepeatOffset));
-        }
-        return 1.0 / maxRepeatOffset;
-    }
-
-    void checkForwardAndBackwardTotals(double fTot, double bTot) {
-        double x = std::abs(fTot);
-        double y = std::abs(bTot);
-
-        // ??? Is 1e6 suitable here ???
-        if (std::abs(fTot - bTot) > std::max(x, y) / 1e6)
-            std::cerr << "tantan: warning: possible numeric inaccuracy\n"
-                      << "tantan:          forward algorithm total: " << fTot << "\n"
-                      << "tantan:          backward algorithm total: " << bTot << "\n";
-    }
-
-    struct Tantan {
-        enum { scaleStepSize = 16 };
-
-        const char *seqBeg;  // start of the sequence
-        const char *seqEnd;  // end of the sequence
-        const char *seqPtr;  // current position in the sequence
-
-        int maxRepeatOffset;
-
-        const const_double_ptr *likelihoodRatioMatrix;
-
-        double b2b;  // transition probability from background to background
-        double f2b;  // transition probability from foreground to background
-        double g2g;  // transition probability from gap/indel to gap/indel
-        //double f2g;  // transition probability from foreground to gap/indel
-        //double g2f;  // transition probability from gap/indel to foreground
-        double oneGapProb;  // f2g * g2f
-        double endGapProb;  // f2g * 1
-        double f2f0;  // foreground to foreground, if there are 0 indel transitions
-        double f2f1;  // foreground to foreground, if there is 1 indel transition
-        double f2f2;  // foreground to foreground, if there are 2 indel transitions
-        double b2fDecay;
-        double b2fGrowth;
-        double b2fFirst;  // background state to first foreground state
-        double b2fLast;  // background state to last foreground state
-
-        double backgroundProb;
-        std::vector<double> b2fProbs;  // background state to each foreground state
-        std::vector<double> foregroundProbs;
-        std::vector<double> insertionProbs;
-
-        std::vector<double> scaleFactors;
-
-        Tantan(const char *seqBeg,
-               const char *seqEnd,
-               int maxRepeatOffset,
-               const const_double_ptr *likelihoodRatioMatrix,
-               double repeatProb,
-               double repeatEndProb,
-               double repeatOffsetProbDecay,
-               double firstGapProb,
-               double otherGapProb) {
-            assert(maxRepeatOffset > 0);
-            assert(repeatProb >= 0 && repeatProb < 1);
-            // (if repeatProb==1, then any sequence is impossible)
-            assert(repeatEndProb >= 0 && repeatEndProb <= 1);
-            assert(repeatOffsetProbDecay > 0 && repeatOffsetProbDecay <= 1);
-            assert(otherGapProb >= 0 && otherGapProb <= 1);
-            assert(firstGapProb >= 0);
-            assert(repeatEndProb + firstGapProb * 2 <= 1);
-
-            this->seqBeg = seqBeg;
-            this->seqEnd = seqEnd;
-            this->seqPtr = seqBeg;
-            this->maxRepeatOffset = maxRepeatOffset;
-            this->likelihoodRatioMatrix = likelihoodRatioMatrix;
-
-            b2b = 1 - repeatProb;
-            f2b = repeatEndProb;
-            g2g = otherGapProb;
-            //f2g = firstGapProb;
-            //g2f = 1 - otherGapProb;
-            oneGapProb = firstGapProb * (1 - otherGapProb);
-            endGapProb = firstGapProb * (maxRepeatOffset > 1);
-            f2f0 = 1 - repeatEndProb;
-            f2f1 = 1 - repeatEndProb - firstGapProb;
-            f2f2 = 1 - repeatEndProb - firstGapProb * 2;
-
-            b2fDecay = repeatOffsetProbDecay;
-            b2fGrowth = 1 / repeatOffsetProbDecay;
-
-            b2fFirst = repeatProb * firstRepeatOffsetProb(b2fDecay, maxRepeatOffset);
-            b2fLast = repeatProb * firstRepeatOffsetProb(b2fGrowth, maxRepeatOffset);
-
-            b2fProbs.resize(maxRepeatOffset);
-            foregroundProbs.resize(maxRepeatOffset);
-            insertionProbs.resize(maxRepeatOffset - 1);
-
-            double p = b2fFirst;
-            for (int i = 0; i < maxRepeatOffset; ++i) {
-                b2fProbs[i] = p;
-                p *= b2fDecay;
-            }
-
-            scaleFactors.resize((seqEnd - seqBeg) / scaleStepSize);
-        }
-
-        void initializeForwardAlgorithm() {
-            backgroundProb = 1.0;
-            std::fill(foregroundProbs.begin(), foregroundProbs.end(), 0.0);
-            std::fill(insertionProbs.begin(), insertionProbs.end(), 0.0);
-        }
-
-        double forwardTotal() {
-            double fromForeground = std::accumulate(foregroundProbs.begin(),
-                                                    foregroundProbs.end(), 0.0);
-            double total = backgroundProb * b2b + fromForeground * f2b;
-            assert(total > 0);
-            return total;
-        }
-
-        void initializeBackwardAlgorithm() {
-            backgroundProb = b2b;
-            std::fill(foregroundProbs.begin(), foregroundProbs.end(), f2b);
-            std::fill(insertionProbs.begin(), insertionProbs.end(), 0.0);
-        }
-
-        double backwardTotal() {
-            assert(backgroundProb > 0);
-            return backgroundProb;
-        }
-
-        void calcForwardTransitionProbsWithGaps() {
-            double fromBackground = backgroundProb * b2fLast;
-            double *foregroundPtr = &foregroundProbs.back();
-            double f = *foregroundPtr;
-            double fromForeground = f;
-
-            double *insertionPtr = &insertionProbs.back();
-            double i = *insertionPtr;
-            *foregroundPtr = fromBackground + f * f2f1 + i * endGapProb;
-            double d = f;
-            --foregroundPtr;
-            fromBackground *= b2fGrowth;
-
-            while (foregroundPtr > &foregroundProbs.front()) {
-                f = *foregroundPtr;
-                fromForeground += f;
-                i = *(insertionPtr - 1);
-                *foregroundPtr = fromBackground + f * f2f2 + (i + d) * oneGapProb;
-                *insertionPtr = f + i * g2g;
-                d = f + d * g2g;
-                --foregroundPtr;
-                --insertionPtr;
-                fromBackground *= b2fGrowth;
-            }
-
-            f = *foregroundPtr;
-            fromForeground += f;
-            *foregroundPtr = fromBackground + f * f2f1 + d * endGapProb;
-            *insertionPtr = f;
-
-            backgroundProb = backgroundProb * b2b + fromForeground * f2b;
-        }
-
-        void calcBackwardTransitionProbsWithGaps() {
-            double toBackground = f2b * backgroundProb;
-            double *foregroundPtr = &foregroundProbs.front();
-            double f = *foregroundPtr;
-            double toForeground = f;
-
-            double *insertionPtr = &insertionProbs.front();
-            double i = *insertionPtr;
-            *foregroundPtr = toBackground + f2f1 * f + i;
-            double d = endGapProb * f;
-            ++foregroundPtr;
-            toForeground *= b2fGrowth;
-
-            while (foregroundPtr < &foregroundProbs.back()) {
-                f = *foregroundPtr;
-                toForeground += f;
-                i = *(insertionPtr + 1);
-                *foregroundPtr = toBackground + f2f2 * f + (i + d);
-                double oneGapProb_f = oneGapProb * f;
-                *insertionPtr = oneGapProb_f + g2g * i;
-                d = oneGapProb_f + g2g * d;
-                ++foregroundPtr;
-                ++insertionPtr;
-                toForeground *= b2fGrowth;
-            }
-
-            f = *foregroundPtr;
-            toForeground += f;
-            *foregroundPtr = toBackground + f2f1 * f + d;
-            *insertionPtr = endGapProb * f;
-
-            backgroundProb = b2b * backgroundProb + b2fLast * toForeground;
-        }
-
-        void calcForwardTransitionProbs() {
-            if (endGapProb > 0) return calcForwardTransitionProbsWithGaps();
-
-            double b = backgroundProb;
-            double fromForeground = 0;
-            double *foregroundBeg = BEG(foregroundProbs);
-
-            for (int i = 0; i < maxRepeatOffset; ++i) {
-                double f = foregroundBeg[i];
-                fromForeground += f;
-                foregroundBeg[i] = b * b2fProbs[i] + f * f2f0;
-            }
-
-            backgroundProb = b * b2b + fromForeground * f2b;
-        }
-
-        void calcBackwardTransitionProbs() {
-            if (endGapProb > 0) return calcBackwardTransitionProbsWithGaps();
-
-            double toBackground = f2b * backgroundProb;
-            double toForeground = 0;
-            double *foregroundBeg = BEG(foregroundProbs);
-
-            for (int i = 0; i < maxRepeatOffset; ++i) {
-                double f = foregroundBeg[i];
-                toForeground += b2fProbs[i] * f;
-                foregroundBeg[i] = toBackground + f2f0 * f;
-            }
-
-            backgroundProb = b2b * backgroundProb + toForeground;
-        }
-
-        void addEndCounts(double forwardProb,
-                          double totalProb,
-                          double *transitionCounts) {
-            double toEnd = forwardProb * b2b / totalProb;
-            transitionCounts[0] += toEnd;
-        }
-
-        void addTransitionCounts(double forwardProb,
-                                 double totalProb,
-                                 double *transitionCounts) {
-            double toBg = forwardProb * b2b / totalProb;
-            double toFg = forwardProb * b2fFirst / totalProb;
-
-            transitionCounts[0] += backgroundProb * toBg;
-
-            for (double *i = BEG(foregroundProbs); i < END(foregroundProbs); ++i) {
-                ++transitionCounts;
-                *transitionCounts += *i * toFg;
-                toFg *= b2fDecay;
-            }
-        }
-
-        bool isNearSeqBeg() {
-            return seqPtr - seqBeg < maxRepeatOffset;
-        }
-
-        int maxOffsetInTheSequence() {
-            return isNearSeqBeg() ? (seqPtr - seqBeg) : maxRepeatOffset;
-        }
-
-        const char *seqFurthestBack() {
-            return isNearSeqBeg() ? seqBeg : seqPtr - maxRepeatOffset;
-        }
-
-        void calcEmissionProbs() {
-            const double *lrRow = likelihoodRatioMatrix[(int)*seqPtr];
-            const char *seqStop = seqFurthestBack();
-            double *foregroundPtr = BEG(foregroundProbs);
-            const char *offsetPtr = seqPtr;
-
-            while (offsetPtr > seqStop) {
-                --offsetPtr;
-                *foregroundPtr *= lrRow[(int)*offsetPtr];
-                ++foregroundPtr;
-            }
-
-            while (foregroundPtr < END(foregroundProbs)) {
-                *foregroundPtr *= 0;
-                ++foregroundPtr;
-            }
-        }
-
-        void calcForwardTransitionAndEmissionProbs() {
-            if (endGapProb > 0) {
-                calcForwardTransitionProbsWithGaps();
-                calcEmissionProbs();
-                return;
-            }
-
-            double b = backgroundProb;
-            double fromForeground = 0;
-            double *foregroundBeg = BEG(foregroundProbs);
-            const double *lrRow = likelihoodRatioMatrix[(int)*seqPtr];
-            int maxOffset = maxOffsetInTheSequence();
-
-            for (int i = 0; i < maxOffset; ++i) {
-                double f = foregroundBeg[i];
-                fromForeground += f;
-                foregroundBeg[i] = (b * b2fProbs[i] + f * f2f0) * lrRow[(int)seqPtr[-i-1]];
-            }
-
-            backgroundProb = b * b2b + fromForeground * f2b;
-        }
-
-        void calcEmissionAndBackwardTransitionProbs() {
-            if (endGapProb > 0) {
-                calcEmissionProbs();
-                calcBackwardTransitionProbsWithGaps();
-                return;
-            }
-
-            double toBackground = f2b * backgroundProb;
-            double toForeground = 0;
-            double *foregroundBeg = BEG(foregroundProbs);
-            const double *lrRow = likelihoodRatioMatrix[(int)*seqPtr];
-            int maxOffset = maxOffsetInTheSequence();
-
-            for (int i = 0; i < maxOffset; ++i) {
-                double f = foregroundBeg[i] * lrRow[(int)seqPtr[-i-1]];
-                toForeground += b2fProbs[i] * f;
-                foregroundBeg[i] = toBackground + f2f0 * f;
-            }
-
-            backgroundProb = b2b * backgroundProb + toForeground;
-        }
-
-        void rescale(double scale) {
-            backgroundProb *= scale;
-            multiplyAll(foregroundProbs, scale);
-            multiplyAll(insertionProbs, scale);
-        }
-
-        void rescaleForward() {
-            if ((seqPtr - seqBeg) % scaleStepSize == scaleStepSize - 1) {
-                assert(backgroundProb > 0);
-                double scale = 1 / backgroundProb;
-                scaleFactors[(seqPtr - seqBeg) / scaleStepSize] = scale;
-                rescale(scale);
-            }
-        }
-
-        void rescaleBackward() {
-            if ((seqPtr - seqBeg) % scaleStepSize == scaleStepSize - 1) {
-                double scale = scaleFactors[(seqPtr - seqBeg) / scaleStepSize];
-                rescale(scale);
-            }
-        }
-
-        void calcRepeatProbs(float *letterProbs) {
-            initializeForwardAlgorithm();
-
-            while (seqPtr < seqEnd) {
-                calcForwardTransitionAndEmissionProbs();
-                rescaleForward();
-                *letterProbs = static_cast<float>(backgroundProb);
-                ++letterProbs;
-                ++seqPtr;
-            }
-
-            double z = forwardTotal();
-
-            initializeBackwardAlgorithm();
-
-            while (seqPtr > seqBeg) {
-                --seqPtr;
-                --letterProbs;
-                double nonRepeatProb = *letterProbs * backgroundProb / z;
-                // Convert nonRepeatProb to a float, so that it is more likely
-                // to be exactly 1 when it should be, e.g. for the 1st letter of
-                // a sequence:
-                *letterProbs = 1 - static_cast<float>(nonRepeatProb);
-                rescaleBackward();
-                calcEmissionAndBackwardTransitionProbs();
-            }
-
-            double z2 = backwardTotal();
-            checkForwardAndBackwardTotals(z, z2);
-        }
-
-        void countTransitions(double *transitionCounts) {
-            std::vector<float> p(seqEnd - seqBeg);
-            float *letterProbs = BEG(p);
-
-            initializeForwardAlgorithm();
-
-            while (seqPtr < seqEnd) {
-                *letterProbs = static_cast<float>(backgroundProb);
-                calcForwardTransitionProbs();
-                calcEmissionProbs();
-                rescaleForward();
-                ++letterProbs;
-                ++seqPtr;
-            }
-
-            double z = forwardTotal();
-
-            addEndCounts(backgroundProb, z, transitionCounts);
-
-            initializeBackwardAlgorithm();
-
-            while (seqPtr > seqBeg) {
-                --seqPtr;
-                --letterProbs;
-                rescaleBackward();
-                calcEmissionProbs();
-                addTransitionCounts(*letterProbs, z, transitionCounts);
-                calcBackwardTransitionProbs();
-            }
-
-            double z2 = backwardTotal();
-            checkForwardAndBackwardTotals(z, z2);
-        }
-    };
-
-    int maskSequences(char *seqBeg,
-                       char *seqEnd,
-                       int maxRepeatOffset,
-                       const const_double_ptr *likelihoodRatioMatrix,
-                       double repeatProb,
-                       double repeatEndProb,
-                       double repeatOffsetProbDecay,
-                       double firstGapProb,
-                       double otherGapProb,
-                       double minMaskProb,
-                       const char *maskTable) {
-        std::vector<float> p(seqEnd - seqBeg);
-        float *probabilities = BEG(p);
-
-        getProbabilities(seqBeg, seqEnd, maxRepeatOffset,
-                         likelihoodRatioMatrix, repeatProb, repeatEndProb,
-                         repeatOffsetProbDecay, firstGapProb, otherGapProb,
-                         probabilities);
-
-        return maskProbableLetters(seqBeg, seqEnd, probabilities, minMaskProb, maskTable);
-    }
-
-    void getProbabilities(const char *seqBeg,
-                          const char *seqEnd,
-                          int maxRepeatOffset,
-                          const const_double_ptr *likelihoodRatioMatrix,
-                          double repeatProb,
-                          double repeatEndProb,
-                          double repeatOffsetProbDecay,
-                          double firstGapProb,
-                          double otherGapProb,
-                          float *probabilities) {
-        Tantan tantan(seqBeg, seqEnd, maxRepeatOffset, likelihoodRatioMatrix,
-                      repeatProb, repeatEndProb, repeatOffsetProbDecay,
-                      firstGapProb, otherGapProb);
-        tantan.calcRepeatProbs(probabilities);
-    }
-
-    int maskProbableLetters(char *seqBeg,
-                             char *seqEnd,
-                             const float *probabilities,
-                             double minMaskProb,
-                             const char *maskTable) {
-        int masked = 0;
-        while (seqBeg < seqEnd) {
-            if (*probabilities >= minMaskProb){
-                *seqBeg = maskTable[(int)*seqBeg];
-                masked++;
-            }
-            ++probabilities;
-            ++seqBeg;
-        }
-        return masked;
-    }
-
-    void countTransitions(const char *seqBeg,
-                          const char *seqEnd,
-                          int maxRepeatOffset,
-                          const const_double_ptr *likelihoodRatioMatrix,
-                          double repeatProb,
-                          double repeatEndProb,
-                          double repeatOffsetProbDecay,
-                          double firstGapProb,
-                          double otherGapProb,
-                          double *transitionCounts) {
-        Tantan tantan(seqBeg, seqEnd, maxRepeatOffset, likelihoodRatioMatrix,
-                      repeatProb, repeatEndProb, repeatOffsetProbDecay,
-                      firstGapProb, otherGapProb);
-        tantan.countTransitions(transitionCounts);
-    }
-
-}
diff --git a/src/linclust/kmermatcher.cpp b/src/linclust/kmermatcher.cpp
index 3e0740c59..63ecb4c33 100644
--- a/src/linclust/kmermatcher.cpp
+++ b/src/linclust/kmermatcher.cpp
@@ -56,8 +56,8 @@ KmerPosition<T> *initKmerPositionMemory(size_t size) {
 
 void maskSequence(int maskMode, int maskLowerCase, float maskProb, Sequence &seq, int maskLetter, ProbabilityMatrix * probMatrix){
     if (maskMode == 1) {
-        tantan::maskSequences((char*)seq.numSequence,
-                              (char*)(seq.numSequence + seq.L),
+        tantan::maskSequences((unsigned char*)seq.numSequence,
+                              (unsigned char*)(seq.numSequence + seq.L),
                               50 /*options.maxCycleLength*/,
                               probMatrix->probMatrixPointers,
                               0.005 /*options.repeatProb*/,
diff --git a/src/prefiltering/IndexBuilder.cpp b/src/prefiltering/IndexBuilder.cpp
index 120a173bb..e4e6eea9d 100644
--- a/src/prefiltering/IndexBuilder.cpp
+++ b/src/prefiltering/IndexBuilder.cpp
@@ -144,8 +144,9 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
                 }
                 if (mask == true) {
                     // s.print();
-                    maskedResidues += tantan::maskSequences((char*)s.numSequence,
-                                                            (char*)(s.numSequence + s.L),
+                    //TODO maskedResidues =;
+                    tantan::maskSequences((unsigned char*)s.numSequence,
+                                                            (unsigned char*)(s.numSequence + s.L),
                                                             50 /*options.maxCycleLength*/,
                                                             probMatrix->probMatrixPointers,
                                                             0.005 /*options.repeatProb*/,
diff --git a/src/util/masksequence.cpp b/src/util/masksequence.cpp
index 82a468c76..8c09d2314 100644
--- a/src/util/masksequence.cpp
+++ b/src/util/masksequence.cpp
@@ -43,7 +43,7 @@ int masksequence(int argc, const char **argv, const Command& command) {
 #ifdef OPENMP
         thread_idx = (unsigned int) omp_get_thread_num();
 #endif
-        char *charSequence = new char[maxSeqLen + 1];
+        unsigned char *charSequence = new unsigned char[maxSeqLen + 1];
 
 #pragma omp for schedule(dynamic, 1)
         for (size_t id = 0; id < reader.getSize(); ++id) {
@@ -68,7 +68,7 @@ int masksequence(int argc, const char **argv, const Command& command) {
                 char aa = seqData[pos];
                 charSequence[pos] = (charSequence[pos] == probMatrix.hardMaskTable[0]) ? tolower(aa) : toupper(aa);
             }
-            writer.writeData(charSequence, seqLen, reader.getDbKey(id), thread_idx);
+            writer.writeData((char*)charSequence, seqLen, reader.getDbKey(id), thread_idx);
         }
         delete[] charSequence;
     }