From 13c2127c1ad02c9b335369e10cc59effcd7451d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sladk=C3=BD?= <127035311+OndrejSladky@users.noreply.github.com> Date: Thu, 22 Aug 2024 21:25:21 +0200 Subject: [PATCH] KmerCamel supports k up to 127. (#74) * KmerCamel supports k up to 127. * Presort: explicit conversion to int64 frmo large k-mers. * Presort: explicit conversion to size_t everywhere. * Presort: size_t's changed to uint64_t's. * Presort: additional macos fixes. --- README.md | 4 +- src/global.h | 14 +- src/khash_utils.h | 18 +- src/kmers.h | 15 +- src/main.cpp | 13 +- src/uint256_t/uint256_t.build | 17 + src/uint256_t/uint256_t.h | 495 +++++++++++++++++++++++ src/uint256_t/uint256_t.include | 537 +++++++++++++++++++++++++ src/uint256_t/uint256_t_config.include | 19 + verify.py | 2 +- 10 files changed, 1113 insertions(+), 21 deletions(-) create mode 100644 src/uint256_t/uint256_t.build create mode 100644 src/uint256_t/uint256_t.h create mode 100644 src/uint256_t/uint256_t.include create mode 100644 src/uint256_t/uint256_t_config.include diff --git a/README.md b/README.md index 3a09ca0..9ced772 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ They come in two different implementations (their results may differ due to the Note that at this point only the implementations with hash table are optimized and that the Aho-Corasick automaton based versions of the algorithms are only experimental. -The hashing based implementations of KmerCamel🐫 support $k$-mer with $k$ at most 63, +The hashing based implementations of KmerCamel🐫 support $k$-mer with $k$ at most 127, All algorithms can be used to either work in the unidirectional model or in the bidirectional model (i.e. treat $k$-mer and its reverse complement as the same; in this case either of them appears in the result). @@ -62,7 +62,7 @@ on macOS. The program has the following arguments: - `-p path_to_fasta` - the path to fasta file (can be `gzip`ed). This is a required argument. -- `-k value_of_k` - the size of one k-mer (up to 63). This is a required argument. +- `-k value_of_k` - the size of one k-mer (up to 127). This is a required argument. - `-a algorithm` - the algorithm which should be run. Either `global` or `globalAC` for Global Greedy, `local` or `localAC` for Local Greedy. The versions with AC use Aho-Corasick automaton. Default `global`. - `-o output_path` - the path to output file. If not specified, output is printed to stdout. diff --git a/src/global.h b/src/global.h index ae6ad0a..15d782a 100644 --- a/src/global.h +++ b/src/global.h @@ -31,17 +31,17 @@ typedef std::pair, std::vector> overlapPath; template void PartialPreSort(std::vector &vals, int k) { int SORT_FIRST_BITS = std::min(2 * k, SORT_FIRST_BITS_DEFAULT); - kmer_t DIFFERENT_PREFIXES_COUNT = kmer_t(1) << SORT_FIRST_BITS; + uint64_t DIFFERENT_PREFIXES_COUNT = 1ULL << SORT_FIRST_BITS; kmer_t PREFIX_MASK = DIFFERENT_PREFIXES_COUNT - kmer_t(1); std::vector counts(DIFFERENT_PREFIXES_COUNT, 0); int shift = (2 * k) - SORT_FIRST_BITS; kmer_t mask = PREFIX_MASK << shift; - for (auto &&kMer : vals) counts[(kMer & mask) >> shift]++; + for (auto &&kMer : vals) counts[(uint64_t)((kMer & mask) >> shift)]++; std::vector> distributed(DIFFERENT_PREFIXES_COUNT); - for (kmer_t i = 0; i < DIFFERENT_PREFIXES_COUNT; ++i) distributed[i] = std::vector (counts[i]); - for (kmer_t i = 0; i < DIFFERENT_PREFIXES_COUNT; ++i) counts[i] = 0; + for (uint64_t i = 0; i < DIFFERENT_PREFIXES_COUNT; ++i) distributed[i] = std::vector (counts[i]); + for (uint64_t i = 0; i < DIFFERENT_PREFIXES_COUNT; ++i) counts[i] = 0; for (auto &&kMer : vals) { - kmer_t index = (kMer & mask) >> shift; + uint64_t index = (kMer & mask) >> shift; distributed[index][counts[index]++] = kMer; } size_t index = 0; @@ -164,7 +164,7 @@ void SuperstringFromPath(const overlapPath &hamiltonianPath, const std::vector>65^(key)^(key)<<21)) #define kh_int128_hash_equal(a, b) ((a) == (b)) +#define kh_int256_hash_func(key) kh_int128_hash_func((__uint128_t)((key)>>129^(key)^(key)<<35)) +#define kh_int256_hash_equal(a, b) ((a) == (b)) #define KHASH_MAP_INIT_INT128(name, khval_t) \ KHASH_INIT(name, __uint128_t, khval_t, 1, kh_int128_hash_func, kh_int128_hash_equal) #define KHASH_SET_INIT_INT128(name) \ - KHASH_INIT(name, __uint128_t, char, 0, kh_int128_hash_func, kh_int128_hash_equal) + KHASH_INIT(name, __uint128_t, char, 0, kh_int128_hash_func, kh_int128_hash_equal) -// Use 128-bit integers for k-mers to allow for larger k. +#define KHASH_MAP_INIT_INT256(name, khval_t) \ + KHASH_INIT(name, uint256_t, khval_t, 1, kh_int256_hash_func, kh_int256_hash_equal) + +#define KHASH_SET_INIT_INT256(name) \ + KHASH_INIT(name, uint256_t, char, 0, kh_int256_hash_func, kh_int256_hash_equal) + +// Use 128-bit integers for extra large k-mers to allow for larger k. +KHASH_SET_INIT_INT256(S256) +KHASH_MAP_INIT_INT256(P256, size_t) +// Use 128-bit integers for large k-mers to allow for larger k. KHASH_SET_INIT_INT128(S128) KHASH_MAP_INIT_INT128(P128, size_t) -// Use 64-bits integers for k-mers for faster operations and less memory usage. +// Use 64-bits integers for small k-mers for faster operations and less memory usage. KHASH_SET_INIT_INT64(S64) KHASH_MAP_INIT_INT64(P64, size_t) @@ -62,6 +73,7 @@ KHASH_MAP_INIT_INT64(P64, size_t) INIT_KHASH_WRAPPER(64) INIT_KHASH_WRAPPER(128) +INIT_KHASH_WRAPPER(256) /// Determine whether the k-mer or its reverse complement is present. template diff --git a/src/kmers.h b/src/kmers.h index 451e03c..97b6191 100644 --- a/src/kmers.h +++ b/src/kmers.h @@ -4,10 +4,13 @@ #include #include +#include "uint256_t/uint256_t.h" + #include "ac/kmers_ac.h" typedef __uint128_t kmer128_t; typedef uint64_t kmer64_t; +typedef uint256_t kmer256_t; static const uint8_t nucleotideToInt[] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -65,7 +68,6 @@ inline kmer64_t word_reverse_complement(kmer64_t w) { return ((U)-1) - w; } - /// Compute the reverse complement of a word. /// Copyright: Jellyfish GPL-3.0 inline kmer128_t word_reverse_complement(kmer128_t w) { @@ -79,6 +81,13 @@ inline kmer128_t word_reverse_complement(kmer128_t w) { return ((U)-1) - w; } +/// Compute the reverse complement of a word. +inline kmer256_t word_reverse_complement(kmer256_t w) { + kmer128_t low = word_reverse_complement(w.lower()); + kmer128_t high = word_reverse_complement(w.upper()); + return kmer256_t(low, high); +} + /// Compute the reverse complement of the given k-mer. template kmer_t ReverseComplement(kmer_t kMer, int k) { @@ -90,7 +99,7 @@ const char letters[4] {'A', 'C', 'G', 'T'}; /// Return the index-th nucleotide from the encoded k-mer. template inline char NucleotideAtIndex(kmer_t encoded, int k, int index) { - return letters[(encoded >> ((k - index - kmer_t(1)) << kmer_t(1))) & kmer_t(3)]; + return letters[(uint64_t)((encoded >> ((k - index - kmer_t(1)) << kmer_t(1))) & kmer_t(3))]; } /// Convert the encoded KMer representation to string. @@ -99,7 +108,7 @@ std::string NumberToKMer(kmer_t encoded, int length) { std::string ret(length, 'N'); for (int i = 0; i < length; ++i) { // The last two bits correspond to one nucleotide. - ret[length - i -1] = letters[encoded & 3]; + ret[length - i -1] = letters[(uint64_t)(encoded & 3)]; // Move to the next letter. encoded >>= 2; } diff --git a/src/main.cpp b/src/main.cpp index 6c0432b..53f49f5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -17,7 +17,7 @@ int Help() { std::cerr << "KmerCamel version " << VERSION << std::endl; std::cerr << "Accepted arguments:" << std::endl; std::cerr << " -p path_to_fasta - required; valid path to fasta file (can be gziped)" << std::endl; - std::cerr << " -k k_value - required; integer value for k (up to 63)" << std::endl; + std::cerr << " -k k_value - required; integer value for k (up to 127)" << std::endl; std::cerr << " -a algorithm - the algorithm to be run [global (default), globalAC, local, localAC, streaming]" << std::endl; std::cerr << " -o output_path - if not specified, the output is printed to stdout" << std::endl; std::cerr << " -d d_value - integer value for d_max; default 5" << std::endl; @@ -31,7 +31,7 @@ int Help() { std::cerr << "For optimization of masks use `kmercamel optimize`." << std::endl; std::cerr << "Accepted arguments:" << std::endl; std::cerr << " -p path_to_fasta - required; valid path to fasta file (can be gziped)" << std::endl; - std::cerr << " -k k_value - required; integer value for k (up to 63)" << std::endl; + std::cerr << " -k k_value - required; integer value for k (up to 127)" << std::endl; std::cerr << " -a algorithm - the algorithm to be run [ones (default), runs, runsapprox, zeros]" << std::endl; std::cerr << " -o output_path - if not specified, the output is printed to stdout" << std::endl; std::cerr << " -c - treat k-mer and its reverse complement as equal" << std::endl; @@ -40,7 +40,7 @@ int Help() { return 1; } -constexpr int MAX_K = 63; +constexpr int MAX_K = 127; void Version() { std::cerr << VERSION << std::endl; @@ -50,7 +50,7 @@ void Version() { #define INIT_KMERCAMEL(type) \ int kmercamel##type(std::string path, int k, int d_max, std::ostream *of, bool complements, bool masks, std::string algorithm, bool optimize_memory) { \ kmer_dict##type##_t wrapper; \ - kmer##type##_t kmer_type; \ + kmer##type##_t kmer_type = 0; \ if (masks) { \ int ret = Optimize(wrapper, kmer_type, algorithm, path, *of, k, complements); \ if (ret) Help(); \ @@ -108,6 +108,7 @@ int kmercamel##type(std::string path, int k, int d_max, std::ostream *of, bool c INIT_KMERCAMEL(64) INIT_KMERCAMEL(128) +INIT_KMERCAMEL(256) int main(int argc, char **argv) { std::string path; @@ -198,7 +199,9 @@ int main(int argc, char **argv) { } if (k < 32) { return kmercamel64(path, k, d_max, of, complements, masks, algorithm, optimize_memory); - } else { + } else if (k < 64) { return kmercamel128(path, k, d_max, of, complements, masks, algorithm, optimize_memory); + } else { + return kmercamel256(path, k, d_max, of, complements, masks, algorithm, optimize_memory); } } diff --git a/src/uint256_t/uint256_t.build b/src/uint256_t/uint256_t.build new file mode 100644 index 0000000..114299a --- /dev/null +++ b/src/uint256_t/uint256_t.build @@ -0,0 +1,17 @@ +// IMPLEMENTATION BUILD HEADER + +// We need uint128_t symbols as plain "extern", neither import nor export +// because we're linking the 128 and 256 object files into a single library +// So we can only have one export for symbol in any translation unit +#define UINT256_T_EXTERN +typedef __uint128_t uint128_t; +#undef UINT256_T_EXTERN + +#ifndef _UNIT256_T_BUILD + #define _UINT256_T_BUILD + #include "uint256_t_config.include" + const uint128_t uint128_0(0); + const uint128_t uint128_1(1); + #define UINT256_T_EXTERN _UINT256_T_EXPORT +#endif +#include "uint256_t.include" diff --git a/src/uint256_t/uint256_t.h b/src/uint256_t/uint256_t.h new file mode 100644 index 0000000..1f3ddae --- /dev/null +++ b/src/uint256_t/uint256_t.h @@ -0,0 +1,495 @@ +#ifndef __LITTLE_ENDIAN__ +#ifndef __BIG_ENDIAN__ + #define __LITTLE_ENDIAN__ 1 +#endif +#endif +#include "uint256_t.build" +#include +#include + +const uint128_t uint128_64(64); +const uint128_t uint128_128(128); +const uint128_t uint128_256(256); +const uint256_t uint256_0(0); +const uint256_t uint256_1(1); +const uint256_t uint256_max(uint128_t(-1), uint128_t(-1)); + +uint256_t::uint256_t(const bool & b) + : uint256_t((uint8_t) b) +{} + +uint256_t & uint256_t::operator=(const bool & rhs) { + UPPER = 0; + LOWER = rhs; + return *this; +} + +uint256_t::operator bool() const{ + return (bool) (UPPER | LOWER); +} + +uint256_t::operator uint8_t() const{ + return (uint8_t) LOWER; +} + +uint256_t::operator uint16_t() const{ + return (uint16_t) LOWER; +} + +uint256_t::operator uint32_t() const{ + return (uint32_t) LOWER; +} + +uint256_t::operator uint64_t() const{ + return (uint64_t) LOWER; +} + +uint256_t::operator uint128_t() const{ + return LOWER; +} + +uint256_t uint256_t::operator&(const uint128_t & rhs) const{ + return uint256_t(uint128_0, LOWER & rhs); +} + +uint256_t uint256_t::operator&(const uint256_t & rhs) const{ + return uint256_t(UPPER & rhs.UPPER, LOWER & rhs.LOWER); +} + +uint256_t & uint256_t::operator&=(const uint128_t & rhs){ + UPPER = uint128_0; + LOWER &= rhs; + return *this; +} + +uint256_t & uint256_t::operator&=(const uint256_t & rhs){ + UPPER &= rhs.UPPER; + LOWER &= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator|(const uint128_t & rhs) const{ + return uint256_t(UPPER , LOWER | rhs); +} + +uint256_t uint256_t::operator|(const uint256_t & rhs) const{ + return uint256_t(UPPER | rhs.UPPER, LOWER | rhs.LOWER); +} + +uint256_t & uint256_t::operator|=(const uint128_t & rhs){ + LOWER |= rhs; + return *this; +} + +uint256_t & uint256_t::operator|=(const uint256_t & rhs){ + UPPER |= rhs.UPPER; + LOWER |= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator^(const uint128_t & rhs) const{ + return uint256_t(UPPER, LOWER ^ rhs); +} + +uint256_t uint256_t::operator^(const uint256_t & rhs) const{ + return uint256_t(UPPER ^ rhs.UPPER, LOWER ^ rhs.LOWER); +} + +uint256_t & uint256_t::operator^=(const uint128_t & rhs){ + LOWER ^= rhs; + return *this; +} + +uint256_t & uint256_t::operator^=(const uint256_t & rhs){ + UPPER ^= rhs.UPPER; + LOWER ^= rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator~() const{ + return uint256_t(~UPPER, ~LOWER); +} + +uint256_t uint256_t::operator<<(const uint128_t & rhs) const{ + return *this << uint256_t(rhs); +} + +uint256_t uint256_t::operator<<(const uint256_t & rhs) const{ + const uint128_t shift = rhs.LOWER; + if (((bool) rhs.UPPER) || (shift >= uint128_256)){ + return uint256_0; + } + else if (shift == uint128_128){ + return uint256_t(LOWER, uint128_0); + } + else if (shift == uint128_0){ + return *this; + } + else if (shift < uint128_128){ + return uint256_t((UPPER << shift) + (LOWER >> (uint128_128 - shift)), LOWER << shift); + } + else if ((uint128_256 > shift) && (shift > uint128_128)){ + return uint256_t(LOWER << (shift - uint128_128), uint128_0); + } + else{ + return uint256_0; + } +} + +uint256_t & uint256_t::operator<<=(const uint128_t & shift){ + return *this <<= uint256_t(shift); +} + +uint256_t & uint256_t::operator<<=(const uint256_t & shift){ + *this = *this << shift; + return *this; +} + +uint256_t uint256_t::operator>>(const uint128_t & rhs) const{ + return *this >> uint256_t(rhs); +} + +uint256_t uint256_t::operator>>(const uint256_t & rhs) const{ + const uint128_t shift = rhs.LOWER; + if (((bool) rhs.UPPER) | (shift >= uint128_256)){ + return uint256_0; + } + else if (shift == uint128_128){ + return uint256_t(UPPER); + } + else if (shift == uint128_0){ + return *this; + } + else if (shift < uint128_128){ + return uint256_t(UPPER >> shift, (UPPER << (uint128_128 - shift)) + (LOWER >> shift)); + } + else if ((uint128_256 > shift) && (shift > uint128_128)){ + return uint256_t(UPPER >> (shift - uint128_128)); + } + else{ + return uint256_0; + } +} + +uint256_t & uint256_t::operator>>=(const uint128_t & shift){ + return *this >>= uint256_t(shift); +} + +uint256_t & uint256_t::operator>>=(const uint256_t & shift){ + *this = *this >> shift; + return *this; +} + +bool uint256_t::operator!() const{ + return ! (bool) *this; +} + +bool uint256_t::operator&&(const uint128_t & rhs) const{ + return (*this && uint256_t(rhs)); +} + +bool uint256_t::operator&&(const uint256_t & rhs) const{ + return ((bool) *this && (bool) rhs); +} + +bool uint256_t::operator||(const uint128_t & rhs) const{ + return (*this || uint256_t(rhs)); +} + +bool uint256_t::operator||(const uint256_t & rhs) const{ + return ((bool) *this || (bool) rhs); +} + +bool uint256_t::operator==(const uint128_t & rhs) const{ + return (*this == uint256_t(rhs)); +} + +bool uint256_t::operator==(const uint256_t & rhs) const{ + return ((UPPER == rhs.UPPER) && (LOWER == rhs.LOWER)); +} + +bool uint256_t::operator!=(const uint128_t & rhs) const{ + return (*this != uint256_t(rhs)); +} + +bool uint256_t::operator!=(const uint256_t & rhs) const{ + return ((UPPER != rhs.UPPER) | (LOWER != rhs.LOWER)); +} + +bool uint256_t::operator>(const uint128_t & rhs) const{ + return (*this > uint256_t(rhs)); +} + +bool uint256_t::operator>(const uint256_t & rhs) const{ + if (UPPER == rhs.UPPER){ + return (LOWER > rhs.LOWER); + } + if (UPPER > rhs.UPPER){ + return true; + } + return false; +} + +bool uint256_t::operator<(const uint128_t & rhs) const{ + return (*this < uint256_t(rhs)); +} + +bool uint256_t::operator<(const uint256_t & rhs) const{ + if (UPPER == rhs.UPPER){ + return (LOWER < rhs.LOWER); + } + if (UPPER < rhs.UPPER){ + return true; + } + return false; +} + +bool uint256_t::operator>=(const uint128_t & rhs) const{ + return (*this >= uint256_t(rhs)); +} + +bool uint256_t::operator>=(const uint256_t & rhs) const{ + return ((*this > rhs) | (*this == rhs)); +} + +bool uint256_t::operator<=(const uint128_t & rhs) const{ + return (*this <= uint256_t(rhs)); +} + +bool uint256_t::operator<=(const uint256_t & rhs) const{ + return ((*this < rhs) | (*this == rhs)); +} + +uint256_t uint256_t::operator+(const uint128_t & rhs) const{ + return *this + uint256_t(rhs); +} + +uint256_t uint256_t::operator+(const uint256_t & rhs) const{ + return uint256_t(UPPER + rhs.UPPER + (((LOWER + rhs.LOWER) < LOWER)?uint128_1:uint128_0), LOWER + rhs.LOWER); +} + +uint256_t & uint256_t::operator+=(const uint128_t & rhs){ + return *this += uint256_t(rhs); +} + +uint256_t & uint256_t::operator+=(const uint256_t & rhs){ + UPPER = rhs.UPPER + UPPER + ((LOWER + rhs.LOWER) < LOWER); + LOWER = LOWER + rhs.LOWER; + return *this; +} + +uint256_t uint256_t::operator-(const uint128_t & rhs) const{ + return *this - uint256_t(rhs); +} + +uint256_t uint256_t::operator-(const uint256_t & rhs) const{ + return uint256_t(UPPER - rhs.UPPER - ((LOWER - rhs.LOWER) > LOWER), LOWER - rhs.LOWER); +} + +uint256_t & uint256_t::operator-=(const uint128_t & rhs){ + return *this -= uint256_t(rhs); +} + +uint256_t & uint256_t::operator-=(const uint256_t & rhs){ + *this = *this - rhs; + return *this; +} + +uint256_t & uint256_t::operator++(){ + *this += uint256_1; + return *this; +} + +uint256_t uint256_t::operator++(int){ + uint256_t temp(*this); + ++*this; + return temp; +} + +uint256_t & uint256_t::operator--(){ + *this -= uint256_1; + return *this; +} + +uint256_t uint256_t::operator--(int){ + uint256_t temp(*this); + --*this; + return temp; +} + +uint256_t uint256_t::operator+() const{ + return *this; +} + +uint256_t uint256_t::operator-() const{ + return ~*this + uint256_1; +} + +const uint128_t & uint256_t::upper() const { + return UPPER; +} + +const uint128_t & uint256_t::lower() const { + return LOWER; +} + +uint256_t operator&(const uint128_t & lhs, const uint256_t & rhs){ + return rhs & lhs; +} + +uint128_t & operator&=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs & lhs).lower(); + return lhs; +} + +uint256_t operator|(const uint128_t & lhs, const uint256_t & rhs){ + return rhs | lhs; +} + +uint128_t & operator|=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs | lhs).lower(); + return lhs; +} + +uint256_t operator^(const uint128_t & lhs, const uint256_t & rhs){ + return rhs ^ lhs; +} + +uint128_t & operator^=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs ^ lhs).lower(); + return lhs; +} + +uint256_t operator<<(const bool & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint256_t operator<<(const int64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) << rhs; +} + +uint128_t & operator<<=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) << rhs).lower(); + return lhs; +} + +uint256_t operator>>(const bool & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const uint128_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int8_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int16_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int32_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint256_t operator>>(const int64_t & lhs, const uint256_t & rhs){ + return uint256_t(lhs) >> rhs; +} + +uint128_t & operator>>=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (uint256_t(lhs) >> rhs).lower(); + return lhs; +} + +// Comparison Operators +bool operator==(const uint128_t & lhs, const uint256_t & rhs){ + return rhs == lhs; +} + +bool operator!=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs != lhs; +} + +bool operator>(const uint128_t & lhs, const uint256_t & rhs){ + return rhs < lhs; +} + +bool operator<(const uint128_t & lhs, const uint256_t & rhs){ + return rhs > lhs; +} + +bool operator>=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs <= lhs; +} + +bool operator<=(const uint128_t & lhs, const uint256_t & rhs){ + return rhs >= lhs; +} + +// Arithmetic Operators +uint256_t operator+(const uint128_t & lhs, const uint256_t & rhs){ + return rhs + lhs; +} + +uint128_t & operator+=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (rhs + lhs).lower(); + return lhs; +} + +uint256_t operator-(const uint128_t & lhs, const uint256_t & rhs){ + return -(rhs - lhs); +} + +uint128_t & operator-=(uint128_t & lhs, const uint256_t & rhs){ + lhs = (-(rhs - lhs)).lower(); + return lhs; +} diff --git a/src/uint256_t/uint256_t.include b/src/uint256_t/uint256_t.include new file mode 100644 index 0000000..1e719a6 --- /dev/null +++ b/src/uint256_t/uint256_t.include @@ -0,0 +1,537 @@ +/* +uint256_t.h +An unsigned 256 bit integer library for C++ + +Copyright (c) 2013 - 2017 Jason Lee @ calccrypto at gmail.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +With much help from Auston Sterling + +Thanks to François Dessenne for convincing me +to do a general rewrite of this class. + + +Ondřej Sladký: removed parts of this file as they were not needed and were +not compatible with __uint128_t. +*/ + +#ifndef __UINT256_T__ +#define __UINT256_T__ + +#include +#include +#include +#include +#include +#include + +class UINT256_T_EXTERN uint256_t; + +// Give uint256_t type traits +namespace std { // This is probably not a good idea + template <> struct is_arithmetic : std::true_type {}; + template <> struct is_integral : std::true_type {}; + template <> struct is_unsigned : std::true_type {}; +} + +class uint256_t{ + private: +#ifdef __BIG_ENDIAN__ + uint128_t UPPER, LOWER; +#endif +#ifdef __LITTLE_ENDIAN__ + uint128_t LOWER, UPPER; +#endif + + public: + // Constructors + uint256_t() = default; + uint256_t(const uint256_t & rhs) = default; + uint256_t(uint256_t && rhs) = default; + uint256_t(const std::string & s); + uint256_t(const char *val); + uint256_t(const std::string & s, uint8_t base); + uint256_t(const char *val, uint8_t base); + uint256_t(const bool & b); + + template ::value, T>::type > + uint256_t(const T & rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(uint128_0), LOWER(rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(rhs), UPPER(uint128_0) +#endif + { + if (std::is_signed::value) { + if (rhs < 0) { + UPPER = uint128_t(-1); + } + } + } + + template ::value && std::is_integral::value, void>::type> + uint256_t(const S & upper_rhs, const T & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(upper_rhs), LOWER(lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(lower_rhs), UPPER(upper_rhs) +#endif + {} + + uint256_t(const uint128_t & upper_rhs, const uint128_t & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(upper_rhs), LOWER(lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(lower_rhs), UPPER(upper_rhs) +#endif + {} + uint256_t(const uint128_t & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(uint128_0), LOWER(lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(lower_rhs), UPPER(uint128_0) +#endif + {} + + template ::value && + std::is_integral::value && + std::is_integral::value && + std::is_integral::value, void>::type> + uint256_t(const R & upper_lhs, const S & lower_lhs, const T & upper_rhs, const U & lower_rhs) +#ifdef __BIG_ENDIAN__ + : UPPER(upper_lhs, lower_lhs), LOWER(upper_rhs, lower_rhs) +#endif +#ifdef __LITTLE_ENDIAN__ + : LOWER(upper_rhs, lower_rhs), UPPER(upper_lhs, lower_lhs) +#endif + {} + + // Assignment Operator + uint256_t & operator=(const uint256_t & rhs) = default; + uint256_t & operator=(uint256_t && rhs) = default; + + template ::value, T>::type> + uint256_t & operator=(const T & rhs){ + UPPER = uint128_0; + + if (std::is_signed::value) { + if (rhs < 0) { + UPPER = uint128_t(-1); + } + } + + LOWER = rhs; + return *this; + } + + uint256_t & operator=(const bool & rhs); + + // Typecast Operators + operator bool () const; + operator uint8_t () const; + operator uint16_t () const; + operator uint32_t () const; + operator uint64_t () const; + operator uint128_t () const; + + // Bitwise Operators + uint256_t operator&(const uint128_t & rhs) const; + uint256_t operator&(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator&(const T & rhs) const{ + return uint256_t(uint128_0, LOWER & (uint128_t) rhs); + } + + uint256_t & operator&=(const uint128_t & rhs); + uint256_t & operator&=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator&=(const T & rhs){ + UPPER = uint128_0; + LOWER &= rhs; + return *this; + } + + uint256_t operator|(const uint128_t & rhs) const; + uint256_t operator|(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator|(const T & rhs) const{ + return uint256_t(UPPER, LOWER | uint128_t(rhs)); + } + + uint256_t & operator|=(const uint128_t & rhs); + uint256_t & operator|=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator|=(const T & rhs){ + LOWER |= (uint128_t) rhs; + return *this; + } + + uint256_t operator^(const uint128_t & rhs) const; + uint256_t operator^(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator^(const T & rhs) const{ + return uint256_t(UPPER, LOWER ^ (uint128_t) rhs); + } + + uint256_t & operator^=(const uint128_t & rhs); + uint256_t & operator^=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator^=(const T & rhs){ + LOWER ^= (uint128_t) rhs; + return *this; + } + + uint256_t operator~() const; + + // Bit Shift Operators + uint256_t operator<<(const uint128_t & shift) const; + uint256_t operator<<(const uint256_t & shift) const; + + template ::value, T>::type > + uint256_t operator<<(const T & rhs) const{ + return *this << uint256_t(rhs); + } + + uint256_t & operator<<=(const uint128_t & shift); + uint256_t & operator<<=(const uint256_t & shift); + + template ::value, T>::type > + uint256_t & operator<<=(const T & rhs){ + *this = *this << uint256_t(rhs); + return *this; + } + + uint256_t operator>>(const uint128_t & shift) const; + uint256_t operator>>(const uint256_t & shift) const; + + template ::value, T>::type > + uint256_t operator>>(const T & rhs) const{ + return *this >> uint256_t(rhs); + } + + uint256_t & operator>>=(const uint128_t & shift); + uint256_t & operator>>=(const uint256_t & shift); + + template ::value, T>::type > + uint256_t & operator>>=(const T & rhs){ + *this = *this >> uint256_t(rhs); + return *this; + } + + // Logical Operators + bool operator!() const; + + bool operator&&(const uint128_t & rhs) const; + bool operator&&(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator&&(const T & rhs) const{ + return ((bool) *this && rhs); + } + + bool operator||(const uint128_t & rhs) const; + bool operator||(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator||(const T & rhs) const{ + return ((bool) *this || rhs); + } + + // Comparison Operators + bool operator==(const uint128_t & rhs) const; + bool operator==(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator==(const T & rhs) const{ + return (!UPPER && (LOWER == uint128_t(rhs))); + } + + bool operator!=(const uint128_t & rhs) const; + bool operator!=(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator!=(const T & rhs) const{ + return ((bool) UPPER | (LOWER != uint128_t(rhs))); + } + + bool operator>(const uint128_t & rhs) const; + bool operator>(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator>(const T & rhs) const{ + return ((bool) UPPER | (LOWER > uint128_t(rhs))); + } + + bool operator<(const uint128_t & rhs) const; + bool operator<(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator<(const T & rhs) const{ + return (!UPPER)?(LOWER < uint128_t(rhs)):false; + } + + bool operator>=(const uint128_t & rhs) const; + bool operator>=(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator>=(const T & rhs) const{ + return ((*this > rhs) | (*this == rhs)); + } + + bool operator<=(const uint128_t & rhs) const; + bool operator<=(const uint256_t & rhs) const; + + template ::value, T>::type > + bool operator<=(const T & rhs) const{ + return ((*this < rhs) | (*this == rhs)); + } + + // Arithmetic Operators + uint256_t operator+(const uint128_t & rhs) const; + uint256_t operator+(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator+(const T & rhs) const{ + return uint256_t(UPPER + ((LOWER + (uint128_t) rhs) < LOWER), LOWER + (uint128_t) rhs); + } + + uint256_t & operator+=(const uint128_t & rhs); + uint256_t & operator+=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator+=(const T & rhs){ + return *this += uint256_t(rhs); + } + + uint256_t operator-(const uint128_t & rhs) const; + uint256_t operator-(const uint256_t & rhs) const; + + template ::value, T>::type > + uint256_t operator-(const T & rhs) const{ + return uint256_t(UPPER - ((LOWER - rhs) > LOWER), LOWER - rhs); + } + + uint256_t & operator-=(const uint128_t & rhs); + uint256_t & operator-=(const uint256_t & rhs); + + template ::value, T>::type > + uint256_t & operator-=(const T & rhs){ + return *this = *this - uint256_t(rhs); + } + + // Increment Operators + uint256_t & operator++(); + uint256_t operator++(int); + + // Decrement Operators + uint256_t & operator--(); + uint256_t operator--(int); + + // Nothing done since promotion doesn't work here + uint256_t operator+() const; + + // two's complement + uint256_t operator-() const; + + // Get private values + const uint128_t & upper() const; + const uint128_t & lower() const; + + // Get bitsize of value + uint16_t bits() const; + + // Get string representation of value + std::string str(uint8_t base = 10, const unsigned int & len = 0) const; +}; + +// useful values +UINT256_T_EXTERN extern const uint128_t uint128_64; +UINT256_T_EXTERN extern const uint128_t uint128_128; +UINT256_T_EXTERN extern const uint128_t uint128_256; +UINT256_T_EXTERN extern const uint256_t uint256_0; +UINT256_T_EXTERN extern const uint256_t uint256_1; +UINT256_T_EXTERN extern const uint256_t uint256_max; + +// Bitwise Operators +UINT256_T_EXTERN uint256_t operator&(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > + uint256_t operator&(const T & lhs, const uint256_t & rhs){ + return rhs & lhs; +} + +UINT256_T_EXTERN uint128_t & operator&=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator&=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (rhs & lhs); +} + +UINT256_T_EXTERN uint256_t operator|(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator|(const T & lhs, const uint256_t & rhs){ + return rhs | lhs; +} + +UINT256_T_EXTERN uint128_t & operator|=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator|=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (rhs | lhs); +} + +UINT256_T_EXTERN uint256_t operator^(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator^(const T & lhs, const uint256_t & rhs){ + return rhs ^ lhs; +} + +uint128_t & operator^=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator^=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (rhs ^ lhs); +} + +// Bitshift operators +UINT256_T_EXTERN uint256_t operator<<(const bool & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint64_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const uint128_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator<<(const int64_t & lhs, const uint256_t & rhs); + +UINT256_T_EXTERN uint128_t & operator<<=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator<<=(T & lhs, const uint256_t & rhs){ + lhs = static_cast (uint256_t(lhs) << rhs); + return lhs; +} + +UINT256_T_EXTERN uint256_t operator>>(const bool & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint64_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const uint128_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int8_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int16_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int32_t & lhs, const uint256_t & rhs); +UINT256_T_EXTERN uint256_t operator>>(const int64_t & lhs, const uint256_t & rhs); + +UINT256_T_EXTERN uint128_t & operator>>=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator>>=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (uint256_t(lhs) >> rhs); +} + +// Comparison Operators +UINT256_T_EXTERN bool operator==(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator==(const T & lhs, const uint256_t & rhs){ + return (!rhs.upper() && ((uint64_t) lhs == rhs.lower())); +} + +UINT256_T_EXTERN bool operator!=(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator!=(const T & lhs, const uint256_t & rhs){ + return (rhs.upper() | ((uint64_t) lhs != rhs.lower())); +} + +UINT256_T_EXTERN bool operator>(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator>(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?false:((uint128_t) lhs > rhs.lower()); +} + +UINT256_T_EXTERN bool operator<(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator<(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?true:((uint128_t) lhs < rhs.lower()); +} + +UINT256_T_EXTERN bool operator>=(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator>=(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?false:((uint128_t) lhs >= rhs.lower()); +} + +UINT256_T_EXTERN bool operator<=(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +bool operator<=(const T & lhs, const uint256_t & rhs){ + return rhs.upper()?true:((uint128_t) lhs <= rhs.lower()); +} + +// Arithmetic Operators +UINT256_T_EXTERN uint256_t operator+(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator+(const T & lhs, const uint256_t & rhs){ + return rhs + lhs; +} + +UINT256_T_EXTERN uint128_t & operator+=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator+=(T & lhs, const uint256_t & rhs){ + lhs = static_cast (rhs + lhs); + return lhs; +} + +UINT256_T_EXTERN uint256_t operator-(const uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +uint256_t operator-(const T & lhs, const uint256_t & rhs){ + return -(rhs - lhs); +} + +UINT256_T_EXTERN uint128_t & operator-=(uint128_t & lhs, const uint256_t & rhs); + +template ::value, T>::type > +T & operator-=(T & lhs, const uint256_t & rhs){ + return lhs = static_cast (-(rhs - lhs)); +} + +#endif diff --git a/src/uint256_t/uint256_t_config.include b/src/uint256_t/uint256_t_config.include new file mode 100644 index 0000000..e83db51 --- /dev/null +++ b/src/uint256_t/uint256_t_config.include @@ -0,0 +1,19 @@ +#ifndef _UINT256_T_CONFIG_ + #define _UINT256_T_CONFIG_ + #if defined(_MSC_VER) + #if defined(_DLL) + #define _UINT256_T_EXPORT __declspec(dllexport) + #define _UINT256_T_IMPORT __declspec(dllimport) + #else + #define _UINT256_T_EXPORT + #define _UINT256_T_IMPORT + #endif + #else + // All modules on Unix are compiled with -fvisibility=hidden + // All API symbols get visibility default + // whether or not we're static linking or dynamic linking (with -fPIC) + #define _UINT256_T_EXPORT __attribute__((visibility("default"))) + #define _UINT256_T_IMPORT __attribute__((visibility("default"))) + #endif +#endif + diff --git a/verify.py b/verify.py index 14d9323..f336c41 100755 --- a/verify.py +++ b/verify.py @@ -76,7 +76,7 @@ def main(): for a in ["global", "local", "globalAC", "localAC", "streaming"]: print(f"Testing {a}:") for complements in [True, False]: - for k in ( ([5, 8, 12] if a not in ["local", "global"] else [5, 8, 12, 17, 31, 32, 51, 63]) if args.quick else range(2, 64)): + for k in ( ([5, 8, 12] if a not in ["local", "global"] else [5, 8, 12, 17, 31, 32, 51, 63, 127]) if args.quick else range(2, 128)): success &= verify_instance(args.path, k, a, complements, "") print("") else: