From 0941fed816755ab89f6f9ab6e8c0a5274daf5a2a Mon Sep 17 00:00:00 2001 From: Cheng Shao Date: Fri, 15 Apr 2022 14:11:15 +0000 Subject: [PATCH] Update simdutf to 1.0.1 --- simdutf/simdutf.cpp | 324 ++------------------------------------------ simdutf/simdutf.h | 114 ++++++++++++---- 2 files changed, 95 insertions(+), 343 deletions(-) diff --git a/simdutf/simdutf.cpp b/simdutf/simdutf.cpp index cf7d32ff..ffe82df6 100644 --- a/simdutf/simdutf.cpp +++ b/simdutf/simdutf.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2021-07-29 10:43:28 -0400. Do not edit! */ +/* auto-generated on 2022-03-21 23:28:26 -0400. Do not edit! */ // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf.cpp /* begin file src/simdutf.cpp */ #include "simdutf.h" @@ -115,93 +115,17 @@ namespace simdutf { namespace arm64 { namespace { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -NO_SANITIZE_UNDEFINED -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDUTF_REGULAR_VISUAL_STUDIO -} - /* result might be undefined when input_num is zero */ simdutf_really_inline int count_ones(uint64_t input_num) { return vaddv_u8(vcnt_u8(vcreate_u8(input_num))); } -simdutf_really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *result) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - } // unnamed namespace } // namespace arm64 } // namespace simdutf #endif // SIMDUTF_ARM64_BITMANIPULATION_H /* end file src/simdutf/arm64/bitmanipulation.h */ -// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/bitmask.h -/* begin file src/simdutf/arm64/bitmask.h */ -#ifndef SIMDUTF_ARM64_BITMASK_H -#define SIMDUTF_ARM64_BITMASK_H - -namespace simdutf { -namespace arm64 { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdutf_really_inline uint64_t prefix_xor(uint64_t bitmask) { - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; -} - -} // unnamed namespace -} // namespace arm64 -} // namespace simdutf - -#endif -/* end file src/simdutf/arm64/bitmask.h */ // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd.h /* begin file src/simdutf/arm64/simd.h */ #ifndef SIMDUTF_ARM64_SIMD_H @@ -1215,33 +1139,6 @@ namespace simdutf { namespace haswell { namespace { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -NO_SANITIZE_UNDEFINED -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - return (int)_tzcnt_u64(input_num); -#else // SIMDUTF_REGULAR_VISUAL_STUDIO - //////// - // You might expect the next line to be equivalent to - // return (int)_tzcnt_u64(input_num); - // but the generated code differs and might be less efficient? - //////// - return __builtin_ctzll(input_num); -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return _blsr_u64(input_num); -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline int leading_zeroes(uint64_t input_num) { - return int(_lzcnt_u64(input_num)); -} - #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows @@ -1253,51 +1150,12 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) { } #endif -simdutf_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - } // unnamed namespace } // namespace haswell } // namespace simdutf #endif // SIMDUTF_HASWELL_BITMANIPULATION_H /* end file src/simdutf/haswell/bitmanipulation.h */ -// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/bitmask.h -/* begin file src/simdutf/haswell/bitmask.h */ -#ifndef SIMDUTF_HASWELL_BITMASK_H -#define SIMDUTF_HASWELL_BITMASK_H - -namespace simdutf { -namespace haswell { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdutf_really_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processor supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // unnamed namespace -} // namespace haswell -} // namespace simdutf - -#endif // SIMDUTF_HASWELL_BITMASK_H -/* end file src/simdutf/haswell/bitmask.h */ // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/haswell/simd.h /* begin file src/simdutf/haswell/simd.h */ #ifndef SIMDUTF_HASWELL_SIMD_H @@ -2063,42 +1921,6 @@ namespace simdutf { namespace westmere { namespace { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -NO_SANITIZE_UNDEFINED -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num-1); -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif// SIMDUTF_REGULAR_VISUAL_STUDIO -} - #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO simdutf_really_inline unsigned __int64 count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows @@ -2110,51 +1932,12 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) { } #endif -simdutf_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - return _addcarry_u64(0, value1, value2, - reinterpret_cast(result)); -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - } // unnamed namespace } // namespace westmere } // namespace simdutf #endif // SIMDUTF_WESTMERE_BITMANIPULATION_H /* end file src/simdutf/westmere/bitmanipulation.h */ -// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/bitmask.h -/* begin file src/simdutf/westmere/bitmask.h */ -#ifndef SIMDUTF_WESTMERE_BITMASK_H -#define SIMDUTF_WESTMERE_BITMASK_H - -namespace simdutf { -namespace westmere { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdutf_really_inline uint64_t prefix_xor(const uint64_t bitmask) { - // There should be no such thing with a processing supporting avx2 - // but not clmul. - __m128i all_ones = _mm_set1_epi8('\xFF'); - __m128i result = _mm_clmulepi64_si128(_mm_set_epi64x(0ULL, bitmask), all_ones, 0); - return _mm_cvtsi128_si64(result); -} - -} // unnamed namespace -} // namespace westmere -} // namespace simdutf - -#endif // SIMDUTF_WESTMERE_BITMASK_H -/* end file src/simdutf/westmere/bitmask.h */ // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/westmere/simd.h /* begin file src/simdutf/westmere/simd.h */ #ifndef SIMDUTF_WESTMERE_SIMD_H @@ -2941,42 +2724,6 @@ namespace simdutf { namespace ppc64 { namespace { -// We sometimes call trailing_zero on inputs that are zero, -// but the algorithms do not end up using the returned value. -// Sadly, sanitizers are not smart enough to figure it out. -NO_SANITIZE_UNDEFINED -simdutf_really_inline int trailing_zeroes(uint64_t input_num) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long ret; - // Search the mask data from least significant bit (LSB) - // to the most significant bit (MSB) for a set bit (1). - _BitScanForward64(&ret, input_num); - return (int)ret; -#else // SIMDUTF_REGULAR_VISUAL_STUDIO - return __builtin_ctzll(input_num); -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline uint64_t clear_lowest_bit(uint64_t input_num) { - return input_num & (input_num - 1); -} - -/* result might be undefined when input_num is zero */ -simdutf_really_inline int leading_zeroes(uint64_t input_num) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - unsigned long leading_zero = 0; - // Search the mask data from most significant bit (MSB) - // to least significant bit (LSB) for a set bit (1). - if (_BitScanReverse64(&leading_zero, input_num)) - return (int)(63 - leading_zero); - else - return 64; -#else - return __builtin_clzll(input_num); -#endif // SIMDUTF_REGULAR_VISUAL_STUDIO -} - #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO simdutf_really_inline int count_ones(uint64_t input_num) { // note: we do not support legacy 32-bit Windows @@ -2988,68 +2735,12 @@ simdutf_really_inline int count_ones(uint64_t input_num) { } #endif -simdutf_really_inline bool add_overflow(uint64_t value1, uint64_t value2, - uint64_t *result) { -#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO - *result = value1 + value2; - return *result < value1; -#else - return __builtin_uaddll_overflow(value1, value2, - reinterpret_cast(result)); -#endif -} - } // unnamed namespace } // namespace ppc64 } // namespace simdutf #endif // SIMDUTF_PPC64_BITMANIPULATION_H /* end file src/simdutf/ppc64/bitmanipulation.h */ -// dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/bitmask.h -/* begin file src/simdutf/ppc64/bitmask.h */ -#ifndef SIMDUTF_PPC64_BITMASK_H -#define SIMDUTF_PPC64_BITMASK_H - -namespace simdutf { -namespace ppc64 { -namespace { - -// -// Perform a "cumulative bitwise xor," flipping bits each time a 1 is -// encountered. -// -// For example, prefix_xor(00100100) == 00011100 -// -simdutf_really_inline uint64_t prefix_xor(uint64_t bitmask) { - // You can use the version below, however gcc sometimes miscompiles - // vec_pmsum_be, it happens somewhere around between 8 and 9th version. - // The performance boost was not noticeable, falling back to a usual - // implementation. - // __vector unsigned long long all_ones = {~0ull, ~0ull}; - // __vector unsigned long long mask = {bitmask, 0}; - // // Clang and GCC return different values for pmsum for ull so cast it to one. - // // Generally it is not specified by ALTIVEC ISA what is returned by - // // vec_pmsum_be. - // #if defined(__LITTLE_ENDIAN__) - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[0]); - // #else - // return (uint64_t)(((__vector unsigned long long)vec_pmsum_be(all_ones, mask))[1]); - // #endif - bitmask ^= bitmask << 1; - bitmask ^= bitmask << 2; - bitmask ^= bitmask << 4; - bitmask ^= bitmask << 8; - bitmask ^= bitmask << 16; - bitmask ^= bitmask << 32; - return bitmask; -} - -} // unnamed namespace -} // namespace ppc64 -} // namespace simdutf - -#endif -/* end file src/simdutf/ppc64/bitmask.h */ // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/src, filename=simdutf/ppc64/simd.h /* begin file src/simdutf/ppc64/simd.h */ #ifndef SIMDUTF_PPC64_SIMD_H @@ -9204,10 +8895,13 @@ inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept { } } unsigned char byte = data[pos]; - if (byte < 0b10000000) { - pos++; - continue; - } else if ((byte & 0b11100000) == 0b11000000) { + + while (byte < 0b10000000) { + if (++pos == len) { return true; } + byte = data[pos]; + } + + if ((byte & 0b11100000) == 0b11000000) { next_pos = pos + 2; if (next_pos > len) { return false; } if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; } @@ -11077,7 +10771,7 @@ std::pair sse_convert_utf16_to_utf8(const char16_t* buf, const __m256i v_f800 = _mm256_set1_epi16((int16_t)0xf800); const __m256i v_d800 = _mm256_set1_epi16((int16_t)0xd800); const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080); - const size_t safety_margin = 8; // to avoid overruns + const size_t safety_margin = 11; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92 while (buf + 16 + safety_margin <= end) { __m256i in = _mm256_loadu_si256((__m256i*)buf); diff --git a/simdutf/simdutf.h b/simdutf/simdutf.h index 82e999fb..26082517 100644 --- a/simdutf/simdutf.h +++ b/simdutf/simdutf.h @@ -1,11 +1,10 @@ -/* auto-generated on 2021-07-29 10:43:28 -0400. Do not edit! */ +/* auto-generated on 2022-03-21 23:28:26 -0400. Do not edit! */ // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf.h /* begin file include/simdutf.h */ #ifndef SIMDUTF_H #define SIMDUTF_H #include #include -#include #include // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/compiler_check.h @@ -412,18 +411,18 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS #define SIMDUTF_SIMDUTF_VERSION_H /** The version of simdutf being used (major.minor.revision) */ -#define SIMDUTF_VERSION 0.1.0 +#define SIMDUTF_VERSION 1.0.1 namespace simdutf { enum { /** * The major version (MAJOR.minor.revision) of simdutf being used. */ - SIMDUTF_VERSION_MAJOR = 0, + SIMDUTF_VERSION_MAJOR = 1, /** * The minor version (major.MINOR.revision) of simdutf being used. */ - SIMDUTF_VERSION_MINOR = 1, + SIMDUTF_VERSION_MINOR = 0, /** * The revision (major.minor.REVISION) of simdutf being used. */ @@ -438,7 +437,9 @@ enum { #ifndef SIMDUTF_IMPLEMENTATION_H #define SIMDUTF_IMPLEMENTATION_H #include +#if !defined(SIMDUTF_NO_THREADS) #include +#endif #include // dofile: invoked with prepath=/Users/lemire/CVS/github/simdutf/include, filename=simdutf/internal/isadetection.h /* begin file include/simdutf/internal/isadetection.h */ @@ -510,7 +511,10 @@ enum instruction_set { PCLMULQDQ = 0x10, BMI1 = 0x20, BMI2 = 0x40, - ALTIVEC = 0x80 + ALTIVEC = 0x80, + AVX512F = 0x100, + AVX512BW = 0x200, + AVX512DQ = 0x400 }; #if defined(__PPC64__) @@ -539,12 +543,38 @@ static inline uint32_t detect_supported_architectures() { namespace { -// Can be found on Intel ISA Reference for CPUID -constexpr uint32_t cpuid_avx2_bit = 1 << 5; ///< @private Bit 5 of EBX for EAX=0x7 -constexpr uint32_t cpuid_bmi1_bit = 1 << 3; ///< @private bit 3 of EBX for EAX=0x7 -constexpr uint32_t cpuid_bmi2_bit = 1 << 8; ///< @private bit 8 of EBX for EAX=0x7 -constexpr uint32_t cpuid_sse42_bit = 1 << 20; ///< @private bit 20 of ECX for EAX=0x1 -constexpr uint32_t cpuid_pclmulqdq_bit = 1 << 1; ///< @private bit 1 of ECX for EAX=0x1 +namespace cpuid_bit { + // Can be found on Intel ISA Reference for CPUID + + // EAX = 0x01 + constexpr uint32_t pclmulqdq = uint32_t(1) << 1; ///< @private bit 1 of ECX for EAX=0x1 + constexpr uint32_t sse42 = uint32_t(1) << 20; ///< @private bit 20 of ECX for EAX=0x1 + + // EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf) + // See: "Table 3-8. Information Returned by CPUID Instruction" + namespace ebx { + constexpr uint32_t bmi1 = uint32_t(1) << 3; + constexpr uint32_t avx2 = uint32_t(1) << 5; + constexpr uint32_t bmi2 = uint32_t(1) << 8; + constexpr uint32_t avx512f = uint32_t(1) << 16; + constexpr uint32_t avx512dq = uint32_t(1) << 17; + constexpr uint32_t avx512ifma = uint32_t(1) << 21; + constexpr uint32_t avx512cd = uint32_t(1) << 28; + constexpr uint32_t avx512bw = uint32_t(1) << 30; + constexpr uint32_t avx512vl = uint32_t(1) << 31; + } + + namespace ecx { + constexpr uint32_t avx512vbmi = uint32_t(1) << 1; + constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6; + constexpr uint32_t avx512vnni = uint32_t(1) << 11; + constexpr uint32_t avx512bitalg = uint32_t(1) << 12; + constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14; + } + namespace edx { + constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8; + } + } } @@ -572,34 +602,45 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, } static inline uint32_t detect_supported_architectures() { - uint32_t eax, ebx, ecx, edx; + uint32_t eax; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; uint32_t host_isa = 0x0; + // EBX for EAX=0x1 + eax = 0x1; + cpuid(&eax, &ebx, &ecx, &edx); + + if (ecx & cpuid_bit::sse42) { + host_isa |= instruction_set::SSE42; + } + + if (ecx & cpuid_bit::pclmulqdq) { + host_isa |= instruction_set::PCLMULQDQ; + } + // ECX for EAX=0x7 eax = 0x7; - ecx = 0x0; + ecx = 0x0; // Sub-leaf = 0 cpuid(&eax, &ebx, &ecx, &edx); - if (ebx & cpuid_avx2_bit) { + if (ebx & cpuid_bit::ebx::avx2) { host_isa |= instruction_set::AVX2; } - if (ebx & cpuid_bmi1_bit) { + if (ebx & cpuid_bit::ebx::bmi1) { host_isa |= instruction_set::BMI1; } - - if (ebx & cpuid_bmi2_bit) { + if (ebx & cpuid_bit::ebx::bmi2) { host_isa |= instruction_set::BMI2; } - - // EBX for EAX=0x1 - eax = 0x1; - cpuid(&eax, &ebx, &ecx, &edx); - - if (ecx & cpuid_sse42_bit) { - host_isa |= instruction_set::SSE42; + if (ebx & cpuid_bit::ebx::avx512f) { + host_isa |= instruction_set::AVX512F; } - - if (ecx & cpuid_pclmulqdq_bit) { - host_isa |= instruction_set::PCLMULQDQ; + if (ebx & cpuid_bit::ebx::avx512bw) { + host_isa |= instruction_set::AVX512BW; + } + if (ebx & cpuid_bit::ebx::avx512dq) { + host_isa |= instruction_set::AVX512DQ; } return host_isa; @@ -1039,6 +1080,17 @@ class atomic_ptr { public: atomic_ptr(T *_ptr) : ptr{_ptr} {} +#if defined(SIMDUTF_NO_THREADS) + operator const T*() const { return ptr; } + const T& operator*() const { return *ptr; } + const T* operator->() const { return ptr; } + + operator T*() { return ptr; } + T& operator*() { return *ptr; } + T* operator->() { return ptr; } + atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } + +#else operator const T*() const { return ptr.load(); } const T& operator*() const { return *ptr; } const T* operator->() const { return ptr.load(); } @@ -1048,8 +1100,14 @@ class atomic_ptr { T* operator->() { return ptr.load(); } atomic_ptr& operator=(T *_ptr) { ptr = _ptr; return *this; } +#endif + private: +#if defined(SIMDUTF_NO_THREADS) + T* ptr; +#else std::atomic ptr; +#endif }; } // namespace internal