diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index d239d69a93d68..893ec360d3e55 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -22,6 +22,8 @@ include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("-msse4.2" CXX_SUPPORTS_SSE4_2) # power compiler flags CHECK_CXX_COMPILER_FLAG("-maltivec" CXX_SUPPORTS_ALTIVEC) +# Arm64 compiler flags +CHECK_CXX_COMPILER_FLAG("-march=armv8-a+crc" CXX_SUPPORTS_ARMCRC) # This ensures that things like gnu++11 get passed correctly set(CMAKE_CXX_STANDARD 11) @@ -220,6 +222,10 @@ if (CXX_SUPPORTS_ALTIVEC AND ARROW_ALTIVEC) set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -maltivec") endif() +if (CXX_SUPPORTS_ARMCRC) + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=armv8-a+crc") +endif() + if (ARROW_USE_SIMD) add_definitions(-DARROW_USE_SIMD) endif() diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 122c551bf42e7..d785eeeaae99e 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -44,6 +44,7 @@ install(FILES logging.h macros.h memory.h + neon-util.h parallel.h rle-encoding.h sse-util.h diff --git a/cpp/src/arrow/util/hash-util.h b/cpp/src/arrow/util/hash-util.h index 3f7e4048bdf10..fd69cb9438c12 100644 --- a/cpp/src/arrow/util/hash-util.h +++ b/cpp/src/arrow/util/hash-util.h @@ -25,22 +25,55 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/neon-util.h" #include "arrow/util/sse-util.h" +static inline uint32_t HW_crc32_u8(uint32_t crc, uint8_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u16(uint32_t crc, uint16_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u32(uint32_t crc, uint32_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u64(uint32_t crc, uint64_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +#ifdef ARROW_HAVE_SSE4_2 +#define HW_crc32_u8 SSE4_crc32_u8 +#define HW_crc32_u16 SSE4_crc32_u16 +#define HW_crc32_u32 SSE4_crc32_u32 +#define HW_crc32_u64 SSE4_crc32_u64 +#elif defined(ARROW_HAVE_ARM_CRC) +#define HW_crc32_u8 ARMCE_crc32_u8 +#define HW_crc32_u16 ARMCE_crc32_u16 +#define HW_crc32_u32 ARMCE_crc32_u32 +#define HW_crc32_u64 ARMCE_crc32_u64 +#endif + namespace arrow { /// Utility class to compute hash values. class HashUtil { public: -#ifdef ARROW_HAVE_SSE4_2 +#if defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_ARM_CRC) static constexpr bool have_hardware_crc32 = true; #else static constexpr bool have_hardware_crc32 = false; #endif - /// Compute the Crc32 hash for data using SSE4 instructions. The input hash + /// Compute the Crc32 hash for data using SSE4/ArmCRC instructions. The input hash /// parameter is the current hash/seed value. - /// This should only be called if SSE is supported. + /// This should only be called if SSE/ArmCRC is supported. /// This is ~4x faster than Fnv/Boost Hash. /// TODO: crc32 hashes with different seeds do not result in different hash functions. /// The resulting hashes are correlated. @@ -49,15 +82,15 @@ class HashUtil { const uint8_t* end = p + nbytes; while (p <= end - 8) { - hash = SSE4_crc32_u64(hash, *reinterpret_cast(p)); + hash = HW_crc32_u64(hash, *reinterpret_cast(p)); p += 8; } while (p <= end - 4) { - hash = SSE4_crc32_u32(hash, *reinterpret_cast(p)); + hash = HW_crc32_u32(hash, *reinterpret_cast(p)); p += 4; } while (p < end) { - hash = SSE4_crc32_u8(hash, *p); + hash = HW_crc32_u8(hash, *p); ++p; } @@ -81,30 +114,30 @@ class HashUtil { uint32_t h2 = static_cast(hash); while (nbytes >= 16) { - h1 = SSE4_crc32_u64(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u64(h2, *reinterpret_cast(p + 8)); + h1 = HW_crc32_u64(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u64(h2, *reinterpret_cast(p + 8)); nbytes -= 16; p += 16; } if (nbytes >= 8) { - h1 = SSE4_crc32_u32(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u32(h2, *reinterpret_cast(p + 4)); + h1 = HW_crc32_u32(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u32(h2, *reinterpret_cast(p + 4)); nbytes -= 8; p += 8; } if (nbytes >= 4) { - h1 = SSE4_crc32_u16(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u16(h2, *reinterpret_cast(p + 2)); + h1 = HW_crc32_u16(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u16(h2, *reinterpret_cast(p + 2)); nbytes -= 4; p += 4; } switch (nbytes) { case 3: - h1 = SSE4_crc32_u8(h1, p[3]); + h1 = HW_crc32_u8(h1, p[3]); case 2: - h2 = SSE4_crc32_u8(h2, p[2]); + h2 = HW_crc32_u8(h2, p[2]); case 1: - h1 = SSE4_crc32_u8(h1, p[1]); + h1 = HW_crc32_u8(h1, p[1]); case 0: break; default: @@ -118,7 +151,7 @@ class HashUtil { /// CrcHash() specialized for 1-byte data static inline uint32_t CrcHash1(const void* v, uint32_t hash) { const uint8_t* s = reinterpret_cast(v); - hash = SSE4_crc32_u8(hash, *s); + hash = HW_crc32_u8(hash, *s); hash = (hash << 16) | (hash >> 16); return hash; } @@ -126,7 +159,7 @@ class HashUtil { /// CrcHash() specialized for 2-byte data static inline uint32_t CrcHash2(const void* v, uint32_t hash) { const uint16_t* s = reinterpret_cast(v); - hash = SSE4_crc32_u16(hash, *s); + hash = HW_crc32_u16(hash, *s); hash = (hash << 16) | (hash >> 16); return hash; } @@ -134,7 +167,7 @@ class HashUtil { /// CrcHash() specialized for 4-byte data static inline uint32_t CrcHash4(const void* v, uint32_t hash) { const uint32_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u32(hash, *p); + hash = HW_crc32_u32(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -142,7 +175,7 @@ class HashUtil { /// CrcHash() specialized for 8-byte data static inline uint32_t CrcHash8(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -150,9 +183,9 @@ class HashUtil { /// CrcHash() specialized for 12-byte data static inline uint32_t CrcHash12(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); ++p; - hash = SSE4_crc32_u32(hash, *reinterpret_cast(p)); + hash = HW_crc32_u32(hash, *reinterpret_cast(p)); hash = (hash << 16) | (hash >> 16); return hash; } @@ -160,9 +193,9 @@ class HashUtil { /// CrcHash() specialized for 16-byte data static inline uint32_t CrcHash16(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); ++p; - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -251,8 +284,8 @@ class HashUtil { return static_cast((hash_u64 >> 32) ^ (hash_u64 & 0xFFFFFFFF)); } - // With sse4.2 - template + // Hash template + template static inline int Hash(const void* data, int32_t bytes, uint32_t seed); /// The magic number (used in hash_combine()) 0x9e3779b9 = 2^32 / (golden ratio). @@ -288,13 +321,21 @@ class HashUtil { } }; -// With sse4.2 +// HW Hash template <> inline int HashUtil::Hash(const void* data, int32_t bytes, uint32_t seed) { - return static_cast(HashUtil::CrcHash(data, bytes, seed)); +#ifdef ARROW_HAVE_ARM_CRC + // Need run time check for Arm + // if not support, fall back to Murmur + if (!crc32c_runtime_check()) + return static_cast(HashUtil::MurmurHash2_64(data, bytes, seed)); + else +#endif + // Double CRC + return static_cast(HashUtil::DoubleCrcHash(data, bytes, seed)); } -// Non-sse4 hash +// Murmur Hash template <> inline int HashUtil::Hash(const void* data, int32_t bytes, uint32_t seed) { return static_cast(HashUtil::MurmurHash2_64(data, bytes, seed)); diff --git a/cpp/src/arrow/util/neon-util.h b/cpp/src/arrow/util/neon-util.h new file mode 100644 index 0000000000000..c81bf1440c37c --- /dev/null +++ b/cpp/src/arrow/util/neon-util.h @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_UTIL_NEON_UTIL_H +#define ARROW_UTIL_NEON_UTIL_H + +namespace arrow { + +#if defined(__aarch64__) || defined(__AARCH64__) +#ifdef __ARM_FEATURE_CRC32 +#define ARROW_HAVE_ARM_CRC +#include +#endif +#endif + +#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC) + +#include +#include +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif +static inline uint32_t crc32c_runtime_check(void) { + uint64_t auxv = getauxval(AT_HWCAP); + return (auxv & HWCAP_CRC32) != 0; +} + +static inline uint32_t ARMCE_crc32_u8(uint32_t crc, uint8_t v) { + return __crc32cb(crc, v); +} + +static inline uint32_t ARMCE_crc32_u16(uint32_t crc, uint16_t v) { + return __crc32ch(crc, v); +} + +static inline uint32_t ARMCE_crc32_u32(uint32_t crc, uint32_t v) { + return __crc32cw(crc, v); +} + +static inline uint32_t ARMCE_crc32_u64(uint32_t crc, uint64_t v) { + return __crc32cd(crc, v); +} + +#else + +static inline uint32_t crc32c_runtime_check(void) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u8(uint32_t, uint8_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u16(uint32_t, uint16_t) { + DCHECK(false) << "Arm crc32 is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u32(uint32_t, uint32_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u64(uint32_t, uint64_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +#endif // defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC) + +} // namespace arrow + +#endif // ARROW_UTIL_NEON_UTIL_H