diff --git a/cpp/build-support/cpplint.py b/cpp/build-support/cpplint.py index 1bceed9a67a05..dc3d47ba8b45d 100755 --- a/cpp/build-support/cpplint.py +++ b/cpp/build-support/cpplint.py @@ -699,7 +699,11 @@ # Hardware specific headers 'arm_neon.h', 'emmintrin.h', - 'xmmintin.h', + 'immintrin.h', + 'intrin.h', + 'nmmintrin.h', + 'x86intrin.h', + 'xmmintrin.h', ]) # Folders of C libraries so commonly used in C++, diff --git a/cpp/src/arrow/acero/bloom_filter.cc b/cpp/src/arrow/acero/bloom_filter.cc index db39ad1a83cab..47263387e5cae 100644 --- a/cpp/src/arrow/acero/bloom_filter.cc +++ b/cpp/src/arrow/acero/bloom_filter.cc @@ -16,11 +16,13 @@ // under the License. #include "arrow/acero/bloom_filter.h" + #include -#include "arrow/acero/util.h" // PREFETCH + #include "arrow/util/bit_util.h" // Log2 #include "arrow/util/bitmap_ops.h" // CountSetBits #include "arrow/util/config.h" +#include "arrow/util/prefetch.h" // PREFETCH namespace arrow { namespace acero { @@ -152,7 +154,7 @@ void BlockedBloomFilter::FindImp(int64_t num_rows, const T* hashes, if (enable_prefetch && UsePrefetch()) { constexpr int kPrefetchIterations = 16; for (int64_t i = 0; i < num_rows - kPrefetchIterations; ++i) { - PREFETCH(blocks_ + block_id(hashes[i + kPrefetchIterations])); + ARROW_PREFETCH(blocks_ + block_id(hashes[i + kPrefetchIterations])); uint64_t result = Find(hashes[i]) ? 1ULL : 0ULL; bits |= result << (i & 63); if ((i & 63) == 63) { diff --git a/cpp/src/arrow/acero/bloom_filter.h b/cpp/src/arrow/acero/bloom_filter.h index 530beaea64827..8f9fe171baeb3 100644 --- a/cpp/src/arrow/acero/bloom_filter.h +++ b/cpp/src/arrow/acero/bloom_filter.h @@ -17,10 +17,6 @@ #pragma once -#if defined(ARROW_HAVE_RUNTIME_AVX2) -# include -#endif - #include #include #include @@ -30,6 +26,7 @@ #include "arrow/memory_pool.h" #include "arrow/result.h" #include "arrow/status.h" +#include "arrow/util/simd.h" namespace arrow { namespace acero { diff --git a/cpp/src/arrow/acero/bloom_filter_avx2.cc b/cpp/src/arrow/acero/bloom_filter_avx2.cc index 5816bb4fc0a32..448b80fdb6913 100644 --- a/cpp/src/arrow/acero/bloom_filter_avx2.cc +++ b/cpp/src/arrow/acero/bloom_filter_avx2.cc @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include - #include "arrow/acero/bloom_filter.h" #include "arrow/util/bit_util.h" +#include "arrow/util/simd.h" namespace arrow { namespace acero { diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h index 582cf28732352..769f3b2145e29 100644 --- a/cpp/src/arrow/compute/key_hash_internal.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -17,14 +17,11 @@ #pragma once -#if defined(ARROW_HAVE_RUNTIME_AVX2) -# include -#endif - #include #include "arrow/compute/light_array_internal.h" #include "arrow/compute/util.h" +#include "arrow/util/simd.h" namespace arrow { namespace compute { diff --git a/cpp/src/arrow/compute/key_hash_internal_avx2.cc b/cpp/src/arrow/compute/key_hash_internal_avx2.cc index 4def87bd7aa20..5a4366d6b266c 100644 --- a/cpp/src/arrow/compute/key_hash_internal_avx2.cc +++ b/cpp/src/arrow/compute/key_hash_internal_avx2.cc @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include - #include "arrow/compute/key_hash_internal.h" #include "arrow/util/bit_util.h" +#include "arrow/util/simd.h" namespace arrow { namespace compute { diff --git a/cpp/src/arrow/compute/key_map_internal_avx2.cc b/cpp/src/arrow/compute/key_map_internal_avx2.cc index 8c98166f49269..1a16603a0faa0 100644 --- a/cpp/src/arrow/compute/key_map_internal_avx2.cc +++ b/cpp/src/arrow/compute/key_map_internal_avx2.cc @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include - #include "arrow/compute/key_map_internal.h" #include "arrow/util/logging.h" +#include "arrow/util/simd.h" namespace arrow { namespace compute { diff --git a/cpp/src/arrow/compute/row/encode_internal_avx2.cc b/cpp/src/arrow/compute/row/encode_internal_avx2.cc index 26f8e3a63de0a..d2e317deb890c 100644 --- a/cpp/src/arrow/compute/row/encode_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/encode_internal_avx2.cc @@ -15,9 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include - #include "arrow/compute/row/encode_internal.h" +#include "arrow/util/simd.h" namespace arrow { namespace compute { diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 9034849bbc36d..1aaff43e10e1f 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -28,24 +28,17 @@ #include "arrow/compute/type_fwd.h" #include "arrow/result.h" #include "arrow/util/cpu_info.h" +#include "arrow/util/simd.h" #if defined(__clang__) || defined(__GNUC__) # define BYTESWAP(x) __builtin_bswap64(x) # define ROTL(x, n) (((x) << (n)) | ((x) >> ((-n) & 31))) # define ROTL64(x, n) (((x) << (n)) | ((x) >> ((-n) & 63))) -# define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) #elif defined(_MSC_VER) # include # define BYTESWAP(x) _byteswap_uint64(x) # define ROTL(x, n) _rotl((x), (n)) # define ROTL64(x, n) _rotl64((x), (n)) -# if defined(_M_X64) || defined(_M_I86) -// https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx -# include -# define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# else -# define PREFETCH(ptr) (void)(ptr) /* disabled */ -# endif #endif namespace arrow { diff --git a/cpp/src/arrow/compute/util_avx2.cc b/cpp/src/arrow/compute/util_avx2.cc index 0191ab06f9532..f0ff4575bbe2e 100644 --- a/cpp/src/arrow/compute/util_avx2.cc +++ b/cpp/src/arrow/compute/util_avx2.cc @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include #include #include "arrow/util/bit_util.h" #include "arrow/util/logging.h" +#include "arrow/util/simd.h" namespace arrow::util::bit_util::avx2 { diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 17d1de406d514..e7eb3f833ea8a 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -20,7 +20,6 @@ #if defined(_MSC_VER) # if defined(_M_AMD64) || defined(_M_X64) # include // IWYU pragma: keep -# include # endif # pragma intrinsic(_BitScanReverse) diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index 5658874b42b6c..af29fd636b51a 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -78,7 +78,6 @@ # define ARROW_FORCE_INLINE __attribute__((always_inline)) # define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0)) # define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) -# define ARROW_PREFETCH(addr) __builtin_prefetch(addr) # define ARROW_RESTRICT __restrict # if defined(__clang__) // clang-specific # define ARROW_COMPILER_ASSUME(expr) __builtin_assume(expr) @@ -105,7 +104,6 @@ # define ARROW_FORCE_INLINE __forceinline # define ARROW_PREDICT_FALSE(x) (x) # define ARROW_PREDICT_TRUE(x) (x) -# define ARROW_PREFETCH(addr) # define ARROW_RESTRICT __restrict # define ARROW_COMPILER_ASSUME(expr) __assume(expr) #else @@ -114,7 +112,6 @@ # define ARROW_FORCE_INLINE # define ARROW_PREDICT_FALSE(x) (x) # define ARROW_PREDICT_TRUE(x) (x) -# define ARROW_PREFETCH(addr) # define ARROW_RESTRICT # define ARROW_COMPILER_ASSUME(expr) #endif diff --git a/cpp/src/arrow/util/prefetch.h b/cpp/src/arrow/util/prefetch.h new file mode 100644 index 0000000000000..1e9b5ae670ca1 --- /dev/null +++ b/cpp/src/arrow/util/prefetch.h @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#if defined(__GNUC__) // GCC and compatible compilers (clang, Intel ICC) +# define ARROW_PREFETCH(addr) __builtin_prefetch(addr) +#elif defined(_MSC_VER) // MSVC +# if defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_RUNTIME_SSE4_2) +# include +# define ARROW_PREFETCH(addr) _mm_prefetch((const char*)(addr), _MM_HINT_T0) +# else +# define ARROW_PREFETCH(addr) +# endif +#else +# define ARROW_PREFETCH(addr) +#endif