diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index d239d69a93d68..893ec360d3e55 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -22,6 +22,8 @@ include(CheckCXXCompilerFlag) CHECK_CXX_COMPILER_FLAG("-msse4.2" CXX_SUPPORTS_SSE4_2) # power compiler flags CHECK_CXX_COMPILER_FLAG("-maltivec" CXX_SUPPORTS_ALTIVEC) +# Arm64 compiler flags +CHECK_CXX_COMPILER_FLAG("-march=armv8-a+crc" CXX_SUPPORTS_ARMCRC) # This ensures that things like gnu++11 get passed correctly set(CMAKE_CXX_STANDARD 11) @@ -220,6 +222,10 @@ if (CXX_SUPPORTS_ALTIVEC AND ARROW_ALTIVEC) set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -maltivec") endif() +if (CXX_SUPPORTS_ARMCRC) + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=armv8-a+crc") +endif() + if (ARROW_USE_SIMD) add_definitions(-DARROW_USE_SIMD) endif() diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index b7a0fb5513a4e..6aa415bbed2f3 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -33,18 +33,9 @@ #include "arrow/util/bit-util.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" -#include "arrow/util/hash-util.h" -#include "arrow/util/hash.h" +#include "arrow/util/hashing.h" #include "arrow/util/logging.h" -#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_USE_ARMCE) -#define HASH_MODE USE_ARMCRC -#elif defined(ARROW_USE_SSE) -#define HASH_MODE USE_SSE42 -#else -#define HASH_MODE USE_DEFAULT -#endif - namespace arrow { using internal::AdaptiveIntBuilderBase; @@ -738,7 +729,7 @@ Status BooleanBuilder::AppendValues(const std::vector& values, int64_t i = 0; internal::GenerateBitsUnrolled(raw_data_, length_, length, - [values, &i]() -> bool { return values[i++]; }); + [&values, &i]() -> bool { return values[i++]; }); // this updates length_ ArrayBuilder::UnsafeAppendToBitmap(is_valid); @@ -751,7 +742,7 @@ Status BooleanBuilder::AppendValues(const std::vector& values) { int64_t i = 0; internal::GenerateBitsUnrolled(raw_data_, length_, length, - [values, &i]() -> bool { return values[i++]; }); + [&values, &i]() -> bool { return values[i++]; }); // this updates length_ ArrayBuilder::UnsafeSetNotNull(length); @@ -761,152 +752,42 @@ Status BooleanBuilder::AppendValues(const std::vector& values) { // ---------------------------------------------------------------------- // DictionaryBuilder -using internal::DictionaryScalar; -using internal::WrappedBinary; - -namespace { - -// A helper class to manage a hash table embedded in a typed Builder. -template -struct DictionaryHashHelper {}; - -// DictionaryHashHelper implementation for primitive types template -struct DictionaryHashHelper> { - using Builder = typename TypeTraits::BuilderType; - using Scalar = typename DictionaryScalar::type; - - // Get the dictionary value at the given builder index - static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { - return builder.GetValue(index); - } - - // Compute the hash of a scalar value - static int64_t HashValue(const Scalar& value, int byte_width) { - return HashUtil::Hash(&value, sizeof(Scalar), 0); - } - - // Return whether the dictionary value at the given builder index is unequal to value - static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) { - return GetDictionaryValue(builder, index) != value; - } - - // Append a value to the builder - static Status AppendValue(Builder& builder, const Scalar& value) { - return builder.Append(value); - } - - // Append another builder's contents to the builder - static Status AppendArray(Builder& builder, const Array& in_array) { - const auto& array = checked_cast(in_array); - return builder.AppendValues(reinterpret_cast(array.values()->data()), - array.length(), nullptr); - } +class DictionaryBuilder::MemoTableImpl + : public internal::HashTraits::MemoTableType { + public: + using MemoTableType = typename internal::HashTraits::MemoTableType; + using MemoTableType::MemoTableType; }; -// DictionaryHashHelper implementation for StringType / BinaryType template -struct DictionaryHashHelper> { - using Builder = typename TypeTraits::BuilderType; - using Scalar = typename DictionaryScalar::type; - - static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { - int32_t v_length; - const uint8_t* v_ptr = builder.GetValue(index, &v_length); - return WrappedBinary(v_ptr, v_length); - } - - static int64_t HashValue(const Scalar& value, int byte_width) { - return HashUtil::Hash(value.ptr_, value.length_, 0); - } - - static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) { - int32_t other_length; - const uint8_t* other_ptr = builder.GetValue(index, &other_length); - return value.length_ != other_length || - memcmp(value.ptr_, other_ptr, other_length) != 0; - } - - static Status AppendValue(Builder& builder, const Scalar& value) { - return builder.Append(value.ptr_, value.length_); - } - - static Status AppendArray(Builder& builder, const Array& in_array) { - const auto& array = checked_cast(in_array); - for (uint64_t index = 0, limit = array.length(); index < limit; ++index) { - int32_t length; - const uint8_t* ptr = array.GetValue(index, &length); - RETURN_NOT_OK(builder.Append(ptr, length)); - } - return Status::OK(); - } -}; - -// DictionaryHashHelper implementation for FixedSizeBinaryType -template -struct DictionaryHashHelper> { - using Builder = typename TypeTraits::BuilderType; - using Scalar = typename DictionaryScalar::type; - - static Scalar GetDictionaryValue(const Builder& builder, int64_t index) { - return builder.GetValue(index); - } - - static int64_t HashValue(const Scalar& value, int byte_width) { - return HashUtil::Hash(value, byte_width, 0); - } - - static bool SlotDifferent(const Builder& builder, int64_t index, const uint8_t* value) { - const int32_t width = builder.byte_width(); - const uint8_t* other_value = builder.GetValue(index); - return memcmp(value, other_value, width) != 0; - } - - static Status AppendValue(Builder& builder, const Scalar& value) { - return builder.Append(value); - } - - static Status AppendArray(Builder& builder, const Array& in_array) { - const auto& array = checked_cast(in_array); - for (uint64_t index = 0, limit = array.length(); index < limit; ++index) { - const Scalar value = array.GetValue(index); - RETURN_NOT_OK(builder.Append(value)); - } - return Status::OK(); - } -}; - -} // namespace +DictionaryBuilder::~DictionaryBuilder() {} template DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), - hash_slots_(nullptr), - dict_builder_(type, pool), - overflow_dict_builder_(type, pool), - values_builder_(pool), - byte_width_(-1) {} + : ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) { + DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder"; +} DictionaryBuilder::DictionaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), values_builder_(pool) {} + : ArrayBuilder(type, pool), values_builder_(pool) { + DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder"; +} template <> DictionaryBuilder::DictionaryBuilder( const std::shared_ptr& type, MemoryPool* pool) : ArrayBuilder(type, pool), - hash_slots_(nullptr), - dict_builder_(type, pool), - overflow_dict_builder_(type, pool), - values_builder_(pool), byte_width_(checked_cast(*type).byte_width()) {} template void DictionaryBuilder::Reset() { - dict_builder_.Reset(); - overflow_dict_builder_.Reset(); + ArrayBuilder::Reset(); values_builder_.Reset(); + memo_table_.reset(); + delta_offset_ = 0; } template @@ -916,14 +797,10 @@ Status DictionaryBuilder::Resize(int64_t capacity) { } if (capacity_ == 0) { - // Fill the initial hash table - RETURN_NOT_OK(internal::NewHashTable(kInitialHashTableSize, pool_, &hash_table_)); - hash_slots_ = reinterpret_cast(hash_table_->mutable_data()); - hash_table_size_ = kInitialHashTableSize; - entry_id_offset_ = 0; - mod_bitmask_ = kInitialHashTableSize - 1; - hash_table_load_threshold_ = - static_cast(static_cast(capacity) * kMaxHashTableLoad); + // Initialize hash table + // XXX should we let the user pass additional size heuristics? + memo_table_.reset(new MemoTableImpl(0)); + delta_offset_ = 0; } RETURN_NOT_OK(values_builder_.Resize(capacity)); return ArrayBuilder::Resize(capacity); @@ -937,67 +814,12 @@ Status DictionaryBuilder::Resize(int64_t capacity) { return ArrayBuilder::Resize(capacity); } -template -int64_t DictionaryBuilder::HashValue(const Scalar& value) { - return DictionaryHashHelper::HashValue(value, byte_width_); -} - -template -typename DictionaryBuilder::Scalar DictionaryBuilder::GetDictionaryValue( - typename TypeTraits::BuilderType& dictionary_builder, int64_t index) { - return DictionaryHashHelper::GetDictionaryValue(dictionary_builder, index); -} - -template -bool DictionaryBuilder::SlotDifferent(hash_slot_t index, const Scalar& value) { - DCHECK_GE(index, 0); - if (index >= entry_id_offset_) { - // Lookup delta dictionary - DCHECK_LT(index - entry_id_offset_, dict_builder_.length()); - return DictionaryHashHelper::SlotDifferent( - dict_builder_, static_cast(index - entry_id_offset_), value); - } else { - DCHECK_LT(index, overflow_dict_builder_.length()); - return DictionaryHashHelper::SlotDifferent(overflow_dict_builder_, - static_cast(index), value); - } -} - -template -Status DictionaryBuilder::AppendDictionary(const Scalar& value) { - return DictionaryHashHelper::AppendValue(dict_builder_, value); -} - template Status DictionaryBuilder::Append(const Scalar& value) { RETURN_NOT_OK(Reserve(1)); - // Based on DictEncoder::Put - int64_t j = HashValue(value) & mod_bitmask_; - hash_slot_t index = hash_slots_[j]; - - // Find an empty slot - while (kHashSlotEmpty != index && SlotDifferent(index, value)) { - // Linear probing - ++j; - if (j == hash_table_size_) { - j = 0; - } - index = hash_slots_[j]; - } - - if (index == kHashSlotEmpty) { - // Not in the hash table, so we insert it now - index = static_cast(dict_builder_.length() + entry_id_offset_); - hash_slots_[j] = index; - RETURN_NOT_OK(AppendDictionary(value)); - - if (ARROW_PREDICT_FALSE(static_cast(dict_builder_.length()) > - hash_table_load_threshold_)) { - RETURN_NOT_OK(DoubleTableSize()); - } - } - RETURN_NOT_OK(values_builder_.Append(index)); + auto memo_index = memo_table_->GetOrInsert(value); + RETURN_NOT_OK(values_builder_.Append(memo_index)); return Status::OK(); } @@ -1029,48 +851,24 @@ Status DictionaryBuilder::AppendArray(const Array& array) { return Status::OK(); } -template <> -Status DictionaryBuilder::AppendArray(const Array& array) { - if (!type_->Equals(*array.type())) { - return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); - } - - const auto& numeric_array = checked_cast(array); - for (int64_t i = 0; i < array.length(); i++) { - if (array.IsNull(i)) { - RETURN_NOT_OK(AppendNull()); - } else { - RETURN_NOT_OK(Append(numeric_array.Value(i))); - } - } - return Status::OK(); -} - -template -Status DictionaryBuilder::DoubleTableSize() { -#define INNER_LOOP \ - int64_t j = HashValue(GetDictionaryValue(dict_builder_, index)) & new_mod_bitmask - - DOUBLE_TABLE_SIZE(, INNER_LOOP); - - return Status::OK(); -} - template Status DictionaryBuilder::FinishInternal(std::shared_ptr* out) { + // Finalize indices array + RETURN_NOT_OK(values_builder_.FinishInternal(out)); + + // Generate dictionary array from hash table contents std::shared_ptr dictionary; - entry_id_offset_ += dict_builder_.length(); - RETURN_NOT_OK(dict_builder_.Finish(&dictionary)); + std::shared_ptr dictionary_data; - // Store current dict entries for further uses of this DictionaryBuilder - RETURN_NOT_OK( - DictionaryHashHelper::AppendArray(overflow_dict_builder_, *dictionary)); - DCHECK_EQ(entry_id_offset_, overflow_dict_builder_.length()); + RETURN_NOT_OK(internal::DictionaryTraits::GetDictionaryArrayData( + pool_, type_, *memo_table_, delta_offset_, &dictionary_data)); + dictionary = MakeArray(dictionary_data); - RETURN_NOT_OK(values_builder_.FinishInternal(out)); + // Set type of array data to the right dictionary type (*out)->type = std::make_shared((*out)->type, dictionary); - dict_builder_.Reset(); + // Update internals for further uses of this DictionaryBuilder + delta_offset_ = memo_table_->size(); values_builder_.Reset(); return Status::OK(); @@ -1089,26 +887,42 @@ Status DictionaryBuilder::FinishInternal(std::shared_ptr* o // StringType and BinaryType specializations // -#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ - \ - template <> \ - Status DictionaryBuilder::AppendArray(const Array& array) { \ - const BinaryArray& binary_array = checked_cast(array); \ - WrappedBinary value(nullptr, 0); \ - for (int64_t i = 0; i < array.length(); i++) { \ - if (array.IsNull(i)) { \ - RETURN_NOT_OK(AppendNull()); \ - } else { \ - value.ptr_ = binary_array.GetValue(i, &value.length_); \ - RETURN_NOT_OK(Append(value)); \ - } \ - } \ - return Status::OK(); \ +#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \ + \ + template <> \ + Status DictionaryBuilder::AppendArray(const Array& array) { \ + using ArrayType = typename TypeTraits::ArrayType; \ + const ArrayType& binary_array = checked_cast(array); \ + for (int64_t i = 0; i < array.length(); i++) { \ + if (array.IsNull(i)) { \ + RETURN_NOT_OK(AppendNull()); \ + } else { \ + RETURN_NOT_OK(Append(binary_array.GetView(i))); \ + } \ + } \ + return Status::OK(); \ } BINARY_DICTIONARY_SPECIALIZATIONS(StringType); BINARY_DICTIONARY_SPECIALIZATIONS(BinaryType); +template <> +Status DictionaryBuilder::AppendArray(const Array& array) { + if (!type_->Equals(*array.type())) { + return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type"); + } + + const auto& typed_array = checked_cast(array); + for (int64_t i = 0; i < array.length(); i++) { + if (array.IsNull(i)) { + RETURN_NOT_OK(AppendNull()); + } else { + RETURN_NOT_OK(Append(typed_array.GetValue(i))); + } + } + return Status::OK(); +} + template class DictionaryBuilder; template class DictionaryBuilder; template class DictionaryBuilder; @@ -1316,6 +1130,19 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const { return value_data_builder_.data() + offset; } +util::string_view BinaryBuilder::GetView(int64_t i) const { + const int32_t* offsets = offsets_builder_.data(); + int32_t offset = offsets[i]; + int32_t value_length; + if (i == (length_ - 1)) { + value_length = static_cast(value_data_builder_.length()) - offset; + } else { + value_length = offsets[i + 1] - offset; + } + return util::string_view( + reinterpret_cast(value_data_builder_.data() + offset), value_length); +} + StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {} Status StringBuilder::AppendValues(const std::vector& values, @@ -1414,6 +1241,12 @@ FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr& byte_width_(checked_cast(*type).byte_width()), byte_builder_(pool) {} +#ifndef NDEBUG +void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) { + DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder"; +} +#endif + Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, const uint8_t* valid_bytes) { RETURN_NOT_OK(Reserve(length)); @@ -1421,10 +1254,6 @@ Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length, return byte_builder_.Append(data, length * byte_width_); } -Status FixedSizeBinaryBuilder::Append(const std::string& value) { - return Append(reinterpret_cast(value.c_str())); -} - Status FixedSizeBinaryBuilder::AppendNull() { RETURN_NOT_OK(Reserve(1)); UnsafeAppendToBitmap(false); @@ -1457,6 +1286,12 @@ const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const { return data_ptr + i * byte_width_; } +util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const { + const uint8_t* data_ptr = byte_builder_.data(); + return util::string_view(reinterpret_cast(data_ptr + i * byte_width_), + byte_width_); +} + // ---------------------------------------------------------------------- // Struct diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc index 81801e2743b04..c057ea5736139 100644 --- a/cpp/src/arrow/compute/kernels/hash.cc +++ b/cpp/src/arrow/compute/kernels/hash.cc @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -39,32 +38,24 @@ #include "arrow/type_traits.h" #include "arrow/util/bit-util.h" #include "arrow/util/checked_cast.h" -#include "arrow/util/hash-util.h" -#include "arrow/util/hash.h" +#include "arrow/util/hashing.h" #include "arrow/util/logging.h" #include "arrow/util/macros.h" +#include "arrow/util/string_view.h" +#include "arrow/visitor_inline.h" namespace arrow { class MemoryPool; using internal::checked_cast; +using internal::DictionaryTraits; +using internal::HashTraits; namespace compute { -// TODO(wesm): Enable top-level dispatch to SSE4 hashing if it is enabled -#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_USE_ARMCE) -#define HASH_MODE USE_ARMCRC -#elif defined(ARROW_USE_SSE) -#define HASH_MODE USE_SSE42 -#else -#define HASH_MODE USE_DEFAULT -#endif - namespace { -enum class SIMDMode : char { NOSIMD, SSE4, AVX2 }; - #define CHECK_IMPLEMENTED(KERNEL, FUNCNAME, TYPE) \ if (!KERNEL) { \ std::stringstream ss; \ @@ -72,754 +63,213 @@ enum class SIMDMode : char { NOSIMD, SSE4, AVX2 }; return Status::NotImplemented(ss.str()); \ } -// This is a slight design concession -- some hash actions have the possibility -// of failure. Rather than introduce extra error checking into all actions, we -// will raise an internal exception so that only the actions where errors can -// occur will experience the extra overhead -class HashException : public std::exception { - public: - explicit HashException(const std::string& msg, StatusCode code = StatusCode::Invalid) - : msg_(msg), code_(code) {} - - ~HashException() throw() override {} - - const char* what() const throw() override; - - StatusCode code() const { return code_; } - - private: - std::string msg_; - StatusCode code_; -}; - -const char* HashException::what() const throw() { return msg_.c_str(); } +// ---------------------------------------------------------------------- +// Unique implementation -class HashTable { +class UniqueAction { public: - HashTable(const std::shared_ptr& type, MemoryPool* pool) - : type_(type), - pool_(pool), - initialized_(false), - hash_table_(nullptr), - hash_slots_(nullptr), - hash_table_size_(0), - mod_bitmask_(0) {} + UniqueAction(const std::shared_ptr& type, MemoryPool* pool) {} - virtual ~HashTable() {} - - virtual Status Append(const ArrayData& input) = 0; - virtual Status Flush(Datum* out) = 0; - virtual Status GetDictionary(std::shared_ptr* out) = 0; - - protected: - Status Init(int64_t elements); + Status Reset() { return Status::OK(); } - std::shared_ptr type_; - MemoryPool* pool_; - bool initialized_; + Status Reserve(const int64_t length) { return Status::OK(); } - // The hash table contains integer indices that reference the set of observed - // distinct values - std::shared_ptr hash_table_; - hash_slot_t* hash_slots_; + void ObserveNull() {} - /// Size of the table. Must be a power of 2. - int64_t hash_table_size_; + template + void ObserveFound(Index index) {} - /// Size at which we decide to resize - int64_t hash_table_load_threshold_; + template + void ObserveNotFound(Index index) {} - // Store hash_table_size_ - 1, so that j & mod_bitmask_ is equivalent to j % - // hash_table_size_, but uses far fewer CPU cycles - int64_t mod_bitmask_; + Status Flush(Datum* out) { return Status::OK(); } }; -Status HashTable::Init(int64_t elements) { - DCHECK_EQ(elements, BitUtil::NextPower2(elements)); - RETURN_NOT_OK(internal::NewHashTable(elements, pool_, &hash_table_)); - hash_slots_ = reinterpret_cast(hash_table_->mutable_data()); - hash_table_size_ = elements; - hash_table_load_threshold_ = - static_cast(static_cast(elements) * kMaxHashTableLoad); - mod_bitmask_ = elements - 1; - initialized_ = true; - return Status::OK(); -} - -template -class HashTableKernel : public HashTable {}; - -// Types of hash actions -// -// unique: append to dictionary when not found, no-op with slot -// dictionary-encode: append to dictionary when not found, append slot # -// match: raise or set null when not found, otherwise append slot # -// isin: set false when not found, otherwise true -// value counts: append to dictionary when not found, increment count for slot - -template -class HashDictionary {}; - // ---------------------------------------------------------------------- -// Hash table pass for nulls +// Dictionary encode implementation -template -class HashTableKernel> : public HashTable { +class DictEncodeAction { public: - using HashTable::HashTable; - - Status Init() { - // No-op, do not even need to initialize hash table - return Status::OK(); - } + DictEncodeAction(const std::shared_ptr& type, MemoryPool* pool) + : indices_builder_(pool) {} - Status Append(const ArrayData& arr) override { - if (!initialized_) { - RETURN_NOT_OK(Init()); - } - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - for (int64_t i = 0; i < arr.length; ++i) { - action->ObserveNull(); - } + Status Reset() { + indices_builder_.Reset(); return Status::OK(); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being a valid dictionary value - auto null_array = std::make_shared(0); - *out = null_array->data(); - return Status::OK(); - } -}; - -// ---------------------------------------------------------------------- -// Hash table pass for primitive types - -template -struct HashDictionary> { - using T = typename Type::c_type; + Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); } - explicit HashDictionary(MemoryPool* pool) : pool(pool), size(0), capacity(0) {} + void ObserveNull() { indices_builder_.UnsafeAppendNull(); } - Status Init() { - this->size = 0; - RETURN_NOT_OK(AllocateResizableBuffer(this->pool, 0, &this->buffer)); - return Resize(kInitialHashTableSize); + template + void ObserveFound(Index index) { + indices_builder_.UnsafeAppend(index); } - Status DoubleSize() { return Resize(this->size * 2); } - - Status Resize(const int64_t elements) { - RETURN_NOT_OK(this->buffer->Resize(elements * sizeof(T))); + template + void ObserveNotFound(Index index) { + return ObserveFound(index); + } - this->capacity = elements; - this->values = reinterpret_cast(this->buffer->mutable_data()); + Status Flush(Datum* out) { + std::shared_ptr result; + RETURN_NOT_OK(indices_builder_.FinishInternal(&result)); + out->value = std::move(result); return Status::OK(); } - MemoryPool* pool; - std::shared_ptr buffer; - T* values; - int64_t size; - int64_t capacity; + private: + Int32Builder indices_builder_; }; -#define GENERIC_HASH_PASS(HASH_INNER_LOOP) \ - if (arr.null_count != 0) { \ - internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); \ - for (int64_t i = 0; i < arr.length; ++i) { \ - const bool is_null = valid_reader.IsNotSet(); \ - valid_reader.Next(); \ - \ - if (is_null) { \ - action->ObserveNull(); \ - continue; \ - } \ - \ - HASH_INNER_LOOP(); \ - } \ - } else { \ - for (int64_t i = 0; i < arr.length; ++i) { \ - HASH_INNER_LOOP(); \ - } \ - } +// ---------------------------------------------------------------------- +// Base class for all hash kernel implementations -template -class HashTableKernel< - Type, Action, - typename std::enable_if::value && !is_8bit_int::value>::type> - : public HashTable { +class HashKernelImpl : public HashKernel { public: - using T = typename Type::c_type; - - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool), dict_(pool) {} - - Status Init() { - RETURN_NOT_OK(dict_.Init()); - return HashTable::Init(kInitialHashTableSize); - } - - Status Append(const ArrayData& arr) override { - if (!initialized_) { - RETURN_NOT_OK(Init()); - } - - const T* values = GetValues(arr, 1); - auto action = checked_cast(this); - - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const T value = values[i]; \ - int64_t j = HashValue(value) & mod_bitmask_; \ - hash_slot_t slot = hash_slots_[j]; \ - \ - while (kHashSlotEmpty != slot && dict_.values[slot] != value) { \ - ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ - j = 0; \ - } \ - slot = hash_slots_[j]; \ - } \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = static_cast(dict_.size); \ - hash_slots_[j] = slot; \ - dict_.values[dict_.size++] = value; \ - \ - action->ObserveNotFound(slot); \ - \ - if (ARROW_PREDICT_FALSE(dict_.size > hash_table_load_threshold_)) { \ - RETURN_NOT_OK(action->DoubleSize()); \ - } \ - } else { \ - action->ObserveFound(slot); \ + Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override { + DCHECK_EQ(Datum::ARRAY, input.kind()); + RETURN_NOT_OK(Append(ctx, *input.array())); + return Flush(out); } - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - - return Status::OK(); + Status Append(FunctionContext* ctx, const ArrayData& input) override { + std::lock_guard guard(lock_); + return Append(input); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being in the dictionary - auto dict_data = dict_.buffer; - RETURN_NOT_OK(dict_data->Resize(dict_.size * sizeof(T), false)); - dict_data->ZeroPadding(); - - *out = ArrayData::Make(type_, dict_.size, {nullptr, dict_data}, 0); - return Status::OK(); - } + virtual Status Append(const ArrayData& arr) = 0; protected: - int64_t HashValue(const T& value) const { - // TODO(wesm): Use faster hash function for C types - return HashUtil::Hash(&value, sizeof(T), 0); - } - - Status DoubleTableSize() { -#define PRIMITIVE_INNER_LOOP \ - const T value = dict_.values[index]; \ - int64_t j = HashValue(value) & new_mod_bitmask; - - DOUBLE_TABLE_SIZE(, PRIMITIVE_INNER_LOOP); - -#undef PRIMITIVE_INNER_LOOP - - return dict_.Resize(hash_table_size_); - } - - HashDictionary dict_; + std::mutex lock_; }; // ---------------------------------------------------------------------- -// Hash table for boolean types +// Base class for all "regular" hash kernel implementations +// (NullType has a separate implementation) -template -class HashTableKernel> : public HashTable { +template +class RegularHashKernelImpl : public HashKernelImpl { public: - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool) { - std::fill(table_, table_ + 2, kHashSlotEmpty); + RegularHashKernelImpl(const std::shared_ptr& type, MemoryPool* pool) + : pool_(pool), type_(type), action_(type, pool) {} + + Status Reset() override { + memo_table_.reset(new MemoTable(0)); + return action_.Reset(); } Status Append(const ArrayData& arr) override { - auto action = checked_cast(this); - - RETURN_NOT_OK(action->Reserve(arr.length)); - - internal::BitmapReader value_reader(arr.buffers[1]->data(), arr.offset, arr.length); - -#define HASH_INNER_LOOP() \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - table_[j] = slot = static_cast(dict_.size()); \ - dict_.push_back(value); \ - action->ObserveNotFound(slot); \ - } else { \ - action->ObserveFound(slot); \ + RETURN_NOT_OK(action_.Reserve(arr.length)); + return ArrayDataVisitor::Visit(arr, this); } - if (arr.null_count != 0) { - internal::BitmapReader valid_reader(arr.buffers[0]->data(), arr.offset, arr.length); - for (int64_t i = 0; i < arr.length; ++i) { - const bool is_null = valid_reader.IsNotSet(); - valid_reader.Next(); - if (is_null) { - value_reader.Next(); - action->ObserveNull(); - continue; - } - const bool value = value_reader.IsSet(); - value_reader.Next(); - const int j = value ? 1 : 0; - hash_slot_t slot = table_[j]; - HASH_INNER_LOOP(); - } - } else { - for (int64_t i = 0; i < arr.length; ++i) { - const bool value = value_reader.IsSet(); - value_reader.Next(); - const int j = value ? 1 : 0; - hash_slot_t slot = table_[j]; - HASH_INNER_LOOP(); - } - } - -#undef HASH_INNER_LOOP - - return Status::OK(); - } + Status Flush(Datum* out) override { return action_.Flush(out); } Status GetDictionary(std::shared_ptr* out) override { - BooleanBuilder builder(pool_); - for (const bool value : dict_) { - RETURN_NOT_OK(builder.Append(value)); - } - return builder.FinishInternal(out); + return DictionaryTraits::GetDictionaryArrayData(pool_, type_, *memo_table_, + 0 /* start_offset */, out); } - private: - hash_slot_t table_[2]; - std::vector dict_; -}; - -// ---------------------------------------------------------------------- -// Hash table pass for variable-length binary types - -template -class HashTableKernel> : public HashTable { - public: - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool), dict_offsets_(pool), dict_data_(pool), dict_size_(0) {} - - Status Init() { - RETURN_NOT_OK(dict_offsets_.Resize(kInitialHashTableSize)); - - // We append the end offset after each append to the dictionary, so this - // sets the initial condition for the length-0 case - // - // initial offsets (dict size == 0): 0 - // after 1st dict entry of length 3: 0 3 - // after 2nd dict entry of length 4: 0 3 7 - RETURN_NOT_OK(dict_offsets_.Append(0)); - return HashTable::Init(kInitialHashTableSize); - } - - Status Append(const ArrayData& arr) override { - constexpr uint8_t empty_value = 0; - if (!initialized_) { - RETURN_NOT_OK(Init()); - } - - const int32_t* offsets = GetValues(arr, 1); - const uint8_t* data; - if (arr.buffers[2].get() == nullptr) { - data = &empty_value; - } else { - data = GetValues(arr, 2); - } - - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const int32_t position = offsets[i]; \ - const int32_t length = offsets[i + 1] - position; \ - const uint8_t* value = data + position; \ - \ - int64_t j = HashValue(value, length) & mod_bitmask_; \ - hash_slot_t slot = hash_slots_[j]; \ - \ - const int32_t* dict_offsets = dict_offsets_.data(); \ - const uint8_t* dict_data = dict_data_.data(); \ - while (kHashSlotEmpty != slot && \ - !((dict_offsets[slot + 1] - dict_offsets[slot]) == length && \ - 0 == memcmp(value, dict_data + dict_offsets[slot], length))) { \ - ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ - j = 0; \ - } \ - slot = hash_slots_[j]; \ - } \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = dict_size_++; \ - hash_slots_[j] = slot; \ - \ - RETURN_NOT_OK(dict_data_.Append(value, length)); \ - RETURN_NOT_OK(dict_offsets_.Append(static_cast(dict_data_.length()))); \ - \ - action->ObserveNotFound(slot); \ - \ - if (ARROW_PREDICT_FALSE(dict_size_ > hash_table_load_threshold_)) { \ - RETURN_NOT_OK(action->DoubleSize()); \ - } \ - } else { \ - action->ObserveFound(slot); \ - } - - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - + Status VisitNull() { + action_.ObserveNull(); return Status::OK(); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being in the dictionary - BufferVector buffers = {nullptr, nullptr, nullptr}; - - RETURN_NOT_OK(dict_offsets_.Finish(&buffers[1])); - RETURN_NOT_OK(dict_data_.Finish(&buffers[2])); - - *out = ArrayData::Make(type_, dict_size_, std::move(buffers), 0); + Status VisitValue(const Scalar& value) { + auto on_found = [this](int32_t memo_index) { action_.ObserveFound(memo_index); }; + auto on_not_found = [this](int32_t memo_index) { + action_.ObserveNotFound(memo_index); + }; + memo_table_->GetOrInsert(value, on_found, on_not_found); return Status::OK(); } protected: - int64_t HashValue(const uint8_t* data, int32_t length) const { - return HashUtil::Hash(data, length, 0); - } - - Status DoubleTableSize() { -#define VARBYTES_SETUP \ - const int32_t* dict_offsets = dict_offsets_.data(); \ - const uint8_t* dict_data = dict_data_.data() - -#define VARBYTES_COMPUTE_HASH \ - const int32_t length = dict_offsets[index + 1] - dict_offsets[index]; \ - const uint8_t* value = dict_data + dict_offsets[index]; \ - int64_t j = HashValue(value, length) & new_mod_bitmask - - DOUBLE_TABLE_SIZE(VARBYTES_SETUP, VARBYTES_COMPUTE_HASH); + using MemoTable = typename HashTraits::MemoTableType; -#undef VARBYTES_SETUP -#undef VARBYTES_COMPUTE_HASH - - return Status::OK(); - } - - TypedBufferBuilder dict_offsets_; - TypedBufferBuilder dict_data_; - int32_t dict_size_; + MemoryPool* pool_; + std::shared_ptr type_; + Action action_; + std::unique_ptr memo_table_; }; // ---------------------------------------------------------------------- -// Hash table pass for fixed size binary types +// Hash kernel implementation for nulls -template -class HashTableKernel> - : public HashTable { +template +class NullHashKernelImpl : public HashKernelImpl { public: - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool), dict_data_(pool), dict_size_(0) { - const auto& fw_type = checked_cast(*type); - byte_width_ = fw_type.bit_width() / 8; - } + NullHashKernelImpl(const std::shared_ptr& type, MemoryPool* pool) + : pool_(pool), type_(type), action_(type, pool) {} - Status Init() { - RETURN_NOT_OK(dict_data_.Resize(kInitialHashTableSize * byte_width_)); - return HashTable::Init(kInitialHashTableSize); - } + Status Reset() override { return action_.Reset(); } Status Append(const ArrayData& arr) override { - if (!initialized_) { - RETURN_NOT_OK(Init()); + RETURN_NOT_OK(action_.Reserve(arr.length)); + for (int64_t i = 0; i < arr.length; ++i) { + action_.ObserveNull(); } - - const uint8_t* data = GetValues(arr, 1); - - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const uint8_t* value = data + i * byte_width_; \ - int64_t j = HashValue(value) & mod_bitmask_; \ - hash_slot_t slot = hash_slots_[j]; \ - \ - const uint8_t* dict_data = dict_data_.data(); \ - while (kHashSlotEmpty != slot && \ - !(0 == memcmp(value, dict_data + slot * byte_width_, byte_width_))) { \ - ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ - j = 0; \ - } \ - slot = hash_slots_[j]; \ - } \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = dict_size_++; \ - hash_slots_[j] = slot; \ - \ - RETURN_NOT_OK(dict_data_.Append(value, byte_width_)); \ - \ - action->ObserveNotFound(slot); \ - \ - if (ARROW_PREDICT_FALSE(dict_size_ > hash_table_load_threshold_)) { \ - RETURN_NOT_OK(action->DoubleSize()); \ - } \ - } else { \ - action->ObserveFound(slot); \ - } - - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - return Status::OK(); } - Status GetDictionary(std::shared_ptr* out) override { - // TODO(wesm): handle null being in the dictionary - BufferVector buffers = {nullptr, nullptr}; - RETURN_NOT_OK(dict_data_.Finish(&buffers[1])); + Status Flush(Datum* out) override { return action_.Flush(out); } - *out = ArrayData::Make(type_, dict_size_, std::move(buffers), 0); + Status GetDictionary(std::shared_ptr* out) override { + // TODO(wesm): handle null being a valid dictionary value + auto null_array = std::make_shared(0); + *out = null_array->data(); return Status::OK(); } protected: - int64_t HashValue(const uint8_t* data) const { - return HashUtil::Hash(data, byte_width_, 0); - } - - Status DoubleTableSize() { -#define FIXED_BYTES_SETUP const uint8_t* dict_data = dict_data_.data() - -#define FIXED_BYTES_COMPUTE_HASH \ - int64_t j = HashValue(dict_data + index * byte_width_) & new_mod_bitmask - - DOUBLE_TABLE_SIZE(FIXED_BYTES_SETUP, FIXED_BYTES_COMPUTE_HASH); - -#undef FIXED_BYTES_SETUP -#undef FIXED_BYTES_COMPUTE_HASH - - return Status::OK(); - } - - int32_t byte_width_; - TypedBufferBuilder dict_data_; - int32_t dict_size_; + MemoryPool* pool_; + std::shared_ptr type_; + Action action_; }; // ---------------------------------------------------------------------- -// Hash table pass for uint8 and int8 - -template -inline int Hash8Bit(const T val) { - return 0; -} - -template <> -inline int Hash8Bit(const uint8_t val) { - return val; -} +// Kernel wrapper for generic hash table kernels -template <> -inline int Hash8Bit(const int8_t val) { - return val + 128; -} +template +struct HashKernelTraits {}; template -class HashTableKernel> : public HashTable { - public: - using T = typename Type::c_type; - - HashTableKernel(const std::shared_ptr& type, MemoryPool* pool) - : HashTable(type, pool) { - std::fill(table_, table_ + 256, kHashSlotEmpty); - } - - Status Append(const ArrayData& arr) override { - const T* values = GetValues(arr, 1); - auto action = checked_cast(this); - RETURN_NOT_OK(action->Reserve(arr.length)); - -#define HASH_INNER_LOOP() \ - const T value = values[i]; \ - const int hash = Hash8Bit(value); \ - hash_slot_t slot = table_[hash]; \ - \ - if (slot == kHashSlotEmpty) { \ - if (!Action::allow_expand) { \ - throw HashException("Encountered new dictionary value"); \ - } \ - \ - slot = static_cast(dict_.size()); \ - table_[hash] = slot; \ - dict_.push_back(value); \ - action->ObserveNotFound(slot); \ - } else { \ - action->ObserveFound(slot); \ - } - - GENERIC_HASH_PASS(HASH_INNER_LOOP); - -#undef HASH_INNER_LOOP - - return Status::OK(); - } - - Status GetDictionary(std::shared_ptr* out) override { - using BuilderType = typename TypeTraits::BuilderType; - BuilderType builder(pool_); - - for (const T value : dict_) { - RETURN_NOT_OK(builder.Append(value)); - } - - return builder.FinishInternal(out); - } - - private: - hash_slot_t table_[256]; - std::vector dict_; +struct HashKernelTraits> { + using HashKernelImpl = NullHashKernelImpl; }; -// ---------------------------------------------------------------------- -// Unique implementation - -template -class UniqueImpl : public HashTableKernel> { - public: - static constexpr bool allow_expand = true; - using Base = HashTableKernel>; - using Base::Base; - - Status Reserve(const int64_t length) { return Status::OK(); } - - void ObserveFound(const hash_slot_t slot) {} - void ObserveNull() {} - void ObserveNotFound(const hash_slot_t slot) {} - - Status DoubleSize() { return Base::DoubleTableSize(); } - - Status Append(const ArrayData& input) override { return Base::Append(input); } - - Status Flush(Datum* out) override { - // No-op - return Status::OK(); - } +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; }; -// ---------------------------------------------------------------------- -// Dictionary encode implementation - -template -class DictEncodeImpl : public HashTableKernel> { - public: - static constexpr bool allow_expand = true; - using Base = HashTableKernel; - - DictEncodeImpl(const std::shared_ptr& type, MemoryPool* pool) - : Base(type, pool), indices_builder_(pool) {} - - Status Reserve(const int64_t length) { return indices_builder_.Reserve(length); } - - void ObserveNull() { indices_builder_.UnsafeAppendToBitmap(false); } - - void ObserveFound(const hash_slot_t slot) { indices_builder_.UnsafeAppend(slot); } - - void ObserveNotFound(const hash_slot_t slot) { return ObserveFound(slot); } - - Status DoubleSize() { return Base::DoubleTableSize(); } - - Status Flush(Datum* out) override { - std::shared_ptr result; - RETURN_NOT_OK(indices_builder_.FinishInternal(&result)); - out->value = std::move(result); - return Status::OK(); - } - - using Base::Append; - - private: - Int32Builder indices_builder_; +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; }; -// ---------------------------------------------------------------------- -// Kernel wrapper for generic hash table kernels - -class HashKernelImpl : public HashKernel { - public: - explicit HashKernelImpl(std::unique_ptr hasher) - : hasher_(std::move(hasher)) {} - - Status Call(FunctionContext* ctx, const Datum& input, Datum* out) override { - DCHECK_EQ(Datum::ARRAY, input.kind()); - RETURN_NOT_OK(Append(ctx, *input.array())); - return Flush(out); - } - - Status Append(FunctionContext* ctx, const ArrayData& input) override { - std::lock_guard guard(lock_); - try { - RETURN_NOT_OK(hasher_->Append(input)); - } catch (const HashException& e) { - return Status(e.code(), e.what()); - } - return Status::OK(); - } - - Status Flush(Datum* out) override { return hasher_->Flush(out); } - - Status GetDictionary(std::shared_ptr* out) override { - return hasher_->GetDictionary(out); - } +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; +}; - private: - std::mutex lock_; - std::unique_ptr hasher_; +template +struct HashKernelTraits> { + using HashKernelImpl = RegularHashKernelImpl; }; } // namespace Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr& type, std::unique_ptr* out) { - std::unique_ptr hasher; + std::unique_ptr kernel; -#define UNIQUE_CASE(InType) \ - case InType::type_id: \ - hasher.reset(new UniqueImpl(type, ctx->memory_pool())); \ +#define UNIQUE_CASE(InType) \ + case InType::type_id: \ + kernel.reset(new typename HashKernelTraits::HashKernelImpl( \ + type, ctx->memory_pool())); \ break switch (type->id()) { @@ -850,19 +300,22 @@ Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr& ty #undef UNIQUE_CASE - CHECK_IMPLEMENTED(hasher, "unique", type); - out->reset(new HashKernelImpl(std::move(hasher))); + CHECK_IMPLEMENTED(kernel, "unique", type); + RETURN_NOT_OK(kernel->Reset()); + *out = std::move(kernel); return Status::OK(); } Status GetDictionaryEncodeKernel(FunctionContext* ctx, const std::shared_ptr& type, std::unique_ptr* out) { - std::unique_ptr hasher; + std::unique_ptr kernel; -#define DICTIONARY_ENCODE_CASE(InType) \ - case InType::type_id: \ - hasher.reset(new DictEncodeImpl(type, ctx->memory_pool())); \ +#define DICTIONARY_ENCODE_CASE(InType) \ + case InType::type_id: \ + kernel.reset(new \ + typename HashKernelTraits::HashKernelImpl( \ + type, ctx->memory_pool())); \ break switch (type->id()) { @@ -893,8 +346,9 @@ Status GetDictionaryEncodeKernel(FunctionContext* ctx, #undef DICTIONARY_ENCODE_CASE - CHECK_IMPLEMENTED(hasher, "dictionary-encode", type); - out->reset(new HashKernelImpl(std::move(hasher))); + CHECK_IMPLEMENTED(kernel, "dictionary-encode", type); + RETURN_NOT_OK(kernel->Reset()); + *out = std::move(kernel); return Status::OK(); } diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 89229169ecdb7..c43298d011efe 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -60,9 +60,6 @@ install(FILES windows_compatibility.h DESTINATION include/arrow/util) -# Armv8 CRC support -configure_file(config.in.cmake ${CMAKE_CURRENT_SOURCE_DIR}/my_config.h) - ####################################### # arrow_test_main ####################################### diff --git a/cpp/src/arrow/util/armce-util.h b/cpp/src/arrow/util/armce-util.h new file mode 100644 index 0000000000000..8c26c63197bfd --- /dev/null +++ b/cpp/src/arrow/util/armce-util.h @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_UTIL_ARMCE_UTIL_H +#define ARROW_UTIL_ARMCE_UTIL_H + +namespace arrow { + +#if defined(__aarch64__) || defined(__AARCH64__) +#ifdef __ARM_FEATURE_CRC32 +#define ARROW_HAVE_ARM_CRC +#include +#endif +#endif + +#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC) + +#include +#include +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif +static inline uint32_t crc32c_runtime_check(void) +{ + unsigned long auxv = getauxval(AT_HWCAP); + return (auxv & HWCAP_CRC32) != 0; +} + +static inline uint32_t ARMCE_crc32_u8(uint32_t crc, uint8_t v) { + return __crc32cb(crc, v); +} + +static inline uint32_t ARMCE_crc32_u16(uint32_t crc, uint16_t v) { + return __crc32ch(crc, v); +} + +static inline uint32_t ARMCE_crc32_u32(uint32_t crc, uint32_t v) { + return __crc32cw(crc, v); +} + +static inline uint32_t ARMCE_crc32_u64(uint32_t crc, uint64_t v) { + return __crc32cd(crc, v); +} + +#else + +static inline uint32_t crc32c_runtime_check(void) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u8(uint32_t, uint8_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u16(uint32_t, uint16_t) { + DCHECK(false) << "Arm crc32 is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u32(uint32_t, uint32_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +static inline uint32_t ARMCE_crc32_u64(uint32_t, uint64_t) { + DCHECK(false) << "Arm crc32 support is not enabled"; + return 0; +} + +#endif // defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC) + +} // namespace arrow + +#endif // ARROW_UTIL_ARMCE_UTIL_H diff --git a/cpp/src/arrow/util/hash-util.h b/cpp/src/arrow/util/hash-util.h index 3f7e4048bdf10..fccb8ca733ab0 100644 --- a/cpp/src/arrow/util/hash-util.h +++ b/cpp/src/arrow/util/hash-util.h @@ -26,21 +26,54 @@ #include "arrow/util/logging.h" #include "arrow/util/macros.h" #include "arrow/util/sse-util.h" +#include "arrow/util/armce-util.h" + +static inline uint32_t HW_crc32_u8(uint32_t crc, uint8_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u16(uint32_t crc, uint16_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u32(uint32_t crc, uint32_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +static inline uint32_t HW_crc32_u64(uint32_t crc, uint64_t v) { + DCHECK(false) << "Hardware CRC support is not enabled"; + return 0; +} + +#ifdef ARROW_HAVE_SSE4_2 +#define HW_crc32_u8 SSE4_crc32_u8 +#define HW_crc32_u16 SSE4_crc32_u16 +#define HW_crc32_u32 SSE4_crc32_u32 +#define HW_crc32_u64 SSE4_crc32_u64 +#elif defined(ARROW_HAVE_ARM_CRC) +#define HW_crc32_u8 ARMCE_crc32_u8 +#define HW_crc32_u16 ARMCE_crc32_u16 +#define HW_crc32_u32 ARMCE_crc32_u32 +#define HW_crc32_u64 ARMCE_crc32_u64 +#endif namespace arrow { /// Utility class to compute hash values. class HashUtil { public: -#ifdef ARROW_HAVE_SSE4_2 +#if defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_ARM_CRC) static constexpr bool have_hardware_crc32 = true; #else static constexpr bool have_hardware_crc32 = false; #endif - /// Compute the Crc32 hash for data using SSE4 instructions. The input hash + /// Compute the Crc32 hash for data using SSE4/ArmCRC instructions. The input hash /// parameter is the current hash/seed value. - /// This should only be called if SSE is supported. + /// This should only be called if SSE/ArmCRC is supported. /// This is ~4x faster than Fnv/Boost Hash. /// TODO: crc32 hashes with different seeds do not result in different hash functions. /// The resulting hashes are correlated. @@ -49,15 +82,15 @@ class HashUtil { const uint8_t* end = p + nbytes; while (p <= end - 8) { - hash = SSE4_crc32_u64(hash, *reinterpret_cast(p)); + hash = HW_crc32_u64(hash, *reinterpret_cast(p)); p += 8; } while (p <= end - 4) { - hash = SSE4_crc32_u32(hash, *reinterpret_cast(p)); + hash = HW_crc32_u32(hash, *reinterpret_cast(p)); p += 4; } while (p < end) { - hash = SSE4_crc32_u8(hash, *p); + hash = HW_crc32_u8(hash, *p); ++p; } @@ -81,30 +114,30 @@ class HashUtil { uint32_t h2 = static_cast(hash); while (nbytes >= 16) { - h1 = SSE4_crc32_u64(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u64(h2, *reinterpret_cast(p + 8)); + h1 = HW_crc32_u64(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u64(h2, *reinterpret_cast(p + 8)); nbytes -= 16; p += 16; } if (nbytes >= 8) { - h1 = SSE4_crc32_u32(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u32(h2, *reinterpret_cast(p + 4)); + h1 = HW_crc32_u32(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u32(h2, *reinterpret_cast(p + 4)); nbytes -= 8; p += 8; } if (nbytes >= 4) { - h1 = SSE4_crc32_u16(h1, *reinterpret_cast(p)); - h2 = SSE4_crc32_u16(h2, *reinterpret_cast(p + 2)); + h1 = HW_crc32_u16(h1, *reinterpret_cast(p)); + h2 = HW_crc32_u16(h2, *reinterpret_cast(p + 2)); nbytes -= 4; p += 4; } switch (nbytes) { case 3: - h1 = SSE4_crc32_u8(h1, p[3]); + h1 = HW_crc32_u8(h1, p[3]); case 2: - h2 = SSE4_crc32_u8(h2, p[2]); + h2 = HW_crc32_u8(h2, p[2]); case 1: - h1 = SSE4_crc32_u8(h1, p[1]); + h1 = HW_crc32_u8(h1, p[1]); case 0: break; default: @@ -118,7 +151,7 @@ class HashUtil { /// CrcHash() specialized for 1-byte data static inline uint32_t CrcHash1(const void* v, uint32_t hash) { const uint8_t* s = reinterpret_cast(v); - hash = SSE4_crc32_u8(hash, *s); + hash = HW_crc32_u8(hash, *s); hash = (hash << 16) | (hash >> 16); return hash; } @@ -126,7 +159,7 @@ class HashUtil { /// CrcHash() specialized for 2-byte data static inline uint32_t CrcHash2(const void* v, uint32_t hash) { const uint16_t* s = reinterpret_cast(v); - hash = SSE4_crc32_u16(hash, *s); + hash = HW_crc32_u16(hash, *s); hash = (hash << 16) | (hash >> 16); return hash; } @@ -134,7 +167,7 @@ class HashUtil { /// CrcHash() specialized for 4-byte data static inline uint32_t CrcHash4(const void* v, uint32_t hash) { const uint32_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u32(hash, *p); + hash = HW_crc32_u32(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -142,7 +175,7 @@ class HashUtil { /// CrcHash() specialized for 8-byte data static inline uint32_t CrcHash8(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -150,9 +183,9 @@ class HashUtil { /// CrcHash() specialized for 12-byte data static inline uint32_t CrcHash12(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); ++p; - hash = SSE4_crc32_u32(hash, *reinterpret_cast(p)); + hash = HW_crc32_u32(hash, *reinterpret_cast(p)); hash = (hash << 16) | (hash >> 16); return hash; } @@ -160,9 +193,9 @@ class HashUtil { /// CrcHash() specialized for 16-byte data static inline uint32_t CrcHash16(const void* v, uint32_t hash) { const uint64_t* p = reinterpret_cast(v); - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); ++p; - hash = SSE4_crc32_u64(hash, *p); + hash = HW_crc32_u64(hash, *p); hash = (hash << 16) | (hash >> 16); return hash; } @@ -251,8 +284,8 @@ class HashUtil { return static_cast((hash_u64 >> 32) ^ (hash_u64 & 0xFFFFFFFF)); } - // With sse4.2 - template + // Hash template + template static inline int Hash(const void* data, int32_t bytes, uint32_t seed); /// The magic number (used in hash_combine()) 0x9e3779b9 = 2^32 / (golden ratio). @@ -288,13 +321,21 @@ class HashUtil { } }; -// With sse4.2 +// HW Hash template <> inline int HashUtil::Hash(const void* data, int32_t bytes, uint32_t seed) { - return static_cast(HashUtil::CrcHash(data, bytes, seed)); +#ifdef ARROW_HAVE_ARM_CRC + // Need run time check for Arm + // if not support, fall back to Murmur + if (!crc32c_runtime_check()) + return static_cast(HashUtil::MurmurHash2_64(data, bytes, seed)); + else +#endif + // Double CRC + return static_cast(HashUtil::DoubleCrcHash(data, bytes, seed)); } -// Non-sse4 hash +// Murmur Hash template <> inline int HashUtil::Hash(const void* data, int32_t bytes, uint32_t seed) { return static_cast(HashUtil::MurmurHash2_64(data, bytes, seed));