Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rebase the patch to master
Browse files Browse the repository at this point in the history
Change-Id: I3044a89bae619968e340636996f014a0134f1030
Signed-off-by: Yuqi Gu <yuqi.gu@arm.com>
guyuqi committed Nov 28, 2018
1 parent e9c1317 commit b13ee1f
Showing 6 changed files with 386 additions and 963 deletions.
6 changes: 6 additions & 0 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
@@ -22,6 +22,8 @@ include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-msse4.2" CXX_SUPPORTS_SSE4_2)
# power compiler flags
CHECK_CXX_COMPILER_FLAG("-maltivec" CXX_SUPPORTS_ALTIVEC)
# Arm64 compiler flags
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+crc" CXX_SUPPORTS_ARMCRC)

# This ensures that things like gnu++11 get passed correctly
set(CMAKE_CXX_STANDARD 11)
@@ -220,6 +222,10 @@ if (CXX_SUPPORTS_ALTIVEC AND ARROW_ALTIVEC)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -maltivec")
endif()

if (CXX_SUPPORTS_ARMCRC)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=armv8-a+crc")
endif()

if (ARROW_USE_SSE)
add_definitions(-DARROW_USE_SSE)
endif()
347 changes: 91 additions & 256 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
@@ -33,18 +33,9 @@
#include "arrow/util/bit-util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/util/hash-util.h"
#include "arrow/util/hash.h"
#include "arrow/util/hashing.h"
#include "arrow/util/logging.h"

#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_USE_ARMCE)
#define HASH_MODE USE_ARMCRC
#elif defined(ARROW_USE_SSE)
#define HASH_MODE USE_SSE42
#else
#define HASH_MODE USE_DEFAULT
#endif

namespace arrow {

using internal::AdaptiveIntBuilderBase;
@@ -738,7 +729,7 @@ Status BooleanBuilder::AppendValues(const std::vector<bool>& values,

int64_t i = 0;
internal::GenerateBitsUnrolled(raw_data_, length_, length,
[values, &i]() -> bool { return values[i++]; });
[&values, &i]() -> bool { return values[i++]; });

// this updates length_
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
@@ -751,7 +742,7 @@ Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {

int64_t i = 0;
internal::GenerateBitsUnrolled(raw_data_, length_, length,
[values, &i]() -> bool { return values[i++]; });
[&values, &i]() -> bool { return values[i++]; });

// this updates length_
ArrayBuilder::UnsafeSetNotNull(length);
@@ -761,152 +752,42 @@ Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
// ----------------------------------------------------------------------
// DictionaryBuilder

using internal::DictionaryScalar;
using internal::WrappedBinary;

namespace {

// A helper class to manage a hash table embedded in a typed Builder.
template <typename T, typename Enable = void>
struct DictionaryHashHelper {};

// DictionaryHashHelper implementation for primitive types
template <typename T>
struct DictionaryHashHelper<T, enable_if_has_c_type<T>> {
using Builder = typename TypeTraits<T>::BuilderType;
using Scalar = typename DictionaryScalar<T>::type;

// Get the dictionary value at the given builder index
static Scalar GetDictionaryValue(const Builder& builder, int64_t index) {
return builder.GetValue(index);
}

// Compute the hash of a scalar value
static int64_t HashValue(const Scalar& value, int byte_width) {
return HashUtil::Hash<HASH_MODE>(&value, sizeof(Scalar), 0);
}

// Return whether the dictionary value at the given builder index is unequal to value
static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) {
return GetDictionaryValue(builder, index) != value;
}

// Append a value to the builder
static Status AppendValue(Builder& builder, const Scalar& value) {
return builder.Append(value);
}

// Append another builder's contents to the builder
static Status AppendArray(Builder& builder, const Array& in_array) {
const auto& array = checked_cast<const PrimitiveArray&>(in_array);
return builder.AppendValues(reinterpret_cast<const Scalar*>(array.values()->data()),
array.length(), nullptr);
}
class DictionaryBuilder<T>::MemoTableImpl
: public internal::HashTraits<T>::MemoTableType {
public:
using MemoTableType = typename internal::HashTraits<T>::MemoTableType;
using MemoTableType::MemoTableType;
};

// DictionaryHashHelper implementation for StringType / BinaryType
template <typename T>
struct DictionaryHashHelper<T, enable_if_binary<T>> {
using Builder = typename TypeTraits<T>::BuilderType;
using Scalar = typename DictionaryScalar<T>::type;

static Scalar GetDictionaryValue(const Builder& builder, int64_t index) {
int32_t v_length;
const uint8_t* v_ptr = builder.GetValue(index, &v_length);
return WrappedBinary(v_ptr, v_length);
}

static int64_t HashValue(const Scalar& value, int byte_width) {
return HashUtil::Hash<HASH_MODE>(value.ptr_, value.length_, 0);
}

static bool SlotDifferent(const Builder& builder, int64_t index, const Scalar& value) {
int32_t other_length;
const uint8_t* other_ptr = builder.GetValue(index, &other_length);
return value.length_ != other_length ||
memcmp(value.ptr_, other_ptr, other_length) != 0;
}

static Status AppendValue(Builder& builder, const Scalar& value) {
return builder.Append(value.ptr_, value.length_);
}

static Status AppendArray(Builder& builder, const Array& in_array) {
const auto& array = checked_cast<const BinaryArray&>(in_array);
for (uint64_t index = 0, limit = array.length(); index < limit; ++index) {
int32_t length;
const uint8_t* ptr = array.GetValue(index, &length);
RETURN_NOT_OK(builder.Append(ptr, length));
}
return Status::OK();
}
};

// DictionaryHashHelper implementation for FixedSizeBinaryType
template <typename T>
struct DictionaryHashHelper<T, enable_if_fixed_size_binary<T>> {
using Builder = typename TypeTraits<FixedSizeBinaryType>::BuilderType;
using Scalar = typename DictionaryScalar<FixedSizeBinaryType>::type;

static Scalar GetDictionaryValue(const Builder& builder, int64_t index) {
return builder.GetValue(index);
}

static int64_t HashValue(const Scalar& value, int byte_width) {
return HashUtil::Hash<HASH_MODE>(value, byte_width, 0);
}

static bool SlotDifferent(const Builder& builder, int64_t index, const uint8_t* value) {
const int32_t width = builder.byte_width();
const uint8_t* other_value = builder.GetValue(index);
return memcmp(value, other_value, width) != 0;
}

static Status AppendValue(Builder& builder, const Scalar& value) {
return builder.Append(value);
}

static Status AppendArray(Builder& builder, const Array& in_array) {
const auto& array = checked_cast<const FixedSizeBinaryArray&>(in_array);
for (uint64_t index = 0, limit = array.length(); index < limit; ++index) {
const Scalar value = array.GetValue(index);
RETURN_NOT_OK(builder.Append(value));
}
return Status::OK();
}
};

} // namespace
DictionaryBuilder<T>::~DictionaryBuilder() {}

template <typename T>
DictionaryBuilder<T>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
MemoryPool* pool)
: ArrayBuilder(type, pool),
hash_slots_(nullptr),
dict_builder_(type, pool),
overflow_dict_builder_(type, pool),
values_builder_(pool),
byte_width_(-1) {}
: ArrayBuilder(type, pool), byte_width_(-1), values_builder_(pool) {
DCHECK_EQ(T::type_id, type->id()) << "inconsistent type passed to DictionaryBuilder";
}

DictionaryBuilder<NullType>::DictionaryBuilder(const std::shared_ptr<DataType>& type,
MemoryPool* pool)
: ArrayBuilder(type, pool), values_builder_(pool) {}
: ArrayBuilder(type, pool), values_builder_(pool) {
DCHECK_EQ(Type::NA, type->id()) << "inconsistent type passed to DictionaryBuilder";
}

template <>
DictionaryBuilder<FixedSizeBinaryType>::DictionaryBuilder(
const std::shared_ptr<DataType>& type, MemoryPool* pool)
: ArrayBuilder(type, pool),
hash_slots_(nullptr),
dict_builder_(type, pool),
overflow_dict_builder_(type, pool),
values_builder_(pool),
byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()) {}

template <typename T>
void DictionaryBuilder<T>::Reset() {
dict_builder_.Reset();
overflow_dict_builder_.Reset();
ArrayBuilder::Reset();
values_builder_.Reset();
memo_table_.reset();
delta_offset_ = 0;
}

template <typename T>
@@ -916,14 +797,10 @@ Status DictionaryBuilder<T>::Resize(int64_t capacity) {
}

if (capacity_ == 0) {
// Fill the initial hash table
RETURN_NOT_OK(internal::NewHashTable(kInitialHashTableSize, pool_, &hash_table_));
hash_slots_ = reinterpret_cast<int32_t*>(hash_table_->mutable_data());
hash_table_size_ = kInitialHashTableSize;
entry_id_offset_ = 0;
mod_bitmask_ = kInitialHashTableSize - 1;
hash_table_load_threshold_ =
static_cast<int64_t>(static_cast<double>(capacity) * kMaxHashTableLoad);
// Initialize hash table
// XXX should we let the user pass additional size heuristics?
memo_table_.reset(new MemoTableImpl(0));
delta_offset_ = 0;
}
RETURN_NOT_OK(values_builder_.Resize(capacity));
return ArrayBuilder::Resize(capacity);
@@ -937,67 +814,12 @@ Status DictionaryBuilder<NullType>::Resize(int64_t capacity) {
return ArrayBuilder::Resize(capacity);
}

template <typename T>
int64_t DictionaryBuilder<T>::HashValue(const Scalar& value) {
return DictionaryHashHelper<T>::HashValue(value, byte_width_);
}

template <typename T>
typename DictionaryBuilder<T>::Scalar DictionaryBuilder<T>::GetDictionaryValue(
typename TypeTraits<T>::BuilderType& dictionary_builder, int64_t index) {
return DictionaryHashHelper<T>::GetDictionaryValue(dictionary_builder, index);
}

template <typename T>
bool DictionaryBuilder<T>::SlotDifferent(hash_slot_t index, const Scalar& value) {
DCHECK_GE(index, 0);
if (index >= entry_id_offset_) {
// Lookup delta dictionary
DCHECK_LT(index - entry_id_offset_, dict_builder_.length());
return DictionaryHashHelper<T>::SlotDifferent(
dict_builder_, static_cast<int64_t>(index - entry_id_offset_), value);
} else {
DCHECK_LT(index, overflow_dict_builder_.length());
return DictionaryHashHelper<T>::SlotDifferent(overflow_dict_builder_,
static_cast<int64_t>(index), value);
}
}

template <typename T>
Status DictionaryBuilder<T>::AppendDictionary(const Scalar& value) {
return DictionaryHashHelper<T>::AppendValue(dict_builder_, value);
}

template <typename T>
Status DictionaryBuilder<T>::Append(const Scalar& value) {
RETURN_NOT_OK(Reserve(1));
// Based on DictEncoder<DType>::Put
int64_t j = HashValue(value) & mod_bitmask_;
hash_slot_t index = hash_slots_[j];

// Find an empty slot
while (kHashSlotEmpty != index && SlotDifferent(index, value)) {
// Linear probing
++j;
if (j == hash_table_size_) {
j = 0;
}
index = hash_slots_[j];
}

if (index == kHashSlotEmpty) {
// Not in the hash table, so we insert it now
index = static_cast<hash_slot_t>(dict_builder_.length() + entry_id_offset_);
hash_slots_[j] = index;
RETURN_NOT_OK(AppendDictionary(value));

if (ARROW_PREDICT_FALSE(static_cast<int32_t>(dict_builder_.length()) >
hash_table_load_threshold_)) {
RETURN_NOT_OK(DoubleTableSize());
}
}

RETURN_NOT_OK(values_builder_.Append(index));
auto memo_index = memo_table_->GetOrInsert(value);
RETURN_NOT_OK(values_builder_.Append(memo_index));

return Status::OK();
}
@@ -1029,48 +851,24 @@ Status DictionaryBuilder<NullType>::AppendArray(const Array& array) {
return Status::OK();
}

template <>
Status DictionaryBuilder<FixedSizeBinaryType>::AppendArray(const Array& array) {
if (!type_->Equals(*array.type())) {
return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type");
}

const auto& numeric_array = checked_cast<const FixedSizeBinaryArray&>(array);
for (int64_t i = 0; i < array.length(); i++) {
if (array.IsNull(i)) {
RETURN_NOT_OK(AppendNull());
} else {
RETURN_NOT_OK(Append(numeric_array.Value(i)));
}
}
return Status::OK();
}

template <typename T>
Status DictionaryBuilder<T>::DoubleTableSize() {
#define INNER_LOOP \
int64_t j = HashValue(GetDictionaryValue(dict_builder_, index)) & new_mod_bitmask

DOUBLE_TABLE_SIZE(, INNER_LOOP);

return Status::OK();
}

template <typename T>
Status DictionaryBuilder<T>::FinishInternal(std::shared_ptr<ArrayData>* out) {
// Finalize indices array
RETURN_NOT_OK(values_builder_.FinishInternal(out));

// Generate dictionary array from hash table contents
std::shared_ptr<Array> dictionary;
entry_id_offset_ += dict_builder_.length();
RETURN_NOT_OK(dict_builder_.Finish(&dictionary));
std::shared_ptr<ArrayData> dictionary_data;

// Store current dict entries for further uses of this DictionaryBuilder
RETURN_NOT_OK(
DictionaryHashHelper<T>::AppendArray(overflow_dict_builder_, *dictionary));
DCHECK_EQ(entry_id_offset_, overflow_dict_builder_.length());
RETURN_NOT_OK(internal::DictionaryTraits<T>::GetDictionaryArrayData(
pool_, type_, *memo_table_, delta_offset_, &dictionary_data));
dictionary = MakeArray(dictionary_data);

RETURN_NOT_OK(values_builder_.FinishInternal(out));
// Set type of array data to the right dictionary type
(*out)->type = std::make_shared<DictionaryType>((*out)->type, dictionary);

dict_builder_.Reset();
// Update internals for further uses of this DictionaryBuilder
delta_offset_ = memo_table_->size();
values_builder_.Reset();

return Status::OK();
@@ -1089,26 +887,42 @@ Status DictionaryBuilder<NullType>::FinishInternal(std::shared_ptr<ArrayData>* o
// StringType and BinaryType specializations
//

#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \
\
template <> \
Status DictionaryBuilder<Type>::AppendArray(const Array& array) { \
const BinaryArray& binary_array = checked_cast<const BinaryArray&>(array); \
WrappedBinary value(nullptr, 0); \
for (int64_t i = 0; i < array.length(); i++) { \
if (array.IsNull(i)) { \
RETURN_NOT_OK(AppendNull()); \
} else { \
value.ptr_ = binary_array.GetValue(i, &value.length_); \
RETURN_NOT_OK(Append(value)); \
} \
} \
return Status::OK(); \
#define BINARY_DICTIONARY_SPECIALIZATIONS(Type) \
\
template <> \
Status DictionaryBuilder<Type>::AppendArray(const Array& array) { \
using ArrayType = typename TypeTraits<Type>::ArrayType; \
const ArrayType& binary_array = checked_cast<const ArrayType&>(array); \
for (int64_t i = 0; i < array.length(); i++) { \
if (array.IsNull(i)) { \
RETURN_NOT_OK(AppendNull()); \
} else { \
RETURN_NOT_OK(Append(binary_array.GetView(i))); \
} \
} \
return Status::OK(); \
}

BINARY_DICTIONARY_SPECIALIZATIONS(StringType);
BINARY_DICTIONARY_SPECIALIZATIONS(BinaryType);

template <>
Status DictionaryBuilder<FixedSizeBinaryType>::AppendArray(const Array& array) {
if (!type_->Equals(*array.type())) {
return Status::Invalid("Cannot append FixedSizeBinary array with non-matching type");
}

const auto& typed_array = checked_cast<const FixedSizeBinaryArray&>(array);
for (int64_t i = 0; i < array.length(); i++) {
if (array.IsNull(i)) {
RETURN_NOT_OK(AppendNull());
} else {
RETURN_NOT_OK(Append(typed_array.GetValue(i)));
}
}
return Status::OK();
}

template class DictionaryBuilder<UInt8Type>;
template class DictionaryBuilder<UInt16Type>;
template class DictionaryBuilder<UInt32Type>;
@@ -1316,6 +1130,19 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, int32_t* out_length) const {
return value_data_builder_.data() + offset;
}

util::string_view BinaryBuilder::GetView(int64_t i) const {
const int32_t* offsets = offsets_builder_.data();
int32_t offset = offsets[i];
int32_t value_length;
if (i == (length_ - 1)) {
value_length = static_cast<int32_t>(value_data_builder_.length()) - offset;
} else {
value_length = offsets[i + 1] - offset;
}
return util::string_view(
reinterpret_cast<const char*>(value_data_builder_.data() + offset), value_length);
}

StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}

Status StringBuilder::AppendValues(const std::vector<std::string>& values,
@@ -1414,17 +1241,19 @@ FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>&
byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
byte_builder_(pool) {}

#ifndef NDEBUG
void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
}
#endif

Status FixedSizeBinaryBuilder::AppendValues(const uint8_t* data, int64_t length,
const uint8_t* valid_bytes) {
RETURN_NOT_OK(Reserve(length));
UnsafeAppendToBitmap(valid_bytes, length);
return byte_builder_.Append(data, length * byte_width_);
}

Status FixedSizeBinaryBuilder::Append(const std::string& value) {
return Append(reinterpret_cast<const uint8_t*>(value.c_str()));
}

Status FixedSizeBinaryBuilder::AppendNull() {
RETURN_NOT_OK(Reserve(1));
UnsafeAppendToBitmap(false);
@@ -1457,6 +1286,12 @@ const uint8_t* FixedSizeBinaryBuilder::GetValue(int64_t i) const {
return data_ptr + i * byte_width_;
}

util::string_view FixedSizeBinaryBuilder::GetView(int64_t i) const {
const uint8_t* data_ptr = byte_builder_.data();
return util::string_view(reinterpret_cast<const char*>(data_ptr + i * byte_width_),
byte_width_);
}

// ----------------------------------------------------------------------
// Struct

806 changes: 130 additions & 676 deletions cpp/src/arrow/compute/kernels/hash.cc

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -60,9 +60,6 @@ install(FILES
windows_compatibility.h
DESTINATION include/arrow/util)

# Armv8 CRC support
configure_file(config.in.cmake ${CMAKE_CURRENT_SOURCE_DIR}/my_config.h)

#######################################
# arrow_test_main
#######################################
90 changes: 90 additions & 0 deletions cpp/src/arrow/util/armce-util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef ARROW_UTIL_ARMCE_UTIL_H
#define ARROW_UTIL_ARMCE_UTIL_H

namespace arrow {

#if defined(__aarch64__) || defined(__AARCH64__)
#ifdef __ARM_FEATURE_CRC32
#define ARROW_HAVE_ARM_CRC
#include <arm_acle.h>
#endif
#endif

#if defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC)

#include <sys/auxv.h>
#include <asm/hwcap.h>
#ifndef HWCAP_CRC32
#define HWCAP_CRC32 (1 << 7)
#endif
static inline uint32_t crc32c_runtime_check(void)
{
unsigned long auxv = getauxval(AT_HWCAP);
return (auxv & HWCAP_CRC32) != 0;
}

static inline uint32_t ARMCE_crc32_u8(uint32_t crc, uint8_t v) {
return __crc32cb(crc, v);
}

static inline uint32_t ARMCE_crc32_u16(uint32_t crc, uint16_t v) {
return __crc32ch(crc, v);
}

static inline uint32_t ARMCE_crc32_u32(uint32_t crc, uint32_t v) {
return __crc32cw(crc, v);
}

static inline uint32_t ARMCE_crc32_u64(uint32_t crc, uint64_t v) {
return __crc32cd(crc, v);
}

#else

static inline uint32_t crc32c_runtime_check(void) {
DCHECK(false) << "Arm crc32 support is not enabled";
return 0;
}

static inline uint32_t ARMCE_crc32_u8(uint32_t, uint8_t) {
DCHECK(false) << "Arm crc32 support is not enabled";
return 0;
}

static inline uint32_t ARMCE_crc32_u16(uint32_t, uint16_t) {
DCHECK(false) << "Arm crc32 is not enabled";
return 0;
}

static inline uint32_t ARMCE_crc32_u32(uint32_t, uint32_t) {
DCHECK(false) << "Arm crc32 support is not enabled";
return 0;
}

static inline uint32_t ARMCE_crc32_u64(uint32_t, uint64_t) {
DCHECK(false) << "Arm crc32 support is not enabled";
return 0;
}

#endif // defined(__GNUC__) && defined(__linux__) && defined(ARROW_HAVE_ARM_CRC)

} // namespace arrow

#endif // ARROW_UTIL_ARMCE_UTIL_H
97 changes: 69 additions & 28 deletions cpp/src/arrow/util/hash-util.h
Original file line number Diff line number Diff line change
@@ -26,21 +26,54 @@
#include "arrow/util/logging.h"
#include "arrow/util/macros.h"
#include "arrow/util/sse-util.h"
#include "arrow/util/armce-util.h"

static inline uint32_t HW_crc32_u8(uint32_t crc, uint8_t v) {
DCHECK(false) << "Hardware CRC support is not enabled";
return 0;
}

static inline uint32_t HW_crc32_u16(uint32_t crc, uint16_t v) {
DCHECK(false) << "Hardware CRC support is not enabled";
return 0;
}

static inline uint32_t HW_crc32_u32(uint32_t crc, uint32_t v) {
DCHECK(false) << "Hardware CRC support is not enabled";
return 0;
}

static inline uint32_t HW_crc32_u64(uint32_t crc, uint64_t v) {
DCHECK(false) << "Hardware CRC support is not enabled";
return 0;
}

#ifdef ARROW_HAVE_SSE4_2
#define HW_crc32_u8 SSE4_crc32_u8
#define HW_crc32_u16 SSE4_crc32_u16
#define HW_crc32_u32 SSE4_crc32_u32
#define HW_crc32_u64 SSE4_crc32_u64
#elif defined(ARROW_HAVE_ARM_CRC)
#define HW_crc32_u8 ARMCE_crc32_u8
#define HW_crc32_u16 ARMCE_crc32_u16
#define HW_crc32_u32 ARMCE_crc32_u32
#define HW_crc32_u64 ARMCE_crc32_u64
#endif

namespace arrow {

/// Utility class to compute hash values.
class HashUtil {
public:
#ifdef ARROW_HAVE_SSE4_2
#if defined(ARROW_HAVE_SSE4_2) || defined(ARROW_HAVE_ARM_CRC)
static constexpr bool have_hardware_crc32 = true;
#else
static constexpr bool have_hardware_crc32 = false;
#endif

/// Compute the Crc32 hash for data using SSE4 instructions. The input hash
/// Compute the Crc32 hash for data using SSE4/ArmCRC instructions. The input hash
/// parameter is the current hash/seed value.
/// This should only be called if SSE is supported.
/// This should only be called if SSE/ArmCRC is supported.
/// This is ~4x faster than Fnv/Boost Hash.
/// TODO: crc32 hashes with different seeds do not result in different hash functions.
/// The resulting hashes are correlated.
@@ -49,15 +82,15 @@ class HashUtil {
const uint8_t* end = p + nbytes;

while (p <= end - 8) {
hash = SSE4_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(p));
hash = HW_crc32_u64(hash, *reinterpret_cast<const uint64_t*>(p));
p += 8;
}
while (p <= end - 4) {
hash = SSE4_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(p));
hash = HW_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(p));
p += 4;
}
while (p < end) {
hash = SSE4_crc32_u8(hash, *p);
hash = HW_crc32_u8(hash, *p);
++p;
}

@@ -81,30 +114,30 @@ class HashUtil {
uint32_t h2 = static_cast<uint32_t>(hash);

while (nbytes >= 16) {
h1 = SSE4_crc32_u64(h1, *reinterpret_cast<const uint64_t*>(p));
h2 = SSE4_crc32_u64(h2, *reinterpret_cast<const uint64_t*>(p + 8));
h1 = HW_crc32_u64(h1, *reinterpret_cast<const uint64_t*>(p));
h2 = HW_crc32_u64(h2, *reinterpret_cast<const uint64_t*>(p + 8));
nbytes -= 16;
p += 16;
}
if (nbytes >= 8) {
h1 = SSE4_crc32_u32(h1, *reinterpret_cast<const uint32_t*>(p));
h2 = SSE4_crc32_u32(h2, *reinterpret_cast<const uint32_t*>(p + 4));
h1 = HW_crc32_u32(h1, *reinterpret_cast<const uint32_t*>(p));
h2 = HW_crc32_u32(h2, *reinterpret_cast<const uint32_t*>(p + 4));
nbytes -= 8;
p += 8;
}
if (nbytes >= 4) {
h1 = SSE4_crc32_u16(h1, *reinterpret_cast<const uint16_t*>(p));
h2 = SSE4_crc32_u16(h2, *reinterpret_cast<const uint16_t*>(p + 2));
h1 = HW_crc32_u16(h1, *reinterpret_cast<const uint16_t*>(p));
h2 = HW_crc32_u16(h2, *reinterpret_cast<const uint16_t*>(p + 2));
nbytes -= 4;
p += 4;
}
switch (nbytes) {
case 3:
h1 = SSE4_crc32_u8(h1, p[3]);
h1 = HW_crc32_u8(h1, p[3]);
case 2:
h2 = SSE4_crc32_u8(h2, p[2]);
h2 = HW_crc32_u8(h2, p[2]);
case 1:
h1 = SSE4_crc32_u8(h1, p[1]);
h1 = HW_crc32_u8(h1, p[1]);
case 0:
break;
default:
@@ -118,51 +151,51 @@ class HashUtil {
/// CrcHash() specialized for 1-byte data
static inline uint32_t CrcHash1(const void* v, uint32_t hash) {
const uint8_t* s = reinterpret_cast<const uint8_t*>(v);
hash = SSE4_crc32_u8(hash, *s);
hash = HW_crc32_u8(hash, *s);
hash = (hash << 16) | (hash >> 16);
return hash;
}

/// CrcHash() specialized for 2-byte data
static inline uint32_t CrcHash2(const void* v, uint32_t hash) {
const uint16_t* s = reinterpret_cast<const uint16_t*>(v);
hash = SSE4_crc32_u16(hash, *s);
hash = HW_crc32_u16(hash, *s);
hash = (hash << 16) | (hash >> 16);
return hash;
}

/// CrcHash() specialized for 4-byte data
static inline uint32_t CrcHash4(const void* v, uint32_t hash) {
const uint32_t* p = reinterpret_cast<const uint32_t*>(v);
hash = SSE4_crc32_u32(hash, *p);
hash = HW_crc32_u32(hash, *p);
hash = (hash << 16) | (hash >> 16);
return hash;
}

/// CrcHash() specialized for 8-byte data
static inline uint32_t CrcHash8(const void* v, uint32_t hash) {
const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
hash = SSE4_crc32_u64(hash, *p);
hash = HW_crc32_u64(hash, *p);
hash = (hash << 16) | (hash >> 16);
return hash;
}

/// CrcHash() specialized for 12-byte data
static inline uint32_t CrcHash12(const void* v, uint32_t hash) {
const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
hash = SSE4_crc32_u64(hash, *p);
hash = HW_crc32_u64(hash, *p);
++p;
hash = SSE4_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(p));
hash = HW_crc32_u32(hash, *reinterpret_cast<const uint32_t*>(p));
hash = (hash << 16) | (hash >> 16);
return hash;
}

/// CrcHash() specialized for 16-byte data
static inline uint32_t CrcHash16(const void* v, uint32_t hash) {
const uint64_t* p = reinterpret_cast<const uint64_t*>(v);
hash = SSE4_crc32_u64(hash, *p);
hash = HW_crc32_u64(hash, *p);
++p;
hash = SSE4_crc32_u64(hash, *p);
hash = HW_crc32_u64(hash, *p);
hash = (hash << 16) | (hash >> 16);
return hash;
}
@@ -251,8 +284,8 @@ class HashUtil {
return static_cast<uint32_t>((hash_u64 >> 32) ^ (hash_u64 & 0xFFFFFFFF));
}

// With sse4.2
template <bool use_sse42 = true>
// Hash template
template <bool hw>
static inline int Hash(const void* data, int32_t bytes, uint32_t seed);

/// The magic number (used in hash_combine()) 0x9e3779b9 = 2^32 / (golden ratio).
@@ -288,13 +321,21 @@ class HashUtil {
}
};

// With sse4.2
// HW Hash
template <>
inline int HashUtil::Hash<true>(const void* data, int32_t bytes, uint32_t seed) {
return static_cast<int>(HashUtil::CrcHash(data, bytes, seed));
#ifdef ARROW_HAVE_ARM_CRC
// Need run time check for Arm
// if not support, fall back to Murmur
if (!crc32c_runtime_check())
return static_cast<int>(HashUtil::MurmurHash2_64(data, bytes, seed));
else
#endif
// Double CRC
return static_cast<int>(HashUtil::DoubleCrcHash(data, bytes, seed));
}

// Non-sse4 hash
// Murmur Hash
template <>
inline int HashUtil::Hash<false>(const void* data, int32_t bytes, uint32_t seed) {
return static_cast<int>(HashUtil::MurmurHash2_64(data, bytes, seed));

0 comments on commit b13ee1f

Please sign in to comment.