Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-927] Add macro __AVX512BW__ check for different CPU architecture (
Browse files Browse the repository at this point in the history
…#975)

* Add __AVX512BW__ check

* Fix cFormat
  • Loading branch information
zhixingheyi-tian authored Jun 20, 2022
1 parent e2cfb29 commit 261345c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
2 changes: 1 addition & 1 deletion native-sql-engine/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ file(COPY codegen/common/hash_relation_number.h DESTINATION ${root_directory}/re

add_definitions(-DNATIVESQL_SRC_PATH="${root_directory}/releases")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes")
set(NATIVE_AVX512_FLAG "-march=icelake-server")
set(NATIVE_AVX512_FLAG "-march=native")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${NATIVE_AVX512_FLAG}")
set(SPARK_COLUMNAR_PLUGIN_SRCS
jni/jni_wrapper.cc
Expand Down
19 changes: 16 additions & 3 deletions native-sql-engine/cpp/src/operators/columnar_to_row_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ arrow::Status ColumnarToRowConverter::Init(
int32_t j = 0;
int32_t* length_data = lengths_.data();

#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512_)) {
__m256i x7_8x = _mm256_load_si256((__m256i*)x_7);
__m256i x8_8x = _mm256_load_si256((__m256i*)x_8);
Expand Down Expand Up @@ -172,6 +173,7 @@ arrow::Status ColumnarToRowConverter::Init(
_mm_prefetch(&offsetarray[j + (128 + 128) / sizeof(offset_type)], _MM_HINT_T0);
}
}
#endif

for (j; j < num_rows_; j++) {
offset_type length = offsetarray[j + 1] - offsetarray[j];
Expand All @@ -192,10 +194,13 @@ arrow::Status ColumnarToRowConverter::Init(
// allocate one more cache line to ease avx operations
if (buffer_ == nullptr || buffer_->capacity() < total_memory_size + 64) {
ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(total_memory_size + 64, memory_pool_));
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512_)) {
memset(buffer_->mutable_data() + total_memory_size, 0,
buffer_->capacity() - total_memory_size);
} else {
} else
#endif
{
memset(buffer_->mutable_data(), 0, buffer_->capacity());
}
}
Expand Down Expand Up @@ -384,6 +389,7 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
std::vector<uint8_t>& typewidth,
std::vector<std::shared_ptr<arrow::Array>>& arrays,
bool support_avx512) {
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512)) {
__m256i fill_0_8x;
fill_0_8x = _mm256_xor_si256(fill_0_8x, fill_0_8x);
Expand All @@ -395,6 +401,7 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
}
}
}
#endif

for (auto col_index = 0; col_index < num_cols; col_index++) {
auto& array = arrays[col_index];
Expand Down Expand Up @@ -427,6 +434,7 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
offset_type length = BinaryOffsets[j + 1] - BinaryOffsets[j];
auto value = &dataptrs[col_index][2][BinaryOffsets[j]];

#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512)) {
// write the variable value
offset_type k;
Expand All @@ -440,7 +448,9 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
__m256i v = _mm256_maskz_loadu_epi8(mask, value + k);
_mm256_mask_storeu_epi8(buffer_address + offsets[j] + buffer_cursor[j] + k,
mask, v);
} else {
} else
#endif
{
// write the variable value
memcpy(buffer_address + offsets[j] + buffer_cursor[j], value, length);
}
Expand Down Expand Up @@ -508,11 +518,14 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
for (auto j = row_start; j < row_start + batch_rows; j++) {
if (nullvec[col_index] || (!array->IsNull(j))) {
const uint8_t* srcptr = dataptr + (j << shift);
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512)) {
__m256i v = _mm256_maskz_loadu_epi8(mask, srcptr);
_mm256_mask_storeu_epi8(buffer_address_tmp + offsets[j], mask, v);
_mm_prefetch(srcptr + 64, _MM_HINT_T0);
} else {
} else
#endif
{
memcpy(buffer_address_tmp + offsets[j], srcptr, typewidth[col_index]);
}
} else {
Expand Down
5 changes: 4 additions & 1 deletion native-sql-engine/cpp/src/shuffle/splitter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,7 @@ arrow::Status Splitter::SplitBinaryType(const uint8_t* src_addr, const T* src_of
<< " strlen = " << strlength << std::endl;
}
auto value_src_ptr = src_addr + src_offset_addr[src_offset];
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512_)) {
// write the variable value
T k;
Expand All @@ -1260,7 +1261,9 @@ arrow::Status Splitter::SplitBinaryType(const uint8_t* src_addr, const T* src_of
auto mask = (1L << (strlength - k)) - 1;
__m256i v = _mm256_maskz_loadu_epi8(mask, value_src_ptr + k);
_mm256_mask_storeu_epi8(dst_value_base + k, mask, v);
} else {
} else
#endif
{
memcpy(dst_value_base, value_src_ptr, strlength);
}
dst_value_base += strlength;
Expand Down

0 comments on commit 261345c

Please sign in to comment.