From 795948ba6a3a74c877cff438fac8993186ff9945 Mon Sep 17 00:00:00 2001 From: Yuan Date: Wed, 13 Jul 2022 22:51:26 +0800 Subject: [PATCH] [NSE-999] s/string/string_view in sort (#1009) * s/string/string_view in sort Signed-off-by: Yuan Zhou * improve timsort Signed-off-by: Yuan Zhou --- .../codegen/arrow_compute/ext/sort_kernel.cc | 20 +++++++++---------- .../third_party/row_wise_memory/unsafe_row.h | 2 +- .../cpp/src/third_party/timsort.hpp | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index 96120b8f0..3adbf5036 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -1580,8 +1580,8 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl { -> typename std::enable_if_t::value> { if (asc_) { auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) { - return cached_key_[x.array_id]->GetString(x.id) < - cached_key_[y.array_id]->GetString(y.id); + return cached_key_[x.array_id]->GetView(x.id) < + cached_key_[y.array_id]->GetView(y.id); }; if (nulls_first_) { std::sort(indices_begin + nulls_total_, indices_begin + items_total_, comp); @@ -1590,8 +1590,8 @@ class SortOnekeyKernel : public SortArraysToIndicesKernel::Impl { } } else { auto comp = [this](const ArrayItemIndexS& x, const ArrayItemIndexS& y) { - return cached_key_[x.array_id]->GetString(x.id) > - cached_key_[y.array_id]->GetString(y.id); + return cached_key_[x.array_id]->GetView(x.id) > + cached_key_[y.array_id]->GetView(y.id); }; if (nulls_first_) { std::sort(indices_begin + nulls_total_, indices_begin + items_total_, comp); @@ -2145,11 +2145,11 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx, auto x_num_value = array + std::to_string(cur_key_idx) + "_[x.array_id]->GetView(x.id)"; auto x_str_value = - array + std::to_string(cur_key_idx) + "_[x.array_id]->GetString(x.id)"; + array + std::to_string(cur_key_idx) + "_[x.array_id]->GetView(x.id)"; auto y_num_value = array + std::to_string(cur_key_idx) + "_[y.array_id]->GetView(y.id)"; auto y_str_value = - array + std::to_string(cur_key_idx) + "_[y.array_id]->GetString(y.id)"; + array + std::to_string(cur_key_idx) + "_[y.array_id]->GetView(y.id)"; auto is_x_null = array + std::to_string(cur_key_idx) + "_[x.array_id]->IsNull(x.id)"; auto is_y_null = array + std::to_string(cur_key_idx) + "_[y.array_id]->IsNull(y.id)"; auto x_null_count = @@ -2201,7 +2201,7 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx, ss << " else {\n"; // Multiple keys sorting w/ different ordering is supported. - // For string type of data, GetString should be used instead of GetView. + // For string type of data, GetView should be used instead of GetView. if (asc) { if (data_type->id() == arrow::Type::STRING) { ss << "return " << x_str_value << " < " << y_str_value << ";\n}\n"; @@ -2261,11 +2261,11 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx, auto x_num_value = array + std::to_string(cur_key_idx) + "_[x.array_id]->GetView(x.id)"; auto x_str_value = - array + std::to_string(cur_key_idx) + "_[x.array_id]->GetString(x.id)"; + array + std::to_string(cur_key_idx) + "_[x.array_id]->GetView(x.id)"; auto y_num_value = array + std::to_string(cur_key_idx) + "_[y.array_id]->GetView(y.id)"; auto y_str_value = - array + std::to_string(cur_key_idx) + "_[y.array_id]->GetString(y.id)"; + array + std::to_string(cur_key_idx) + "_[y.array_id]->GetView(y.id)"; auto is_x_nan = "std::isnan(" + x_num_value + ")"; auto is_y_nan = "std::isnan(" + y_num_value + ")"; @@ -2292,7 +2292,7 @@ extern "C" void MakeCodeGen(arrow::compute::ExecContext* ctx, } // Multiple keys sorting w/ different ordering is supported. - // For string type of data, GetString should be used instead of GetView. + // For string type of data, GetView should be used instead of GetView. if (asc) { if (data_type->id() == arrow::Type::STRING) { ss << "return " << x_str_value << " < " << y_str_value << ";\n"; diff --git a/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h b/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h index 33f9ce473..6c8cf4809 100644 --- a/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h +++ b/native-sql-engine/cpp/src/third_party/row_wise_memory/unsafe_row.h @@ -26,7 +26,7 @@ #include "third_party/row_wise_memory/native_memory.h" -#define TEMP_UNSAFEROW_BUFFER_SIZE 1024 +#define TEMP_UNSAFEROW_BUFFER_SIZE 8192 static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; /* Unsafe Row Layout (This unsafe row only used to append all fields data as diff --git a/native-sql-engine/cpp/src/third_party/timsort.hpp b/native-sql-engine/cpp/src/third_party/timsort.hpp index df8b94133..35bcbeb7c 100644 --- a/native-sql-engine/cpp/src/third_party/timsort.hpp +++ b/native-sql-engine/cpp/src/third_party/timsort.hpp @@ -182,7 +182,7 @@ class TimSort { ~TimSort() {} void pushRun(iter_t const runBase, diff_t const runLen) { - pending_.push_back(run(runBase, runLen)); + pending_.emplace_back(runBase, runLen); } void mergeCollapse(Compare compare) {