Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
use optimized std function
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Feb 4, 2021
1 parent bae3252 commit d39f680
Show file tree
Hide file tree
Showing 4 changed files with 675 additions and 31 deletions.
7 changes: 4 additions & 3 deletions cpp/src/codegen/arrow_compute/ext/cmp_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <arrow/type.h>
#include <arrow/array.h>
#include "third_party/function.h"

namespace sparkcolumnarplugin {
namespace codegen {
Expand All @@ -32,7 +33,7 @@ class TypedComparator {

~TypedComparator() {}

std::function<void(int, int, int64_t, int64_t, int&)> GetCompareFunc(
func::function<void(int, int, int64_t, int64_t, int&)> GetCompareFunc(
const arrow::ArrayVector& arrays, bool asc, bool nulls_first) {
uint64_t null_total = 0;
std::vector<std::shared_ptr<ArrayType>> typed_arrays;
Expand Down Expand Up @@ -161,7 +162,7 @@ class StringComparator {

~StringComparator() {}

std::function<void(int, int, int64_t, int64_t, int&)> GetCompareFunc(
func::function<void(int, int, int64_t, int64_t, int&)> GetCompareFunc(
const arrow::ArrayVector& arrays, bool asc, bool nulls_first) {
uint64_t null_total = 0;
std::vector<std::shared_ptr<ArrayType>> typed_arrays;
Expand Down Expand Up @@ -303,7 +304,7 @@ static arrow::Status MakeCmpFunction(
std::vector<int> key_index_list,
std::vector<bool> sort_directions,
std::vector<bool> nulls_order,
std::vector<std::function<void(int, int, int64_t, int64_t, int&)>>& cmp_functions) {
std::vector<func::function<void(int, int, int64_t, int64_t, int&)>>& cmp_functions) {
for (int i = 0; i < key_field_list.size(); i++) {
auto type = key_field_list[i]->type();
int key_col_id = key_index_list[i];
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/codegen/arrow_compute/ext/sort_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1760,7 +1760,7 @@ class SortMultiplekeyKernel : public SortArraysToIndicesKernel::Impl {
uint64_t num_batches_ = 0;
uint64_t items_total_ = 0;
int col_num_;
std::vector<std::function<void(int, int, int64_t, int64_t, int&)>> cmp_functions_;
std::vector<func::function<void(int, int, int64_t, int64_t, int&)>> cmp_functions_;

class SorterResultIterator : public ResultIterator<arrow::RecordBatch> {
public:
Expand Down
67 changes: 40 additions & 27 deletions cpp/src/tests/arrow_compute_test_sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,8 @@ TEST(TestArrowComputeSort, SortTestInPlaceNullsFirstAsc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
<<<<<<< HEAD
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
=======
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types,
&sort_expr, true));
>>>>>>> 930de59a... support mul-key sort without projection
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -156,7 +152,8 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastAsc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -243,7 +240,8 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsFirstDesc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -330,7 +328,8 @@ TEST(TestArrowComputeSort, SortTestInplaceNullsLastDesc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -417,7 +416,8 @@ TEST(TestArrowComputeSort, SortTestInplaceAsc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -504,7 +504,8 @@ TEST(TestArrowComputeSort, SortTestInplaceDesc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -590,7 +591,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstAsc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
Expand Down Expand Up @@ -683,7 +685,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastAsc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -764,7 +767,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstDesc) {
auto do_codegen = TreeExprBuilder::MakeFunction(
"codegen", {TreeExprBuilder::MakeLiteral(false)}, uint32());
auto n_sort_to_indices = TreeExprBuilder::MakeFunction(
"sortArraysToIndices", {n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
"sortArraysToIndices",
{n_key_func, n_key_field, n_dir, n_nulls_order, NaN_check, do_codegen}, uint32());
auto n_sort = TreeExprBuilder::MakeFunction(
"standalone", {n_sort_to_indices}, uint32());
auto sortArrays_expr = TreeExprBuilder::MakeExpression(n_sort, f_res);
Expand All @@ -774,7 +778,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsFirstDesc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -866,7 +871,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyNullsLastDesc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -961,7 +967,8 @@ TEST(TestArrowComputeSort, SortTestOnekeyBooleanDesc) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -1058,7 +1065,8 @@ TEST(TestArrowComputeSort, SortTestOneKeyStr) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
Expand Down Expand Up @@ -1148,7 +1156,8 @@ TEST(TestArrowComputeSort, SortTestOneKeyWithProjection) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
std::vector<std::shared_ptr<arrow::RecordBatch>> dummy_result_batches;
Expand Down Expand Up @@ -1238,7 +1247,8 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysNaN) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -1351,7 +1361,8 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithProjection) {
"isnull", {arg_2}, arrow::boolean());

auto n_key_func = TreeExprBuilder::MakeFunction(
"key_function", {coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2}, uint32());
"key_function",
{coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2}, uint32());
auto n_key_field = TreeExprBuilder::MakeFunction(
"key_field", {arg_0, arg_0, arg_1, arg_1, arg_2, arg_2}, uint32());
auto n_dir = TreeExprBuilder::MakeFunction(
Expand All @@ -1377,7 +1388,8 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithProjection) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -1488,8 +1500,8 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithoutCodegen) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(
CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down Expand Up @@ -1602,7 +1614,8 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithoutCodegenWithProjection) {
"isnull", {arg_2}, arrow::boolean());

auto n_key_func = TreeExprBuilder::MakeFunction(
"key_function", {coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2}, uint32());
"key_function",
{coalesce_0, isnull_0, coalesce_1, isnull_1, coalesce_2, isnull_2}, uint32());
auto n_key_field = TreeExprBuilder::MakeFunction(
"key_field", {arg_0, arg_0, arg_1, arg_1, arg_2, arg_2}, uint32());
auto n_dir = TreeExprBuilder::MakeFunction(
Expand All @@ -1628,8 +1641,8 @@ TEST(TestArrowComputeSort, SortTestMultipleKeysWithoutCodegenWithProjection) {
///////////////////// Calculation //////////////////
std::shared_ptr<CodeGenerator> sort_expr;
arrow::compute::FunctionContext ctx;
ASSERT_NOT_OK(
CreateCodeGenerator(ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));
ASSERT_NOT_OK(CreateCodeGenerator(
ctx.memory_pool(), sch, {sortArrays_expr}, ret_types, &sort_expr, true));

std::shared_ptr<arrow::RecordBatch> input_batch;
std::vector<std::shared_ptr<arrow::RecordBatch>> input_batch_list;
Expand Down
Loading

0 comments on commit d39f680

Please sign in to comment.