From e0b21d4951b36b51153326ba3aee9b628a15208c Mon Sep 17 00:00:00 2001 From: Yuan Zhou Date: Thu, 16 Dec 2021 21:21:06 +0800 Subject: [PATCH] using pch Signed-off-by: Yuan Zhou --- .../arrow_compute/ext/codegen_common.cc | 54 +++++++++++-------- .../ext/conditioned_merge_join_kernel.cc | 5 +- .../ext/conditioned_probe_kernel.cc | 5 +- .../ext/hash_aggregate_kernel.cc | 16 +++--- .../codegen/arrow_compute/ext/sort_kernel.cc | 11 +--- .../ext/whole_stage_codegen_kernel.cc | 3 +- .../cpp/src/precompile/wscgapi.hpp | 33 ++++++++++++ 7 files changed, 77 insertions(+), 50 deletions(-) create mode 100644 native-sql-engine/cpp/src/precompile/wscgapi.hpp diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc index da64be0e7..0263724cd 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/codegen_common.cc @@ -39,12 +39,7 @@ namespace extra { std::string BaseCodes() { return R"( -#include -#include -#include "codegen/arrow_compute/ext/code_generator_base.h" -#include "precompile/array.h" -using namespace sparkcolumnarplugin::codegen::arrowcompute::extra; )"; } @@ -601,6 +596,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { mkdir(outpath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); std::string prefix = "/spark-columnar-plugin-codegen-"; std::string cppfile = outpath + prefix + signature + ".cc"; + std::string objfile = outpath + prefix + signature + ".o"; std::string libfile = outpath + prefix + signature + ".so"; std::string jarfile = outpath + prefix + signature + ".jar"; std::string logfile = outpath + prefix + signature + ".log"; @@ -626,13 +622,15 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { } std::string env_gcc = std::string(env_gcc_); + std::string env_codegen_option = " -O3 -march=native "; char* env_codegen_option_ = std::getenv("CODEGEN_OPTION"); - if (env_codegen_option_ == nullptr) { - env_codegen_option_ = " -O3 -march=native "; + if (env_codegen_option_ != nullptr) { + env_codegen_option = std::string(env_codegen_option_); } - std::string env_codegen_option = std::string(env_codegen_option_); + std::string libwscgfile = GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp"; + std::string libwscg_pch = GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp.gch"; const char* env_arrow_dir = std::getenv("LIBARROW_DIR"); std::string arrow_header; std::string arrow_lib, arrow_lib2; @@ -646,14 +644,35 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { arrow_lib2 = " -L" + std::string(env_arrow_dir) + "/lib "; } // compile the code - std::string cmd = env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + - arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 + - nativesql_lib + cppfile + " -o " + libfile + env_codegen_option + - " -shared -fPIC -lspark_columnar_jni 2> " + logfile; + std::string base_dir = GetTempPath(); + chdir(base_dir.c_str()); + std::string cmd = ""; + struct stat pch_stat; + auto ret = stat(libwscg_pch.c_str(), &pch_stat); + if (ret == -1) { + cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + + arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 + + " -c " + + libwscgfile + env_codegen_option + " -fPIC && "; + + } + + cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + + nativesql_header + nativesql_header_2 + " -c " + + cppfile + " -o "+ objfile + env_codegen_option + "-fPIC && "; + // linking + cmd += env_gcc + arrow_lib + arrow_lib2 + nativesql_lib + + objfile + " -o " + libfile + " -lspark_columnar_jni -shared && "; + + // package + cmd += "cd " + outpath + " && jar -cf spark-columnar-plugin-codegen-precompile-" + + signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so 2>" + logfile; + + #ifdef DEBUG std::cout << cmd << std::endl; #endif - int ret; + int elapse_time = 0; TIME_MICRO(elapse_time, ret, system(cmd.c_str())); #ifdef DEBUG @@ -664,15 +683,6 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { std::cout << cmd << std::endl; return arrow::Status::Invalid("compilation failed, see ", logfile); } - cmd = "cd " + outpath + "; jar -cf spark-columnar-plugin-codegen-precompile-" + - signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so"; -#ifdef DEBUG - std::cout << cmd << std::endl; -#endif - ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != EXIT_SUCCESS) { - return arrow::Status::Invalid("package jar failed"); - } #ifdef DEBUG struct stat tstat; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc index 273ce6313..1a1d92c6c 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_merge_join_kernel.cc @@ -108,8 +108,7 @@ class ConditionedMergeJoinKernel::Impl { auto codegen_ctx = std::make_shared(); bool use_relation_for_stream = input.empty(); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/array_item_index.h")"); + std::vector prepare_list; bool cond_check = false; @@ -119,7 +118,7 @@ class ConditionedMergeJoinKernel::Impl { std::stringstream sort_define_ss; std::vector field_list = {left_field_list_, right_field_list_}; - codegen_ctx->header_codes.push_back(R"(#include "codegen/common/sort_relation.h")"); + int idx = 0; for (auto relation_id : relation_id_) { auto relation_list_name = "sort_relation_" + std::to_string(relation_id) + "_"; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc index a09eb30ef..131fe471b 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/conditioned_probe_kernel.cc @@ -173,8 +173,7 @@ class ConditionedProbeKernel::Impl { std::shared_ptr* codegen_ctx_out, int* var_id) { auto codegen_ctx = std::make_shared(); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/array_item_index.h")"); + std::vector prepare_list; bool cond_check = false; @@ -192,7 +191,7 @@ class ConditionedProbeKernel::Impl { hash_prepare_ss << "RETURN_NOT_OK(typed_dependent_iter_list_" << hash_relation_id_ << "->Next(" << "&" << relation_list_name << "));" << std::endl; - codegen_ctx->header_codes.push_back(R"(#include "codegen/common/hash_relation.h")"); + hash_define_ss << "std::shared_ptr " << relation_list_name << ";" << std::endl; diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc index 182cf16a2..53063c083 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/hash_aggregate_kernel.cc @@ -215,10 +215,7 @@ class HashAggregateKernel::Impl { std::shared_ptr* codegen_ctx_out, int* var_id) { auto codegen_ctx = std::make_shared(); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/array_item_index.h")"); - codegen_ctx->header_codes.push_back( - R"(#include "codegen/arrow_compute/ext/actions_impl.h")"); + std::vector prepare_list; // 1.0 prepare aggregate input expressions @@ -243,7 +240,7 @@ class HashAggregateKernel::Impl { // 1. Get action list and action_prepare_project_list if (key_node_list.size() > 0 && key_node_list[0]->return_type()->id() == arrow::Type::DECIMAL128) { - codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")"); + aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<" << GetTypeString(key_node_list[0]->return_type(), "") << "HashMap>(ctx_->memory_pool());" << std::endl; @@ -254,7 +251,7 @@ class HashAggregateKernel::Impl { } else if (key_node_list.size() > 1 || (key_node_list.size() > 0 && key_node_list[0]->return_type()->id() == arrow::Type::STRING)) { - codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")"); + aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<" << GetTypeString(arrow::utf8(), "") << "HashMap>(ctx_->memory_pool());" << std::endl; @@ -263,7 +260,7 @@ class HashAggregateKernel::Impl { } else if (key_node_list.size() > 0) { auto type = key_node_list[0]->return_type(); - codegen_ctx->header_codes.push_back(R"(#include "precompile/sparse_hash_map.h")"); + aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<" << "SparseHashMap<" << GetCTypeString(type) << ">>(ctx_->memory_pool());" << std::endl; @@ -308,8 +305,7 @@ class HashAggregateKernel::Impl { prepare_ss << "auto " << unsafe_row_name_validity << " = " << project_output_list[i].first.first << "_validity;" << std::endl; } else { - codegen_ctx->header_codes.push_back( - R"(#include "third_party/row_wise_memory/unsafe_row.h")"); + std::stringstream unsafe_row_define_ss; unsafe_row_define_ss << "std::shared_ptr " << unsafe_row_name << "_unsafe_row = std::make_shared(" @@ -562,7 +558,7 @@ class HashAggregateKernel::Impl { if (!result_expr_list_.empty()) { codegen_ctx->gandiva_projector = std::make_shared( ctx_, arrow::schema(result_field_list_), GetGandivaKernel(result_expr_list_)); - codegen_ctx->header_codes.push_back(R"(#include "precompile/gandiva_projector.h")"); + finish_ss << "RETURN_NOT_OK(gandiva_projector_list_[gp_idx++]->Evaluate(&" "do_hash_" "aggr_finish_" diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index 49a1ab48b..8d7292e76 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -1987,18 +1987,9 @@ class SortArraysCodegenKernel : public SortArraysToIndicesKernel::Impl { GetCachedVariablesClear(key_typed_codegen_list); return BaseCodes() + R"( -#include -#include -#include +#include "precompile/wscgapi.hpp" -#include "codegen/arrow_compute/ext/array_item_index.h" -#include "codegen/common/sort_relation.h" -#include "precompile/builder.h" -#include "precompile/type.h" -#include "third_party/ska_sort.hpp" -#include "third_party/timsort.hpp" -using namespace sparkcolumnarplugin::precompile; class TypedSorterImpl : public CodeGenBase { public: diff --git a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index f5c4fc973..b0d575c10 100644 --- a/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/native-sql-engine/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -306,8 +306,7 @@ class WholeStageCodeGenKernel::Impl { std::string out_list; std::stringstream define_ss; codes_ss << BaseCodes() << std::endl; - codes_ss << R"(#include "precompile/builder.h")" << std::endl; - codes_ss << R"(#include "utils/macros.h")" << std::endl; + codes_ss << R"(#include "precompile/wscgapi.hpp")" << std::endl; std::vector headers; for (auto codegen_ctx : codegen_ctx_list) { for (auto header : codegen_ctx->header_codes) { diff --git a/native-sql-engine/cpp/src/precompile/wscgapi.hpp b/native-sql-engine/cpp/src/precompile/wscgapi.hpp new file mode 100644 index 000000000..003898963 --- /dev/null +++ b/native-sql-engine/cpp/src/precompile/wscgapi.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include + +#include "codegen/arrow_compute/ext/code_generator_base.h" +#include "precompile/array.h" + +#include "precompile/builder.h" +#include "utils/macros.h" +#include "codegen/arrow_compute/ext/array_item_index.h" +#include "codegen/common/hash_relation.h" +#include "codegen/arrow_compute/ext/actions_impl.h" +#include "precompile/hash_map.h" +#include "precompile/sparse_hash_map.h" +#include "codegen/common/sort_relation.h" +#include "third_party/row_wise_memory/unsafe_row.h" + +#include "precompile/type.h" +#include "third_party/ska_sort.hpp" +#include "third_party/timsort.hpp" +#include "precompile/gandiva.h" + +#include + +#include +#include + +#include +#include +using namespace sparkcolumnarplugin::precompile; + +using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;