Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-126] improve codegen with pre-compiled header (#639)
Browse files Browse the repository at this point in the history
* using pch

Signed-off-by: Yuan Zhou <[email protected]>

* fix format

Signed-off-by: Yuan Zhou <[email protected]>

* fix window sort codegen

Signed-off-by: Yuan Zhou <[email protected]>

* fix test

Signed-off-by: Yuan Zhou <[email protected]>
  • Loading branch information
zhouyuan authored Dec 18, 2021
1 parent b2510a1 commit d9318f8
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 71 deletions.
3 changes: 3 additions & 0 deletions native-sql-engine/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -477,9 +477,12 @@ file(COPY codegen/common/result_iterator.h DESTINATION ${root_directory}/release
file(COPY codegen/common/relation_column.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(COPY codegen/common/hash_relation.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(COPY codegen/common/sort_relation.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(CREATE_LINK ${root_directory}/releases/include ${root_directory}/releases/nativesql_include SYMBOLIC)
file(COPY codegen/common/hash_relation_string.h DESTINATION ${root_directory}/releases/include/codegen/common/)
file(COPY codegen/common/hash_relation_number.h DESTINATION ${root_directory}/releases/include/codegen/common/)



add_definitions(-DNATIVESQL_SRC_PATH="${root_directory}/releases")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes")
set(SPARK_COLUMNAR_PLUGIN_SRCS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,7 @@ namespace extra {

std::string BaseCodes() {
return R"(
#include <arrow/compute/api.h>
#include <arrow/record_batch.h>
#include "codegen/arrow_compute/ext/code_generator_base.h"
#include "precompile/array.h"
using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;
)";
}

Expand Down Expand Up @@ -601,6 +596,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
mkdir(outpath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
std::string prefix = "/spark-columnar-plugin-codegen-";
std::string cppfile = outpath + prefix + signature + ".cc";
std::string objfile = outpath + prefix + signature + ".o";
std::string libfile = outpath + prefix + signature + ".so";
std::string jarfile = outpath + prefix + signature + ".jar";
std::string logfile = outpath + prefix + signature + ".log";
Expand All @@ -626,13 +622,16 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
}
std::string env_gcc = std::string(env_gcc_);

std::string env_codegen_option = " -O3 -march=native ";
char* env_codegen_option_ = std::getenv("CODEGEN_OPTION");

if (env_codegen_option_ == nullptr) {
env_codegen_option_ = " -O3 -march=native ";
if (env_codegen_option_ != nullptr) {
env_codegen_option = std::string(env_codegen_option_);
}
std::string env_codegen_option = std::string(env_codegen_option_);

std::string libwscgfile = GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp";
std::string libwscg_pch =
GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp.gch";
const char* env_arrow_dir = std::getenv("LIBARROW_DIR");
std::string arrow_header;
std::string arrow_lib, arrow_lib2;
Expand All @@ -646,14 +645,33 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
arrow_lib2 = " -L" + std::string(env_arrow_dir) + "/lib ";
}
// compile the code
std::string cmd = env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 +
nativesql_lib + cppfile + " -o " + libfile + env_codegen_option +
" -shared -fPIC -lspark_columnar_jni 2> " + logfile;
std::string base_dir = GetTempPath();
chdir(base_dir.c_str());
std::string cmd = "";
struct stat pch_stat;
auto ret = stat(libwscg_pch.c_str(), &pch_stat);
if (ret == -1) {
cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 + " -c " +
libwscgfile + env_codegen_option + " -fPIC && ";
}

cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
nativesql_header + nativesql_header_2 + " -c " + cppfile + " -o " + objfile +
env_codegen_option + "-fPIC && ";
// linking
cmd += env_gcc + arrow_lib + arrow_lib2 + nativesql_lib + objfile + " -o " + libfile +
" -lspark_columnar_jni -shared && ";

// package
cmd += "cd " + outpath + " && jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so 2>" +
logfile;

#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
int ret;

int elapse_time = 0;
TIME_MICRO(elapse_time, ret, system(cmd.c_str()));
#ifdef DEBUG
Expand All @@ -664,15 +682,6 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
std::cout << cmd << std::endl;
return arrow::Status::Invalid("compilation failed, see ", logfile);
}
cmd = "cd " + outpath + "; jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so";
#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
ret = system(cmd.c_str());
if (WEXITSTATUS(ret) != EXIT_SUCCESS) {
return arrow::Status::Invalid("package jar failed");
}

#ifdef DEBUG
struct stat tstat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,6 @@ class ConditionedMergeJoinKernel::Impl {
auto codegen_ctx = std::make_shared<CodeGenContext>();
bool use_relation_for_stream = input.empty();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");

std::vector<std::string> prepare_list;
bool cond_check = false;
if (condition_) cond_check = true;
Expand All @@ -119,7 +116,6 @@ class ConditionedMergeJoinKernel::Impl {
std::stringstream sort_define_ss;
std::vector<gandiva::FieldVector> field_list = {left_field_list_, right_field_list_};

codegen_ctx->header_codes.push_back(R"(#include "codegen/common/sort_relation.h")");
int idx = 0;
for (auto relation_id : relation_id_) {
auto relation_list_name = "sort_relation_" + std::to_string(relation_id) + "_";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,6 @@ class ConditionedProbeKernel::Impl {
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");

std::vector<std::string> prepare_list;
bool cond_check = false;
if (condition_) cond_check = true;
Expand All @@ -192,7 +189,6 @@ class ConditionedProbeKernel::Impl {
hash_prepare_ss << "RETURN_NOT_OK(typed_dependent_iter_list_" << hash_relation_id_
<< "->Next("
<< "&" << relation_list_name << "));" << std::endl;
codegen_ctx->header_codes.push_back(R"(#include "codegen/common/hash_relation.h")");

hash_define_ss << "std::shared_ptr<HashRelation> " << relation_list_name << ";"
<< std::endl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,6 @@ class HashAggregateKernel::Impl {
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");
codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/actions_impl.h")");

std::vector<std::string> prepare_list;
// 1.0 prepare aggregate input expressions
std::stringstream prepare_ss;
Expand All @@ -243,7 +238,6 @@ class HashAggregateKernel::Impl {
// 1. Get action list and action_prepare_project_list
if (key_node_list.size() > 0 &&
key_node_list[0]->return_type()->id() == arrow::Type::DECIMAL128) {
codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")");
aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< GetTypeString(key_node_list[0]->return_type(), "")
<< "HashMap>(ctx_->memory_pool());" << std::endl;
Expand All @@ -254,7 +248,6 @@ class HashAggregateKernel::Impl {
} else if (key_node_list.size() > 1 ||
(key_node_list.size() > 0 &&
key_node_list[0]->return_type()->id() == arrow::Type::STRING)) {
codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")");
aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< GetTypeString(arrow::utf8(), "")
<< "HashMap>(ctx_->memory_pool());" << std::endl;
Expand All @@ -263,7 +256,7 @@ class HashAggregateKernel::Impl {

} else if (key_node_list.size() > 0) {
auto type = key_node_list[0]->return_type();
codegen_ctx->header_codes.push_back(R"(#include "precompile/sparse_hash_map.h")");

aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< "SparseHashMap<" << GetCTypeString(type)
<< ">>(ctx_->memory_pool());" << std::endl;
Expand Down Expand Up @@ -308,8 +301,6 @@ class HashAggregateKernel::Impl {
prepare_ss << "auto " << unsafe_row_name_validity << " = "
<< project_output_list[i].first.first << "_validity;" << std::endl;
} else {
codegen_ctx->header_codes.push_back(
R"(#include "third_party/row_wise_memory/unsafe_row.h")");
std::stringstream unsafe_row_define_ss;
unsafe_row_define_ss << "std::shared_ptr<UnsafeRow> " << unsafe_row_name
<< "_unsafe_row = std::make_shared<UnsafeRow>("
Expand Down Expand Up @@ -562,7 +553,7 @@ class HashAggregateKernel::Impl {
if (!result_expr_list_.empty()) {
codegen_ctx->gandiva_projector = std::make_shared<GandivaProjector>(
ctx_, arrow::schema(result_field_list_), GetGandivaKernel(result_expr_list_));
codegen_ctx->header_codes.push_back(R"(#include "precompile/gandiva_projector.h")");

finish_ss << "RETURN_NOT_OK(gandiva_projector_list_[gp_idx++]->Evaluate(&"
"do_hash_"
"aggr_finish_"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1312,12 +1312,7 @@ typedef )" + item_content_str +
GetListContentStr(multiple_cols, left_key_index_list.size());

return BaseCodes() + R"(
#include <numeric>
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "precompile/builder.h"
#include "precompile/gandiva.h"
using namespace sparkcolumnarplugin::precompile;
#include "precompile/wscgapi.hpp"
)" + hash_map_include_str +
R"(
class FVector {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1987,18 +1987,9 @@ class SortArraysCodegenKernel : public SortArraysToIndicesKernel::Impl {
GetCachedVariablesClear(key_typed_codegen_list);

return BaseCodes() + R"(
#include <arrow/buffer.h>
#include <algorithm>
#include <cmath>
#include "precompile/wscgapi.hpp"
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/common/sort_relation.h"
#include "precompile/builder.h"
#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
using namespace sparkcolumnarplugin::precompile;
class TypedSorterImpl : public CodeGenBase {
public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,7 @@ class WholeStageCodeGenKernel::Impl {
std::string out_list;
std::stringstream define_ss;
codes_ss << BaseCodes() << std::endl;
codes_ss << R"(#include "precompile/builder.h")" << std::endl;
codes_ss << R"(#include "utils/macros.h")" << std::endl;
codes_ss << R"(#include "precompile/wscgapi.hpp")" << std::endl;
std::vector<std::string> headers;
for (auto codegen_ctx : codegen_ctx_list) {
for (auto header : codegen_ctx->header_codes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,18 +215,7 @@ class WindowSortKernel::Impl {
std::string typed_res_array_str = GetTypedResArray(shuffle_typed_codegen_list.size());

return BaseCodes() + R"(
#include <arrow/array.h>
#include <arrow/buffer.h>
#include <arrow/builder.h>
#include <algorithm>
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "precompile/builder.h"
#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
using namespace sparkcolumnarplugin::precompile;
#include "precompile/wscgapi.hpp"
class TypedSorterImpl : public CodeGenBase {
public:
Expand Down
30 changes: 30 additions & 0 deletions native-sql-engine/cpp/src/precompile/wscgapi.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#pragma once

#include <arrow/buffer.h>
#include <arrow/compute/api.h>
#include <arrow/record_batch.h>

#include <algorithm>
#include <cmath>
#include <numeric>
#include <tuple>

#include "codegen/arrow_compute/ext/actions_impl.h"
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/arrow_compute/ext/code_generator_base.h"
#include "codegen/common/hash_relation.h"
#include "codegen/common/sort_relation.h"
#include "precompile/array.h"
#include "precompile/builder.h"
#include "precompile/gandiva.h"
#include "precompile/gandiva_projector.h"
#include "precompile/hash_map.h"
#include "precompile/sparse_hash_map.h"
#include "precompile/type.h"
#include "third_party/row_wise_memory/unsafe_row.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
#include "utils/macros.h"
using namespace sparkcolumnarplugin::precompile;

using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;

0 comments on commit d9318f8

Please sign in to comment.