Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
using pch
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Zhou <[email protected]>
  • Loading branch information
zhouyuan committed Dec 17, 2021
1 parent f0655cc commit e0b21d4
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,7 @@ namespace extra {

std::string BaseCodes() {
return R"(
#include <arrow/compute/api.h>
#include <arrow/record_batch.h>
#include "codegen/arrow_compute/ext/code_generator_base.h"
#include "precompile/array.h"
using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;
)";
}

Expand Down Expand Up @@ -601,6 +596,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
mkdir(outpath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
std::string prefix = "/spark-columnar-plugin-codegen-";
std::string cppfile = outpath + prefix + signature + ".cc";
std::string objfile = outpath + prefix + signature + ".o";
std::string libfile = outpath + prefix + signature + ".so";
std::string jarfile = outpath + prefix + signature + ".jar";
std::string logfile = outpath + prefix + signature + ".log";
Expand All @@ -626,13 +622,15 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
}
std::string env_gcc = std::string(env_gcc_);

std::string env_codegen_option = " -O3 -march=native ";
char* env_codegen_option_ = std::getenv("CODEGEN_OPTION");

if (env_codegen_option_ == nullptr) {
env_codegen_option_ = " -O3 -march=native ";
if (env_codegen_option_ != nullptr) {
env_codegen_option = std::string(env_codegen_option_);
}
std::string env_codegen_option = std::string(env_codegen_option_);

std::string libwscgfile = GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp";
std::string libwscg_pch = GetTempPath() + "/nativesql_include/precompile/wscgapi.hpp.gch";
const char* env_arrow_dir = std::getenv("LIBARROW_DIR");
std::string arrow_header;
std::string arrow_lib, arrow_lib2;
Expand All @@ -646,14 +644,35 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
arrow_lib2 = " -L" + std::string(env_arrow_dir) + "/lib ";
}
// compile the code
std::string cmd = env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 +
nativesql_lib + cppfile + " -o " + libfile + env_codegen_option +
" -shared -fPIC -lspark_columnar_jni 2> " + logfile;
std::string base_dir = GetTempPath();
chdir(base_dir.c_str());
std::string cmd = "";
struct stat pch_stat;
auto ret = stat(libwscg_pch.c_str(), &pch_stat);
if (ret == -1) {
cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2
+ " -c " +
libwscgfile + env_codegen_option + " -fPIC && ";

}

cmd += env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
nativesql_header + nativesql_header_2 + " -c " +
cppfile + " -o "+ objfile + env_codegen_option + "-fPIC && ";
// linking
cmd += env_gcc + arrow_lib + arrow_lib2 + nativesql_lib +
objfile + " -o " + libfile + " -lspark_columnar_jni -shared && ";

// package
cmd += "cd " + outpath + " && jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so 2>" + logfile;


#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
int ret;

int elapse_time = 0;
TIME_MICRO(elapse_time, ret, system(cmd.c_str()));
#ifdef DEBUG
Expand All @@ -664,15 +683,6 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
std::cout << cmd << std::endl;
return arrow::Status::Invalid("compilation failed, see ", logfile);
}
cmd = "cd " + outpath + "; jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so";
#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
ret = system(cmd.c_str());
if (WEXITSTATUS(ret) != EXIT_SUCCESS) {
return arrow::Status::Invalid("package jar failed");
}

#ifdef DEBUG
struct stat tstat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,7 @@ class ConditionedMergeJoinKernel::Impl {
auto codegen_ctx = std::make_shared<CodeGenContext>();
bool use_relation_for_stream = input.empty();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");


std::vector<std::string> prepare_list;
bool cond_check = false;
Expand All @@ -119,7 +118,7 @@ class ConditionedMergeJoinKernel::Impl {
std::stringstream sort_define_ss;
std::vector<gandiva::FieldVector> field_list = {left_field_list_, right_field_list_};

codegen_ctx->header_codes.push_back(R"(#include "codegen/common/sort_relation.h")");

int idx = 0;
for (auto relation_id : relation_id_) {
auto relation_list_name = "sort_relation_" + std::to_string(relation_id) + "_";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,7 @@ class ConditionedProbeKernel::Impl {
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");


std::vector<std::string> prepare_list;
bool cond_check = false;
Expand All @@ -192,7 +191,7 @@ class ConditionedProbeKernel::Impl {
hash_prepare_ss << "RETURN_NOT_OK(typed_dependent_iter_list_" << hash_relation_id_
<< "->Next("
<< "&" << relation_list_name << "));" << std::endl;
codegen_ctx->header_codes.push_back(R"(#include "codegen/common/hash_relation.h")");


hash_define_ss << "std::shared_ptr<HashRelation> " << relation_list_name << ";"
<< std::endl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,7 @@ class HashAggregateKernel::Impl {
std::shared_ptr<CodeGenContext>* codegen_ctx_out, int* var_id) {
auto codegen_ctx = std::make_shared<CodeGenContext>();

codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/array_item_index.h")");
codegen_ctx->header_codes.push_back(
R"(#include "codegen/arrow_compute/ext/actions_impl.h")");


std::vector<std::string> prepare_list;
// 1.0 prepare aggregate input expressions
Expand All @@ -243,7 +240,7 @@ class HashAggregateKernel::Impl {
// 1. Get action list and action_prepare_project_list
if (key_node_list.size() > 0 &&
key_node_list[0]->return_type()->id() == arrow::Type::DECIMAL128) {
codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")");

aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< GetTypeString(key_node_list[0]->return_type(), "")
<< "HashMap>(ctx_->memory_pool());" << std::endl;
Expand All @@ -254,7 +251,7 @@ class HashAggregateKernel::Impl {
} else if (key_node_list.size() > 1 ||
(key_node_list.size() > 0 &&
key_node_list[0]->return_type()->id() == arrow::Type::STRING)) {
codegen_ctx->header_codes.push_back(R"(#include "precompile/hash_map.h")");

aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< GetTypeString(arrow::utf8(), "")
<< "HashMap>(ctx_->memory_pool());" << std::endl;
Expand All @@ -263,7 +260,7 @@ class HashAggregateKernel::Impl {

} else if (key_node_list.size() > 0) {
auto type = key_node_list[0]->return_type();
codegen_ctx->header_codes.push_back(R"(#include "precompile/sparse_hash_map.h")");

aggr_prepare_ss << "aggr_hash_table_" << level << " = std::make_shared<"
<< "SparseHashMap<" << GetCTypeString(type)
<< ">>(ctx_->memory_pool());" << std::endl;
Expand Down Expand Up @@ -308,8 +305,7 @@ class HashAggregateKernel::Impl {
prepare_ss << "auto " << unsafe_row_name_validity << " = "
<< project_output_list[i].first.first << "_validity;" << std::endl;
} else {
codegen_ctx->header_codes.push_back(
R"(#include "third_party/row_wise_memory/unsafe_row.h")");

std::stringstream unsafe_row_define_ss;
unsafe_row_define_ss << "std::shared_ptr<UnsafeRow> " << unsafe_row_name
<< "_unsafe_row = std::make_shared<UnsafeRow>("
Expand Down Expand Up @@ -562,7 +558,7 @@ class HashAggregateKernel::Impl {
if (!result_expr_list_.empty()) {
codegen_ctx->gandiva_projector = std::make_shared<GandivaProjector>(
ctx_, arrow::schema(result_field_list_), GetGandivaKernel(result_expr_list_));
codegen_ctx->header_codes.push_back(R"(#include "precompile/gandiva_projector.h")");

finish_ss << "RETURN_NOT_OK(gandiva_projector_list_[gp_idx++]->Evaluate(&"
"do_hash_"
"aggr_finish_"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1987,18 +1987,9 @@ class SortArraysCodegenKernel : public SortArraysToIndicesKernel::Impl {
GetCachedVariablesClear(key_typed_codegen_list);

return BaseCodes() + R"(
#include <arrow/buffer.h>
#include <algorithm>
#include <cmath>
#include "precompile/wscgapi.hpp"
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/common/sort_relation.h"
#include "precompile/builder.h"
#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
using namespace sparkcolumnarplugin::precompile;
class TypedSorterImpl : public CodeGenBase {
public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,7 @@ class WholeStageCodeGenKernel::Impl {
std::string out_list;
std::stringstream define_ss;
codes_ss << BaseCodes() << std::endl;
codes_ss << R"(#include "precompile/builder.h")" << std::endl;
codes_ss << R"(#include "utils/macros.h")" << std::endl;
codes_ss << R"(#include "precompile/wscgapi.hpp")" << std::endl;
std::vector<std::string> headers;
for (auto codegen_ctx : codegen_ctx_list) {
for (auto header : codegen_ctx->header_codes) {
Expand Down
33 changes: 33 additions & 0 deletions native-sql-engine/cpp/src/precompile/wscgapi.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include <arrow/compute/api.h>
#include <arrow/record_batch.h>

#include "codegen/arrow_compute/ext/code_generator_base.h"
#include "precompile/array.h"

#include "precompile/builder.h"
#include "utils/macros.h"
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/common/hash_relation.h"
#include "codegen/arrow_compute/ext/actions_impl.h"
#include "precompile/hash_map.h"
#include "precompile/sparse_hash_map.h"
#include "codegen/common/sort_relation.h"
#include "third_party/row_wise_memory/unsafe_row.h"

#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
#include "precompile/gandiva.h"

#include <arrow/buffer.h>

#include <algorithm>
#include <cmath>

#include <tuple>
#include <numeric>
using namespace sparkcolumnarplugin::precompile;

using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;

0 comments on commit e0b21d4

Please sign in to comment.