diff --git a/cpp/src/codegen/arrow_compute/ext/codegen_common.cc b/cpp/src/codegen/arrow_compute/ext/codegen_common.cc index 2985b14d4..1c4d0bb1d 100644 --- a/cpp/src/codegen/arrow_compute/ext/codegen_common.cc +++ b/cpp/src/codegen/arrow_compute/ext/codegen_common.cc @@ -523,6 +523,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { mkdir(outpath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); std::string prefix = "/spark-columnar-plugin-codegen-"; std::string cppfile = outpath + prefix + signature + ".cc"; + std::string tmplibfile = outpath + prefix + signature + ".o"; std::string libfile = outpath + prefix + signature + ".so"; std::string jarfile = outpath + prefix + signature + ".jar"; std::string logfile = outpath + prefix + signature + ".log"; @@ -541,12 +542,15 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { out.flush(); out.close(); + + std::string libwscgfile = GetTempPath() + "/nativesql_include/precompile/libwscg.hpp"; // compile the code const char* env_gcc_ = std::getenv("CC"); if (env_gcc_ == nullptr) { env_gcc_ = "gcc"; } std::string env_gcc = std::string(env_gcc_); + std::string env_ccache_prefix = "CCACHE_SLOPPINESS=pch_defines,file_macro,time_macros,include_file_mtime CCACHE_NOHASHDIR=1 CCACHE_BASEDIR=" + GetTempPath() + " "; const char* env_arrow_dir = std::getenv("LIBARROW_DIR"); std::string arrow_header; @@ -561,10 +565,24 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { arrow_lib2 = " -L" + std::string(env_arrow_dir) + "/lib "; } // compile the code - std::string cmd = env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + - arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 + - nativesql_lib + cppfile + " -o " + libfile + - " -O3 -march=native -shared -fPIC -lspark_columnar_jni 2> " + logfile; + std::string base_dir = GetTempPath(); + chdir(base_dir.c_str()); + std::string cmd = env_ccache_prefix + env_gcc +" -std=c++14 -Wno-deprecated-declarations " + arrow_header + + nativesql_header + nativesql_header_2 + " -c " + + libwscgfile + " -O3 -march=native -fPIC && ";; + + cmd += env_ccache_prefix + env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header + + nativesql_header + nativesql_header_2 + " -c " + + cppfile + " -o "+ tmplibfile + " -O3 -march=native -fPIC && "; + + // linking + cmd += env_gcc + arrow_lib + arrow_lib2 + nativesql_lib + + tmplibfile + " -o " + libfile + " -lspark_columnar_jni -shared && "; + + // package + cmd += "cd " + outpath + " && jar -cf spark-columnar-plugin-codegen-precompile-" + + signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so 2>" + logfile; + #ifdef DEBUG std::cout << cmd << std::endl; #endif @@ -577,28 +595,8 @@ arrow::Status CompileCodes(std::string codes, std::string signature) { if (WEXITSTATUS(ret) != EXIT_SUCCESS) { std::cout << "compilation failed, see " << logfile << std::endl; std::cout << cmd << std::endl; - /*cmd = "ls -R -l " + GetTempPath() + "; cat " + logfile; - system(cmd.c_str());*/ return arrow::Status::Invalid("compilation failed, see ", logfile); - // exit(EXIT_FAILURE); } - cmd = "cd " + outpath + "; jar -cf spark-columnar-plugin-codegen-precompile-" + - signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so"; -#ifdef DEBUG - std::cout << cmd << std::endl; -#endif - ret = system(cmd.c_str()); - if (WEXITSTATUS(ret) != EXIT_SUCCESS) { - exit(EXIT_FAILURE); - } - - struct stat tstat; - ret = stat(libfile.c_str(), &tstat); - if (ret == -1) { - std::cout << "stat failed: " << strerror(errno) << std::endl; - exit(EXIT_FAILURE); - } - return arrow::Status::OK(); } diff --git a/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc b/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc index 7e8b4e46e..0f99dadfa 100644 --- a/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc +++ b/cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc @@ -1190,8 +1190,7 @@ typedef )" + item_content_str + " item_content;"; std::vector content_tuple_types; if (multiple_cols) { - list_tiem_str = R"( - #include )"; + list_tiem_str = ""; for (auto& key : left_key_index_list) { tuple_types.push_back("std::shared_ptr<" + GetTypeString(left_field_list[key]->type(), "Array") + ">"); @@ -1291,13 +1290,14 @@ typedef )" + item_content_str + " item_content;"; auto make_list_str = GetListStr(multiple_cols, left_key_index_list.size()); auto make_list_content_str = GetListContentStr(multiple_cols, left_key_index_list.size()); - return BaseCodes() + R"( -#include "codegen/arrow_compute/ext/array_item_index.h" -#include "precompile/builder.h" -#include -using namespace sparkcolumnarplugin::precompile; -)" + hash_map_include_str + - R"( +// return BaseCodes() + R"( +//#include "codegen/arrow_compute/ext/array_item_index.h" +//#include "precompile/builder.h" +//#include +//using namespace sparkcolumnarplugin::precompile; +//)" + hash_map_include_str + + return R"(#include "precompile/libwscg.hpp" +)" + hash_map_include_str + R"( class FVector { public: diff --git a/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc b/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc index ca8a6e30f..2edac5a16 100644 --- a/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc +++ b/cpp/src/codegen/arrow_compute/ext/sort_kernel.cc @@ -281,18 +281,20 @@ class SortArraysToIndicesKernel::Impl { std::string cached_variables_define_str = GetCachedVariablesDefine(key_typed_codegen_list); - return BaseCodes() + R"( -#include - -#include -#include - -#include "codegen/arrow_compute/ext/array_item_index.h" -#include "codegen/common/sort_relation.h" -#include "precompile/builder.h" -#include "precompile/type.h" -#include "third_party/ska_sort.hpp" -#include "third_party/timsort.hpp" +// return BaseCodes() + R"( +//#include +// +//#include +//#include +// +//#include "codegen/arrow_compute/ext/array_item_index.h" +//#include "codegen/common/sort_relation.h" +//#include "precompile/builder.h" +//#include "precompile/type.h" +//#include "third_party/ska_sort.hpp" +//#include "third_party/timsort.hpp" +return R"( +#include "precompile/libwscg.hpp" using namespace sparkcolumnarplugin::precompile; class TypedSorterImpl : public CodeGenBase { diff --git a/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc b/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc index b6a38a531..6066c2786 100644 --- a/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc +++ b/cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc @@ -295,9 +295,10 @@ class WholeStageCodeGenKernel::Impl { std::stringstream codes_ss; std::string out_list; std::stringstream define_ss; - codes_ss << BaseCodes() << std::endl; - codes_ss << R"(#include "precompile/builder.h")" << std::endl; - codes_ss << R"(#include "utils/macros.h")" << std::endl; + codes_ss << R"(#include "precompile/libwscg.hpp")" << std::endl; + //codes_ss << BaseCodes() << std::endl; + //codes_ss << R"(#include "precompile/builder.h")" << std::endl; + //codes_ss << R"(#include "utils/macros.h")" << std::endl; std::vector headers; for (auto codegen_ctx : codegen_ctx_list) { for (auto header : codegen_ctx->header_codes) { @@ -308,9 +309,9 @@ class WholeStageCodeGenKernel::Impl { if (codegen_ctx->gandiva_projector) gandiva_projector_list_.push_back(codegen_ctx->gandiva_projector); } - for (auto header : headers) { - codes_ss << header << std::endl; - } + //for (auto header : headers) { + // codes_ss << header << std::endl; + //} if (is_aggr_) { for (auto codegen_ctx : codegen_ctx_list) { @@ -663,4 +664,4 @@ std::string WholeStageCodeGenKernel::GetSignature() { return impl_->GetSignature } // namespace extra } // namespace arrowcompute } // namespace codegen -} // namespace sparkcolumnarplugin \ No newline at end of file +} // namespace sparkcolumnarplugin diff --git a/cpp/src/precompile/libwscg.hpp b/cpp/src/precompile/libwscg.hpp new file mode 100644 index 000000000..ecda06e7d --- /dev/null +++ b/cpp/src/precompile/libwscg.hpp @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include "codegen/arrow_compute/ext/code_generator_base.h" +#include "precompile/array.h" +using namespace sparkcolumnarplugin::codegen::arrowcompute::extra; + +#include "precompile/builder.h" +#include "utils/macros.h" +#include "codegen/arrow_compute/ext/array_item_index.h" +#include "codegen/common/hash_relation.h" +#include "codegen/arrow_compute/ext/actions_impl.h" +#include "precompile/hash_map.h" +#include "precompile/sparse_hash_map.h" +#include "codegen/common/sort_relation.h" +#include "third_party/row_wise_memory/unsafe_row.h" + +#include "precompile/type.h" +#include "third_party/ska_sort.hpp" +#include "third_party/timsort.hpp" +#include "precompile/gandiva.h" + +#include + +#include +#include + +#include +#include +using namespace sparkcolumnarplugin::precompile;