Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[DNM][NSE-126]setup ccache to speed up codegen #127

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 22 additions & 24 deletions cpp/src/codegen/arrow_compute/ext/codegen_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -523,6 +523,7 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
mkdir(outpath.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
std::string prefix = "/spark-columnar-plugin-codegen-";
std::string cppfile = outpath + prefix + signature + ".cc";
std::string tmplibfile = outpath + prefix + signature + ".o";
std::string libfile = outpath + prefix + signature + ".so";
std::string jarfile = outpath + prefix + signature + ".jar";
std::string logfile = outpath + prefix + signature + ".log";
Expand All @@ -541,12 +542,15 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
out.flush();
out.close();


std::string libwscgfile = GetTempPath() + "/nativesql_include/precompile/libwscg.hpp";
// compile the code
const char* env_gcc_ = std::getenv("CC");
if (env_gcc_ == nullptr) {
env_gcc_ = "gcc";
}
std::string env_gcc = std::string(env_gcc_);
std::string env_ccache_prefix = "CCACHE_SLOPPINESS=pch_defines,file_macro,time_macros,include_file_mtime CCACHE_NOHASHDIR=1 CCACHE_BASEDIR=" + GetTempPath() + " ";

const char* env_arrow_dir = std::getenv("LIBARROW_DIR");
std::string arrow_header;
Expand All @@ -561,10 +565,24 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
arrow_lib2 = " -L" + std::string(env_arrow_dir) + "/lib ";
}
// compile the code
std::string cmd = env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
arrow_lib + arrow_lib2 + nativesql_header + nativesql_header_2 +
nativesql_lib + cppfile + " -o " + libfile +
" -O3 -march=native -shared -fPIC -lspark_columnar_jni 2> " + logfile;
std::string base_dir = GetTempPath();
chdir(base_dir.c_str());
std::string cmd = env_ccache_prefix + env_gcc +" -std=c++14 -Wno-deprecated-declarations " + arrow_header +
nativesql_header + nativesql_header_2 + " -c " +
libwscgfile + " -O3 -march=native -fPIC && ";;

cmd += env_ccache_prefix + env_gcc + " -std=c++14 -Wno-deprecated-declarations " + arrow_header +
nativesql_header + nativesql_header_2 + " -c " +
cppfile + " -o "+ tmplibfile + " -O3 -march=native -fPIC && ";

// linking
cmd += env_gcc + arrow_lib + arrow_lib2 + nativesql_lib +
tmplibfile + " -o " + libfile + " -lspark_columnar_jni -shared && ";

// package
cmd += "cd " + outpath + " && jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so 2>" + logfile;

#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
Expand All @@ -577,28 +595,8 @@ arrow::Status CompileCodes(std::string codes, std::string signature) {
if (WEXITSTATUS(ret) != EXIT_SUCCESS) {
std::cout << "compilation failed, see " << logfile << std::endl;
std::cout << cmd << std::endl;
/*cmd = "ls -R -l " + GetTempPath() + "; cat " + logfile;
system(cmd.c_str());*/
return arrow::Status::Invalid("compilation failed, see ", logfile);
// exit(EXIT_FAILURE);
}
cmd = "cd " + outpath + "; jar -cf spark-columnar-plugin-codegen-precompile-" +
signature + ".jar spark-columnar-plugin-codegen-" + signature + ".so";
#ifdef DEBUG
std::cout << cmd << std::endl;
#endif
ret = system(cmd.c_str());
if (WEXITSTATUS(ret) != EXIT_SUCCESS) {
exit(EXIT_FAILURE);
}

struct stat tstat;
ret = stat(libfile.c_str(), &tstat);
if (ret == -1) {
std::cout << "stat failed: " << strerror(errno) << std::endl;
exit(EXIT_FAILURE);
}

return arrow::Status::OK();
}

Expand Down
18 changes: 9 additions & 9 deletions cpp/src/codegen/arrow_compute/ext/merge_join_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1190,8 +1190,7 @@ typedef )" + item_content_str + " item_content;";
std::vector<std::string> content_tuple_types;

if (multiple_cols) {
list_tiem_str = R"(
#include <tuple>)";
list_tiem_str = "";

for (auto& key : left_key_index_list) {
tuple_types.push_back("std::shared_ptr<" + GetTypeString(left_field_list[key]->type(), "Array") + ">");
Expand Down Expand Up @@ -1291,13 +1290,14 @@ typedef )" + item_content_str + " item_content;";
auto make_list_str = GetListStr(multiple_cols, left_key_index_list.size());
auto make_list_content_str = GetListContentStr(multiple_cols, left_key_index_list.size());

return BaseCodes() + R"(
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "precompile/builder.h"
#include <numeric>
using namespace sparkcolumnarplugin::precompile;
)" + hash_map_include_str +
R"(
// return BaseCodes() + R"(
//#include "codegen/arrow_compute/ext/array_item_index.h"
//#include "precompile/builder.h"
//#include <numeric>
//using namespace sparkcolumnarplugin::precompile;
//)" + hash_map_include_str +
return R"(#include "precompile/libwscg.hpp"
)" + hash_map_include_str + R"(
class FVector {

public:
Expand Down
26 changes: 14 additions & 12 deletions cpp/src/codegen/arrow_compute/ext/sort_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,18 +281,20 @@ class SortArraysToIndicesKernel::Impl {
std::string cached_variables_define_str =
GetCachedVariablesDefine(key_typed_codegen_list);

return BaseCodes() + R"(
#include <arrow/buffer.h>

#include <algorithm>
#include <cmath>

#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/common/sort_relation.h"
#include "precompile/builder.h"
#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
// return BaseCodes() + R"(
//#include <arrow/buffer.h>
//
//#include <algorithm>
//#include <cmath>
//
//#include "codegen/arrow_compute/ext/array_item_index.h"
//#include "codegen/common/sort_relation.h"
//#include "precompile/builder.h"
//#include "precompile/type.h"
//#include "third_party/ska_sort.hpp"
//#include "third_party/timsort.hpp"
return R"(
#include "precompile/libwscg.hpp"
using namespace sparkcolumnarplugin::precompile;

class TypedSorterImpl : public CodeGenBase {
Expand Down
15 changes: 8 additions & 7 deletions cpp/src/codegen/arrow_compute/ext/whole_stage_codegen_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,10 @@ class WholeStageCodeGenKernel::Impl {
std::stringstream codes_ss;
std::string out_list;
std::stringstream define_ss;
codes_ss << BaseCodes() << std::endl;
codes_ss << R"(#include "precompile/builder.h")" << std::endl;
codes_ss << R"(#include "utils/macros.h")" << std::endl;
codes_ss << R"(#include "precompile/libwscg.hpp")" << std::endl;
//codes_ss << BaseCodes() << std::endl;
//codes_ss << R"(#include "precompile/builder.h")" << std::endl;
//codes_ss << R"(#include "utils/macros.h")" << std::endl;
std::vector<std::string> headers;
for (auto codegen_ctx : codegen_ctx_list) {
for (auto header : codegen_ctx->header_codes) {
Expand All @@ -308,9 +309,9 @@ class WholeStageCodeGenKernel::Impl {
if (codegen_ctx->gandiva_projector)
gandiva_projector_list_.push_back(codegen_ctx->gandiva_projector);
}
for (auto header : headers) {
codes_ss << header << std::endl;
}
//for (auto header : headers) {
// codes_ss << header << std::endl;
//}

if (is_aggr_) {
for (auto codegen_ctx : codegen_ctx_list) {
Expand Down Expand Up @@ -663,4 +664,4 @@ std::string WholeStageCodeGenKernel::GetSignature() { return impl_->GetSignature
} // namespace extra
} // namespace arrowcompute
} // namespace codegen
} // namespace sparkcolumnarplugin
} // namespace sparkcolumnarplugin
48 changes: 48 additions & 0 deletions cpp/src/precompile/libwscg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <arrow/compute/context.h>
#include <arrow/record_batch.h>

#include "codegen/arrow_compute/ext/code_generator_base.h"
#include "precompile/array.h"
using namespace sparkcolumnarplugin::codegen::arrowcompute::extra;

#include "precompile/builder.h"
#include "utils/macros.h"
#include "codegen/arrow_compute/ext/array_item_index.h"
#include "codegen/common/hash_relation.h"
#include "codegen/arrow_compute/ext/actions_impl.h"
#include "precompile/hash_map.h"
#include "precompile/sparse_hash_map.h"
#include "codegen/common/sort_relation.h"
#include "third_party/row_wise_memory/unsafe_row.h"

#include "precompile/type.h"
#include "third_party/ska_sort.hpp"
#include "third_party/timsort.hpp"
#include "precompile/gandiva.h"

#include <arrow/buffer.h>

#include <algorithm>
#include <cmath>

#include <tuple>
#include <numeric>
using namespace sparkcolumnarplugin::precompile;