diff --git a/cpp/src/gandiva/codegen/CMakeLists.txt b/cpp/src/gandiva/codegen/CMakeLists.txt index c0166a26c1591..a5164c7b97dcd 100644 --- a/cpp/src/gandiva/codegen/CMakeLists.txt +++ b/cpp/src/gandiva/codegen/CMakeLists.txt @@ -31,6 +31,7 @@ add_library(gandiva SHARED llvm_types.cc node.cc tree_expr_builder.cc + status.cc ${BC_FILE_PATH_CC}) # For users of gandiva library (including integ tests), include-dir is : @@ -85,12 +86,13 @@ install( #args: label test-file src-files add_gandiva_unit_test(dex_llvm_test.cc) -add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc ${BC_FILE_PATH_CC}) +add_gandiva_unit_test(engine_llvm_test.cc engine.cc llvm_types.cc status.cc ${BC_FILE_PATH_CC}) add_gandiva_unit_test(function_signature_test.cc) add_gandiva_unit_test(function_registry_test.cc function_registry.cc) add_gandiva_unit_test(llvm_types_test.cc llvm_types.cc) -add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc function_registry.cc annotator.cc ${BC_FILE_PATH_CC}) +add_gandiva_unit_test(llvm_generator_test.cc llvm_generator.cc engine.cc llvm_types.cc function_registry.cc annotator.cc status.cc ${BC_FILE_PATH_CC}) add_gandiva_unit_test(annotator_test.cc annotator.cc) add_gandiva_unit_test(tree_expr_test.cc tree_expr_builder.cc node.cc annotator.cc function_registry.cc) +add_gandiva_unit_test(status_test.cc status.cc) add_gandiva_integ_test(evaluator_test.cc) diff --git a/cpp/src/gandiva/codegen/codegen_exception.h b/cpp/src/gandiva/codegen/codegen_exception.h deleted file mode 100644 index abdafc47f1f09..0000000000000 --- a/cpp/src/gandiva/codegen/codegen_exception.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2017-2018 Dremio Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef GANDIVA_CODEGEN_EXCEPTION_H -#define GANDIVA_CODEGEN_EXCEPTION_H - -#include -#include - -namespace gandiva { - -/* - * Exceptions from LLVMGenerator. - */ -class CodeGenException : public std::runtime_error { - public: - explicit CodeGenException(const std::string &msg) - : std::runtime_error(msg) {} -}; - -#endif // GANDIVA_CODEGEN_EXCEPTION_H - -} // namespace gandiva diff --git a/cpp/src/gandiva/codegen/engine.cc b/cpp/src/gandiva/codegen/engine.cc index 03e002c427b9b..684f72b0b8d8f 100644 --- a/cpp/src/gandiva/codegen/engine.cc +++ b/cpp/src/gandiva/codegen/engine.cc @@ -34,7 +34,6 @@ #include #include #include -#include "codegen/codegen_exception.h" #include "codegen/engine.h" namespace gandiva { @@ -58,33 +57,39 @@ void Engine::InitOnce() { init_once_done_ = true; } -Engine::Engine() - : module_finalized_(false) { - std::call_once(init_once_flag, InitOnce); - context_.reset(new llvm::LLVMContext()); - ir_builder_.reset(new llvm::IRBuilder<>(*context())); +/// factory method to construct the engine. +Status Engine::Make(std::unique_ptr *engine) { + std::unique_ptr engine_obj(new Engine()); + + std::call_once(init_once_flag, [&engine_obj] {engine_obj->InitOnce();}); + engine_obj->context_.reset(new llvm::LLVMContext()); + engine_obj->ir_builder_.reset(new llvm::IRBuilder<>(*(engine_obj->context()))); /* Create the execution engine */ - std::unique_ptr cg_module(new llvm::Module("codegen", *context())); - module_ = cg_module.get(); + std::unique_ptr cg_module(new llvm::Module("codegen", + *(engine_obj->context()))); + engine_obj->module_ = cg_module.get(); llvm::EngineBuilder engineBuilder(std::move(cg_module)); engineBuilder.setEngineKind(llvm::EngineKind::JIT); engineBuilder.setOptLevel(llvm::CodeGenOpt::Aggressive); - engineBuilder.setErrorStr(&llvm_error_); - execution_engine_.reset(engineBuilder.create()); - if (execution_engine_ == NULL) { - module_ = NULL; - throw CodeGenException(llvm_error_); + engineBuilder.setErrorStr(&(engine_obj->llvm_error_)); + engine_obj->execution_engine_.reset(engineBuilder.create()); + if (engine_obj->execution_engine_ == NULL) { + engine_obj->module_ = NULL; + return Status::CodeGenError(engine_obj->llvm_error_); } - LoadPreCompiledIRFiles(); + Status result = engine_obj->LoadPreCompiledIRFiles(); + GANDIVA_RETURN_NOT_OK(result); + *engine = std::move(engine_obj); + return Status::OK(); } /* * Handling for pre-compiled IR libraries. */ -void Engine::LoadPreCompiledIRFiles() { +Status Engine::LoadPreCompiledIRFiles() { /// Read from file into memory buffer. llvm::ErrorOr> buffer_or_error = llvm::MemoryBuffer::getFile(kByteCodeFilePath); @@ -92,7 +97,7 @@ void Engine::LoadPreCompiledIRFiles() { std::stringstream ss; ss << "Could not load module from IR " << kByteCodeFilePath << ": " << buffer_or_error.getError().message(); - throw CodeGenException(ss.str()); + return Status::CodeGenError(ss.str()); } std::unique_ptr buffer = move(buffer_or_error.get()); @@ -104,26 +109,26 @@ void Engine::LoadPreCompiledIRFiles() { llvm::handleAllErrors(module_or_error.takeError(), [&](llvm::ErrorInfoBase &eib) { error_string = eib.message(); }); - throw CodeGenException(error_string); + return Status::CodeGenError(error_string); } std::unique_ptr ir_module = move(module_or_error.get()); /// Verify the IR module if (llvm::verifyModule(*ir_module.get(), &llvm::errs())) { - throw CodeGenException("verify of IR Module failed"); + return Status::CodeGenError("verify of IR Module failed"); } // Link this to the primary module. if (llvm::Linker::linkModules(*module_, move(ir_module))) { - throw CodeGenException("failed to link IR Modules"); + return Status::CodeGenError("failed to link IR Modules"); } + return Status::OK(); } /* * Optimise and compile the module. */ -void -Engine::FinalizeModule(bool optimise_ir, bool dump_ir) { +Status Engine::FinalizeModule(bool optimise_ir, bool dump_ir) { if (dump_ir) { DumpIR("Before optimise"); } @@ -178,16 +183,17 @@ Engine::FinalizeModule(bool optimise_ir, bool dump_ir) { } if (llvm::verifyModule(*module_, &llvm::errs())) { - throw CodeGenException("verify of module failed after optimisation passes"); + return Status::CodeGenError("verify of module failed after optimisation passes"); } // do the compilation execution_engine_->finalizeObject(); module_finalized_ = true; + return Status::OK(); } void *Engine::CompiledFunction(llvm::Function *irFunction) { - assert(module_finalized_); + DCHECK(module_finalized_); return execution_engine_->getPointerToFunction(irFunction); } diff --git a/cpp/src/gandiva/codegen/engine.h b/cpp/src/gandiva/codegen/engine.h index a31dde3d9f8e0..ee0cc7ec8870d 100644 --- a/cpp/src/gandiva/codegen/engine.h +++ b/cpp/src/gandiva/codegen/engine.h @@ -24,19 +24,24 @@ #include #include #include "gandiva/logging.h" +#include "gandiva/status.h" namespace gandiva { /// \brief LLVM Execution engine wrapper. class Engine { public: - Engine(); - llvm::LLVMContext *context() { return context_.get(); } llvm::IRBuilder<> &ir_builder() { return *ir_builder_.get(); } llvm::Module *module() { return module_; } + /// factory method to create and initialize the engine + /// object. + /// + /// @param engine (out) : the created engine. + static Status Make(std::unique_ptr *engine); + /// Add the function to the list of IR functions that need to be compiled. /// Compiling only the functions that are used by the module saves time. void AddFunctionToCompile(const std::string &fname) { @@ -45,29 +50,33 @@ class Engine { } /// Optimise and compile the module. - void FinalizeModule(bool optimise_ir, bool dump_ir); + Status FinalizeModule(bool optimise_ir, bool dump_ir); /// Get the compiled function corresponding to the irfunction. void *CompiledFunction(llvm::Function *irFunction); private: - // do one time inits. + /// private constructor to ensure engine is created + /// only through the factory. + Engine() : module_finalized_(false) {} + + /// do one time inits. static void InitOnce(); static bool init_once_done_; llvm::ExecutionEngine &execution_engine() { return *execution_engine_.get(); } - // load pre-compiled modules and merge them into the main module. - void LoadPreCompiledIRFiles(); + /// load pre-compiled modules and merge them into the main module. + Status LoadPreCompiledIRFiles(); - // dump the IR code to stdout with the prefix string. + /// dump the IR code to stdout with the prefix string. void DumpIR(std::string prefix); std::unique_ptr context_; std::unique_ptr execution_engine_; std::unique_ptr> ir_builder_; - llvm::Module *module_; // This is owned by the execution_engine_, so doesn't need to be - // explicitly deleted. + llvm::Module *module_; /// This is owned by the execution_engine_, so doesn't need to be + /// explicitly deleted. std::vector functions_to_compile_; diff --git a/cpp/src/gandiva/codegen/engine_llvm_test.cc b/cpp/src/gandiva/codegen/engine_llvm_test.cc index dccaad75ba6a8..6d8234dfe17c6 100644 --- a/cpp/src/gandiva/codegen/engine_llvm_test.cc +++ b/cpp/src/gandiva/codegen/engine_llvm_test.cc @@ -17,7 +17,6 @@ #include #include "codegen/engine.h" #include "codegen/llvm_types.h" -#include "codegen/codegen_exception.h" namespace gandiva { @@ -103,26 +102,28 @@ llvm::Function *TestEngine::BuildVecAdd(Engine *engine, LLVMTypes *types) { } TEST_F(TestEngine, TestAddUnoptimised) { - Engine engine; - LLVMTypes types(*engine.context()); - llvm::Function *ir_func = BuildVecAdd(&engine, &types); - engine.FinalizeModule(false, false); + std::unique_ptr engine; + Engine::Make(&engine); + LLVMTypes types(*engine->context()); + llvm::Function *ir_func = BuildVecAdd(engine.get(), &types); + engine->FinalizeModule(false, false); add_vector_func_t add_func = - reinterpret_cast(engine.CompiledFunction(ir_func)); + reinterpret_cast(engine->CompiledFunction(ir_func)); int64_t my_array[] = {1, 3, -5, 8, 10}; EXPECT_EQ(add_func(my_array, 5), 17); } TEST_F(TestEngine, TestAddOptimised) { - Engine engine; - LLVMTypes types(*engine.context()); - llvm::Function *ir_func = BuildVecAdd(&engine, &types); - engine.FinalizeModule(true, false); + std::unique_ptr engine; + Engine::Make(&engine); + LLVMTypes types(*engine->context()); + llvm::Function *ir_func = BuildVecAdd(engine.get(), &types); + engine->FinalizeModule(true, false); add_vector_func_t add_func = - reinterpret_cast(engine.CompiledFunction(ir_func)); + reinterpret_cast(engine->CompiledFunction(ir_func)); int64_t my_array[] = {1, 3, -5, 8, 10}; EXPECT_EQ(add_func(my_array, 5), 17); diff --git a/cpp/src/gandiva/codegen/evaluator.cc b/cpp/src/gandiva/codegen/evaluator.cc index 753e9b49c5094..fc4a1c49d152e 100644 --- a/cpp/src/gandiva/codegen/evaluator.cc +++ b/cpp/src/gandiva/codegen/evaluator.cc @@ -31,15 +31,17 @@ Evaluator::Evaluator(std::unique_ptr llvm_generator, output_fields_(output_fields), pool_(pool) {} -// TODO : exceptions -std::shared_ptr Evaluator::Make(SchemaPtr schema, - const ExpressionVector &exprs, - arrow::MemoryPool *pool) { +Status Evaluator::Make(SchemaPtr schema, + const ExpressionVector &exprs, + arrow::MemoryPool *pool, + std::shared_ptr *evaluator) { // TODO: validate schema // TODO : validate expressions (fields, function signatures, output types, ..) // Build LLVM generator, and generate code for the specified expressions - std::unique_ptr llvm_gen(new LLVMGenerator()); + std::unique_ptr llvm_gen; + Status status = LLVMGenerator::Make(&llvm_gen); + GANDIVA_RETURN_NOT_OK(status); llvm_gen->Build(exprs); // save the output field types. Used for validation at Evaluate() time. @@ -49,10 +51,11 @@ std::shared_ptr Evaluator::Make(SchemaPtr schema, } // Instantiate the evaluator with the completely built llvm generator - return std::shared_ptr(new Evaluator(std::move(llvm_gen), - schema, - output_fields, - pool)); + *evaluator = std::shared_ptr(new Evaluator(std::move(llvm_gen), + schema, + output_fields, + pool)); + return Status::OK(); } arrow::ArrayVector Evaluator::Evaluate(const arrow::RecordBatch &batch) { diff --git a/cpp/src/gandiva/codegen/evaluator.h b/cpp/src/gandiva/codegen/evaluator.h index a97bb4dad63ff..3f1b410af0303 100644 --- a/cpp/src/gandiva/codegen/evaluator.h +++ b/cpp/src/gandiva/codegen/evaluator.h @@ -21,6 +21,7 @@ #include #include "gandiva/arrow.h" #include "gandiva/expression.h" +#include "gandiva/status.h" namespace gandiva { @@ -33,9 +34,10 @@ class LLVMGenerator; class Evaluator { public: /// Build an evaluator for the given schema to evaluate the vector of expressions. - static std::shared_ptr Make(SchemaPtr schema, - const ExpressionVector &exprs, - arrow::MemoryPool *pool); + static Status Make(SchemaPtr schema, + const ExpressionVector &exprs, + arrow::MemoryPool *pool, + std::shared_ptr *evaluator); /// Evaluate the specified record batch, and fill the output vectors. /// TODO : need a zero-copy variant if the caller can alloc the output vectors. diff --git a/cpp/src/gandiva/codegen/evaluator_test.cc b/cpp/src/gandiva/codegen/evaluator_test.cc index 193ca0c8c6887..8c8311ce29135 100644 --- a/cpp/src/gandiva/codegen/evaluator_test.cc +++ b/cpp/src/gandiva/codegen/evaluator_test.cc @@ -27,7 +27,6 @@ namespace gandiva { using arrow::int32; using arrow::float32; using arrow::boolean; -using arrow::Status; class TestEvaluator : public ::testing::Test { public: @@ -72,10 +71,9 @@ TEST_F(TestEvaluator, TestIntSumSub) { auto sub_expr = TreeExprBuilder::MakeExpression("subtract", {field0, field1}, field_sub); - /* - * Build an evaluator for the expressions. - */ - auto evaluator = Evaluator::Make(schema, {sum_expr, sub_expr}, pool_); + std::shared_ptr evaluator; + Status status = Evaluator::Make(schema, {sum_expr, sub_expr}, pool_, &evaluator); + EXPECT_TRUE(status.ok()); /* Create a row-batch with some sample data */ int num_records = 4; @@ -118,7 +116,10 @@ TEST_F(TestEvaluator, TestFloatLessThan) { /* * Build an evaluator for the expressions. */ - auto evaluator = Evaluator::Make(schema, {lt_expr}, pool_); + std::shared_ptr evaluator; + Status status = Evaluator::Make(schema, {lt_expr}, pool_, &evaluator); + EXPECT_TRUE(status.ok()); + /* Create a row-batch with some sample data */ int num_records = 3; @@ -157,7 +158,9 @@ TEST_F(TestEvaluator, TestIsNotNull) { /* * Build an evaluator for the expressions. */ - auto evaluator = Evaluator::Make(schema, {myexpr}, pool_); + std::shared_ptr evaluator; + Status status = Evaluator::Make(schema, {myexpr}, pool_, &evaluator); + EXPECT_TRUE(status.ok()); /* Create a row-batch with some sample data */ int num_records = 3; diff --git a/cpp/src/gandiva/codegen/llvm_generator.cc b/cpp/src/gandiva/codegen/llvm_generator.cc index c160718b8080e..901d85a8a5773 100644 --- a/cpp/src/gandiva/codegen/llvm_generator.cc +++ b/cpp/src/gandiva/codegen/llvm_generator.cc @@ -17,8 +17,8 @@ #include #include #include +#include #include "gandiva/expression.h" -#include "codegen/codegen_exception.h" #include "codegen/dex.h" #include "codegen/function_registry.h" #include "codegen/llvm_generator.h" @@ -26,21 +26,29 @@ namespace gandiva { -LLVMGenerator::LLVMGenerator() - : engine_(new Engine()), - types_(*engine_->context()), - in_replay_(false), - optimise_ir_(true), - enable_ir_traces_(false) {} +LLVMGenerator::LLVMGenerator() : + in_replay_(false), + optimise_ir_(true), + enable_ir_traces_(false) {} + +Status LLVMGenerator::Make(std::unique_ptr *llvm_generator) { + std::unique_ptr llvmgen_obj(new LLVMGenerator()); + Status status = Engine::Make(&(llvmgen_obj->engine_)); + GANDIVA_RETURN_NOT_OK(status); + llvmgen_obj->types_ = new LLVMTypes(*(llvmgen_obj->engine_)->context()); + *llvm_generator = std::move(llvmgen_obj); + return Status::OK(); +} LLVMGenerator::~LLVMGenerator() { for (auto it = compiled_exprs_.begin(); it != compiled_exprs_.end(); ++it) { delete *it; } + delete types_; } -void LLVMGenerator::Add(const ExpressionPtr expr, - const FieldDescriptorPtr output) { +Status LLVMGenerator::Add(const ExpressionPtr expr, + const FieldDescriptorPtr output) { int idx = compiled_exprs_.size(); // decompose the expression to separate out value and validities. @@ -48,18 +56,23 @@ void LLVMGenerator::Add(const ExpressionPtr expr, annotator_); // Generate the IR function for the decomposed expression. - llvm::Function *ir_function = CodeGenExprValue(value_validity->value_expr(), - output, - idx); + llvm::Function *ir_function = nullptr; + + Status status = CodeGenExprValue(value_validity->value_expr(), + output, + idx, + &ir_function); + GANDIVA_RETURN_NOT_OK(status); CompiledExpr *compiled_expr = new CompiledExpr(value_validity, output, ir_function); compiled_exprs_.push_back(compiled_expr); + return Status::OK(); } /* * Build and optimise module for projection expression. */ -void LLVMGenerator::Build(const ExpressionVector &exprs) { +Status LLVMGenerator::Build(const ExpressionVector &exprs) { for (auto it = exprs.begin(); it != exprs.end(); it++) { ExpressionPtr expr = *it; @@ -68,7 +81,8 @@ void LLVMGenerator::Build(const ExpressionVector &exprs) { } // optimise, compile and finalize the module - engine_->FinalizeModule(optimise_ir_, in_replay_); + Status result = engine_->FinalizeModule(optimise_ir_, in_replay_); + GANDIVA_RETURN_NOT_OK(result); // setup the jit functions for each expression. for (auto it = compiled_exprs_.begin(); it != compiled_exprs_.end(); it++) { @@ -77,12 +91,13 @@ void LLVMGenerator::Build(const ExpressionVector &exprs) { EvalFunc fn = reinterpret_cast(engine_->CompiledFunction(ir_func)); compiled_expr->set_jit_function(fn); } + return Status::OK(); } /* * Execute the compiled module against the provided vectors. */ -int LLVMGenerator::Execute(const arrow::RecordBatch &record_batch, +Status LLVMGenerator::Execute(const arrow::RecordBatch &record_batch, const arrow::ArrayVector &outputs) { DCHECK_GT(record_batch.num_rows(), 0); @@ -101,7 +116,7 @@ int LLVMGenerator::Execute(const arrow::RecordBatch &record_batch, ComputeBitMapsForExpr(compiled_expr, eval_batch->buffers(), eval_batch->num_buffers(), record_batch.num_rows()); } - return 0; + return Status::OK(); } llvm::Value *LLVMGenerator::LoadVectorAtIndex(llvm::Value *arg_addrs, @@ -109,7 +124,7 @@ llvm::Value *LLVMGenerator::LoadVectorAtIndex(llvm::Value *arg_addrs, const std::string &name) { llvm::IRBuilder<> &builder = ir_builder(); llvm::Value *offset = builder.CreateGEP(arg_addrs, - types_.i32_constant(idx), + types_->i32_constant(idx), name + "_mem_addr"); return builder.CreateLoad(offset, name + "_mem"); } @@ -122,7 +137,7 @@ llvm::Value *LLVMGenerator::GetValidityReference(llvm::Value *arg_addrs, FieldPtr field) { const std::string &name = field->name(); llvm::Value *load = LoadVectorAtIndex(arg_addrs, idx, name); - return ir_builder().CreateIntToPtr(load, types_.i64_ptr_type(), name + "_varray"); + return ir_builder().CreateIntToPtr(load, types_->i64_ptr_type(), name + "_varray"); } /* @@ -133,8 +148,8 @@ llvm::Value *LLVMGenerator::GetDataReference(llvm::Value *arg_addrs, FieldPtr field) { const std::string &name = field->name(); llvm::Value *load = LoadVectorAtIndex(arg_addrs, idx, name); - llvm::Type *base_type = types_.DataVecType(field->type()); - llvm::Type *pointer_type = types_.ptr_type(base_type); + llvm::Type *base_type = types_->DataVecType(field->type()); + llvm::Type *pointer_type = types_->ptr_type(base_type); return ir_builder().CreateIntToPtr(load, pointer_type, name + "_darray"); } @@ -189,31 +204,32 @@ llvm::Value *LLVMGenerator::GetDataReference(llvm::Value *arg_addrs, * } * */ -llvm::Function *LLVMGenerator::CodeGenExprValue(DexPtr value_expr, - FieldDescriptorPtr output, - int suffix_idx) { +Status LLVMGenerator::CodeGenExprValue(DexPtr value_expr, + FieldDescriptorPtr output, + int suffix_idx, + llvm::Function **fn) { llvm::IRBuilder<> &builder = ir_builder(); // Create fn prototype : // int expr_1 (long **addrs, int nrec) std::vector arguments; - arguments.push_back(types_.i64_ptr_type()); - arguments.push_back(types_.i32_type()); - llvm::FunctionType *prototype = llvm::FunctionType::get(types_.i32_type(), + arguments.push_back(types_->i64_ptr_type()); + arguments.push_back(types_->i32_type()); + llvm::FunctionType *prototype = llvm::FunctionType::get(types_->i32_type(), arguments, false /*isVarArg*/); // Create fn std::string func_name = "expr_" + std::to_string(suffix_idx); engine_->AddFunctionToCompile(func_name); - llvm::Function *fn = llvm::Function::Create(prototype, - llvm::GlobalValue::ExternalLinkage, - func_name, - module()); - assert(fn != NULL); - + *fn = llvm::Function::Create(prototype, + llvm::GlobalValue::ExternalLinkage, + func_name, + module()); + GANDIVA_RETURN_FAILURE_IF_FALSE((fn != NULL), + Status::CodeGenError("Error creating function.")); // Name the arguments - llvm::Function::arg_iterator args = fn->arg_begin(); + llvm::Function::arg_iterator args = (*fn)->arg_begin(); llvm::Value *arg_addrs = &*args; arg_addrs->setName("args"); ++args; @@ -221,9 +237,9 @@ llvm::Function *LLVMGenerator::CodeGenExprValue(DexPtr value_expr, arg_nrecords->setName("nrecords"); ++args; - llvm::BasicBlock *loop_entry = llvm::BasicBlock::Create(context(), "entry", fn); - llvm::BasicBlock *loop_body = llvm::BasicBlock::Create(context(), "loop", fn); - llvm::BasicBlock *loop_exit = llvm::BasicBlock::Create(context(), "exit", fn); + llvm::BasicBlock *loop_entry = llvm::BasicBlock::Create(context(), "entry", *fn); + llvm::BasicBlock *loop_body = llvm::BasicBlock::Create(context(), "loop", *fn); + llvm::BasicBlock *loop_exit = llvm::BasicBlock::Create(context(), "exit", *fn); // Add reference to output vector (in entry block) builder.SetInsertPoint(loop_entry); @@ -235,15 +251,15 @@ llvm::Function *LLVMGenerator::CodeGenExprValue(DexPtr value_expr, builder.SetInsertPoint(loop_body); // define loop_var : start with 0, +1 after each iter - llvm::PHINode *loop_var = builder.CreatePHI(types_.i32_type(), 2, "loop_var"); - loop_var->addIncoming(types_.i32_constant(0), loop_entry); + llvm::PHINode *loop_var = builder.CreatePHI(types_->i32_type(), 2, "loop_var"); + loop_var->addIncoming(types_->i32_constant(0), loop_entry); llvm::Value *loop_update = builder.CreateAdd(loop_var, - types_.i32_constant(1), + types_->i32_constant(1), "loop_var+1"); loop_var->addIncoming(loop_update, loop_body); // The visitor can add code to both the entry/loop blocks. - Visitor visitor(this, fn, loop_entry, loop_body, arg_addrs, loop_var); + Visitor visitor(this, *fn, loop_entry, loop_body, arg_addrs, loop_var); value_expr->Accept(&visitor); LValuePtr output_value = visitor.result(); @@ -270,8 +286,8 @@ llvm::Function *LLVMGenerator::CodeGenExprValue(DexPtr value_expr, // Loop exit builder.SetInsertPoint(loop_exit); - builder.CreateRet(types_.i32_constant(0)); - return fn; + builder.CreateRet(types_->i32_constant(0)); + return Status::OK(); } /* @@ -282,9 +298,9 @@ llvm::Value *LLVMGenerator::GetPackedBitValue(llvm::Value *bitmap, AddTrace("fetch bit at position %T", position); llvm::Value *bitmap8 = ir_builder().CreateBitCast(bitmap, - types_.ptr_type(types_.i8_type()), + types_->ptr_type(types_->i8_type()), "bitMapCast"); - return AddFunctionCall("bitMapGetBit", types_.i1_type(), {bitmap8, position}); + return AddFunctionCall("bitMapGetBit", types_->i1_type(), {bitmap8, position}); } /* @@ -297,9 +313,9 @@ void LLVMGenerator::SetPackedBitValue(llvm::Value *bitmap, AddTrace(" to value %T ", value); llvm::Value *bitmap8 = ir_builder().CreateBitCast(bitmap, - types_.ptr_type(types_.i8_type()), + types_->ptr_type(types_->i8_type()), "bitMapCast"); - AddFunctionCall("bitMapSetBit", types_.void_type(), {bitmap8, position, value}); + AddFunctionCall("bitMapSetBit", types_->void_type(), {bitmap8, position, value}); } /* @@ -387,7 +403,7 @@ llvm::Value *LLVMGenerator::AddFunctionCall(const std::string &full_name, // find the llvm function. llvm::Function *fn = module()->getFunction(full_name); - assert(fn != NULL); + DCHECK(fn != NULL); if (enable_ir_traces_ && full_name.compare("printf") && @@ -402,7 +418,7 @@ llvm::Value *LLVMGenerator::AddFunctionCall(const std::string &full_name, return ir_builder().CreateCall(fn, args); } else { llvm::Value *value = ir_builder().CreateCall(fn, args, full_name); - assert(value->getType() == ret_type); + DCHECK(value->getType() == ret_type); return value; } } @@ -464,7 +480,7 @@ void LLVMGenerator::Visitor::Visit(const LiteralDex &dex) { void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex &dex) { AddTrace("visit NonNullableFunc base function " + dex.func_descriptor()->name()); - LLVMTypes &types = generator_->types_; + LLVMTypes *types = generator_->types_; // build the function params. std::vector args; @@ -477,7 +493,7 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex &dex) { } const NativeFunction *native_function = dex.native_function(); - llvm::Type *ret_type = types.IRType(native_function->signature().ret_type()->id()); + llvm::Type *ret_type = types->IRType(native_function->signature().ret_type()->id()); llvm::Value *value = generator_->AddFunctionCall(native_function->pc_name(), ret_type, args); @@ -486,7 +502,7 @@ void LLVMGenerator::Visitor::Visit(const NonNullableFuncDex &dex) { void LLVMGenerator::Visitor::Visit(const NullableNeverFuncDex &dex) { AddTrace("visit NullableNever base function " + dex.func_descriptor()->name()); - LLVMTypes &types = generator_->types_; + LLVMTypes *types = generator_->types_; // build the function params, along with the validities. std::vector args; @@ -504,7 +520,7 @@ void LLVMGenerator::Visitor::Visit(const NullableNeverFuncDex &dex) { } const NativeFunction *native_function = dex.native_function(); - llvm::Type *ret_type = types.IRType(native_function->signature().ret_type()->id()); + llvm::Type *ret_type = types->IRType(native_function->signature().ret_type()->id()); llvm::Value *value = generator_->AddFunctionCall(native_function->pc_name(), ret_type, args); @@ -516,9 +532,9 @@ void LLVMGenerator::Visitor::Visit(const NullableNeverFuncDex &dex) { */ llvm::Value *LLVMGenerator::Visitor::BuildCombinedValidity(const DexVector &validities) { llvm::IRBuilder<> &builder = ir_builder(); - LLVMTypes &types = generator_->types_; + LLVMTypes *types = generator_->types_; - llvm::Value *isValid = types.true_constant(); + llvm::Value *isValid = types->true_constant(); for (auto it = validities.begin(); it != validities.end(); it++) { (*it)->Accept(this); isValid = builder.CreateAnd(isValid, result()->data(), "validityBitAnd"); @@ -581,17 +597,17 @@ void LLVMGenerator::AddTrace(const std::string &msg, llvm::Value *value) { // cast this to an llvm pointer. const char *str = trace_strings_.back().c_str(); - llvm::Constant *str_int_cast = types_.i64_constant((int64_t)str); + llvm::Constant *str_int_cast = types_->i64_constant((int64_t)str); llvm::Constant *str_ptr_cast = llvm::ConstantExpr::getIntToPtr( str_int_cast, - types_.ptr_type(types_.i8_type())); + types_->ptr_type(types_->i8_type())); std::vector args; args.push_back(str_ptr_cast); if (value) { args.push_back(value); } - AddFunctionCall(print_fn_name, types_.i32_type(), args); + AddFunctionCall(print_fn_name, types_->i32_type(), args); } } // namespace gandiva diff --git a/cpp/src/gandiva/codegen/llvm_generator.h b/cpp/src/gandiva/codegen/llvm_generator.h index f0ca03513badb..31b3650a12ded 100644 --- a/cpp/src/gandiva/codegen/llvm_generator.h +++ b/cpp/src/gandiva/codegen/llvm_generator.h @@ -36,24 +36,29 @@ namespace gandiva { /// Builds an LLVM module and generates code for the specified set of expressions. class LLVMGenerator { public: - LLVMGenerator(); ~LLVMGenerator(); + /// \brief Factory method to initialize the generator. + static Status Make(std::unique_ptr *llvm_generator); + /// \brief Build the code for the expression trees. Each element in the vector /// represents an expression tree - void Build(const ExpressionVector &exprs); + Status Build(const ExpressionVector &exprs); /// \brief Execute the built expression against the provided arguments. - int Execute(const arrow::RecordBatch &record_batch, const arrow::ArrayVector &outputs); + Status Execute(const arrow::RecordBatch &record_batch, + const arrow::ArrayVector &outputs); private: + LLVMGenerator(); + FRIEND_TEST(TestLLVMGenerator, TestAdd); FRIEND_TEST(TestLLVMGenerator, TestIntersectBitMaps); llvm::Module *module() { return engine_->module(); } llvm::LLVMContext &context() { return *(engine_->context()); } llvm::IRBuilder<> &ir_builder() { return engine_->ir_builder(); } - LLVMTypes &types() { return types_; } + LLVMTypes *types() { return types_; } /// Visitor to generate the code for a decomposed expression. class Visitor : public DexVisitor { @@ -95,7 +100,7 @@ class LLVMGenerator { // Generate the code for one expression, with the output of the expression going to // 'output'. - void Add(const ExpressionPtr expr, const FieldDescriptorPtr output); + Status Add(const ExpressionPtr expr, const FieldDescriptorPtr output); /// Generate code to load the vector at specified index in the 'arg_addrs' array. llvm::Value *LoadVectorAtIndex(llvm::Value *arg_addrs, @@ -113,9 +118,10 @@ class LLVMGenerator { FieldPtr field); /// Generate code for the value array of one expression. - llvm::Function *CodeGenExprValue(DexPtr value_expr, - FieldDescriptorPtr output, - int suffix_idx); + Status CodeGenExprValue(DexPtr value_expr, + FieldDescriptorPtr output, + int suffix_idx, + llvm::Function ** fn); /// Generate code to get the bit value at 'position' in the bitmap. llvm::Value *GetPackedBitValue(llvm::Value *bitMap, llvm::Value *position); @@ -157,7 +163,7 @@ class LLVMGenerator { std::unique_ptr engine_; std::vector compiled_exprs_; - LLVMTypes types_; + LLVMTypes * types_; FunctionRegistry function_registry_; Annotator annotator_; diff --git a/cpp/src/gandiva/codegen/llvm_generator_test.cc b/cpp/src/gandiva/codegen/llvm_generator_test.cc index 659b14c6917fb..27a2d0bcbd870 100644 --- a/cpp/src/gandiva/codegen/llvm_generator_test.cc +++ b/cpp/src/gandiva/codegen/llvm_generator_test.cc @@ -18,7 +18,6 @@ #include #include #include "gandiva/expression.h" -#include "codegen/codegen_exception.h" #include "codegen/dex.h" #include "codegen/func_descriptor.h" #include "codegen/function_registry.h" @@ -61,9 +60,12 @@ void TestLLVMGenerator::ByteWiseIntersectBitMaps(uint8_t *dst, TEST_F(TestLLVMGenerator, TestAdd) { // Setup LLVM generator to do an arithmetic add of two vectors - LLVMGenerator generator; + std::unique_ptr generator; + Status status = LLVMGenerator::Make(&generator); + EXPECT_TRUE(status.ok()); Annotator annotator; + auto field0 = std::make_shared("f0", arrow::int32()); auto desc0 = annotator.CheckAndAddInputFieldDescriptor(field0); auto validity_dex0 = std::make_shared(desc0); @@ -82,7 +84,7 @@ TEST_F(TestLLVMGenerator, TestAdd) { func_desc->params(), func_desc->return_type()); const NativeFunction *native_func = - generator.function_registry_.LookupSignature(signature); + generator->function_registry_.LookupSignature(signature); std::vector pairs{pair0, pair1}; auto func_dex = std::make_shared(func_desc, native_func, pairs); @@ -90,12 +92,14 @@ TEST_F(TestLLVMGenerator, TestAdd) { auto field_sum = std::make_shared("out", arrow::int32()); auto desc_sum = annotator.CheckAndAddInputFieldDescriptor(field_sum); - llvm::Function *ir_func = generator.CodeGenExprValue(func_dex, desc_sum, 0); + llvm::Function *ir_func = nullptr; - generator.engine_->AddFunctionToCompile("eval_0"); - generator.engine_->FinalizeModule(true, false); + status = generator->CodeGenExprValue(func_dex, desc_sum, 0, &ir_func); + ASSERT_TRUE(status.ok()); - EvalFunc eval_func = (EvalFunc)generator.engine_->CompiledFunction(ir_func); + generator->engine_->AddFunctionToCompile("eval_0"); + generator->engine_->FinalizeModule(true, false); + EvalFunc eval_func = (EvalFunc)generator->engine_->CompiledFunction(ir_func); int num_records = 4; uint32_t a0[] = {1, 2, 3, 4}; @@ -114,7 +118,6 @@ TEST_F(TestLLVMGenerator, TestAdd) { reinterpret_cast(&out_bitmap), }; eval_func(addrs, num_records); - uint32_t expected[] = { 6, 8, 10, 12 }; for (int i = 0; i < num_records; i++) { EXPECT_EQ(expected[i], out[i]); diff --git a/cpp/src/gandiva/codegen/status.cc b/cpp/src/gandiva/codegen/status.cc new file mode 100644 index 0000000000000..e10401a636422 --- /dev/null +++ b/cpp/src/gandiva/codegen/status.cc @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2017-2018 Dremio Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Adapted from Apache Arrow. + */ + +#include +#include +#include "gandiva/status.h" + +namespace gandiva { + +Status::Status(StatusCode code, const std::string& msg) { + assert(code != StatusCode::OK); + state_ = new State; + state_->code = code; + state_->msg = msg; +} + +void Status::CopyFrom(const Status& s) { + delete state_; + if (s.state_ == nullptr) { + state_ = nullptr; + } else { + state_ = new State(*s.state_); + } +} + +std::string Status::CodeAsString() const { + if (state_ == nullptr) { + return "OK"; + } + + const char* type; + switch (code()) { + case StatusCode::OK: + type = "OK"; + break; + case StatusCode::CodeGenError: + type = "CodeGenError"; + break; + default: + type = "Unknown"; + break; + } + return std::string(type); +} + +void Status::MoveFrom(Status& s) { + delete state_; + state_ = s.state_; + s.state_ = NULL; +} + +std::string Status::ToString() const { + std::string result(CodeAsString()); + if (state_ == NULL) { + return result; + } + result += ": "; + result += state_->msg; + return result; +} +} // namespace gandiva diff --git a/cpp/src/gandiva/codegen/status.h b/cpp/src/gandiva/codegen/status.h new file mode 100644 index 0000000000000..a0f92164bb8dd --- /dev/null +++ b/cpp/src/gandiva/codegen/status.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2017-2018 Dremio Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Adapted from Apache Arrow Status. + */ +#ifndef GANDIVA_STATUS_H +#define GANDIVA_STATUS_H + +#include +#include + +#define GANDIVA_RETURN_NOT_OK(status) \ + do { \ + Status _status = (status); \ + if (!_status.ok()) { \ + std::stringstream ss; \ + ss << __FILE__ << ":" << __LINE__ << " code: " << #status << "\n" << _status.message(); \ + return Status(_status.code(), ss.str()); \ + } \ +} while (0) + +#define GANDIVA_RETURN_FAILURE_IF_FALSE(condition, status) \ +do { \ + if (!condition) { \ + Status _status = (status); \ + std::stringstream ss; \ + ss << __FILE__ << ":" << __LINE__ << " code: " << #status << "\n" << _status.message(); \ + return Status(_status.code(), ss.str()); \ + } \ +} while (0) + +namespace gandiva { + +enum class StatusCode : char { + OK = 0, + CodeGenError = 1 +}; + +class Status { + public: + // Create a success status. + Status() : state_(NULL) {} + ~Status() { delete state_; } + + Status(StatusCode code, const std::string& msg); + + // Copy the specified status. + Status(const Status& s); + Status& operator=(const Status& s); + + // Move the specified status. + Status(Status&& s); + Status& operator=(Status&& s); + + // AND the statuses. + Status operator&(const Status& s) const; + Status operator&(Status&& s) const; + Status& operator&=(const Status& s); + Status& operator&=(Status&& s); + + // Return a success status. + static Status OK() { return Status(); } + + // Return error status of an appropriate type. + static Status CodeGenError(const std::string& msg) { + return Status(StatusCode::CodeGenError, msg); + } + + // Returns true if the status indicates success. + bool ok() const { return (state_ == NULL); } + + bool IsCodeGenError() const { return code() == StatusCode::CodeGenError; } + + // Return a string representation of this status suitable for printing. + // Returns the string "OK" for success. + std::string ToString() const; + + // Return a string representation of the status code, without the message + // text or posix code information. + std::string CodeAsString() const; + + StatusCode code() const { return ok() ? StatusCode::OK : state_->code; } + + std::string message() const { return ok() ? "" : state_->msg; } + + private: + struct State { + StatusCode code; + std::string msg; + }; + // OK status has a `NULL` state_. Otherwise, `state_` points to + // a `State` structure containing the error code and message(s) + State* state_; + + void CopyFrom(const Status& s); + void MoveFrom(Status& s); +}; + +static inline std::ostream& operator<<(std::ostream& os, const Status& x) { + os << x.ToString(); + return os; +} + +inline Status::Status(const Status& s) + : state_((s.state_ == NULL) ? NULL : new State(*s.state_)) {} + +inline Status& Status::operator=(const Status& s) { + // The following condition catches both aliasing (when this == &s), + // and the common case where both s and *this are ok. + if (state_ != s.state_) { + CopyFrom(s); + } + return *this; +} + +inline Status::Status(Status&& s) : state_(s.state_) { s.state_ = NULL; } + +inline Status& Status::operator=(Status&& s) { + MoveFrom(s); + return *this; +} + +inline Status Status::operator&(const Status& s) const { + if (ok()) { + return s; + } else { + return *this; + } +} + +inline Status Status::operator&(Status&& s) const { + if (ok()) { + return std::move(s); + } else { + return *this; + } +} + +inline Status& Status::operator&=(const Status& s) { + if (ok() && !s.ok()) { + CopyFrom(s); + } + return *this; +} + +inline Status& Status::operator&=(Status&& s) { + if (ok() && !s.ok()) { + MoveFrom(s); + } + return *this; +} + +} // namespace gandiva +#endif // GANDIVA_STATUS_H diff --git a/cpp/src/gandiva/codegen/status_test.cc b/cpp/src/gandiva/codegen/status_test.cc new file mode 100644 index 0000000000000..0b46ab96148ab --- /dev/null +++ b/cpp/src/gandiva/codegen/status_test.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2017-2018 Dremio Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Adapted from Apache Arrow Status. + */ +#include + +#include + +#include "gandiva/status.h" + +namespace gandiva { + +TEST(StatusTest, TestCodeAndMessage) { + Status ok = Status::OK(); + ASSERT_EQ(StatusCode::OK, ok.code()); + Status code_gen_error = Status::CodeGenError("input invalid."); + ASSERT_EQ(StatusCode::CodeGenError, code_gen_error.code()); + ASSERT_EQ("input invalid.", code_gen_error.message()); +} + +TEST(StatusTest, TestToString) { + Status code_gen_error = Status::CodeGenError("input invalid."); + ASSERT_EQ("CodeGenError: input invalid.", code_gen_error.ToString()); + + std::stringstream ss; + ss << code_gen_error; + ASSERT_EQ(code_gen_error.ToString(), ss.str()); +} + +TEST(StatusTest, AndStatus) { + Status a = Status::OK(); + Status b = Status::OK(); + Status c = Status::CodeGenError("invalid value"); + + Status res; + res = a & b; + ASSERT_TRUE(res.ok()); + res = a & c; + ASSERT_TRUE(res.IsCodeGenError()); + + res = Status::OK(); + res &= c; + ASSERT_TRUE(res.IsCodeGenError()); + + // With rvalues + res = Status::OK() & Status::CodeGenError("foo"); + ASSERT_TRUE(res.IsCodeGenError()); + res = Status::CodeGenError("foo") & Status::OK(); + ASSERT_TRUE(res.IsCodeGenError()); + + res = Status::OK(); + res &= Status::OK(); + ASSERT_TRUE(res.ok()); + res &= Status::CodeGenError("foo"); + ASSERT_TRUE(res.IsCodeGenError()); +} + +} // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index 6e6e7f8916e69..bde3d5b6af978 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -17,7 +17,7 @@ #ifndef PRECOMPILED_TYPES_H #define PRECOMPILED_TYPES_H -#include +#include /* * Use the same names as in arrow data types. Makes it easy to write pre-processor macros.