Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-38589: [C++][Gandiva] Support registering external C functions #38632

Merged
merged 9 commits into from
Nov 17, 2023
2 changes: 2 additions & 0 deletions cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ set(SRC_FILES
expression_registry.cc
exported_funcs_registry.cc
exported_funcs.cc
external_c_functions.cc
filter.cc
function_holder_maker_registry.cc
function_ir_builder.cc
function_registry.cc
function_registry_arithmetic.cc
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/gandiva/cast_time.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

namespace gandiva {

void ExportedTimeFunctions::AddMappings(Engine* engine) const {
arrow::Status ExportedTimeFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();

Expand All @@ -42,6 +42,7 @@ void ExportedTimeFunctions::AddMappings(Engine* engine) const {
engine->AddGlobalMappingForFunc("gdv_fn_time_with_zone",
types->i32_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_time_with_zone));
return arrow::Status::OK();
}

} // namespace gandiva
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/gandiva/context_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

namespace gandiva {

void ExportedContextFunctions::AddMappings(Engine* engine) const {
arrow::Status ExportedContextFunctions::AddMappings(Engine* engine) const {
std::vector<llvm::Type*> args;
auto types = engine->types();

Expand All @@ -50,6 +50,7 @@ void ExportedContextFunctions::AddMappings(Engine* engine) const {

engine->AddGlobalMappingForFunc("gdv_fn_context_arena_reset", types->void_type(), args,
reinterpret_cast<void*>(gdv_fn_context_arena_reset));
return arrow::Status::OK();
}

} // namespace gandiva
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/gandiva/decimal_xlarge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

namespace gandiva {

void ExportedDecimalFunctions::AddMappings(Engine* engine) const {
arrow::Status ExportedDecimalFunctions::AddMappings(Engine* engine) const {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For external C interface functions, they may cause some error when adding mapping for them, so I change the AddGlobalMappings and ExportedFuncsBase::AddMappings functions to return arrow::Status to represent the result

std::vector<llvm::Type*> args;
auto types = engine->types();

Expand Down Expand Up @@ -93,6 +93,7 @@ void ExportedDecimalFunctions::AddMappings(Engine* engine) const {

engine->AddGlobalMappingForFunc("gdv_xlarge_compare", types->i32_type() /*return_type*/,
args, reinterpret_cast<void*>(gdv_xlarge_compare));
return arrow::Status::OK();
}

} // namespace gandiva
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/gandiva/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ Engine::Engine(const std::shared_ptr<Configuration>& conf,
Status Engine::Init() {
std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs);
// Add mappings for global functions that can be accessed from LLVM/IR module.
AddGlobalMappings();
ARROW_RETURN_NOT_OK(AddGlobalMappings());

return Status::OK();
}
Expand Down Expand Up @@ -447,7 +447,11 @@ void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_ty
execution_engine_->addGlobalMapping(fn, function_ptr);
}

void Engine::AddGlobalMappings() { ExportedFuncsRegistry::AddMappings(this); }
arrow::Status Engine::AddGlobalMappings() {
ARROW_RETURN_NOT_OK(ExportedFuncsRegistry::AddMappings(this));
ExternalCFunctions c_funcs(function_registry_);
return c_funcs.AddMappings(this);
}

std::string Engine::DumpIR() {
std::string ir;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class GANDIVA_EXPORT Engine {
Status LoadExternalPreCompiledIR();

// Create and add mappings for cpp functions that can be accessed from LLVM.
void AddGlobalMappings();
arrow::Status AddGlobalMappings();

// Remove unused functions to reduce compile time.
Status RemoveUnusedFunctions();
Expand Down
26 changes: 19 additions & 7 deletions cpp/src/gandiva/exported_funcs.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#pragma once

#include <vector>
#include "gandiva/function_registry.h"
#include "gandiva/visibility.h"

namespace gandiva {
Expand All @@ -29,37 +30,48 @@ class ExportedFuncsBase {
public:
virtual ~ExportedFuncsBase() = default;

virtual void AddMappings(Engine* engine) const = 0;
virtual arrow::Status AddMappings(Engine* engine) const = 0;
};

// Class for exporting Stub functions
class ExportedStubFunctions : public ExportedFuncsBase {
void AddMappings(Engine* engine) const override;
arrow::Status AddMappings(Engine* engine) const override;
};

// Class for exporting Context functions
class ExportedContextFunctions : public ExportedFuncsBase {
void AddMappings(Engine* engine) const override;
arrow::Status AddMappings(Engine* engine) const override;
};

// Class for exporting Time functions
class ExportedTimeFunctions : public ExportedFuncsBase {
void AddMappings(Engine* engine) const override;
arrow::Status AddMappings(Engine* engine) const override;
};

// Class for exporting Decimal functions
class ExportedDecimalFunctions : public ExportedFuncsBase {
void AddMappings(Engine* engine) const override;
arrow::Status AddMappings(Engine* engine) const override;
};

// Class for exporting String functions
class ExportedStringFunctions : public ExportedFuncsBase {
void AddMappings(Engine* engine) const override;
arrow::Status AddMappings(Engine* engine) const override;
};

// Class for exporting Hash functions
class ExportedHashFunctions : public ExportedFuncsBase {
void AddMappings(Engine* engine) const override;
arrow::Status AddMappings(Engine* engine) const override;
};

class ExternalCFunctions : public ExportedFuncsBase {
public:
explicit ExternalCFunctions(std::shared_ptr<FunctionRegistry> function_registry)
: function_registry_(std::move(function_registry)) {}

arrow::Status AddMappings(Engine* engine) const override;

private:
std::shared_ptr<FunctionRegistry> function_registry_;
};

GANDIVA_EXPORT void RegisterExportedFuncs();
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/gandiva/exported_funcs_registry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@

namespace gandiva {

void ExportedFuncsRegistry::AddMappings(Engine* engine) {
arrow::Status ExportedFuncsRegistry::AddMappings(Engine* engine) {
for (const auto& entry : *registered()) {
entry->AddMappings(engine);
ARROW_RETURN_NOT_OK(entry->AddMappings(engine));
}
return arrow::Status::OK();
}

const ExportedFuncsRegistry::list_type& ExportedFuncsRegistry::Registered() {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/exported_funcs_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class GANDIVA_EXPORT ExportedFuncsRegistry {
using list_type = std::vector<std::shared_ptr<ExportedFuncsBase>>;

// Add functions from all the registered classes to the engine.
static void AddMappings(Engine* engine);
static arrow::Status AddMappings(Engine* engine);

static bool Register(std::shared_ptr<ExportedFuncsBase> entry) {
registered()->emplace_back(std::move(entry));
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/gandiva/expr_decomposer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@

#include "gandiva/annotator.h"
#include "gandiva/dex.h"
#include "gandiva/function_holder_registry.h"
#include "gandiva/function_holder_maker_registry.h"
#include "gandiva/function_registry.h"
#include "gandiva/function_signature.h"
#include "gandiva/in_holder.h"
#include "gandiva/node.h"
#include "gandiva/regex_functions_holder.h"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this include needed?

Copy link
Contributor Author

@niyue niyue Nov 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I found LikeHolder is used below but its header is not included so I added it (it is likely included indirectly by other header files ). Let me know if this is not recommended in the project (if not included, I ran into some issues in other projects, during refactoring, a indirectly included header file was removed, and causing the other file failed to be compiled)


namespace gandiva {

Expand Down Expand Up @@ -81,9 +82,10 @@ Status ExprDecomposer::Visit(const FunctionNode& in_node) {
std::shared_ptr<FunctionHolder> holder;
int holder_idx = -1;
if (native_function->NeedsFunctionHolder()) {
auto status = FunctionHolderRegistry::Make(desc->name(), node, &holder);
auto function_holder_maker_registry = registry_.GetFunctionHolderMakerRegistry();
ARROW_ASSIGN_OR_RAISE(holder,
function_holder_maker_registry.Make(desc->name(), node));
holder_idx = annotator_.AddHolderPointer(holder.get());
ARROW_RETURN_NOT_OK(status);
}

if (native_function->result_nullable_type() == kResultNullIfNull) {
Expand Down
79 changes: 79 additions & 0 deletions cpp/src/gandiva/external_c_functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License

#include <llvm/IR/Type.h>

#include "gandiva/engine.h"
#include "gandiva/exported_funcs.h"

namespace {
// calculate the number of arguments for a function signature
size_t GetNumArgs(const gandiva::FunctionSignature& sig,
const gandiva::NativeFunction& func) {
auto num_args = 0;
num_args += func.NeedsContext() ? 1 : 0;
num_args += func.NeedsFunctionHolder() ? 1 : 0;
for (auto const& arg : sig.param_types()) {
num_args += arg->id() == arrow::Type::STRING ? 2 : 1;
}
num_args += sig.ret_type()->id() == arrow::Type::STRING ? 1 : 0;
return num_args;
}

// map from a NativeFunction's signature to the corresponding LLVM signature
arrow::Result<std::pair<std::vector<llvm::Type*>, llvm::Type*>> MapToLLVMSignature(
const gandiva::FunctionSignature& sig, const gandiva::NativeFunction& func,
gandiva::LLVMTypes* types) {
std::vector<llvm::Type*> arg_llvm_types;
arg_llvm_types.reserve(GetNumArgs(sig, func));

if (func.NeedsContext()) {
arg_llvm_types.push_back(types->i64_type());
}
if (func.NeedsFunctionHolder()) {
arg_llvm_types.push_back(types->i64_type());
}
for (auto const& arg : sig.param_types()) {
arg_llvm_types.push_back(types->IRType(arg->id()));
if (arg->id() == arrow::Type::STRING) {
// string type needs an additional length argument
arg_llvm_types.push_back(types->i32_type());
}
}
if (sig.ret_type()->id() == arrow::Type::STRING) {
// for string output, the last arg is the output length
arg_llvm_types.push_back(types->i32_ptr_type());
}
auto ret_llvm_type = types->IRType(sig.ret_type()->id());
return std::make_pair(std::move(arg_llvm_types), ret_llvm_type);
}
} // namespace

namespace gandiva {
Status ExternalCFunctions::AddMappings(Engine* engine) const {
auto const& c_funcs = function_registry_->GetCFunctions();
auto const types = engine->types();
for (auto& [func, func_ptr] : c_funcs) {
for (auto const& sig : func.signatures()) {
ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func, types));
auto& [args, ret_llvm_type] = llvm_signature;
engine->AddGlobalMappingForFunc(func.pc_name(), ret_llvm_type, args, func_ptr);
}
}
return Status::OK();
}
} // namespace gandiva
72 changes: 72 additions & 0 deletions cpp/src/gandiva/function_holder_maker_registry.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "gandiva/function_holder_maker_registry.h"

#include <functional>

#include "arrow/util/string.h"
#include "gandiva/function_holder.h"
#include "gandiva/interval_holder.h"
#include "gandiva/random_generator_holder.h"
#include "gandiva/regex_functions_holder.h"
#include "gandiva/to_date_holder.h"

namespace gandiva {

using arrow::internal::AsciiToLower;

FunctionHolderMakerRegistry::FunctionHolderMakerRegistry()
: function_holder_makers_(DefaultHolderMakers()) {}

arrow::Status FunctionHolderMakerRegistry::Register(const std::string& name,
FunctionHolderMaker holder_maker) {
function_holder_makers_.emplace(AsciiToLower(name), std::move(holder_maker));
return arrow::Status::OK();
}

template <typename HolderType>
static arrow::Result<FunctionHolderPtr> HolderMaker(const FunctionNode& node) {
std::shared_ptr<HolderType> derived_instance;
ARROW_RETURN_NOT_OK(HolderType::Make(node, &derived_instance));
return derived_instance;
}
Comment on lines +42 to +47
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we change HolderType::Make() to use Result instead of Status (e.g. Status LikeHolder::Make(...) -> Result<std::shared_ptr<LikeHolder>> LikeHolder::Make(...)), we can remove this helper template function?
If so, we can do it as a follow-up task.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. If I make this change, we need to change some existing classes, like LikeHolder/ReplaceHolder/etc. They are GANDIVA_EXPORT classes, but I am not sure if they are really used externally. The code will be simpler after this refactoring. I am glad to submit another PR for this if this is desired.


arrow::Result<FunctionHolderPtr> FunctionHolderMakerRegistry::Make(
const std::string& name, const FunctionNode& node) {
auto lowered_name = AsciiToLower(name);
auto found = function_holder_makers_.find(lowered_name);
if (found == function_holder_makers_.end()) {
return Status::Invalid("function holder not registered for function " + name);
}

return found->second(node);
}

FunctionHolderMakerRegistry::MakerMap FunctionHolderMakerRegistry::DefaultHolderMakers() {
static const MakerMap maker_map = {
{"like", HolderMaker<LikeHolder>},
{"to_date", HolderMaker<ToDateHolder>},
{"random", HolderMaker<RandomGeneratorHolder>},
{"rand", HolderMaker<RandomGeneratorHolder>},
{"regexp_replace", HolderMaker<ReplaceHolder>},
{"regexp_extract", HolderMaker<ExtractHolder>},
{"castintervalday", HolderMaker<IntervalDaysHolder>},
{"castintervalyear", HolderMaker<IntervalYearsHolder>}};
return maker_map;
}
} // namespace gandiva
Loading
Loading