Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-37753: [C++][Gandiva] Add external function registry support #37787

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ set(SRC_FILES
function_registry.cc
function_registry_arithmetic.cc
function_registry_datetime.cc
function_registry_external.cc
function_registry_hash.cc
function_registry_math_ops.cc
function_registry_string.cc
Expand Down Expand Up @@ -232,6 +233,7 @@ add_gandiva_test(internals-test
bitmap_accumulator_test.cc
engine_llvm_test.cc
function_registry_test.cc
function_registry_external_test.cc
function_signature_test.cc
llvm_types_test.cc
llvm_generator_test.cc
Expand All @@ -253,3 +255,4 @@ add_gandiva_test(internals-test

add_subdirectory(precompiled)
add_subdirectory(tests)
add_subdirectory(extension_tests)
84 changes: 84 additions & 0 deletions cpp/src/gandiva/cmake/GenerateBitcode.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Create bitcode for each of the source files.
function(generate_bitcode PRECOMPILED_SRC_LIST OUTPUT_DIR OUTPUT_VAR)
set(LOCAL_BC_FILES "")

if(MSVC)
# clang pretends to be a particular version of MSVC. Thestandard
# library uses C++14 features, so we have to use that -std version
# to get the IR compilation to work.
# See https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html
# for MSVC_VERSION and Visual Studio version.
if(MSVC_VERSION LESS 1930)
set(FMS_COMPATIBILITY 19.20)
elseif(MSVC_VERSION LESS 1920)
set(FMS_COMPATIBILITY 19.10)
else()
message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}")
endif()
set(PLATFORM_CLANG_OPTIONS -std=c++17 -fms-compatibility
-fms-compatibility-version=${FMS_COMPATIBILITY})
else()
set(PLATFORM_CLANG_OPTIONS -std=c++17)
endif()

foreach(SRC_FILE ${PRECOMPILED_SRC_LIST})
get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE)
get_filename_component(ABSOLUTE_SRC ${SRC_FILE} ABSOLUTE)
set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_DIR}${SRC_BASE}.bc)
set(PRECOMPILE_COMMAND)
if(CMAKE_OSX_SYSROOT)
list(APPEND
PRECOMPILE_COMMAND
${CMAKE_COMMAND}
-E
env
SDKROOT=${CMAKE_OSX_SYSROOT})
endif()
list(APPEND
PRECOMPILE_COMMAND
${CLANG_EXECUTABLE}
${PLATFORM_CLANG_OPTIONS}
-DGANDIVA_IR
-DNDEBUG # DCHECK macros not implemented in precompiled code
-DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
-DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
-fno-use-cxa-atexit # Workaround for unresolved __dso_handle
-emit-llvm
-O3
-c
${ABSOLUTE_SRC}
-o
${BC_FILE}
${ARROW_GANDIVA_PC_CXX_FLAGS}
-I${CMAKE_SOURCE_DIR}/src
-I${ARROW_BINARY_DIR}/src)

if(NOT ARROW_USE_NATIVE_INT128)
foreach(boost_include_dir ${Boost_INCLUDE_DIRS})
list(APPEND PRECOMPILE_COMMAND -I${boost_include_dir})
endforeach()
endif()
add_custom_command(OUTPUT ${BC_FILE}
COMMAND ${PRECOMPILE_COMMAND}
DEPENDS ${SRC_FILE})
list(APPEND LOCAL_BC_FILES ${BC_FILE})
endforeach()
set(${OUTPUT_VAR} "${LOCAL_BC_FILES}" PARENT_SCOPE)
endfunction()
51 changes: 47 additions & 4 deletions cpp/src/gandiva/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
#pragma warning(pop)
#endif

#include "arrow/util/io_util.h"
#include "gandiva/configuration.h"
#include "gandiva/decimal_ir.h"
#include "gandiva/exported_funcs_registry.h"
Expand Down Expand Up @@ -137,6 +138,13 @@ Status Engine::LoadFunctionIRs() {
if (!functions_loaded_) {
ARROW_RETURN_NOT_OK(LoadPreCompiledIR());
ARROW_RETURN_NOT_OK(DecimalIR::AddFunctions(this));
auto maybe_ext_dir_env = ::arrow::internal::GetEnvVar("GANDIVA_EXTENSION_DIR");
if (maybe_ext_dir_env.ok()) {
auto ext_dir_env = *maybe_ext_dir_env;
if (!ext_dir_env.empty()) {
ARROW_RETURN_NOT_OK(LoadExtendedPreCompiledIR(ext_dir_env));
}
}
functions_loaded_ = true;
}
return Status::OK();
Expand Down Expand Up @@ -220,6 +228,42 @@ static void SetDataLayout(llvm::Module* module) {
}
// end of the mofified method from MLIR

// Loading extended IR files from the given directory
// all .bc files under the given directory will be loaded and parsed
Status Engine::LoadExtendedPreCompiledIR(const std::filesystem::path& dir_path) {
for (const auto& entry : std::filesystem::directory_iterator(dir_path)) {
if (entry.is_regular_file() && entry.path().extension() == ".bc") {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
niyue marked this conversation as resolved.
Show resolved Hide resolved
llvm::MemoryBuffer::getFile(entry.path().string());

ARROW_RETURN_IF(!buffer_or_error,
Status::CodeGenError("Could not load module from IR file: ",
entry.path().string() + " Error: " +
buffer_or_error.getError().message()));

auto buffer = std::move(buffer_or_error.get());

auto module_or_error =
llvm::parseBitcodeFile(buffer->getMemBufferRef(), *context());
if (!module_or_error) {
std::string str;
llvm::raw_string_ostream stream(str);
stream << module_or_error.takeError();
return Status::CodeGenError("Failed to parse bitcode file: " +
entry.path().string() + " Error: " + stream.str());
}
auto ir_module = std::move(module_or_error.get());

ARROW_RETURN_IF(llvm::verifyModule(*ir_module, &llvm::errs()),
Status::CodeGenError("verify of IR Module failed"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a detail message to this too like the above code?

ARROW_RETURN_IF(llvm::Linker::linkModules(*module_, std::move(ir_module)),
Status::CodeGenError("failed to link IR Modules"));
}
}

return Status::OK();
}

// Handling for pre-compiled IR libraries.
Status Engine::LoadPreCompiledIR() {
auto bitcode = llvm::StringRef(reinterpret_cast<const char*>(kPrecompiledBitcode),
Expand All @@ -233,11 +277,10 @@ Status Engine::LoadPreCompiledIR() {
Status::CodeGenError("Could not load module from IR: ",
buffer_or_error.getError().message()));

std::unique_ptr<llvm::MemoryBuffer> buffer = std::move(buffer_or_error.get());
auto buffer = std::move(buffer_or_error.get());

/// Parse the IR module.
llvm::Expected<std::unique_ptr<llvm::Module>> module_or_error =
llvm::getOwningLazyBitcodeModule(std::move(buffer), *context());
auto module_or_error = llvm::getOwningLazyBitcodeModule(std::move(buffer), *context());
if (!module_or_error) {
// NOTE: llvm::handleAllErrors() fails linking with RTTI-disabled LLVM builds
// (ARROW-5148)
Expand All @@ -246,7 +289,7 @@ Status Engine::LoadPreCompiledIR() {
stream << module_or_error.takeError();
return Status::CodeGenError(stream.str());
}
std::unique_ptr<llvm::Module> ir_module = std::move(module_or_error.get());
auto ir_module = std::move(module_or_error.get());

// set dataLayout
SetDataLayout(ir_module.get());
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/gandiva/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <filesystem>
#include <memory>
#include <set>
#include <string>
Expand Down Expand Up @@ -92,6 +93,7 @@ class GANDIVA_EXPORT Engine {
/// load pre-compiled IR modules from precompiled_bitcode.cc and merge them into
/// the main module.
Status LoadPreCompiledIR();
Status LoadExtendedPreCompiledIR(const std::filesystem::path& dir_path);

// Create and add mappings for cpp functions that can be accessed from LLVM.
void AddGlobalMappings();
Expand Down
47 changes: 47 additions & 0 deletions cpp/src/gandiva/extension_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

if(NO_TESTS)
return()
endif()

# copy the testing data into the build directory
add_custom_target(extension-tests-data
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_BINARY_DIR}/gandiva_extension_tests)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that CMAKE_CURRENT_BINARY_DIR is better. Why do we want to the top build directory for this?


include(../cmake/GenerateBitcode.cmake)

set(TEST_EXT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/extended_funcs)
set(TEST_PRECOMPILED_SRCS ${TEST_EXT_DIR}/multiply_by_two.cc)
generate_bitcode("${TEST_PRECOMPILED_SRCS}"
"../../../gandiva_extension_tests/extended_funcs/" TEST_BC_FILES)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CMAKE_BINARY_DIR?

Suggested change
"../../../gandiva_extension_tests/extended_funcs/" TEST_BC_FILES)
"${CMAKE_BINARY_DIR}/gandiva_extension_tests/extended_funcs/" TEST_BC_FILES)

add_custom_target(extension-tests ALL DEPENDS extension-tests-data ${TEST_BC_FILES})

# set the GANDIVA_EXTENSION_TEST_DIR macro so that the tests can pass regardless where they are run from
# corresponding extension test data files and bitcode will be copied/generated
set(TARGETS gandiva-internals-test gandiva-projector-test gandiva-projector-test-static)
foreach(TARGET ${TARGETS})
if(TARGET ${TARGET})
add_dependencies(${TARGET} extension-tests)
target_compile_definitions(${TARGET}
PRIVATE -DGANDIVA_EXTENSION_TEST_DIR="${CMAKE_BINARY_DIR}/gandiva_extension_tests"
)
endif()
endforeach()

add_dependencies(gandiva-tests extension-tests)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"version": "1.0",
"functions": [
{
"name": "greet",
"aliases": [
],
"param_types": [
{
"type": "timestamp",
"unit": "second"
},
{
"type": "list",
"value_type": {
"type": "int32"
}
}
],
"return_type": {
"type": "decimal",
"precision": 10,
"scale": 2
},
"result_nullable": "never",
"can_return_errors": true,
"pc_name": "greet_timestamp_list"
}
]
}
20 changes: 20 additions & 0 deletions cpp/src/gandiva/extension_tests/extended_funcs/multiply_by_two.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "multiply_by_two.h" // NOLINT

int64_t multiply_by_two_int32(int32_t value) { return value * 2; }
24 changes: 24 additions & 0 deletions cpp/src/gandiva/extension_tests/extended_funcs/multiply_by_two.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>

extern "C" {
int64_t multiply_by_two_int32(int32_t value);
}
19 changes: 19 additions & 0 deletions cpp/src/gandiva/extension_tests/extended_funcs/registry.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"version": "1.0",
"functions": [
{
"name": "multiply_by_two",
"aliases": [
],
"param_types": [
{
"type": "int32"
}
],
"return_type": {
"type": "int64"
},
"pc_name": "multiply_by_two_int32"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"version": "1.0",
"functions": [
{
"name": "say_hello",
"aliases": [
"hello"
],
"param_types": [
{
"type": "utf8"
}
],
"return_type": {
"type": "int64"
},
"result_nullable": "never",
"can_return_errors": true,
"pc_name": "say_hello_utf8"
},
{
"name": "say_goodbye",
"aliases": [
],
"param_types": [
],
"return_type": {
"type": "utf8"
},
"result_nullable": "never",
"can_return_errors": true,
"pc_name": "say_goodbye"
}
]
}
Loading
Loading