Skip to content

Commit

Permalink
Add external function registry support to gandiva. JSON based functio…
Browse files Browse the repository at this point in the history
…n registry can be used for describing the function metadata, and LLVM bitcode can be automatically loaded as pre-compiled external functions.
  • Loading branch information
niyue committed Sep 21, 2023
1 parent 25fa89d commit 2e00633
Show file tree
Hide file tree
Showing 25 changed files with 939 additions and 63 deletions.
3 changes: 3 additions & 0 deletions cpp/src/gandiva/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ set(SRC_FILES
function_registry.cc
function_registry_arithmetic.cc
function_registry_datetime.cc
function_registry_external.cc
function_registry_hash.cc
function_registry_math_ops.cc
function_registry_string.cc
Expand Down Expand Up @@ -232,6 +233,7 @@ add_gandiva_test(internals-test
bitmap_accumulator_test.cc
engine_llvm_test.cc
function_registry_test.cc
function_registry_external_test.cc
function_signature_test.cc
llvm_types_test.cc
llvm_generator_test.cc
Expand All @@ -253,3 +255,4 @@ add_gandiva_test(internals-test

add_subdirectory(precompiled)
add_subdirectory(tests)
add_subdirectory(extension_tests)
84 changes: 84 additions & 0 deletions cpp/src/gandiva/cmake/GenerateBitcode.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Create bitcode for each of the source files.
function(generate_bitcode PRECOMPILED_SRC_LIST OUTPUT_DIR OUTPUT_VAR)
set(LOCAL_BC_FILES "")

if(MSVC)
# clang pretends to be a particular version of MSVC. Thestandard
# library uses C++14 features, so we have to use that -std version
# to get the IR compilation to work.
# See https://cmake.org/cmake/help/latest/variable/MSVC_VERSION.html
# for MSVC_VERSION and Visual Studio version.
if(MSVC_VERSION LESS 1930)
set(FMS_COMPATIBILITY 19.20)
elseif(MSVC_VERSION LESS 1920)
set(FMS_COMPATIBILITY 19.10)
else()
message(FATAL_ERROR "Unsupported MSVC_VERSION=${MSVC_VERSION}")
endif()
set(PLATFORM_CLANG_OPTIONS -std=c++17 -fms-compatibility
-fms-compatibility-version=${FMS_COMPATIBILITY})
else()
set(PLATFORM_CLANG_OPTIONS -std=c++17)
endif()

foreach(SRC_FILE ${PRECOMPILED_SRC_LIST})
get_filename_component(SRC_BASE ${SRC_FILE} NAME_WE)
get_filename_component(ABSOLUTE_SRC ${SRC_FILE} ABSOLUTE)
set(BC_FILE ${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_DIR}${SRC_BASE}.bc)
set(PRECOMPILE_COMMAND)
if(CMAKE_OSX_SYSROOT)
list(APPEND
PRECOMPILE_COMMAND
${CMAKE_COMMAND}
-E
env
SDKROOT=${CMAKE_OSX_SYSROOT})
endif()
list(APPEND
PRECOMPILE_COMMAND
${CLANG_EXECUTABLE}
${PLATFORM_CLANG_OPTIONS}
-DGANDIVA_IR
-DNDEBUG # DCHECK macros not implemented in precompiled code
-DARROW_STATIC # Do not set __declspec(dllimport) on MSVC on Arrow symbols
-DGANDIVA_STATIC # Do not set __declspec(dllimport) on MSVC on Gandiva symbols
-fno-use-cxa-atexit # Workaround for unresolved __dso_handle
-emit-llvm
-O3
-c
${ABSOLUTE_SRC}
-o
${BC_FILE}
${ARROW_GANDIVA_PC_CXX_FLAGS}
-I${CMAKE_SOURCE_DIR}/src
-I${ARROW_BINARY_DIR}/src)

if(NOT ARROW_USE_NATIVE_INT128)
foreach(boost_include_dir ${Boost_INCLUDE_DIRS})
list(APPEND PRECOMPILE_COMMAND -I${boost_include_dir})
endforeach()
endif()
add_custom_command(OUTPUT ${BC_FILE}
COMMAND ${PRECOMPILE_COMMAND}
DEPENDS ${SRC_FILE})
list(APPEND LOCAL_BC_FILES ${BC_FILE})
endforeach()
set(${OUTPUT_VAR} "${LOCAL_BC_FILES}" PARENT_SCOPE)
endfunction()
40 changes: 40 additions & 0 deletions cpp/src/gandiva/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ Status Engine::LoadFunctionIRs() {
if (!functions_loaded_) {
ARROW_RETURN_NOT_OK(LoadPreCompiledIR());
ARROW_RETURN_NOT_OK(DecimalIR::AddFunctions(this));
const char* ext_dir_env = std::getenv("GANDIVA_EXTENSION_DIR");
if (ext_dir_env && strcmp(ext_dir_env, "") != 0) {
ARROW_RETURN_NOT_OK(LoadExtendedPreCompiledIR(ext_dir_env));
}
functions_loaded_ = true;
}
return Status::OK();
Expand Down Expand Up @@ -220,6 +224,42 @@ static void SetDataLayout(llvm::Module* module) {
}
// end of the mofified method from MLIR

// Loading extended IR files from the given directory
// all .bc files under the given directory will be loaded and parsed
Status Engine::LoadExtendedPreCompiledIR(const std::filesystem::path& dir_path) {
for (const auto& entry : std::filesystem::directory_iterator(dir_path)) {
if (entry.is_regular_file() && entry.path().extension() == ".bc") {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error =
llvm::MemoryBuffer::getFile(entry.path().string());

ARROW_RETURN_IF(!buffer_or_error,
Status::CodeGenError("Could not load module from IR file: ",
entry.path().string() + " Error: " +
buffer_or_error.getError().message()));

std::unique_ptr<llvm::MemoryBuffer> buffer = std::move(buffer_or_error.get());

llvm::Expected<std::unique_ptr<llvm::Module>> module_or_error =
llvm::parseBitcodeFile(buffer->getMemBufferRef(), *context());
if (!module_or_error) {
std::string str;
llvm::raw_string_ostream stream(str);
stream << module_or_error.takeError();
return Status::CodeGenError("Failed to parse bitcode file: " +
entry.path().string() + " Error: " + stream.str());
}
std::unique_ptr<llvm::Module> ir_module = std::move(module_or_error.get());

ARROW_RETURN_IF(llvm::verifyModule(*ir_module, &llvm::errs()),
Status::CodeGenError("verify of IR Module failed"));
ARROW_RETURN_IF(llvm::Linker::linkModules(*module_, std::move(ir_module)),
Status::CodeGenError("failed to link IR Modules"));
}
}

return Status::OK();
}

// Handling for pre-compiled IR libraries.
Status Engine::LoadPreCompiledIR() {
auto bitcode = llvm::StringRef(reinterpret_cast<const char*>(kPrecompiledBitcode),
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/gandiva/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <filesystem>
#include <memory>
#include <set>
#include <string>
Expand Down Expand Up @@ -92,6 +93,7 @@ class GANDIVA_EXPORT Engine {
/// load pre-compiled IR modules from precompiled_bitcode.cc and merge them into
/// the main module.
Status LoadPreCompiledIR();
Status LoadExtendedPreCompiledIR(const std::filesystem::path& dir_path);

// Create and add mappings for cpp functions that can be accessed from LLVM.
void AddGlobalMappings();
Expand Down
47 changes: 47 additions & 0 deletions cpp/src/gandiva/extension_tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

if(NO_TESTS)
return()
endif()

# copy the testing data into the build directory
add_custom_target(extension-tests-data
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_BINARY_DIR}/gandiva_extension_tests)

include(../cmake/GenerateBitcode.cmake)

set(TEST_EXT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/extended_funcs)
set(TEST_PRECOMPILED_SRCS ${TEST_EXT_DIR}/multiply_by_two.cc)
generate_bitcode("${TEST_PRECOMPILED_SRCS}"
"../../../gandiva_extension_tests/extended_funcs/" TEST_BC_FILES)
add_custom_target(extension-tests ALL DEPENDS extension-tests-data ${TEST_BC_FILES})

# set the GANDIVA_EXTENSION_TEST_DIR macro so that the tests can pass regardless where they are run from
# corresponding extension test data files and bitcode will be copied/generated
set(TARGETS gandiva-internals-test gandiva-projector-test gandiva-projector-test-static)
foreach(TARGET ${TARGETS})
if(TARGET ${TARGET})
add_dependencies(${TARGET} extension-tests)
target_compile_definitions(${TARGET}
PRIVATE -DGANDIVA_EXTENSION_TEST_DIR="${CMAKE_BINARY_DIR}/gandiva_extension_tests"
)
endif()
endforeach()

add_dependencies(gandiva-tests extension-tests)
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"version": "1.0",
"functions": [
{
"name": "greet",
"aliases": [
],
"param_types": [
{
"type": "timestamp",
"unit": "second"
},
{
"type": "list",
"value_type": {
"type": "int32"
}
}
],
"return_type": {
"type": "decimal",
"precision": 10,
"scale": 2
},
"result_nullable": "never",
"can_return_errors": true,
"pc_name": "greet_timestamp_list"
}
]
}
20 changes: 20 additions & 0 deletions cpp/src/gandiva/extension_tests/extended_funcs/multiply_by_two.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "multiply_by_two.h"

int64_t multiply_by_two_int32(int32_t value) { return value * 2; }
24 changes: 24 additions & 0 deletions cpp/src/gandiva/extension_tests/extended_funcs/multiply_by_two.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <cstdint>

extern "C" {
int64_t multiply_by_two_int32(int32_t value);
}
19 changes: 19 additions & 0 deletions cpp/src/gandiva/extension_tests/extended_funcs/registry.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"version": "1.0",
"functions": [
{
"name": "multiply_by_two",
"aliases": [
],
"param_types": [
{
"type": "int32"
}
],
"return_type": {
"type": "int64"
},
"pc_name": "multiply_by_two_int32"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"version": "1.0",
"functions": [
{
"name": "say_hello",
"aliases": [
"hello"
],
"param_types": [
{
"type": "utf8"
}
],
"return_type": {
"type": "int64"
},
"result_nullable": "never",
"can_return_errors": true,
"pc_name": "say_hello_utf8"
},
{
"name": "say_goodbye",
"aliases": [
],
"param_types": [
],
"return_type": {
"type": "utf8"
},
"result_nullable": "never",
"can_return_errors": true,
"pc_name": "say_goodbye"
}
]
}
21 changes: 21 additions & 0 deletions cpp/src/gandiva/extension_tests/multiple_registries/reg_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"version": "1.0",
"functions": [
{
"name": "say_hello",
"aliases": [
"hello"
],
"param_types": [
{
"type": "utf8"
}
],
"return_type": {
"type": "int64"
},
"result_nullable": "never",
"pc_name": "say_hello_utf8"
}
]
}
Loading

0 comments on commit 2e00633

Please sign in to comment.