From b6a306d3b41251a972f89baa21ac4ec47be43c7f Mon Sep 17 00:00:00 2001 From: Yue Date: Tue, 3 Oct 2023 15:21:45 +0800 Subject: [PATCH] GH-37834: [Gandiva] Migrate to new LLVM PassManager API (#37867) ### Rationale for this change In https://github.com/apache/arrow/issues/37834, to support LLVM 17, we need to migrate to use the new LLVM PassManager API. ### What changes are included in this PR? This PR tries to migrate the legacy PassManager to the new PassManager. ### Are these changes tested? It should be covered by existing unit tests. But more performance tests may be needed to verify this change. ### Are there any user-facing changes? No * Closes: #37834 Lead-authored-by: Yue Ni Co-authored-by: Sutou Kouhei Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/CMakeLists.txt | 1 + cpp/cmake_modules/FindLLVMAlt.cmake | 24 ++--- cpp/src/gandiva/CMakeLists.txt | 2 - cpp/src/gandiva/engine.cc | 130 +++++++++++++++++++++------- 4 files changed, 115 insertions(+), 42 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f2906b960eba6..f0acab0389b19 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -152,6 +152,7 @@ set(ARROW_DOC_DIR "share/doc/${PROJECT_NAME}") set(BUILD_SUPPORT_DIR "${CMAKE_SOURCE_DIR}/build-support") set(ARROW_LLVM_VERSIONS + "17.0" "16.0" "15.0" "14.0" diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake index e980f53fd3407..69f680824b082 100644 --- a/cpp/cmake_modules/FindLLVMAlt.cmake +++ b/cpp/cmake_modules/FindLLVMAlt.cmake @@ -86,16 +86,20 @@ if(LLVM_FOUND) target_link_libraries(LLVM::LLVM_LIBS INTERFACE LLVM) else() # Find the libraries that correspond to the LLVM components - llvm_map_components_to_libnames(LLVM_LIBS - core - mcjit - native - ipo - bitreader - target - linker - analysis - debuginfodwarf) + set(LLVM_TARGET_COMPONENTS + analysis + bitreader + core + debuginfodwarf + ipo + linker + mcjit + native + target) + if(LLVM_VERSION_MAJOR GREATER_EQUAL 14) + list(APPEND LLVM_TARGET_COMPONENTS passes) + endif() + llvm_map_components_to_libnames(LLVM_LIBS ${LLVM_TARGET_COMPONENTS}) target_link_libraries(LLVM::LLVM_LIBS INTERFACE ${LLVM_LIBS}) if(TARGET LLVMSupport AND NOT ARROW_ZSTD_USE_SHARED) diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index db260b5acc933..d2810c39f723c 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -31,8 +31,6 @@ if(ARROW_WITH_ZSTD AND "${zstd_SOURCE}" STREQUAL "SYSTEM") provide_find_module(zstdAlt "Gandiva") endif() -add_definitions(-DGANDIVA_LLVM_VERSION=${LLVM_VERSION_MAJOR}) - # Set the path where the bitcode file generated, see precompiled/CMakeLists.txt set(GANDIVA_PRECOMPILED_BC_PATH "${CMAKE_CURRENT_BINARY_DIR}/irhelpers.bc") set(GANDIVA_PRECOMPILED_CC_PATH "${CMAKE_CURRENT_BINARY_DIR}/precompiled_bitcode.cc") diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index 7d75793a3e9e7..b6c78da89d575 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -53,18 +53,33 @@ #include #include #include +#if LLVM_VERSION_MAJOR >= 17 +#include +#else #include +#endif +#include #include #include +#include +#include #if LLVM_VERSION_MAJOR >= 14 +#include #include +#include +#include +#include +#include +#include +#include +#include #else #include +#include #endif #include #include #include -#include #include #include #include @@ -268,49 +283,104 @@ Status Engine::LoadPreCompiledIR() { // a pass for dead code elimination. Status Engine::RemoveUnusedFunctions() { // Setup an optimiser pipeline - std::unique_ptr pass_manager( - new llvm::legacy::PassManager()); + llvm::PassBuilder pass_builder; + llvm::ModuleAnalysisManager module_am; + + pass_builder.registerModuleAnalyses(module_am); + llvm::ModulePassManager module_pm; std::unordered_set used_functions; used_functions.insert(functions_to_compile_.begin(), functions_to_compile_.end()); - pass_manager->add( - llvm::createInternalizePass([&used_functions](const llvm::GlobalValue& func) { - return (used_functions.find(func.getName().str()) != used_functions.end()); + module_pm.addPass( + llvm::InternalizePass([&used_functions](const llvm::GlobalValue& variable) -> bool { + return used_functions.find(variable.getName().str()) != used_functions.end(); })); - pass_manager->add(llvm::createGlobalDCEPass()); - pass_manager->run(*module_); + module_pm.addPass(llvm::GlobalDCEPass()); + + module_pm.run(*module_, module_am); return Status::OK(); } +// several passes requiring LLVM 14+ that are not available in the legacy pass manager +#if LLVM_VERSION_MAJOR >= 14 +static void OptimizeModuleWithNewPassManager(llvm::Module& module, + llvm::TargetIRAnalysis target_analysis) { + // Setup an optimiser pipeline + llvm::PassBuilder pass_builder; + llvm::LoopAnalysisManager loop_am; + llvm::FunctionAnalysisManager function_am; + llvm::CGSCCAnalysisManager cgscc_am; + llvm::ModuleAnalysisManager module_am; + + function_am.registerPass([&] { return target_analysis; }); + + // Register required analysis managers + pass_builder.registerModuleAnalyses(module_am); + pass_builder.registerCGSCCAnalyses(cgscc_am); + pass_builder.registerFunctionAnalyses(function_am); + pass_builder.registerLoopAnalyses(loop_am); + pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am); + + pass_builder.registerPipelineStartEPCallback([&](llvm::ModulePassManager& module_pm, + llvm::OptimizationLevel Level) { + module_pm.addPass(llvm::ModuleInlinerPass()); + + llvm::FunctionPassManager function_pm; + function_pm.addPass(llvm::InstCombinePass()); + function_pm.addPass(llvm::PromotePass()); + function_pm.addPass(llvm::GVNPass()); + function_pm.addPass(llvm::NewGVNPass()); + function_pm.addPass(llvm::SimplifyCFGPass()); + function_pm.addPass(llvm::LoopVectorizePass()); + function_pm.addPass(llvm::SLPVectorizerPass()); + module_pm.addPass(llvm::createModuleToFunctionPassAdaptor(std::move(function_pm))); + + module_pm.addPass(llvm::GlobalOptPass()); + }); + + pass_builder.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O3) + .run(module, module_am); +} +#else +static void OptimizeModuleWithLegacyPassManager(llvm::Module& module, + llvm::TargetIRAnalysis target_analysis) { + std::unique_ptr pass_manager( + new llvm::legacy::PassManager()); + + pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis)); + pass_manager->add(llvm::createFunctionInliningPass()); + pass_manager->add(llvm::createInstructionCombiningPass()); + pass_manager->add(llvm::createPromoteMemoryToRegisterPass()); + pass_manager->add(llvm::createGVNPass()); + pass_manager->add(llvm::createNewGVNPass()); + pass_manager->add(llvm::createCFGSimplificationPass()); + pass_manager->add(llvm::createLoopVectorizePass()); + pass_manager->add(llvm::createSLPVectorizerPass()); + pass_manager->add(llvm::createGlobalOptimizerPass()); + + // run the optimiser + llvm::PassManagerBuilder pass_builder; + pass_builder.OptLevel = 3; + pass_builder.populateModulePassManager(*pass_manager); + pass_manager->run(module); +} +#endif + // Optimise and compile the module. Status Engine::FinalizeModule() { if (!cached_) { ARROW_RETURN_NOT_OK(RemoveUnusedFunctions()); if (optimize_) { - // misc passes to allow for inlining, vectorization, .. - std::unique_ptr pass_manager( - new llvm::legacy::PassManager()); - - llvm::TargetIRAnalysis target_analysis = - execution_engine_->getTargetMachine()->getTargetIRAnalysis(); - pass_manager->add(llvm::createTargetTransformInfoWrapperPass(target_analysis)); - pass_manager->add(llvm::createFunctionInliningPass()); - pass_manager->add(llvm::createInstructionCombiningPass()); - pass_manager->add(llvm::createPromoteMemoryToRegisterPass()); - pass_manager->add(llvm::createGVNPass()); - pass_manager->add(llvm::createNewGVNPass()); - pass_manager->add(llvm::createCFGSimplificationPass()); - pass_manager->add(llvm::createLoopVectorizePass()); - pass_manager->add(llvm::createSLPVectorizerPass()); - pass_manager->add(llvm::createGlobalOptimizerPass()); - - // run the optimiser - llvm::PassManagerBuilder pass_builder; - pass_builder.OptLevel = 3; - pass_builder.populateModulePassManager(*pass_manager); - pass_manager->run(*module_); + auto target_analysis = execution_engine_->getTargetMachine()->getTargetIRAnalysis(); + +// misc passes to allow for inlining, vectorization, .. +#if LLVM_VERSION_MAJOR >= 14 + OptimizeModuleWithNewPassManager(*module_, target_analysis); +#else + OptimizeModuleWithLegacyPassManager(*module_, target_analysis); +#endif } ARROW_RETURN_IF(llvm::verifyModule(*module_, &llvm::errs()),