From 7bebb1bb61d058818050314336e20664a6c2ad73 Mon Sep 17 00:00:00 2001 From: Eyal Rozenberg Date: Thu, 20 Apr 2023 23:55:09 +0300 Subject: [PATCH] Fixes #500: CUDA 11.x's "NVVM" is now called "LTO IR" --- src/cuda/rtc/compilation_options.hpp | 7 ++-- src/cuda/rtc/compilation_output.hpp | 56 ++++++++++++++++++---------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/src/cuda/rtc/compilation_options.hpp b/src/cuda/rtc/compilation_options.hpp index 61975e5b..3d136f50 100644 --- a/src/cuda/rtc/compilation_options.hpp +++ b/src/cuda/rtc/compilation_options.hpp @@ -323,8 +323,9 @@ class compilation_options_t final : bool use_fast_math { false }; /** - * Do not compile fully into PTX/Cubin. Instead, only generate NVVM (the LLVM IR variant), which is - * combined with other NVVM pieces from LTO-compiled "objects", at device link time. + * Do not compile fully into PTX/Cubin. Instead, only generate NVIDIA's "LTO IR", which is + * combined with other LTO IR pieces from object files compiled with LTO support, at + * device link time. */ bool link_time_optimization { false }; @@ -335,7 +336,7 @@ class compilation_options_t final : bool source_dirs_in_include_path { true }; /** - * Enables more aggressive device code vectorization in the NVVM optimizer. + * Enables more aggressive device code vectorization in the LTO IR optimizer. */ bool extra_device_vectorization { false }; diff --git a/src/cuda/rtc/compilation_output.hpp b/src/cuda/rtc/compilation_output.hpp index 8b9618de..7a7c5766 100644 --- a/src/cuda/rtc/compilation_output.hpp +++ b/src/cuda/rtc/compilation_output.hpp @@ -151,22 +151,29 @@ inline void get_ptx(char* buffer, program::handle_t program_handle, co #if CUDA_VERSION >= 11040 -inline size_t get_nvvm_size(program::handle_t program_handle, const char *program_name = nullptr) +inline size_t get_lto_ir_size(program::handle_t program_handle, const char *program_name = nullptr) { size_t size; +#if CUDA_VERSION >= 12000 + auto status = nvrtcGetLTOIRSize(program_handle, &size); +#else auto status = nvrtcGetNVVMSize(program_handle, &size); - throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output NVVM size for compilation of " +#endif + throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output LTO IR size for compilation of " + identify(program_handle, program_name)); return size; } -inline void get_nvvm(char* buffer, program::handle_t program_handle, const char *program_name = nullptr) +inline void get_lto_ir(char* buffer, program::handle_t program_handle, const char *program_name = nullptr) { +#if CUDA_VERSION >= 12000 + auto status = nvrtcGetLTOIR(program_handle, buffer); +#else auto status = nvrtcGetNVVM(program_handle, buffer); - throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output NVVM for compilation of " +#endif + throw_if_rtc_error_lazy(cuda_cpp, status, "Failed obtaining output LTO IR code for compilation of " + identify(program_handle, program_name)); } - #endif // CUDA_VERSION >= 11040 } // namespace detail_ @@ -401,47 +408,58 @@ class compilation_output_t : public compilation_output_base_t= 11040 /** - * Obtain a copy of the nvvm intermediate format result of the last compilation + * Obtain a copy of the LTO IR result of the last compilation - the intermediate + * representation used for link-time optimization * * @throws ::std::invalid_argument if the supplied buffer is too small to hold - * the program's NVVM. + * the program's LTO IR. * * @param[inout] buffer A writable buffer large enough to contain the compiled - * program's NVVM code. + * program's LTO IR code. * @return The sub-buffer, starting at the beginning of @p buffer, containing - * exactly the compiled program's NVVM (i.e. sized down to fit the contents) + * exactly the compiled program's LTO-IR (i.e. sized down to fit the contents) + * + * @note LTO IR was called NVVM in CUDA 11.x . */ /// @{ - span nvvm(span buffer) const + span lto_ir(span buffer) const { - size_t size = program::detail_::get_nvvm_size(program_handle_, program_name_.c_str()); + size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str()); if (buffer.size() < size) { - throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's NVVM (" + throw ::std::invalid_argument("Provided buffer size is insufficient for the compiled program's LTO IR (" + ::std::to_string(buffer.size()) + " < " + ::std::to_string(size) + ": " + compilation_output::detail_::identify(*this)); } - program::detail_::get_nvvm(buffer.data(), program_handle_, program_name_.c_str()); + program::detail_::get_lto_ir(buffer.data(), program_handle_, program_name_.c_str()); return { buffer.data(), size }; } - dynarray nvvm() const + dynarray lto_ir() const { - size_t size = program::detail_::get_nvvm_size(program_handle_, program_name_.c_str()); + size_t size = program::detail_::get_lto_ir_size(program_handle_, program_name_.c_str()); dynarray result(size); - program::detail_::get_nvvm(result.data(), program_handle_, program_name_.c_str()); + program::detail_::get_lto_ir(result.data(), program_handle_, program_name_.c_str()); return result; } /// @} - bool has_nvvm() const + /** + * Check whether the compilation also resulted in LTO IR - intermediate representation + * for link-time optimization + */ + bool has_lto_ir() const { size_t size; +#if CUDA_VERSION >= 12000 + auto status = nvrtcGetLTOIRSize(program_handle_, &size); +#else auto status = nvrtcGetNVVMSize(program_handle_, &size); +#endif if (status == NVRTC_ERROR_INVALID_PROGRAM) { return false; } - throw_if_rtc_error_lazy(cuda_cpp, status, "Failed determining whether the NVRTC program has a compiled NVVM result: " + throw_if_rtc_error_lazy(cuda_cpp, status, "Failed determining whether the NVRTC program has a compiled LTO IR result: " + compilation_output::detail_::identify(*this)); if (size == 0) { - throw ::std::logic_error("NVVM size reported as 0 by NVRTC for program: " + throw ::std::logic_error("LTO IR size reported as 0 by NVRTC for program: " + compilation_output::detail_::identify(*this)); } return true;