From 8edefaa89e41a93af43e5d1f5a5ab2459555c4c1 Mon Sep 17 00:00:00 2001 From: Eyal Rozenberg Date: Mon, 8 Jan 2024 01:46:51 +0200 Subject: [PATCH] Regards #570: Added supprort for CUDA libraries Caveat: Not yet supporting library kernels as first-class citizens; they are currently only intermediate entities for obtaining context-associated kernels (CUfunction's) --- src/cuda/api.hpp | 3 + src/cuda/api/library.hpp | 434 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 437 insertions(+) create mode 100644 src/cuda/api/library.hpp diff --git a/src/cuda/api.hpp b/src/cuda/api.hpp index ae64047a..ddc318d2 100644 --- a/src/cuda/api.hpp +++ b/src/cuda/api.hpp @@ -40,6 +40,9 @@ #include "api/event.hpp" #include "api/kernel.hpp" #include "api/module.hpp" +#if CUDA_VERSION >= 12000 +#include "api/library.hpp" +#endif #include "api/link.hpp" #include "api/current_device.hpp" diff --git a/src/cuda/api/library.hpp b/src/cuda/api/library.hpp new file mode 100644 index 00000000..a351785f --- /dev/null +++ b/src/cuda/api/library.hpp @@ -0,0 +1,434 @@ +/** + * @file + * + * @brief Wrappers for working with "libraries" of compiled CUDA code (which are similar + * to modules, but not associated with any CUDA context). + */ +#pragma once +#ifndef CUDA_API_WRAPPERS_LIBRARY_HPP_ +#define CUDA_API_WRAPPERS_LIBRARY_HPP_ + +#if CUDA_VERSION >= 12000 + +#include "module.hpp" + +#if __cplusplus >= 201703L +#include +#endif + +namespace cuda { + +///@cond +class device_t; +class context_t; +class module_t; +class library_t; +class kernel_t; +///@endcond + +namespace library { + +using handle_t = CUlibrary; + +namespace detail_ { + +using option_t = CUlibraryOption; +using kernel_handle_t = CUkernel; // Don't be confused; a context-associated kernel is a CUfunction :-( + +} // namespace detail_ + +class kernel_t; // A kernel stored within a library; strangely, a context-associated kernel is a CUfunction. + +namespace detail_ { + +inline library_t wrap( + handle_t handle, + const link::options_t& link_options, + bool code_is_preserved, + bool take_ownership = false) noexcept; + +inline ::std::string identify(const library::handle_t &handle) +{ + return ::std::string("library ") + cuda::detail_::ptr_as_hex(handle); +} + +::std::string identify(const library_t &library); + +} // namespace detail_ + +/** + * Create a CUDA driver library of compiled code from raw image data. + * + * @param[in] module_data the opaque, raw binary data for the module - in a contiguous container + * such as a span, a cuda::dynarray etc.. + */ +///@{ +template ::value, bool> = true > +library_t create( + ContiguousContainer library_data, + optional link_options, + bool code_is_preserved); +///@} + +} // namespace library + +memory::region_t get_global(const context_t& context, const library_t& library, const char* name); +kernel_t get_kernel(const library_t& library, const char* name); +kernel_t get_kernel(const context_t& context, const library_t& library, const char* name); +memory::region_t get_managed_region(const library_t& library, const char* name); + +namespace module { + +module_t create(const context_t& context, const library_t& library); +module_t create(const library_t& library); + +} // namespace module + +void* get_unified_function(const library_t& library, const char* symbol); +void* get_unified_function(const context_t& context, const library_t& library, const char* symbol); + + +/** + * Wrapper class for a CUDA compiled code library (like a @ref module_t , but not associated + * with a context) + */ +class library_t { + +public: // getters + + library::handle_t handle() const { return handle_; } + const link::options_t& link_options() const { return link_options_; } + bool code_is_preserved() const { return code_is_preserved_; } + + /** + * Obtains an already-compiled kernel previously associated with + * this library, in the current context. + * + * @param name The function name, in case of a C-style function, + * or the mangled function signature, in case of a C++-style + * function. + * + * @return An enqueable kernel proxy object for the requested kernel, + * in the current context. + */ + cuda::kernel_t get_kernel(const char* name) const + { + return cuda::get_kernel(context::current::get(), *this, name); + } + + cuda::kernel_t get_kernel(const ::std::string& name) const + { + return get_kernel(name.c_str()); + } + + memory::region_t get_global(const char* name) const + { + return cuda::get_global(context::current::get(), *this, name); + } + + memory::region_t get_global(const std::string& name) const + { + return get_global(name.c_str()); + } + + memory::region_t get_managed_region(const char* name) + { + return cuda::get_managed_region(*this, name); + } + + memory::region_t get_managed(const std::string& name) + { + return get_managed(name.c_str()); + } + + module_t as_module() + { + return module::create(context::current::get(), *this); + } + + void* get_unified_function(const char* symbol) + { + return cuda::get_unified_function(*this, symbol); + } + + void* get_unified_function(const ::std::string& symbol) + { + return get_unified_function(symbol.c_str()); + } + +protected: // constructors + + library_t(library::handle_t handle, link::options_t link_options, bool code_is_preserved, bool owning) noexcept + : handle_(handle), link_options_(::std::move(link_options)), code_is_preserved_(code_is_preserved), owning_(owning) + { } + +public: // friendship + + friend library_t library::detail_::wrap(library::handle_t, const link::options_t&, bool, bool) noexcept; + +public: // constructors and destructor + + library_t(const library_t&) = delete; + + library_t(library_t&& other) noexcept : + library_t(other.handle_, other.link_options_, other.code_is_preserved_, other.owning_) + { + other.owning_ = false; + }; + + ~library_t() noexcept(false) + { + if (owning_) { + auto status = cuLibraryUnload(handle_); + throw_if_error_lazy(status, "Failed unloading " + library::detail_::identify(handle_)); + } + } + +public: // operators + + library_t& operator=(const library_t&) = delete; + library_t& operator=(library_t&& other) noexcept + { + ::std::swap(handle_, other.handle_); + ::std::swap(link_options_, other.link_options_); + ::std::swap(code_is_preserved_, other.code_is_preserved_); + ::std::swap(owning_, other.owning_); + return *this; + } + +protected: // data members + library::handle_t handle_; + link::options_t link_options_; + bool code_is_preserved_; + bool owning_; + // this field is mutable only for enabling move construction; other + // than in that case it must not be altered +}; + +inline memory::region_t get_global(const context_t& context, const library_t& library, const char* name) +{ + CUdeviceptr dptr; + size_t size; + auto result = cuLibraryGetGlobal(&dptr, &size, library.handle(), name); + throw_if_error_lazy(result, + ::std::string("Obtaining the memory address and size for the global object '") + name + "' from " + + library::detail_::identify(library) + " in context " + context::detail_::identify(context)); + return { memory::as_pointer(dptr), size }; + // Note: Nothing is holding a PC refcount unit here! +} + +// Implement other get's + +inline kernel_t get_kernel(const context_t& context, const library_t& library, const char* name) +{ + CAW_SET_SCOPE_CONTEXT(context.handle()); + library::detail_::kernel_handle_t new_handle; + auto status = cuLibraryGetKernel(&new_handle, library.handle(), name); + throw_if_error_lazy(status, ::std::string("Failed obtaining kernel '") + name + + "' from " + library::detail_::identify(library)); + kernel::handle_t new_proper_kernel_handle; + status = cuKernelGetFunction(&new_proper_kernel_handle, new_handle); + throw_if_error_lazy(status, ::std::string("Failed obtaining a context-associated kernel ") + + "from kernel '" + name + "' in " + library::detail_::identify(library)); + return kernel::wrap(context.device_id(), context.handle(), + new_proper_kernel_handle, do_hold_primary_context_refcount_unit); +} + +inline kernel_t get_kernel(const library_t& library, const char* name) +{ + return get_kernel(context::current::get(), library, name); +} + +inline memory::region_t get_managed_region(const library_t& library, const char* name) +{ + memory::device::address_t region_start; + size_t region_size; + auto status = cuLibraryGetManaged(®ion_start, ®ion_size, library.handle(), name); + throw_if_error_lazy(status, ::std::string("Failed obtaining the managed memory region '") + name + + "' from " + library::detail_::identify(library)); + return { region_start, region_size }; +} + +namespace module { + +inline module_t create(const context_t& context, const library_t& library) +{ + CAW_SET_SCOPE_CONTEXT(context.handle()); + module::handle_t new_handle; + auto status = cuLibraryGetModule(&new_handle, library.handle()); + throw_if_error_lazy(status, ::std::string("Failed creating a module '") + + + "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context)); + constexpr const bool is_owning { true }; + return module::detail_::wrap(context.device_id(), context.handle(), new_handle, library.link_options(), + is_owning, do_hold_primary_context_refcount_unit); + // TODO: We could consider adding a variant of this function taking a context&&, and using that + // to decide whether or not to hold a PC refcount unit +} + +inline module_t create(const library_t& library) +{ + return create(context::current::get(), library); +} + +} // namespace module + +inline void* get_unified_function(const library_t& library, const char* symbol) +{ + void* function_ptr; + auto status = cuLibraryGetUnifiedFunction(&function_ptr, library.handle(), symbol); + throw_if_error_lazy(status, ::std::string("Failed obtaining a pointer for function '") + symbol + + "' from " + library::detail_::identify(library)); + return function_ptr; +} + +inline void* get_unified_function(const context_t& context, const library_t& library, const char* symbol) +{ + CAW_SET_SCOPE_CONTEXT(context.handle()); + void* function_ptr; + auto status = cuLibraryGetUnifiedFunction(&function_ptr, library.handle(), symbol); + throw_if_error_lazy(status, ::std::string("Failed obtaining a pointer for function '") + symbol + + "' from " + library::detail_::identify(library) + " in " + context::detail_::identify(context)); + return function_ptr; +} + +namespace library { + +namespace detail_ { + + +template +library_t create( + Creator creator, + DataSource data_source, + ErrorStringGenerator error_string_generator, + const link::options_t& link_options = {}, + bool code_is_preserved = false) +{ + handle_t new_lib_handle; + auto raw_link_opts = marshal(link_options); + struct { + detail_::option_t options[1]; + void* values[1]; + unsigned count; + } raw_opts = { { CU_LIBRARY_BINARY_IS_PRESERVED }, { &code_is_preserved }, 1 }; + auto status = creator( + &new_lib_handle, data_source, + const_cast(raw_link_opts.options()), + const_cast(raw_link_opts.values()), raw_link_opts.count(), + raw_opts.options, raw_opts.values, raw_opts.count + ); + throw_if_error_lazy(status, + ::std::string("Failed loading a compiled CUDA code library from ") + error_string_generator()); + bool do_take_ownership{true}; + return detail_::wrap(new_lib_handle, link_options, code_is_preserved, do_take_ownership); +} + +} // namespace detail_ + +/** + * Load a library from an appropriate compiled or semi-compiled file, allocating all + * relevant resources for it. + * + * @param path of a cubin, PTX, or fatbin file constituting the module to be loaded. + * @return the loaded library + * + * @note this covers cuModuleLoadFatBinary() even though that's not directly used + * + * @todo: When switching to the C++17 standard, use string_view's instead of the const char* + */ +///@{ +inline library_t load_from_file( + const char* path, + const link::options_t& link_options = {}, + bool code_is_preserved = false) +{ + return detail_::create( + cuLibraryLoadFromFile, path, + [path]() { return ::std::string("file ") + path; }, + link_options, code_is_preserved); +} + +inline library_t load_from_file( + const ::std::string& path, + const link::options_t& link_options = {}, + bool code_is_preserved = false) +{ + return load_from_file(path.c_str(), link_options, code_is_preserved); +} + +#if __cplusplus >= 201703L + +inline library_t load_from_file( + const ::std::filesystem::path& path, + const link::options_t& link_options = {}, + bool code_is_preserved = false) +{ + return load_from_file(path.c_str(), link_options, code_is_preserved); +} + +#endif +///@} + +namespace detail_ { + +inline library_t wrap( + handle_t handle, + const link::options_t& link_options, + bool code_is_preserved, + bool take_ownership +) noexcept +{ + return library_t{handle, link_options, code_is_preserved, take_ownership}; +} + +} // namespace detail_ + +/** + * Creates a new module in a context using raw compiled code + * + * @param module_data The raw compiled code for the module. + * @param link_options Potential options for the PTX compilation and device linking of the code. + * @param code_is_preserved See @ref + */ +inline library_t create( + const void* module_data, + const link::options_t& link_options = {}, + bool code_is_preserved = false) +{ + return detail_::create( + cuLibraryLoadData, module_data, + [module_data]() { return ::std::string("data at ") + cuda::detail_::ptr_as_hex(module_data); }, + link_options, code_is_preserved); +} + + +// TODO: Use an optional to reduce the number of functions here... when the +// library starts requiring C++14. + +namespace detail_ { + +inline ::std::string identify(const library_t& library) +{ + return identify(library.handle()); +} + +} // namespace detail_ + +template ::value, bool> > +library_t create( + ContiguousContainer library_data, + optional link_options, + bool code_is_preserved) +{ + return create(library_data.data(), link_options, code_is_preserved); +} + +} // namespace library + +} // namespace cuda + +#endif // CUDA_VERSION >= 12000 + +#endif // CUDA_API_WRAPPERS_LIBRARY_HPP_