From 9ad1a532117f3512561a767858cd96540661de6a Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Wed, 12 Jun 2024 15:21:04 -0700 Subject: [PATCH 1/9] Remove the compile-time link dependency for the Vulkan loader, and resolve the instance methods dynamically. Update the Vulkan readme to match the latest information regarding the SDK packages. --- Makefile | 7 ---- README_vulkan.md | 4 +-- cmake/HalideGeneratorHelpers.cmake | 5 --- src/runtime/mini_vulkan.h | 4 +-- src/runtime/vulkan_context.h | 49 ++++++++++++++++++++++++---- src/runtime/vulkan_functions.h | 3 +- src/runtime/vulkan_interface.h | 52 +++++++++++++++++++++++++++--- 7 files changed, 96 insertions(+), 28 deletions(-) diff --git a/Makefile b/Makefile index 761dfb058b75..775e45803a73 100644 --- a/Makefile +++ b/Makefile @@ -335,9 +335,6 @@ endif ifneq ($(TEST_OPENCL), ) OPENCL_LD_FLAGS ?= -lOpenCL endif -ifneq ($(TEST_VULKAN), ) -VULKAN_LD_FLAGS ?= -lvulkan -endif HOST_OS=linux endif @@ -349,10 +346,6 @@ endif ifneq ($(TEST_OPENCL), ) OPENCL_LD_FLAGS ?= -framework OpenCL endif -ifneq ($(TEST_VULKAN), ) -# The Vulkan loader is distributed as a dylib on OSX (not a framework) -VULKAN_LD_FLAGS ?= -lvulkan -endif ifneq ($(TEST_METAL), ) METAL_LD_FLAGS ?= -framework Metal -framework Foundation endif diff --git a/README_vulkan.md b/README_vulkan.md index 017dd56aed73..fa626afc37d2 100644 --- a/README_vulkan.md +++ b/README_vulkan.md @@ -57,9 +57,9 @@ For Vulkan device drivers, consult the appropriate hardware vendor for your devi ## Linux -On Ubuntu Linux v22.04, the vulkan runtime is distributed in the `vulkan-tools` package. For earlier versions of Ubuntu (e.g. v20.x or v18.x) the contents of the `vulkan-tools` package was distributed as `vulkan-utils` so use that package instead. +The Vulkan SDK packages are now being maintained by LunarG. These include the Vulkan Loader library, as well as the Vulkan Tools packages. Instructions for installing these can be found on their [Getting Started Guide](https://vulkan.lunarg.com/doc/view/latest/linux/getting_started_ubuntu.html). -Proprietary drivers can be installed via 'apt' using PPA's for each vendor. Examples for AMD and NVIDIA are provided below. +Once the SDK has been installed, you need to install the appropriate driver for your device. Proprietary drivers can be installed via 'apt' using PPA's for each vendor. Examples for AMD and NVIDIA are provided below. For AMD on Ubuntu v22.04: ``` diff --git a/cmake/HalideGeneratorHelpers.cmake b/cmake/HalideGeneratorHelpers.cmake index 3aa380da450e..ae2698cf1ce2 100644 --- a/cmake/HalideGeneratorHelpers.cmake +++ b/cmake/HalideGeneratorHelpers.cmake @@ -712,11 +712,6 @@ function(_Halide_add_targets_to_runtime TARGET) endfunction() function(_Halide_target_link_gpu_libs TARGET VISIBILITY) - if ("${ARGN}" MATCHES "vulkan") - find_package(Vulkan REQUIRED) - target_link_libraries(${TARGET} ${VISIBILITY} Vulkan::Vulkan) - endif () - if ("${ARGN}" MATCHES "metal") find_library(FOUNDATION_LIBRARY Foundation REQUIRED) find_library(METAL_LIBRARY Metal REQUIRED) diff --git a/src/runtime/mini_vulkan.h b/src/runtime/mini_vulkan.h index 1eff0ad7310b..c02f6044a308 100644 --- a/src/runtime/mini_vulkan.h +++ b/src/runtime/mini_vulkan.h @@ -2639,18 +2639,16 @@ typedef void(VKAPI_PTR *PFN_vkCmdNextSubpass)(VkCommandBuffer commandBuffer, VkS typedef void(VKAPI_PTR *PFN_vkCmdEndRenderPass)(VkCommandBuffer commandBuffer); typedef void(VKAPI_PTR *PFN_vkCmdExecuteCommands)(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCommandBuffers); -// This appears to be exported by the loader +#ifndef VK_NO_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateInstance( const VkInstanceCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkInstance *pInstance); -// Same as above ... these two methods are the only prototypes we depend upon VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vkGetInstanceProcAddr( VkInstance instance, const char *pName); -#ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkDestroyInstance( VkInstance instance, const VkAllocationCallbacks *pAllocator); diff --git a/src/runtime/vulkan_context.h b/src/runtime/vulkan_context.h index 80165318643e..ad24befff5db 100644 --- a/src/runtime/vulkan_context.h +++ b/src/runtime/vulkan_context.h @@ -88,27 +88,52 @@ int vk_find_compute_capability(void *user_context, int *major, int *minor) { VkPhysicalDevice physical_device = nullptr; uint32_t queue_family_index = 0; + if (!lib_vulkan) { + // If the Vulkan loader can't be found, we want to return compute + // capability of (0, 0) ... this is not considered an error. So we + // should be very careful about looking for Vulkan without tripping + // any errors in the rest of this runtime. + void *sym = halide_vulkan_get_symbol(user_context, "vkCreateInstance"); + if (!sym) { + *major = *minor = 0; + return halide_error_code_success; + } + } + + if (vkCreateInstance == nullptr) { + vk_load_vulkan_loader_functions(user_context); + if(vkCreateInstance == nullptr) { + debug(user_context) << " no valid vulkan loader library was found ...\n"; + *major = *minor = 0; + return halide_error_code_success; + } + } + StringTable requested_layers; vk_get_requested_layers(user_context, requested_layers); + // Attempt to find a suitable instance that can support the requested layers const VkAllocationCallbacks *alloc_callbacks = halide_vulkan_get_allocation_callbacks(user_context); int status = vk_create_instance(user_context, requested_layers, &instance, alloc_callbacks); if (status != halide_error_code_success) { debug(user_context) << " no valid vulkan runtime was found ...\n"; - *major = 0; - *minor = 0; + *major = *minor = 0; return 0; } if (vkCreateDevice == nullptr) { - vk_load_vulkan_functions(instance); + vk_load_vulkan_functions(user_context, instance); + if(vkCreateDevice == nullptr) { + debug(user_context) << " no valid vulkan runtime was found ...\n"; + *major = *minor = 0; + return halide_error_code_success; + } } status = vk_select_device_for_context(user_context, &instance, &device, &physical_device, &queue_family_index); if (status != halide_error_code_success) { debug(user_context) << " no valid vulkan device was found ...\n"; - *major = 0; - *minor = 0; + *major = *minor = 0; return 0; } @@ -425,6 +450,14 @@ int vk_create_context(void *user_context, VulkanMemoryAllocator **allocator, debug(user_context) << " vk_create_context (user_context: " << user_context << ")\n"; + if (vkCreateInstance == nullptr) { + vk_load_vulkan_loader_functions(user_context); + if(vkCreateInstance == nullptr) { + error(user_context) << "Vulkan: Failed to resolve loader library methods to create instance!\n"; + return halide_error_code_symbol_not_found; + } + } + StringTable requested_layers; uint32_t requested_layer_count = vk_get_requested_layers(user_context, requested_layers); debug(user_context) << " requested " << requested_layer_count << " layers for instance!\n"; @@ -440,7 +473,11 @@ int vk_create_context(void *user_context, VulkanMemoryAllocator **allocator, } if (vkCreateDevice == nullptr) { - vk_load_vulkan_functions(*instance); + vk_load_vulkan_functions(user_context, *instance); + if (vkCreateDevice == nullptr) { + error(user_context) << "Vulkan: Failed to resolve API library methods to create device!\n"; + return halide_error_code_symbol_not_found; + } } error_code = vk_select_device_for_context(user_context, instance, device, physical_device, queue_family_index); diff --git a/src/runtime/vulkan_functions.h b/src/runtime/vulkan_functions.h index d1c0a8bfd32c..568a10dcc74f 100644 --- a/src/runtime/vulkan_functions.h +++ b/src/runtime/vulkan_functions.h @@ -1,4 +1,5 @@ -// NOTE: vkCreateInstance is already defined in the mini_vulkan header +// NOTE: vkCreateInstance and vkGetInstanceProcAddr are defined in the loader library and will be resolved seperately +// The rest of these are resolved via vkGetInstanceProcAddr which the loader exports and maps to the driver implementation VULKAN_FN(vkDestroyInstance) VULKAN_FN(vkCreateDevice) VULKAN_FN(vkDestroyDevice) diff --git a/src/runtime/vulkan_interface.h b/src/runtime/vulkan_interface.h index 676c8548f6fc..000bfe2ced22 100644 --- a/src/runtime/vulkan_interface.h +++ b/src/runtime/vulkan_interface.h @@ -37,25 +37,69 @@ namespace Vulkan { // -------------------------------------------------------------------------- -// Halide device interface struct for runtime specific funtion table +// Halide device interface struct for runtime specific function table extern WEAK halide_device_interface_t vulkan_device_interface; // -------------------------------------------------------------------------- +// The default implementation of halide_vulkan_get_symbol attempts to load +// the Vulkan loader shared library/DLL, and then get the symbol from it. +WEAK void *lib_vulkan = nullptr; + +extern "C" WEAK void *halide_vulkan_get_symbol(void *user_context, const char *name) { + // Only try to load the library if the library isn't already + // loaded, or we can't load the symbol from the process already. + void *symbol = halide_get_library_symbol(lib_vulkan, name); + if (symbol) { + return symbol; + } + + const char *lib_names[] = { +#if defined(_WIN32) + "vulkan-1.dll", +#elif defined(__APPLE__) + "libvulkan.1.dylib", +#else + "libvulkan.so.1" +#endif + }; + for (auto &lib_name : lib_names) { + lib_vulkan = halide_load_library(lib_name); + if (lib_vulkan) { + debug(user_context) << " Loaded Vulkan loader library: " << lib_name << "\n"; + break; + } + } + + return halide_get_library_symbol(lib_vulkan, name); +} + +// Declare all the function pointers for the Vulkan API methods that will be resolved dynamically // clang-format off #define VULKAN_FN(fn) WEAK PFN_##fn fn; +VULKAN_FN(vkCreateInstance) +VULKAN_FN(vkGetInstanceProcAddr) #include "vulkan_functions.h" #undef VULKAN_FN // clang-format on -void WEAK vk_load_vulkan_functions(VkInstance instance) { +// Get the function pointers from the Vulkan loader to create an Instance (to find all available driver implementations) +void WEAK vk_load_vulkan_loader_functions(void *user_context) { + debug(user_context) << " vk_load_vulkan_loader_functions (user_context: " << user_context << ")\n"; +#define VULKAN_FN(fn) fn = (PFN_##fn)halide_vulkan_get_symbol(user_context, #fn); +VULKAN_FN(vkCreateInstance) +VULKAN_FN(vkGetInstanceProcAddr) +#undef VULKAN_FN +} + +// Get the function pointers from the Vulkan instance for the resolved driver API methods. +void WEAK vk_load_vulkan_functions(void *user_context, VkInstance instance) { + debug(user_context) << " vk_load_vulkan_functions (user_context: " << user_context << ")\n"; #define VULKAN_FN(fn) fn = (PFN_##fn)vkGetInstanceProcAddr(instance, #fn); #include "vulkan_functions.h" #undef VULKAN_FN } -// -- - // -------------------------------------------------------------------------- } // namespace Vulkan From 3a876900c91885b3bea130b35e04b1c170182995 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Wed, 12 Jun 2024 16:00:07 -0700 Subject: [PATCH 2/9] Formatting pass.w --- src/runtime/vulkan_context.h | 10 +++++----- src/runtime/vulkan_interface.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/runtime/vulkan_context.h b/src/runtime/vulkan_context.h index ad24befff5db..bd58d556b325 100644 --- a/src/runtime/vulkan_context.h +++ b/src/runtime/vulkan_context.h @@ -90,8 +90,8 @@ int vk_find_compute_capability(void *user_context, int *major, int *minor) { if (!lib_vulkan) { // If the Vulkan loader can't be found, we want to return compute - // capability of (0, 0) ... this is not considered an error. So we - // should be very careful about looking for Vulkan without tripping + // capability of (0, 0) ... this is not considered an error. So we + // should be very careful about looking for Vulkan without tripping // any errors in the rest of this runtime. void *sym = halide_vulkan_get_symbol(user_context, "vkCreateInstance"); if (!sym) { @@ -102,7 +102,7 @@ int vk_find_compute_capability(void *user_context, int *major, int *minor) { if (vkCreateInstance == nullptr) { vk_load_vulkan_loader_functions(user_context); - if(vkCreateInstance == nullptr) { + if (vkCreateInstance == nullptr) { debug(user_context) << " no valid vulkan loader library was found ...\n"; *major = *minor = 0; return halide_error_code_success; @@ -123,7 +123,7 @@ int vk_find_compute_capability(void *user_context, int *major, int *minor) { if (vkCreateDevice == nullptr) { vk_load_vulkan_functions(user_context, instance); - if(vkCreateDevice == nullptr) { + if (vkCreateDevice == nullptr) { debug(user_context) << " no valid vulkan runtime was found ...\n"; *major = *minor = 0; return halide_error_code_success; @@ -452,7 +452,7 @@ int vk_create_context(void *user_context, VulkanMemoryAllocator **allocator, if (vkCreateInstance == nullptr) { vk_load_vulkan_loader_functions(user_context); - if(vkCreateInstance == nullptr) { + if (vkCreateInstance == nullptr) { error(user_context) << "Vulkan: Failed to resolve loader library methods to create instance!\n"; return halide_error_code_symbol_not_found; } diff --git a/src/runtime/vulkan_interface.h b/src/runtime/vulkan_interface.h index 000bfe2ced22..6ec06e787f7d 100644 --- a/src/runtime/vulkan_interface.h +++ b/src/runtime/vulkan_interface.h @@ -87,8 +87,8 @@ VULKAN_FN(vkGetInstanceProcAddr) void WEAK vk_load_vulkan_loader_functions(void *user_context) { debug(user_context) << " vk_load_vulkan_loader_functions (user_context: " << user_context << ")\n"; #define VULKAN_FN(fn) fn = (PFN_##fn)halide_vulkan_get_symbol(user_context, #fn); -VULKAN_FN(vkCreateInstance) -VULKAN_FN(vkGetInstanceProcAddr) + VULKAN_FN(vkCreateInstance) + VULKAN_FN(vkGetInstanceProcAddr) #undef VULKAN_FN } From 754cc7e8e35e3f9885df64ce6130f6148210c950 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Wed, 12 Jun 2024 16:00:28 -0700 Subject: [PATCH 3/9] Add runtime check to verify shared memory amount used in pipeline can be run on device --- src/runtime/vulkan_memory.h | 3 +++ src/runtime/vulkan_resources.h | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h index 055fbef72277..1a88bc141cbf 100644 --- a/src/runtime/vulkan_memory.h +++ b/src/runtime/vulkan_memory.h @@ -79,6 +79,9 @@ class VulkanMemoryAllocator { VkPhysicalDevice current_physical_device() const { return this->physical_device; } + VkPhysicalDeviceLimits current_physical_device_limits() const { + return this->physical_device_limits; + } const VkAllocationCallbacks *callbacks() const { return this->alloc_callbacks; } diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index d4b7bf866d11..016e1e3843be 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -923,6 +923,21 @@ int vk_setup_compute_pipeline(void *user_context, dispatch_constant_values[dispatch_constant_index] = dynamic_array_size; dispatch_constant_index++; } + + // verify the device can actually support the necessary amount of shared memory requested + if (allocator->current_physical_device_limits().maxComputeSharedMemorySize > 0) { + uint64_t device_shared_mem_size = allocator->current_physical_device_limits().maxComputeSharedMemorySize; + if (static_shared_mem_bytes > device_shared_mem_size) { + error(user_context) << "Vulkan: Amount of static shared memory used exceeds device limit! " + << static_shared_mem_bytes << " > " << device_shared_mem_size << " bytes\n"; + return halide_error_code_incompatible_device_interface; + } + if (dispatch_data->shared_mem_bytes > device_shared_mem_size) { + error(user_context) << "Vulkan: Amount of dynamic shared memory exceeds device limit! " + << (uint64_t)dispatch_data->shared_mem_bytes << " > " << device_shared_mem_size << " bytes\n"; + return halide_error_code_incompatible_device_interface; + } + } } // locate the mapping for overriding any dynamic workgroup local sizes From 42871735074fdb2158e50ad161389542b945910b Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Thu, 13 Jun 2024 15:51:15 -0700 Subject: [PATCH 4/9] Fix platform ifdefs for Vulkan library names (normal ones arent defined when the runtime is compiled). --- src/runtime/vulkan_interface.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/runtime/vulkan_interface.h b/src/runtime/vulkan_interface.h index 6ec06e787f7d..d7ff99c860ea 100644 --- a/src/runtime/vulkan_interface.h +++ b/src/runtime/vulkan_interface.h @@ -55,12 +55,11 @@ extern "C" WEAK void *halide_vulkan_get_symbol(void *user_context, const char *n } const char *lib_names[] = { -#if defined(_WIN32) +#ifdef WINDOWS "vulkan-1.dll", -#elif defined(__APPLE__) - "libvulkan.1.dylib", #else - "libvulkan.so.1" + "libvulkan.so.1", + "libvulkan.1.dylib", #endif }; for (auto &lib_name : lib_names) { @@ -68,6 +67,8 @@ extern "C" WEAK void *halide_vulkan_get_symbol(void *user_context, const char *n if (lib_vulkan) { debug(user_context) << " Loaded Vulkan loader library: " << lib_name << "\n"; break; + } else { + debug(user_context) << " Missing Vulkan loader library: " << lib_name << "\n"; } } From 4f7f64072dfa79c4f4edcdf5e9b537fb90723b16 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Thu, 13 Jun 2024 15:55:58 -0700 Subject: [PATCH 5/9] Detect if VK_LAYER_KHRONOS_validation is enabled, and bypass the module destructor which calls halide_vulkan_device_release() to avoid a segfault (at the cost of leaking!). See https://github.com/halide/Halide/issues/8290. Refactor and cleanup halide_vulkan_device_release(). Add vk_destroy_context() methods. --- src/runtime/vulkan.cpp | 58 +++++++++++++++++----------------- src/runtime/vulkan_context.h | 30 ++++++++++++++++-- src/runtime/vulkan_functions.h | 1 + src/runtime/vulkan_internal.h | 12 +++++++ src/runtime/vulkan_resources.h | 6 +++- 5 files changed, 75 insertions(+), 32 deletions(-) diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp index a4765e4f56fa..cb79950f7421 100644 --- a/src/runtime/vulkan.cpp +++ b/src/runtime/vulkan.cpp @@ -210,47 +210,36 @@ WEAK int halide_vulkan_device_release(void *user_context) { debug(user_context) << "halide_vulkan_device_release (user_context: " << user_context << ")\n"; - VulkanMemoryAllocator *allocator; - VkInstance instance; - VkDevice device; - VkCommandPool command_pool; - VkPhysicalDevice physical_device; - VkQueue queue; - uint32_t _throwaway; - + VulkanMemoryAllocator *allocator = nullptr; + VkInstance instance = nullptr; + VkDevice device = nullptr; + VkCommandPool command_pool = VkInvalidCommandPool; + VkPhysicalDevice physical_device = nullptr; + VkQueue queue = nullptr; + uint32_t queue_family_index = 0; + + int destroy_status = halide_error_code_success; int acquire_status = halide_vulkan_acquire_context(user_context, reinterpret_cast(&allocator), - &instance, &device, &physical_device, &command_pool, &queue, &_throwaway, false); + &instance, &device, &physical_device, &command_pool, &queue, &queue_family_index, false); - if ((acquire_status == halide_error_code_success) && (instance != nullptr)) { - vkQueueWaitIdle(queue); - if (command_pool == cached_command_pool) { - cached_command_pool = 0; - } - if (reinterpret_cast(allocator) == cached_allocator) { + if (acquire_status == halide_error_code_success) { + // Destroy the context if we created it + if ((instance == cached_instance) && (device == cached_device)) { + destroy_status = vk_destroy_context(user_context, allocator, instance, device, physical_device, command_pool, queue); + cached_command_pool = VkInvalidCommandPool; cached_allocator = nullptr; - } - - vk_destroy_command_pool(user_context, allocator, command_pool); - vk_destroy_shader_modules(user_context, allocator); - vk_destroy_memory_allocator(user_context, allocator); - - if (device == cached_device) { cached_device = nullptr; cached_physical_device = nullptr; cached_queue = nullptr; cached_queue_family_index = 0; - } - vkDestroyDevice(device, nullptr); - - if (instance == cached_instance) { cached_instance = nullptr; } - vkDestroyInstance(instance, nullptr); + halide_vulkan_release_context(user_context, instance, device, queue); } - return halide_error_code_success; + return destroy_status; } WEAK int halide_vulkan_device_malloc(void *user_context, halide_buffer_t *buf) { @@ -1409,7 +1398,18 @@ WEAK __attribute__((constructor)) void register_vulkan_allocation_pool() { } WEAK __attribute__((destructor)) void halide_vulkan_cleanup() { - halide_vulkan_device_release(nullptr); + // FIXME: When the VK_LAYER_KHRONOS_validation is intercepting calls to the API, it will + // cause a segfault if it's invoked inside the destructor since it uses it's own global + // state to track object usage which is no longer valid once this call is invoked. + // Calling this destructor with the validator hooks in place will cause an uncaught + // exception for an uninitialized mutex lock. We can avoid the crash on exit by \ + // bypassing the device release call and leak (!!!!) + // ISSUE: https://github.com/halide/Halide/issues/8290 + const char *layer_names = vk_get_layer_names_internal(nullptr); + bool has_validation_layer = strstr(layer_names, "VK_LAYER_KHRONOS_validation"); + if (!has_validation_layer) { + halide_vulkan_device_release(nullptr); + } } // -------------------------------------------------------------------------- diff --git a/src/runtime/vulkan_context.h b/src/runtime/vulkan_context.h index bd58d556b325..f95cb441fb4a 100644 --- a/src/runtime/vulkan_context.h +++ b/src/runtime/vulkan_context.h @@ -24,7 +24,7 @@ halide_vulkan_memory_allocator *WEAK cached_allocator = nullptr; // Cached instance related handles for device resources VkInstance WEAK cached_instance = nullptr; VkDevice WEAK cached_device = nullptr; -VkCommandPool WEAK cached_command_pool = 0; +VkCommandPool WEAK cached_command_pool = VkInvalidCommandPool; VkQueue WEAK cached_queue = nullptr; VkPhysicalDevice WEAK cached_physical_device = nullptr; uint32_t WEAK cached_queue_family_index = 0; @@ -42,7 +42,7 @@ class VulkanContext { VulkanMemoryAllocator *allocator = nullptr; VkInstance instance = nullptr; VkDevice device = nullptr; - VkCommandPool command_pool = 0; + VkCommandPool command_pool = VkInvalidCommandPool; VkPhysicalDevice physical_device = nullptr; VkQueue queue = nullptr; uint32_t queue_family_index = 0; // used for operations requiring queue family @@ -507,6 +507,32 @@ int vk_create_context(void *user_context, VulkanMemoryAllocator **allocator, return halide_error_code_success; } +// Destroys the context and all associated resources (used by halide_vulkan_device_release) +// NOTE: This should be called inside an acquire_context/release_context scope +int vk_destroy_context(void *user_context, VulkanMemoryAllocator *allocator, + VkInstance instance, VkDevice device, VkPhysicalDevice physical_device, + VkCommandPool command_pool, VkQueue queue) { + + debug(user_context) + << "vk_destroy_context (user_context: " << user_context << ")\n"; + + if (device != nullptr) { + vkDeviceWaitIdle(device); + } + if ((command_pool != VkInvalidCommandPool) && (allocator != nullptr)) { + vk_destroy_command_pool(user_context, allocator, command_pool); + vk_destroy_shader_modules(user_context, allocator); + vk_destroy_memory_allocator(user_context, allocator); + } + if (device != nullptr) { + vkDestroyDevice(device, nullptr); + } + if (instance != nullptr) { + vkDestroyInstance(instance, nullptr); + } + return halide_error_code_success; +} + // -------------------------------------------------------------------------- } // namespace diff --git a/src/runtime/vulkan_functions.h b/src/runtime/vulkan_functions.h index 568a10dcc74f..22cb28d750e5 100644 --- a/src/runtime/vulkan_functions.h +++ b/src/runtime/vulkan_functions.h @@ -2,6 +2,7 @@ // The rest of these are resolved via vkGetInstanceProcAddr which the loader exports and maps to the driver implementation VULKAN_FN(vkDestroyInstance) VULKAN_FN(vkCreateDevice) +VULKAN_FN(vkDeviceWaitIdle) VULKAN_FN(vkDestroyDevice) VULKAN_FN(vkGetDeviceQueue) VULKAN_FN(vkCreateBuffer) diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h index 05eb03361d15..df2ac962590f 100644 --- a/src/runtime/vulkan_internal.h +++ b/src/runtime/vulkan_internal.h @@ -58,6 +58,15 @@ int vk_create_context( VkCommandPool *command_pool, VkQueue *queue, uint32_t *queue_family_index); +int vk_destroy_context( + void *user_context, + VulkanMemoryAllocator *allocator, + VkInstance instance, + VkDevice device, + VkPhysicalDevice physical_device, + VkCommandPool command_pool, + VkQueue queue); + int vk_find_compute_capability(void *user_context, int *major, int *minor); int vk_create_instance(void *user_context, const StringTable &requested_layers, VkInstance *instance, const VkAllocationCallbacks *alloc_callbacks); @@ -92,6 +101,9 @@ bool vk_validate_required_extension_support(void *user_context, int vk_create_command_pool(void *user_context, VulkanMemoryAllocator *allocator, uint32_t queue_index, VkCommandPool *command_pool); int vk_destroy_command_pool(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool); +// Command pools are uint64_t and zero may be valid, so use this as a sentinel for invalid +static const VkCommandPool VkInvalidCommandPool(uint64_t(-1)); + // -- Command Buffer int vk_create_command_buffer(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool pool, VkCommandBuffer *command_buffer); int vk_destroy_command_buffer(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool, VkCommandBuffer command_buffer); diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index 016e1e3843be..838e6669a0b7 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -113,7 +113,11 @@ int vk_destroy_command_pool(void *user_context, VulkanMemoryAllocator *allocator error(user_context) << "Vulkan: Failed to destroy command pool ... invalid allocator pointer!\n"; return halide_error_code_generic_error; } - + if (command_pool == VkInvalidCommandPool) { + debug(user_context) << "Vulkan: Command pool already destroyed.\n"; + return halide_error_code_success; + } + vkResetCommandPool(allocator->current_device(), command_pool, VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT); vkDestroyCommandPool(allocator->current_device(), command_pool, allocator->callbacks()); return halide_error_code_success; } From 2296e643d30ba2a041abda7bc83a35ab782697bf Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Thu, 13 Jun 2024 15:59:22 -0700 Subject: [PATCH 6/9] Fix GPU object lifetime AOT test to use TEST_VULKAN macro. --- test/generator/gpu_object_lifetime_aottest.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/generator/gpu_object_lifetime_aottest.cpp b/test/generator/gpu_object_lifetime_aottest.cpp index 0ebfaaa9b20b..d0d0a7d544f3 100644 --- a/test/generator/gpu_object_lifetime_aottest.cpp +++ b/test/generator/gpu_object_lifetime_aottest.cpp @@ -10,6 +10,8 @@ #include "HalideRuntimeOpenCL.h" #elif defined(TEST_METAL) #include "HalideRuntimeMetal.h" +#elif defined(TEST_VULKAN) +#include "HalideRuntimeVulkan.h" #endif #include "gpu_object_lifetime.h" @@ -34,6 +36,8 @@ int main(int argc, char **argv) { printf("TEST_OPENCL enabled for gpu_object_lifetime testing...\n"); #elif defined(TEST_METAL) printf("TEST_METAL enabled for gpu_object_lifetime testing...\n"); +#elif defined(TEST_VULKAN) + printf("TEST_VULKAN enabled for gpu_object_lifetime testing...\n"); #else // TODO: we can't support WebGPU here (yet) because our WebGPU runtime doesn't // (yet) support halide_webgpu_wrap_native(); when it does, we should be able @@ -211,6 +215,8 @@ int main(int argc, char **argv) { halide_device_release(nullptr, halide_opencl_device_interface()); #elif defined(TEST_METAL) halide_device_release(nullptr, halide_metal_device_interface()); +#elif defined(TEST_VULKAN) + halide_device_release(nullptr, halide_vulkan_device_interface()); #endif } From 5468f7ff09267c24764b189d9c4d6287980321a4 Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Fri, 14 Jun 2024 09:31:00 -0700 Subject: [PATCH 7/9] Fix clang tidy warning for usage of static in anonymous namespace --- src/runtime/vulkan_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h index df2ac962590f..319d8caf63bf 100644 --- a/src/runtime/vulkan_internal.h +++ b/src/runtime/vulkan_internal.h @@ -102,7 +102,7 @@ int vk_create_command_pool(void *user_context, VulkanMemoryAllocator *allocator, int vk_destroy_command_pool(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool command_pool); // Command pools are uint64_t and zero may be valid, so use this as a sentinel for invalid -static const VkCommandPool VkInvalidCommandPool(uint64_t(-1)); +const VkCommandPool VkInvalidCommandPool(uint64_t(-1)); // -- Command Buffer int vk_create_command_buffer(void *user_context, VulkanMemoryAllocator *allocator, VkCommandPool pool, VkCommandBuffer *command_buffer); From 96ab45158a70c00183b63f33e62a4600a629b61d Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Fri, 14 Jun 2024 10:06:30 -0700 Subject: [PATCH 8/9] Disable Vulkan validation layer for leak tests (or we'll leak). --- test/correctness/gpu_object_lifetime_1.cpp | 8 ++++++++ test/correctness/gpu_object_lifetime_2.cpp | 7 +++++++ test/correctness/gpu_object_lifetime_3.cpp | 7 +++++++ test/correctness/leak_device_memory.cpp | 7 +++++++ 4 files changed, 29 insertions(+) diff --git a/test/correctness/gpu_object_lifetime_1.cpp b/test/correctness/gpu_object_lifetime_1.cpp index d955d822d068..ce11522aed6f 100644 --- a/test/correctness/gpu_object_lifetime_1.cpp +++ b/test/correctness/gpu_object_lifetime_1.cpp @@ -1,6 +1,7 @@ #include "Halide.h" #include "gpu_object_lifetime_tracker.h" +#include #include using namespace Halide; @@ -18,6 +19,13 @@ int main(int argc, char *argv[]) { Target target = get_jit_target_from_environment(); + // Disable the Vulkan validation layer or we'll leak + // https://github.com/halide/Halide/issues/8290 + if (target.has_feature(Target::Vulkan)) { + char clear_env_var[] = "VK_INSTANCE_LAYERS="; + putenv(clear_env_var); + } + // We need to hook the default handler too, to catch the frees done by release_all JITHandlers handlers; handlers.custom_print = halide_print; diff --git a/test/correctness/gpu_object_lifetime_2.cpp b/test/correctness/gpu_object_lifetime_2.cpp index 17010576fb99..ade23b1d15e8 100644 --- a/test/correctness/gpu_object_lifetime_2.cpp +++ b/test/correctness/gpu_object_lifetime_2.cpp @@ -18,6 +18,13 @@ int main(int argc, char *argv[]) { Target target = get_jit_target_from_environment(); + // Disable the Vulkan validation layer or we'll leak + // https://github.com/halide/Halide/issues/8290 + if (target.has_feature(Target::Vulkan)) { + char clear_env_var[] = "VK_INSTANCE_LAYERS="; + putenv(clear_env_var); + } + // We need to hook the default handler too, to catch the frees done by release_all JITHandlers handlers; handlers.custom_print = halide_print; diff --git a/test/correctness/gpu_object_lifetime_3.cpp b/test/correctness/gpu_object_lifetime_3.cpp index edaee04ba9ff..aea0b6280292 100644 --- a/test/correctness/gpu_object_lifetime_3.cpp +++ b/test/correctness/gpu_object_lifetime_3.cpp @@ -18,6 +18,13 @@ int main(int argc, char *argv[]) { Target target = get_jit_target_from_environment(); + // Disable the Vulkan validation layer or we'll leak + // https://github.com/halide/Halide/issues/8290 + if (target.has_feature(Target::Vulkan)) { + char clear_env_var[] = "VK_INSTANCE_LAYERS="; + putenv(clear_env_var); + } + // We need to hook the default handler too, to catch the frees done by release_all JITHandlers handlers; handlers.custom_print = halide_print; diff --git a/test/correctness/leak_device_memory.cpp b/test/correctness/leak_device_memory.cpp index 567aeddb5fd8..fcded0edac54 100644 --- a/test/correctness/leak_device_memory.cpp +++ b/test/correctness/leak_device_memory.cpp @@ -21,6 +21,13 @@ int main(int argc, char **argv) { Target target = get_jit_target_from_environment(); + // Disable the Vulkan validation layer or we'll leak + // https://github.com/halide/Halide/issues/8290 + if (target.has_feature(Target::Vulkan)) { + char clear_env_var[] = "VK_INSTANCE_LAYERS="; + putenv(clear_env_var); + } + // We need debug output to record object creation. target.set_feature(Target::Debug); From f5b4ed0b06fa2dc76413ed4b5a807d97808afdfc Mon Sep 17 00:00:00 2001 From: Derek Gerstmann Date: Fri, 14 Jun 2024 12:03:59 -0700 Subject: [PATCH 9/9] Add vk_validate_shader_for_device() method to check shader bindings against device limits prior to compiling to verify shader compatibility. --- src/runtime/vulkan_resources.h | 86 +++++++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 6 deletions(-) diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h index 838e6669a0b7..f28548700457 100644 --- a/src/runtime/vulkan_resources.h +++ b/src/runtime/vulkan_resources.h @@ -754,6 +754,16 @@ int vk_create_pipeline_layout(void *user_context, return halide_error_code_generic_error; } + if (allocator->current_physical_device_limits().maxBoundDescriptorSets > 0) { + uint64_t max_bound_descriptor_sets = allocator->current_physical_device_limits().maxBoundDescriptorSets; + if (descriptor_set_count > max_bound_descriptor_sets) { + error(user_context) << "Vulkan: Number of descriptor sets for pipeline layout exceeds the number that can be bound by device!\n" + << " requested: " << descriptor_set_count << "," + << " available: " << max_bound_descriptor_sets << "\n"; + return halide_error_code_incompatible_device_interface; + } + } + VkPipelineLayoutCreateInfo pipeline_layout_info = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // structure type nullptr, // pointer to a structure extending this @@ -912,16 +922,18 @@ int vk_setup_compute_pipeline(void *user_context, } } uint32_t shared_mem_bytes_avail = (dispatch_data->shared_mem_bytes - static_shared_mem_bytes); +#ifdef DEBUG_RUNTIME debug(user_context) << " pipeline uses " << static_shared_mem_bytes << " bytes of static shared memory\n"; debug(user_context) << " dispatch requests " << dispatch_data->shared_mem_bytes << " bytes of shared memory\n"; debug(user_context) << " dynamic shared memory " << shared_mem_bytes_avail << " bytes available\n"; - +#endif // setup the dynamic array size if ((shared_mem_constant_id > 0) && (shared_mem_bytes_avail > 0)) { uint32_t dynamic_array_size = (uint32_t)shared_mem_bytes_avail / shared_mem_type_size; +#ifdef DEBUG_RUNTIME debug(user_context) << " setting shared memory to " << (uint32_t)dynamic_array_size << " elements " << "(or " << (uint32_t)shared_mem_bytes_avail << " bytes)\n"; - +#endif // save the shared mem specialization constant in the first slot dispatch_constant_ids[dispatch_constant_index] = shared_mem_constant_id; dispatch_constant_values[dispatch_constant_index] = dynamic_array_size; @@ -932,13 +944,15 @@ int vk_setup_compute_pipeline(void *user_context, if (allocator->current_physical_device_limits().maxComputeSharedMemorySize > 0) { uint64_t device_shared_mem_size = allocator->current_physical_device_limits().maxComputeSharedMemorySize; if (static_shared_mem_bytes > device_shared_mem_size) { - error(user_context) << "Vulkan: Amount of static shared memory used exceeds device limit! " - << static_shared_mem_bytes << " > " << device_shared_mem_size << " bytes\n"; + error(user_context) << "Vulkan: Amount of static shared memory used exceeds device limit!\n" + << " requested: " << static_shared_mem_bytes << " bytes," + << " available: " << device_shared_mem_size << " bytes\n"; return halide_error_code_incompatible_device_interface; } if (dispatch_data->shared_mem_bytes > device_shared_mem_size) { - error(user_context) << "Vulkan: Amount of dynamic shared memory exceeds device limit! " - << (uint64_t)dispatch_data->shared_mem_bytes << " > " << device_shared_mem_size << " bytes\n"; + error(user_context) << "Vulkan: Amount of dynamic shared memory used exceeds device limit!\n" + << " requested: " << dispatch_data->shared_mem_bytes << " bytes," + << " available: " << device_shared_mem_size << " bytes\n"; return halide_error_code_incompatible_device_interface; } } @@ -959,9 +973,11 @@ int vk_setup_compute_pipeline(void *user_context, uint32_t found_index = invalid_index; for (uint32_t sc = 0; sc < shader_bindings->specialization_constants_count; sc++) { if (shader_bindings->specialization_constants[sc].constant_id == dispatch_constant_ids[dc]) { +#ifdef DEBUG_RUNTIME debug(user_context) << " binding specialization constant [" << dispatch_constant_ids[dc] << "] " << "'" << shader_bindings->specialization_constants[sc].constant_name << "' " << " => " << dispatch_constant_values[dc] << "\n"; +#endif found_index = sc; break; } @@ -1273,6 +1289,56 @@ VulkanShaderBinding *vk_decode_shader_bindings(void *user_context, VulkanMemoryA return shader_bindings; } +int vk_validate_shader_for_device(void *user_context, VulkanMemoryAllocator *allocator, + const VulkanShaderBinding *shader_bindings, uint32_t shader_count) { +#ifdef DEBUG_RUNTIME + debug(user_context) + << " vk_validate_shader_for_device (user_context: " << user_context << ", " + << "allocator: " << (void *)allocator << ", " + << "shader_bindings: " << (void *)shader_bindings << ", " + << "shader_count: " << shader_count << ")\n"; +#endif + + // validate that the shared memory used is less than the available amount on device + if (shader_bindings->shared_memory_allocations_count) { + + uint32_t static_shared_mem_bytes = 0; + + for (uint32_t sm = 0; sm < shader_bindings->shared_memory_allocations_count; sm++) { + VulkanSharedMemoryAllocation *allocation = &(shader_bindings->shared_memory_allocations[sm]); + if (allocation->constant_id == 0) { + // static fixed-size allocation + static_shared_mem_bytes += allocation->type_size * allocation->array_size; + } else { + // dynamic allocation (can't determine this until runtime) + } + } + + // verify the device can actually support the necessary amount of shared memory requested + if (allocator->current_physical_device_limits().maxComputeSharedMemorySize > 0) { + uint64_t device_shared_mem_size = allocator->current_physical_device_limits().maxComputeSharedMemorySize; + if (static_shared_mem_bytes > device_shared_mem_size) { + error(user_context) << "Vulkan: Amount of static shared memory used exceeds device limit!\n" + << " requested: " << static_shared_mem_bytes << " bytes," + << " available: " << device_shared_mem_size << " bytes\n"; + return halide_error_code_incompatible_device_interface; + } + } + } + + // validate the number of descriptor sets used is within the amount supported by the device + if (allocator->current_physical_device_limits().maxPerStageDescriptorStorageBuffers > 0) { + uint64_t max_descriptors = allocator->current_physical_device_limits().maxPerStageDescriptorStorageBuffers; + if (shader_count > max_descriptors) { + error(user_context) << "Vulkan: Number of required descriptor sets exceeds the amount available for device!\n" + << " requested: " << shader_count << "," + << " available: " << max_descriptors << "\n"; + return halide_error_code_incompatible_device_interface; + } + } + return halide_error_code_success; +} + VulkanCompilationCacheEntry *vk_compile_shader_module(void *user_context, VulkanMemoryAllocator *allocator, const char *ptr, int size) { #ifdef DEBUG_RUNTIME @@ -1342,6 +1408,14 @@ VulkanCompilationCacheEntry *vk_compile_shader_module(void *user_context, Vulkan return nullptr; } + // validate that the compiled shader can be executed by the device with the requested resources + int valid_status = vk_validate_shader_for_device(user_context, allocator, decoded_bindings, shader_count); + if (valid_status != halide_error_code_success) { + vk_host_free(user_context, cache_entry->shader_bindings, allocator->callbacks()); + vk_host_free(user_context, cache_entry, allocator->callbacks()); + return nullptr; + } + // save the shader bindings in the cache entry cache_entry->shader_bindings = decoded_bindings; cache_entry->shader_count = shader_count;