From 91026e2a4dea379e706fffda60423a791b53d4b9 Mon Sep 17 00:00:00 2001 From: Krzysztof Swiecicki Date: Tue, 15 Oct 2024 09:51:45 +0000 Subject: [PATCH] [L0] Add initial USM alloc enqueue API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: MichaƂ Staniewski --- source/adapters/level_zero/context.cpp | 43 +++ source/adapters/level_zero/context.hpp | 9 + source/adapters/level_zero/queue.cpp | 2 +- source/adapters/level_zero/usm.cpp | 317 +++++++++++++++------ test/adapters/level_zero/CMakeLists.txt | 8 + test/adapters/level_zero/enqueue_alloc.cpp | 140 +++++++++ 6 files changed, 424 insertions(+), 95 deletions(-) create mode 100644 test/adapters/level_zero/enqueue_alloc.cpp diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 7c1c412ee4..596d267d35 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -204,6 +204,17 @@ ur_result_t ur_context_handle_t_::initialize() { .Configs[usm::DisjointPoolMemType::Device]) .second)); + MemProvider = umf::memoryProviderMakeUnique( + reinterpret_cast(this), Device) + .second; + AsyncDeviceMemPools.emplace( + std::piecewise_construct, std::make_tuple(Device->ZeDevice), + std::make_tuple(umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(MemProvider), + &DisjointPoolConfigInstance + .Configs[usm::DisjointPoolMemType::Device]) + .second)); + MemProvider = umf::memoryProviderMakeUnique( reinterpret_cast(this), Device) .second; @@ -215,6 +226,17 @@ ur_result_t ur_context_handle_t_::initialize() { .Configs[usm::DisjointPoolMemType::Shared]) .second)); + MemProvider = umf::memoryProviderMakeUnique( + reinterpret_cast(this), Device) + .second; + AsyncSharedMemPools.emplace( + std::piecewise_construct, std::make_tuple(Device->ZeDevice), + std::make_tuple(umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(MemProvider), + &DisjointPoolConfigInstance + .Configs[usm::DisjointPoolMemType::Shared]) + .second)); + MemProvider = umf::memoryProviderMakeUnique( reinterpret_cast(this), Device) .second; @@ -227,6 +249,18 @@ ur_result_t ur_context_handle_t_::initialize() { .Configs[usm::DisjointPoolMemType::SharedReadOnly]) .second)); + MemProvider = umf::memoryProviderMakeUnique( + reinterpret_cast(this), Device) + .second; + AsyncSharedReadOnlyMemPools.emplace( + std::piecewise_construct, std::make_tuple(Device->ZeDevice), + std::make_tuple( + umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(MemProvider), + &DisjointPoolConfigInstance + .Configs[usm::DisjointPoolMemType::SharedReadOnly]) + .second)); + MemProvider = umf::memoryProviderMakeUnique( reinterpret_cast(this), Device) .second; @@ -279,6 +313,15 @@ ur_result_t ur_context_handle_t_::initialize() { &DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Host]) .second; + MemProvider = umf::memoryProviderMakeUnique( + reinterpret_cast(this), nullptr) + .second; + AsyncHostMemPool = + umf::poolMakeUniqueFromOps( + umfDisjointPoolOps(), std::move(MemProvider), + &DisjointPoolConfigInstance.Configs[usm::DisjointPoolMemType::Host]) + .second; + MemProvider = umf::memoryProviderMakeUnique( reinterpret_cast(this), nullptr) .second; diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 0d3b2846e2..fbcbe3dbdd 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -124,6 +124,15 @@ struct ur_context_handle_t_ : _ur_object { SharedReadOnlyMemProxyPools; umf::pool_unique_handle_t HostMemProxyPool; + // USM pools for async allocations. + std::unordered_map + AsyncDeviceMemPools; + std::unordered_map + AsyncSharedMemPools; + std::unordered_map + AsyncSharedReadOnlyMemPools; + umf::pool_unique_handle_t AsyncHostMemPool; + // Map associating pools created with urUsmPoolCreate and internal pools std::list UsmPoolHandles{}; diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index c4598f3472..6a75ef1e4a 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1333,7 +1333,7 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList, Device->Platform->ContextsMutex, std::defer_lock); if (IndirectAccessTrackingEnabled) { - // We are going to submit kernels for execution. If indirect access flag is + // We are going to submit kernels for execution. If indirect access flag is // set for a kernel then we need to make a snapshot of existing memory // allocations in all contexts in the platform. We need to lock the mutex // guarding the list of contexts in the platform to prevent creation of new diff --git a/source/adapters/level_zero/usm.cpp b/source/adapters/level_zero/usm.cpp index 0f266fc13b..20965615b9 100644 --- a/source/adapters/level_zero/usm.cpp +++ b/source/adapters/level_zero/usm.cpp @@ -788,124 +788,253 @@ ur_result_t urUSMReleaseExp(ur_context_handle_t Context, void *HostPtr) { return UR_RESULT_SUCCESS; } +enum class USMAllocType { Host = 0, Device = 1, Shared = 2 }; + +static ur_result_t USMAllocHelper(ur_context_handle_t Context, + ur_device_handle_t Device, size_t Size, + void **RetMem, USMAllocType Type) { + auto &Platform = Device->Platform; + + // TODO: Should alignemnt be passed in 'ur_exp_async_usm_alloc_properties_t'? + uint32_t Alignment = 0; + + std::shared_lock ContextLock(Context->Mutex, + std::defer_lock); + std::unique_lock IndirectAccessTrackingLock( + Platform->ContextsMutex, std::defer_lock); + if (IndirectAccessTrackingEnabled) { + IndirectAccessTrackingLock.lock(); + UR_CALL(ur::level_zero::urContextRetain(Context)); + } else { + ContextLock.lock(); + } + + umf_memory_pool_handle_t hPoolInternal = nullptr; + switch (Type) { + case USMAllocType::Host: + hPoolInternal = Context->AsyncHostMemPool.get(); + break; + case USMAllocType::Device: { + auto It = Context->AsyncDeviceMemPools.find(Device->ZeDevice); + if (It == Context->AsyncDeviceMemPools.end()) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + hPoolInternal = It->second.get(); + } break; + case USMAllocType::Shared: { + auto It = Context->AsyncSharedMemPools.find(Device->ZeDevice); + if (It == Context->AsyncSharedMemPools.end()) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + hPoolInternal = It->second.get(); + } break; + }; + + *RetMem = umfPoolAlignedMalloc(hPoolInternal, Size, Alignment); + if (*RetMem == nullptr) { + auto umfRet = umfPoolGetLastAllocationError(hPoolInternal); + return umf2urResult(umfRet); + } + + if (IndirectAccessTrackingEnabled) { + // Keep track of all memory allocations in the context + Context->MemAllocs.emplace(std::piecewise_construct, + std::forward_as_tuple(*RetMem), + std::forward_as_tuple(Context)); + } + + return UR_RESULT_SUCCESS; +} + +static ur_result_t enqueueUSMAllocHelper( + ur_queue_handle_t Queue, ur_usm_pool_handle_t Pool, const size_t Size, + const ur_exp_enqueue_usm_alloc_properties_t *Properties, + uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList, + void **RetMem, ur_event_handle_t *OutEvent, USMAllocType Type) { + std::ignore = Pool; + std::ignore = Properties; + + std::scoped_lock lock(Queue->Mutex); + + bool UseCopyEngine = false; + _ur_ze_event_list_t TmpWaitList; + UR_CALL(TmpWaitList.createAndRetainUrZeEventList( + NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); + + // Get a new command list to be used on this call + ur_command_list_ptr_t CommandList{}; + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + + ze_event_handle_t ZeEvent = nullptr; + ur_event_handle_t InternalEvent{}; + bool IsInternal = OutEvent == nullptr; + ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + + ur_command_t CommandType = UR_COMMAND_FORCE_UINT32; + switch (Type) { + case USMAllocType::Host: + CommandType = UR_COMMAND_ENQUEUE_USM_HOST_ALLOC_EXP; + break; + case USMAllocType::Device: + CommandType = UR_COMMAND_ENQUEUE_USM_DEVICE_ALLOC_EXP; + break; + case USMAllocType::Shared: + CommandType = UR_COMMAND_ENQUEUE_USM_SHARED_ALLOC_EXP; + break; + } + UR_CALL(createEventAndAssociateQueue(Queue, Event, CommandType, CommandList, + IsInternal, false)); + ZeEvent = (*Event)->ZeEvent; + (*Event)->WaitList = TmpWaitList; + + // Allocate USM memory + auto Ret = USMAllocHelper(Queue->Context, Queue->Device, Size, RetMem, Type); + if (Ret) { + return Ret; + } + + // Signal that USM allocation event was finished + ZE2UR_CALL(zeCommandListAppendSignalEvent, (CommandList->first, ZeEvent)); + + UR_CALL(Queue->executeCommandList(CommandList, false)); + + return UR_RESULT_SUCCESS; +} + ur_result_t urEnqueueUSMDeviceAllocExp( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_usm_pool_handle_t - pPool, ///< [in][optional] handle of the USM memory pool - const size_t size, ///< [in] minimum size in bytes of the USM memory object + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor + const size_t Size, ///< [in] minimum size in bytes of the USM memory object ///< to be allocated const ur_exp_enqueue_usm_alloc_properties_t - *pProperties, ///< [in][optional] pointer to the enqueue asynchronous - ///< USM allocation properties - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + *Properties, ///< [in][optional] pointer to the enqueue async alloc + ///< properties + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating no wait - ///< events. - void **ppMem, ///< [out] pointer to USM memory object - ur_event_handle_t - *phEvent ///< [out][optional] return an event object that identifies the - ///< asynchronous USM device allocation + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **Mem, ///< [out] pointer to USM memory object + ur_event_handle_t *OutEvent ///< [out][optional] return an event object that + ///< identifies the async alloc ) { - std::ignore = hQueue; - std::ignore = pPool; - std::ignore = size; - std::ignore = pProperties; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = ppMem; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + return enqueueUSMAllocHelper(Queue, Pool, Size, Properties, + NumEventsInWaitList, EventWaitList, Mem, + OutEvent, USMAllocType::Device); } ur_result_t urEnqueueUSMSharedAllocExp( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_usm_pool_handle_t - pPool, ///< [in][optional] handle of the USM memory pool - const size_t size, ///< [in] minimum size in bytes of the USM memory object + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor + const size_t Size, ///< [in] minimum size in bytes of the USM memory object ///< to be allocated const ur_exp_enqueue_usm_alloc_properties_t - *pProperties, ///< [in][optional] pointer to the enqueue asynchronous - ///< USM allocation properties - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + *Properties, ///< [in][optional] pointer to the enqueue async alloc + ///< properties + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating no wait - ///< events. - void **ppMem, ///< [out] pointer to USM memory object - ur_event_handle_t - *phEvent ///< [out][optional] return an event object that identifies the - ///< asynchronous USM shared allocation + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **Mem, ///< [out] pointer to USM memory object + ur_event_handle_t *OutEvent ///< [out][optional] return an event object that + ///< identifies the async alloc ) { - std::ignore = hQueue; - std::ignore = pPool; - std::ignore = size; - std::ignore = pProperties; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = ppMem; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + return enqueueUSMAllocHelper(Queue, Pool, Size, Properties, + NumEventsInWaitList, EventWaitList, Mem, + OutEvent, USMAllocType::Shared); } ur_result_t urEnqueueUSMHostAllocExp( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_usm_pool_handle_t - pPool, ///< [in][optional] handle of the USM memory pool - const size_t size, ///< [in] minimum size in bytes of the USM memory object + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor + const size_t Size, ///< [in] minimum size in bytes of the USM memory object ///< to be allocated const ur_exp_enqueue_usm_alloc_properties_t - *pProperties, ///< [in][optional] pointer to the enqueue asynchronous - ///< USM allocation properties - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + *Properties, ///< [in][optional] pointer to the enqueue async alloc + ///< properties + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating no wait - ///< events. - void **ppMem, ///< [out] pointer to USM memory object - ur_event_handle_t - *phEvent ///< [out][optional] return an event object that identifies the - ///< asynchronous USM host allocation + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + void **Mem, ///< [out] pointer to USM memory object + ur_event_handle_t *OutEvent ///< [out][optional] return an event object that + ///< identifies the async alloc ) { - std::ignore = hQueue; - std::ignore = pPool; - std::ignore = size; - std::ignore = pProperties; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = ppMem; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + return enqueueUSMAllocHelper(Queue, Pool, Size, Properties, + NumEventsInWaitList, EventWaitList, Mem, + OutEvent, USMAllocType::Host); } ur_result_t urEnqueueUSMFreeExp( - ur_queue_handle_t hQueue, ///< [in] handle of the queue object - ur_usm_pool_handle_t - pPool, ///< [in][optional] handle of the USM memory pooliptor - void *pMem, ///< [in] pointer to USM memory object - uint32_t numEventsInWaitList, ///< [in] size of the event wait list + ur_queue_handle_t Queue, ///< [in] handle of the queue object + ur_usm_pool_handle_t Pool, ///< [in][optional] USM pool descriptor + void *Mem, ///< [in] pointer to USM memory object + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t - *phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] - ///< pointer to a list of events that must be complete - ///< before the kernel execution. If nullptr, the - ///< numEventsInWaitList must be 0, indicating no wait - ///< events. - ur_event_handle_t *phEvent ///< [out][optional] return an event object that - ///< identifies the asynchronous USM deallocation + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before the kernel execution. If nullptr, the + ///< numEventsInWaitList must be 0, indicating no wait + ///< events. + ur_event_handle_t *OutEvent ///< [out][optional] return an event object that + ///< identifies the async alloc ) { - std::ignore = hQueue; - std::ignore = pPool; - std::ignore = pMem; - std::ignore = numEventsInWaitList; - std::ignore = phEventWaitList; - std::ignore = phEvent; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + std::ignore = Pool; + + std::scoped_lock lock(Queue->Mutex); + + bool UseCopyEngine = false; + _ur_ze_event_list_t TmpWaitList; + UR_CALL(TmpWaitList.createAndRetainUrZeEventList( + NumEventsInWaitList, EventWaitList, Queue, UseCopyEngine)); + + // Get a new command list to be used on this call + ur_command_list_ptr_t CommandList{}; + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + + ze_event_handle_t ZeEvent = nullptr; + ur_event_handle_t InternalEvent{}; + bool IsInternal = OutEvent == nullptr; + ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent; + + UR_CALL(createEventAndAssociateQueue(Queue, Event, + UR_COMMAND_ENQUEUE_USM_FREE_EXP, + CommandList, IsInternal, false)); + ZeEvent = (*Event)->ZeEvent; + (*Event)->WaitList = TmpWaitList; + + const auto &ZeCommandList = CommandList->first; + const auto &WaitList = (*Event)->WaitList; + if (WaitList.Length) { + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (ZeCommandList, WaitList.Length, WaitList.ZeEventList)); + } + + // Wait for commands execution until USM can be freed + UR_CALL(Queue->executeCommandList(CommandList, true)); // Blocking + + // Free USM memory + auto Ret = USMFreeHelper(Queue->Context, Mem); + if (Ret) { + return Ret; + } + + // Signal that USM free event was finished + ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); + + UR_CALL(Queue->executeCommandList(CommandList, false)); + + return UR_RESULT_SUCCESS; } } // namespace ur::level_zero diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index bfb02d37c2..cf8885edeb 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -39,6 +39,14 @@ if(UR_BUILD_ADAPTER_L0) add_dependencies(test-adapter-level_zero generate_device_binaries kernel_names_header) + + add_adapter_test(level_zero_enqueue_alloc + FIXTURE KERNELS + SOURCES + enqueue_alloc.cpp + ENVIRONMENT + "UR_ADAPTERS_FORCE_LOAD=\"$\"" + ) endif() if(NOT WIN32 AND NOT UR_STATIC_ADAPTER_L0) diff --git a/test/adapters/level_zero/enqueue_alloc.cpp b/test/adapters/level_zero/enqueue_alloc.cpp new file mode 100644 index 0000000000..4cb965ffa0 --- /dev/null +++ b/test/adapters/level_zero/enqueue_alloc.cpp @@ -0,0 +1,140 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +struct urL0EnqueueAllocTest : uur::urKernelExecutionTest { + void SetUp() { + program_name = "fill_usm"; + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); + } + + void ValidateEnqueueFree(void *ptr) { + ur_event_handle_t freeEvent = nullptr; + ASSERT_NE(ptr, nullptr); + ASSERT_SUCCESS( + urEnqueueUSMFreeExp(queue, nullptr, ptr, 0, nullptr, &freeEvent)); + ASSERT_NE(freeEvent, nullptr); + ASSERT_SUCCESS(urQueueFinish(queue)); + } + + static constexpr size_t ARRAY_SIZE = 16; + static constexpr uint32_t DATA = 0xC0FFEE; +}; +UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urL0EnqueueAllocTest); + +TEST_P(urL0EnqueueAllocTest, SuccessHost) { + ur_device_usm_access_capability_flags_t hostUSMSupport = 0; + ASSERT_SUCCESS(uur::GetDeviceUSMHostSupport(device, hostUSMSupport)); + if (!hostUSMSupport) { + GTEST_SKIP() << "Host USM is not supported."; + } + + void *ptr = nullptr; + ur_event_handle_t allocEvent = nullptr; + ASSERT_SUCCESS(urEnqueueUSMHostAllocExp(queue, nullptr, sizeof(uint32_t), + nullptr, 0, nullptr, &ptr, + &allocEvent)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_NE(ptr, nullptr); + ASSERT_NE(allocEvent, nullptr); + *(uint32_t *)ptr = DATA; + ValidateEnqueueFree(ptr); +} + +// Disable temporarily until user pool handling is implemented +// TEST_P(urL0EnqueueAllocTest, SuccessHostPoolAlloc) { +// ur_device_usm_access_capability_flags_t hostUSMSupport = 0; +// ASSERT_SUCCESS(uur::GetDeviceUSMHostSupport(device, hostUSMSupport)); +// if (!hostUSMSupport) { +// GTEST_SKIP() << "Host USM is not supported."; +// } + +// ur_usm_pool_handle_t pool = nullptr; +// ASSERT_SUCCESS(urUSMPoolCreate(context, nullptr, &pool)); + +// void *ptr = nullptr; +// ur_event_handle_t allocEvent = nullptr; +// ASSERT_SUCCESS(urEnqueueUSMHostAllocExp(queue, pool, sizeof(uint32_t), nullptr, +// 0, nullptr, &ptr, &allocEvent)); +// ASSERT_SUCCESS(urQueueFinish(queue)); +// ASSERT_NE(ptr, nullptr); +// ASSERT_NE(allocEvent, nullptr); +// *static_cast(ptr) = DATA; +// ValidateEnqueueFree(ptr, pool); +// } + +TEST_P(urL0EnqueueAllocTest, SuccessDevice) { + ur_device_usm_access_capability_flags_t deviceUSMSupport = 0; + ASSERT_SUCCESS(uur::GetDeviceUSMDeviceSupport(device, deviceUSMSupport)); + if (!(deviceUSMSupport & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Device USM is not supported."; + } + + void *ptr = nullptr; + ur_event_handle_t allocEvent = nullptr; + ASSERT_SUCCESS(urEnqueueUSMDeviceAllocExp( + queue, nullptr, ARRAY_SIZE * sizeof(uint32_t), nullptr, 0, nullptr, + &ptr, &allocEvent)); + ASSERT_NE(ptr, nullptr); + ASSERT_NE(allocEvent, nullptr); + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, ptr)); + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(DATA), nullptr, &DATA)); + Launch1DRange(ARRAY_SIZE); + ValidateEnqueueFree(ptr); +} + +TEST_P(urL0EnqueueAllocTest, SuccessDeviceRepeat) { + ur_device_usm_access_capability_flags_t deviceUSMSupport = 0; + ASSERT_SUCCESS(uur::GetDeviceUSMDeviceSupport(device, deviceUSMSupport)); + if (!(deviceUSMSupport & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Device USM is not supported."; + } + + void *ptr = nullptr; + ASSERT_SUCCESS(urEnqueueUSMDeviceAllocExp( + queue, nullptr, ARRAY_SIZE * sizeof(uint32_t), nullptr, 0, nullptr, + &ptr, nullptr)); + ASSERT_NE(ptr, nullptr); + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, ptr)); + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(DATA), nullptr, &DATA)); + Launch1DRange(ARRAY_SIZE); + ASSERT_SUCCESS( + urEnqueueUSMFreeExp(queue, nullptr, ptr, 0, nullptr, nullptr)); + + void *ptr2 = nullptr; + ASSERT_SUCCESS(urEnqueueUSMDeviceAllocExp( + queue, nullptr, ARRAY_SIZE * sizeof(uint32_t), nullptr, 0, nullptr, + &ptr2, nullptr)); + ASSERT_NE(ptr, nullptr); + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, ptr2)); + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(DATA), nullptr, &DATA)); + Launch1DRange(ARRAY_SIZE); + ValidateEnqueueFree(ptr2); +} + +TEST_P(urL0EnqueueAllocTest, SuccessShared) { + ur_device_usm_access_capability_flags_t sharedUSMSupport = 0; + ASSERT_SUCCESS( + uur::GetDeviceUSMSingleSharedSupport(device, sharedUSMSupport)); + if (!(sharedUSMSupport & UR_DEVICE_USM_ACCESS_CAPABILITY_FLAG_ACCESS)) { + GTEST_SKIP() << "Shared USM is not supported."; + } + + void *ptr = nullptr; + ur_event_handle_t allocEvent = nullptr; + ASSERT_SUCCESS(urEnqueueUSMSharedAllocExp( + queue, nullptr, ARRAY_SIZE * sizeof(uint32_t), nullptr, 0, nullptr, + &ptr, &allocEvent)); + ASSERT_NE(ptr, nullptr); + ASSERT_NE(allocEvent, nullptr); + ASSERT_SUCCESS(urKernelSetArgPointer(kernel, 0, nullptr, ptr)); + ASSERT_SUCCESS( + urKernelSetArgValue(kernel, 1, sizeof(DATA), nullptr, &DATA)); + Launch1DRange(ARRAY_SIZE); + ValidateEnqueueFree(ptr); +}