From b8dc1db97931a97a5635d0ee763873d9c0a48a4f Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Tue, 27 Aug 2024 15:01:03 -0700 Subject: [PATCH] [L0] Use zesInit for SysMan API usage - Change to using zesInit and zes data structures for accessing L0 SysMan functionality. - Updated Platform & Devices to store zes handles if sysman support is available. - Given Legacy Environment Variable from user, then fallback to old functionality. - Fixed Return code on error to be consistently unsupported enumeration. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/adapter.cpp | 56 +++++++++++++++++++++---- source/adapters/level_zero/adapter.hpp | 1 + source/adapters/level_zero/device.cpp | 55 ++++++++++++++++++------ source/adapters/level_zero/platform.hpp | 12 ++++++ 4 files changed, 102 insertions(+), 22 deletions(-) diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index ed52254ec3..167240a24b 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -52,7 +52,30 @@ class ur_legacy_sink : public logger::Sink { }; }; -ur_result_t initPlatforms(PlatformVec &platforms) noexcept try { +// Find the corresponding ZesDevice Handle for a given ZeDevice +ur_result_t getZesDeviceHandle(zes_uuid_t coreDeviceUuid, + zes_device_handle_t *ZesDevice, + uint32_t *SubDeviceId, ze_bool_t *SubDevice) { + uint32_t ZesDriverCount = 0; + std::vector ZesDrivers; + std::vector ZesDevices; + ze_result_t ZesResult = ZE_RESULT_ERROR_INVALID_ARGUMENT; + ZE2UR_CALL(zesDriverGet, (&ZesDriverCount, nullptr)); + ZesDrivers.resize(ZesDriverCount); + ZE2UR_CALL(zesDriverGet, (&ZesDriverCount, ZesDrivers.data())); + for (uint32_t I = 0; I < ZesDriverCount; ++I) { + ZesResult = ZE_CALL_NOCHECK( + zesDriverGetDeviceByUuidExp, + (ZesDrivers[I], coreDeviceUuid, ZesDevice, SubDevice, SubDeviceId)); + if (ZesResult == ZE_RESULT_SUCCESS) { + return UR_RESULT_SUCCESS; + } + } + return UR_RESULT_ERROR_INVALID_ARGUMENT; +} + +ur_result_t initPlatforms(PlatformVec &platforms, + ze_result_t ZesResult) noexcept try { uint32_t ZeDriverCount = 0; ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, nullptr)); if (ZeDriverCount == 0) { @@ -65,24 +88,37 @@ ur_result_t initPlatforms(PlatformVec &platforms) noexcept try { ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data())); for (uint32_t I = 0; I < ZeDriverCount; ++I) { + bool DriverInit = false; ze_device_properties_t device_properties{}; device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; uint32_t ZeDeviceCount = 0; ZE2UR_CALL(zeDeviceGet, (ZeDrivers[I], &ZeDeviceCount, nullptr)); ZeDevices.resize(ZeDeviceCount); ZE2UR_CALL(zeDeviceGet, (ZeDrivers[I], &ZeDeviceCount, ZeDevices.data())); + auto platform = std::make_unique(ZeDrivers[I]); // Check if this driver has GPU Devices for (uint32_t D = 0; D < ZeDeviceCount; ++D) { ZE2UR_CALL(zeDeviceGetProperties, (ZeDevices[D], &device_properties)); - if (ZE_DEVICE_TYPE_GPU == device_properties.type) { - // If this Driver is a GPU, save it as a usable platform. - auto platform = std::make_unique(ZeDrivers[I]); - UR_CALL(platform->initialize()); + if (!DriverInit) { + // If this Driver is a GPU, save it as a usable platform. + UR_CALL(platform->initialize()); - // Save a copy in the cache for future uses. - platforms.push_back(std::move(platform)); - break; + // Save a copy in the cache for future uses. + platforms.push_back(std::move(platform)); + DriverInit = true; + } + if (ZesResult == ZE_RESULT_SUCCESS) { + ur_zes_device_handle_data_t ZesDeviceData; + zes_uuid_t ZesUUID; + std::memcpy(&ZesUUID, &device_properties.uuid, sizeof(zes_uuid_t)); + if (getZesDeviceHandle( + ZesUUID, &ZesDeviceData.ZesDevice, &ZesDeviceData.SubDeviceId, + &ZesDeviceData.SubDevice) == UR_RESULT_SUCCESS) { + platforms.back()->ZedeviceToZesDeviceMap.insert( + std::make_pair(ZeDevices[D], std::move(ZesDeviceData))); + } + } } } } @@ -172,7 +208,9 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() return; } - ur_result_t err = initPlatforms(platforms); + GlobalAdapter->ZesResult = ZE_CALL_NOCHECK(zesInit, (0)); + + ur_result_t err = initPlatforms(platforms, *GlobalAdapter->ZesResult); if (err == UR_RESULT_SUCCESS) { result = std::move(platforms); } else { diff --git a/source/adapters/level_zero/adapter.hpp b/source/adapters/level_zero/adapter.hpp index 273cdb4193..9dbd03e262 100644 --- a/source/adapters/level_zero/adapter.hpp +++ b/source/adapters/level_zero/adapter.hpp @@ -27,6 +27,7 @@ struct ur_adapter_handle_t_ { std::mutex Mutex; std::optional ZeResult; + std::optional ZesResult; ZeCache> PlatformCache; logger::Logger &logger; }; diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index de2bee3789..6bea2ce6dd 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -701,11 +701,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( } case UR_DEVICE_INFO_GLOBAL_MEM_FREE: { - if (getenv("ZES_ENABLE_SYSMAN") == nullptr) { - setErrorMessage("Set ZES_ENABLE_SYSMAN=1 to obtain free memory", - UR_RESULT_ERROR_UNINITIALIZED, + bool SysManEnv = getenv_tobool("ZES_ENABLE_SYSMAN", false); + if ((Device->Platform->ZedeviceToZesDeviceMap.size() == 0) && !SysManEnv) { + setErrorMessage("SysMan support is unavailable on this system. Please " + "check your level zero driver installation.", + UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION, static_cast(ZE_RESULT_ERROR_UNINITIALIZED)); - return UR_RESULT_ERROR_ADAPTER_SPECIFIC; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } // Calculate the global memory size as the max limit that can be reported as // "free" memory for the user to allocate. @@ -714,30 +716,57 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( // Currently this is only the one enumerated with ordinal 0. uint64_t FreeMemory = 0; uint32_t MemCount = 0; - ZE2UR_CALL(zesDeviceEnumMemoryModules, (ZeDevice, &MemCount, nullptr)); + + zes_device_handle_t ZesDevice = Device->ZeDevice; + struct ur_zes_device_handle_data_t ZesDeviceData = {}; + // If legacy sysman is enabled thru the environment variable, then zesInit + // will fail, but sysman is still usable so go the legacy route. + if (!SysManEnv) { + auto It = Device->Platform->ZedeviceToZesDeviceMap.find(Device->ZeDevice); + if (It == Device->Platform->ZedeviceToZesDeviceMap.end()) { + // no matching device + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } else { + ZesDeviceData = + Device->Platform->ZedeviceToZesDeviceMap[Device->ZeDevice]; + ZesDevice = ZesDeviceData.ZesDevice; + } + } + + ZE2UR_CALL(zesDeviceEnumMemoryModules, (ZesDevice, &MemCount, nullptr)); if (MemCount != 0) { std::vector ZesMemHandles(MemCount); ZE2UR_CALL(zesDeviceEnumMemoryModules, - (ZeDevice, &MemCount, ZesMemHandles.data())); + (ZesDevice, &MemCount, ZesMemHandles.data())); for (auto &ZesMemHandle : ZesMemHandles) { ZesStruct ZesMemProperties; ZE2UR_CALL(zesMemoryGetProperties, (ZesMemHandle, &ZesMemProperties)); // For root-device report memory from all memory modules since that // is what totally available in the default implicit scaling mode. // For sub-devices only report memory local to them. - if (!Device->isSubDevice() || Device->ZeDeviceProperties->subdeviceId == - ZesMemProperties.subdeviceId) { - - ZesStruct ZesMemState; - ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); - FreeMemory += ZesMemState.free; + if (SysManEnv) { + if (!Device->isSubDevice() || + Device->ZeDeviceProperties->subdeviceId == + ZesMemProperties.subdeviceId) { + + ZesStruct ZesMemState; + ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); + FreeMemory += ZesMemState.free; + } + } else { + if (ZesDeviceData.SubDeviceId == ZesMemProperties.subdeviceId || + !ZesDeviceData.SubDevice) { + ZesStruct ZesMemState; + ZE2UR_CALL(zesMemoryGetState, (ZesMemHandle, &ZesMemState)); + FreeMemory += ZesMemState.free; + } } } } if (MemCount > 0) { return ReturnValue(std::min(GlobalMemSize, FreeMemory)); } else { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } } case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: { diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index b5a4d97aeb..b53b55bb23 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -12,11 +12,18 @@ #include "common.hpp" #include "ur_api.h" #include "ze_api.h" +#include "zes_api.h" struct ur_device_handle_t_; typedef size_t DeviceId; +struct ur_zes_device_handle_data_t { + zes_device_handle_t ZesDevice; + uint32_t SubDeviceId; + ze_bool_t SubDevice = false; +}; + struct ur_platform_handle_t_ : public _ur_platform { ur_platform_handle_t_(ze_driver_handle_t Driver) : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} @@ -27,6 +34,11 @@ struct ur_platform_handle_t_ : public _ur_platform { // a pretty good fit to keep here. ze_driver_handle_t ZeDriver; + // Cache of the ZesDevices mapped to the ZeDevices for use in zes apis calls + // based on a ze device handle. + std::unordered_map + ZedeviceToZesDeviceMap; + // Given a multi driver scenario, the driver handle must be translated to the // internal driver handle to allow calls to driver experimental apis. ze_driver_handle_t ZeDriverHandleExpTranslated;