diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp index 11cc6fab4bce25..c3f4cc14eb4945 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp @@ -61,6 +61,8 @@ class ZeroExecutor final : public IExecutor { } private: + void initialize_graph_through_command_list() const; + const Config _config; Logger _logger; @@ -72,7 +74,6 @@ class ZeroExecutor final : public IExecutor { const uint32_t _group_ordinal; ze_graph_handle_t _graph = nullptr; - ze_graph_properties_t _props{}; std::vector _input_descriptors; std::vector _output_descriptors; diff --git a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp index a2b50639cf97a0..ca65d99eff806b 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp @@ -45,7 +45,7 @@ class ZeroRemoteTensor : public RemoteTensor { void* _mem = nullptr; void* _data = nullptr; - bool _external_memory_support = true; + bool _external_memory_support = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_types.hpp b/src/plugins/intel_npu/src/backend/include/zero_types.hpp index 834d66a45a80d9..32c67988bfb565 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_types.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_types.hpp @@ -16,7 +16,7 @@ /** * @brief Last version of Table of Graph Extension functions used within plugin */ -using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t; +using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_8_t; /** * @brief Last version of the Command Queue functions used within plugin */ @@ -157,10 +157,23 @@ struct ze_graph_dditable_ext_decorator final { } // version 1.7 - ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) { + ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, + size_t* pSize, + const uint8_t** pGraphNativeBinary) { throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7); return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary); } + + // version 1.8 + ze_result_t ZE_APICALL pfnGetProperties2(ze_graph_handle_t hGraph, ze_graph_properties_2_t* pGraphProperties) { + throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8); + return _impl->pfnGetProperties2(hGraph, pGraphProperties); + } + + ze_result_t ZE_APICALL pfnGraphInitialize(ze_graph_handle_t hGraph) { + throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8); + return _impl->pfnGraphInitialize(hGraph); + } }; /** diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp index ade476c5649e53..3655f0b611d5f9 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp @@ -37,23 +37,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _initStructs->getCommandQueueDdiTable(), _config, group_ordinal)} { - _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); - CommandList graph_command_list(_initStructs->getDevice(), - _initStructs->getContext(), - _graph_ddi_table_ext, - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); - CommandQueue graph_command_queue(_initStructs->getDevice(), - _initStructs->getContext(), - ZE_COMMAND_QUEUE_PRIORITY_NORMAL, - _initStructs->getCommandQueueDdiTable(), - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); - Fence fence(graph_command_queue, _config); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph"); OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate"); @@ -79,7 +62,10 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties"); _logger.debug("performing pfnGetProperties"); - zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &_props)); + ze_graph_properties_t props{}; + props.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + + zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &props)); auto targetDriverExtVersion = _graph_ddi_table_ext.version(); if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) { OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please " @@ -88,8 +74,9 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3"); _logger.debug("performing pfnGetArgumentProperties3"); - for (uint32_t index = 0; index < _props.numGraphArgs; ++index) { - ze_graph_argument_properties_3_t arg3; + for (uint32_t index = 0; index < props.numGraphArgs; ++index) { + ze_graph_argument_properties_3_t arg3{}; + arg3.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; zeroUtils::throwOnFail("pfnGetArgumentProperties3", _graph_ddi_table_ext.pfnGetArgumentProperties3(_graph, index, &arg3)); @@ -100,6 +87,51 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i } } + if (_graph_ddi_table_ext.version() < ZE_GRAPH_EXT_VERSION_1_8) { + initialize_graph_through_command_list(); + } else { + ze_graph_properties_2_t properties = {}; + properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + _graph_ddi_table_ext.pfnGetProperties2(_graph, &properties); + + if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { + OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGraphInitialize"); + _graph_ddi_table_ext.pfnGraphInitialize(_graph); + } + + if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) { + initialize_graph_through_command_list(); + } + } + + if (config.has()) { + setWorkloadType(config.get()); + } +} + +void ZeroExecutor::initialize_graph_through_command_list() const { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, + itt::domains::LevelZeroBackend, + "Executor::ZeroExecutor", + "initialize_graph_through_command_list"); + + _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); + CommandList graph_command_list(_initStructs->getDevice(), + _initStructs->getContext(), + _graph_ddi_table_ext, + _config, + _group_ordinal); + _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); + CommandQueue graph_command_queue(_initStructs->getDevice(), + _initStructs->getContext(), + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + _initStructs->getCommandQueueDdiTable(), + _config, + _group_ordinal); + _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); + Fence fence(graph_command_queue, _config); + OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize"); _logger.debug("ZeroExecutor::ZeroExecutor - performing appendGraphInitialize"); graph_command_list.appendGraphInitialize(_graph); @@ -112,10 +144,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize"); fence.hostSynchronize(); _logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed"); - - if (config.has()) { - setWorkloadType(config.get()); - } } void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const { diff --git a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp index 4ac1d75fe57f10..e47b454e11c427 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp @@ -44,9 +44,16 @@ ZeroRemoteTensor::ZeroRemoteTensor(std::shared_ptr context, ze_device_external_memory_properties_t desc = {}; desc.stype = ZE_STRUCTURE_TYPE_DEVICE_EXTERNAL_MEMORY_PROPERTIES; auto res = zeDeviceGetExternalMemoryProperties(_init_structs->getDevice(), &desc); - if (res != ZE_RESULT_SUCCESS || (desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF && - desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32)) { - _external_memory_support = false; + if (res == ZE_RESULT_SUCCESS) { +#ifdef _WIN32 + if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32) { + _external_memory_support = true; + } +#else + if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) { + _external_memory_support = true; + } +#endif } allocate(byte_size); diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index 523fc87a7f9dd3..65f7e8e3bee6c7 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -132,14 +132,14 @@ class LevelZeroCompilerInDriver final : public ICompiler { void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& blob, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const; template = true> void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& /* unusedBlob */, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const; template = true> diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp index 1f2a23539a99f5..ad7efd701cfde6 100644 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp @@ -70,6 +70,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr>(driverHandle, + deviceHandle, + zeContext, + graph_ddi_table_ext); + break; default: apiAdapter = std::make_shared>(driverHandle, deviceHandle, diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index af7682bc1ae064..d768a4322f7a9b 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -371,7 +371,7 @@ template > void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& blob, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const { // Get blob size first auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr); @@ -408,7 +408,7 @@ template > void LevelZeroCompilerInDriver::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, ze_graph_handle_t graphHandle, std::vector& /* unusedBlob */, - uint8_t*& blobPtr, + const uint8_t*& blobPtr, size_t& blobSize) const { // Get blob ptr and size auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr); @@ -431,7 +431,7 @@ CompiledNetwork LevelZeroCompilerInDriver::getCompiledNetwork( _logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle"); ze_graph_handle_t graphHandle = static_cast(networkDescription.metadata.graphHandle); - uint8_t* blobPtr = nullptr; + const uint8_t* blobPtr = nullptr; size_t blobSize = -1; std::vector blob; @@ -1243,6 +1243,7 @@ template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; template class LevelZeroCompilerInDriver; +template class LevelZeroCompilerInDriver; } // namespace driverCompilerAdapter } // namespace intel_npu diff --git a/src/plugins/intel_npu/thirdparty/level-zero-ext b/src/plugins/intel_npu/thirdparty/level-zero-ext index 816b5ce120096c..cdb761dd63b1d4 160000 --- a/src/plugins/intel_npu/thirdparty/level-zero-ext +++ b/src/plugins/intel_npu/thirdparty/level-zero-ext @@ -1 +1 @@ -Subproject commit 816b5ce120096cbc115b56ed43f8a030eb420b19 +Subproject commit cdb761dd63b1d47230d501e631a2d725db09ba0d