From 7b98b9688230c4bcdf628002202b6f5049b7744e Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Thu, 3 Oct 2024 16:25:18 +0300 Subject: [PATCH] Use different graph allocator --- .../src/backend/include/zero_executor.hpp | 3 +- .../src/backend/src/zero_executor.cpp | 76 +++++++++++++------ 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp index 11cc6fab4bce25..05d81d04c0f81e 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp @@ -61,6 +61,8 @@ class ZeroExecutor final : public IExecutor { } private: + void initilize_graph_through_command_list() const; + const Config _config; Logger _logger; @@ -72,7 +74,6 @@ class ZeroExecutor final : public IExecutor { const uint32_t _group_ordinal; ze_graph_handle_t _graph = nullptr; - ze_graph_properties_t _props{}; std::vector _input_descriptors; std::vector _output_descriptors; diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp index ade476c5649e53..3654dcb9b43f68 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp @@ -37,23 +37,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _initStructs->getCommandQueueDdiTable(), _config, group_ordinal)} { - _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); - CommandList graph_command_list(_initStructs->getDevice(), - _initStructs->getContext(), - _graph_ddi_table_ext, - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); - CommandQueue graph_command_queue(_initStructs->getDevice(), - _initStructs->getContext(), - ZE_COMMAND_QUEUE_PRIORITY_NORMAL, - _initStructs->getCommandQueueDdiTable(), - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); - Fence fence(graph_command_queue, _config); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph"); OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate"); @@ -79,7 +62,10 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties"); _logger.debug("performing pfnGetProperties"); - zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &_props)); + ze_graph_properties_t props{}; + props.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + + zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &props)); auto targetDriverExtVersion = _graph_ddi_table_ext.version(); if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) { OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please " @@ -88,8 +74,9 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3"); _logger.debug("performing pfnGetArgumentProperties3"); - for (uint32_t index = 0; index < _props.numGraphArgs; ++index) { - ze_graph_argument_properties_3_t arg3; + for (uint32_t index = 0; index < props.numGraphArgs; ++index) { + ze_graph_argument_properties_3_t arg3{}; + arg3.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; zeroUtils::throwOnFail("pfnGetArgumentProperties3", _graph_ddi_table_ext.pfnGetArgumentProperties3(_graph, index, &arg3)); @@ -100,6 +87,51 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i } } + if (_graph_ddi_table_ext.version() < ZE_GRAPH_EXT_VERSION_1_8) { + initilize_graph_through_command_list(); + } else { + ze_graph_properties_2_t properties = {}; + properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + _graph_ddi_table_ext.pfnGetProperties2(_graph, &properties); + + if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { + OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGraphInitialize"); + _graph_ddi_table_ext.pfnGraphInitialize(_graph); + + if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) { + initilize_graph_through_command_list(); + } + } + } + + if (config.has()) { + setWorkloadType(config.get()); + } +} + +void ZeroExecutor::initilize_graph_through_command_list() const { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, + itt::domains::LevelZeroBackend, + "Executor::ZeroExecutor", + "initilize_graph_through_command_list"); + + _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); + OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); + CommandList graph_command_list(_initStructs->getDevice(), + _initStructs->getContext(), + _graph_ddi_table_ext, + _config, + _group_ordinal); + _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); + CommandQueue graph_command_queue(_initStructs->getDevice(), + _initStructs->getContext(), + ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + _initStructs->getCommandQueueDdiTable(), + _config, + _group_ordinal); + _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); + Fence fence(graph_command_queue, _config); + OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize"); _logger.debug("ZeroExecutor::ZeroExecutor - performing appendGraphInitialize"); graph_command_list.appendGraphInitialize(_graph); @@ -112,10 +144,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i _logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize"); fence.hostSynchronize(); _logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed"); - - if (config.has()) { - setWorkloadType(config.get()); - } } void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const {