diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp index 0175949db1ae73..a2f6285233b157 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp @@ -30,37 +30,29 @@ namespace intel_npu { * upon applying a transposition corresponding to the legacy layout value. Use the "transposedShape" one if not sure * which one you need. */ -struct IONodeDescriptor { - std::string legacyName; - std::string currentNodeName; +struct IODescriptor { + std::string nameFromCompiler; + std::string nodeFriendlyName; std::unordered_set outputTensorNames; ov::element::Type_t precision; - ov::PartialShape originalShape; - ov::PartialShape transposedShape; + ov::PartialShape shape; + bool isStateInput; + bool isStateOutput; + bool isShapeTensor; }; /** * @brief A helper map to represent descriptions for inputs and outputs * of a network */ -using IONodeDescriptorMap = std::unordered_map; +using std::vector = std::vector; struct NetworkMetadata final { std::string name; - std::vector inputNames; - std::vector outputNames; - std::vector stateNames; - std::vector shapeNames; - - IONodeDescriptorMap parameters; - IONodeDescriptorMap results; - IONodeDescriptorMap states; - IONodeDescriptorMap shapes; - IONodeDescriptorMap profilingOutputs; - - std::unordered_map inputOrder; - std::unordered_map outputOrder; + std::vector inputs; + std::vector outputs; + std::vector profilingOutputs; int numStreams = 1; }; diff --git a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp index 34e9239e8430d1..b1639ac1879d1c 100644 --- a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp @@ -177,7 +177,7 @@ class SyncInferRequest : public ov::IInferRequest { * @param allocator If provided, the tensor uses the custom allocator instead of using the default one. */ void allocate_tensor(std::string tensorName, - const IONodeDescriptor& descriptor, + const IODescriptor& descriptor, TensorType tensorType = TensorType::InputOrOutput, const ov::Allocator& allocator = {}); diff --git a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp index 5b50caf8f3fe3d..61967215342639 100644 --- a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp @@ -160,7 +160,7 @@ void SyncInferRequest::check_tensors() const { } void SyncInferRequest::allocate_tensor(std::string tensorName, - const IONodeDescriptor& descriptor, + const IODescriptor& descriptor, TensorType tensorType, const ov::Allocator& allocator) { std::shared_ptr tensor; diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index c53174e8c14f1c..55478a7f1dd969 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -24,7 +24,7 @@ constexpr std::size_t BATCH_AXIS = 0; * @param zeDescriptor The Level Zero specific structure used for comparison. * @param name Tensor identifier used for error logging. */ -void checkLevelZeroAttributesMatch(const IONodeDescriptor& nodeDescriptor, +void checkLevelZeroAttributesMatch(const IODescriptor& nodeDescriptor, const ZeroExecutor::ArgumentDescriptor& zeDescriptor, const std::string& name) { const ov::element::Type_t ovPrecision = nodeDescriptor.precision; @@ -197,7 +197,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& } for (const std::string& inputName : _metadata.inputNames) { - IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName); + IODescriptor& parameterDescriptor = _metadata.parameters.at(inputName); checkLevelZeroAttributesMatch(parameterDescriptor, executorInputDescriptors.at(inputName), inputName); ov::Allocator inputAllocator; @@ -218,7 +218,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& if (contains(_metadata.shapeNames, inputName)) { const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + inputName; - const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(inputName); + const IODescriptor& shapeDescriptor = _metadata.shapes.at(inputName); checkLevelZeroAttributesMatch(shapeDescriptor, executorInputDescriptors.at(shapeBufferName), @@ -244,7 +244,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& if (shapeNameMatch != _nodeNameToLegacyName.end()) { if (contains(_metadata.shapeNames, shapeNameMatch->second)) { const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second; - const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second); + const IODescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second); checkLevelZeroAttributesMatch(shapeDescriptor, executorOutputDescriptors.at(shapeBufferName), @@ -266,7 +266,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& OPENVINO_THROW("Invalid graph output descriptor key: " + stateOutputBufferName); } - const IONodeDescriptor& stateDescriptor = _metadata.states.at(stateName); + const IODescriptor& stateDescriptor = _metadata.states.at(stateName); checkLevelZeroAttributesMatch(stateDescriptor, executorInputDescriptors.at(stateInputBufferName), stateInputBufferName); diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index fc9157469e383a..ff6ee0a8cb93ef 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -104,9 +104,9 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler { * be extracted. */ template - void getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, + void getLayoutOrStateDescriptor(std::vector& parameters, + std::vector& results, + std::vector& states, std::vector& stateNames, const T& arg) const; @@ -117,9 +117,9 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler { std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& state) const; + std::vector& parameters, + std::vector& results, + std::vector& state) const; template = true> void getMetadata(TableExtension* graphDdiTableExt, @@ -128,9 +128,9 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler { std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& state) const; + std::vector& parameters, + std::vector& results, + std::vector& state) const; // ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) template = true> diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index 9e1c35c5743c8b..47d51263ed50b6 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -841,9 +841,9 @@ uint32_t LevelZeroCompilerInDriver::getSupportedOpset() const { template template -void LevelZeroCompilerInDriver::getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, +void LevelZeroCompilerInDriver::getLayoutOrStateDescriptor(std::vector& parameters, + std::vector& results, + std::vector& states, std::vector& stateNames, const T& arg) const { std::string legacyName = arg.name; @@ -885,7 +885,7 @@ void LevelZeroCompilerInDriver::getLayoutOrStateDescriptor(IONod * @param names The I/O identifiers shall be stored here in the order found within the compiled model. * @param metadata The Level Zero structure fomr which the descriptors will be extracted. */ -static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, +static void getNodeDescriptor(std::vector& nodeDescriptors, std::vector& names, ze_graph_argument_properties_3_t& arg) { ov::element::Type_t precision = toOVElementType(arg.devicePrecision); @@ -907,7 +907,7 @@ static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, {legacyName, arg.debug_friendly_name, std::move(outputTensorNames), precision, shape, shape}; } -static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, +static void getNodeDescriptor(std::vector& nodeDescriptors, std::vector& names, ze_graph_argument_properties_3_t& arg, ze_graph_argument_metadata_t& metadata) { @@ -942,9 +942,9 @@ void LevelZeroCompilerInDriver::getMetadata(TableExtension* grap std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states) const { + std::vector& parameters, + std::vector& results, + std::vector& states) const { ze_graph_argument_properties_3_t arg; auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg); if (ZE_RESULT_SUCCESS != result) { @@ -977,9 +977,9 @@ void LevelZeroCompilerInDriver::getMetadata(TableExtension* grap std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states) const { + std::vector& parameters, + std::vector& results, + std::vector& states) const { ze_graph_argument_properties_3_t arg; auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg); if (ZE_RESULT_SUCCESS != result) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index e2e895019d19f5..50e5e50b3f5e15 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -39,8 +39,8 @@ const char* NPU_PLUGIN_LIB_NAME = "openvino_intel_npu_plugin"; * @param outputNames The names of the outputs registered in the order given by the model. * @param isBatchingSupported Newer driver versions support batching mode on the plugin. */ -std::shared_ptr create_dummy_model(const IONodeDescriptorMap& parameterDescriptors, - const IONodeDescriptorMap& resultDescriptors, +std::shared_ptr create_dummy_model(const std::vector& parameterDescriptors, + const std::vector& resultDescriptors, const std::vector& inputNames, const std::vector& outputNames, bool isBatchingSupported) { @@ -48,7 +48,7 @@ std::shared_ptr create_dummy_model(const IONodeDescriptorMap& paramet ov::NodeVector results; for (const std::string& inputName : inputNames) { - const IONodeDescriptor& parameterDescriptor = parameterDescriptors.at(inputName); + const IODescriptor& parameterDescriptor = parameterDescriptors.at(inputName); std::shared_ptr parameter = [&] { if (isBatchingSupported) { @@ -70,7 +70,7 @@ std::shared_ptr create_dummy_model(const IONodeDescriptorMap& paramet // constant can't have dynamic shape). The dummy tensor was also brought in order to register the correct, // potentially dynamic, output shape. for (const std::string& outputName : outputNames) { - const IONodeDescriptor& resultDescriptor = resultDescriptors.at(outputName); + const IODescriptor& resultDescriptor = resultDescriptors.at(outputName); std::shared_ptr constantDummy = std::make_shared(resultDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE); constantDummy->set_friendly_name(resultDescriptor.legacyName);