diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp index 0175949db1ae73..a2f6285233b157 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp @@ -30,37 +30,29 @@ namespace intel_npu { * upon applying a transposition corresponding to the legacy layout value. Use the "transposedShape" one if not sure * which one you need. */ -struct IONodeDescriptor { - std::string legacyName; - std::string currentNodeName; +struct IODescriptor { + std::string nameFromCompiler; + std::string nodeFriendlyName; std::unordered_set outputTensorNames; ov::element::Type_t precision; - ov::PartialShape originalShape; - ov::PartialShape transposedShape; + ov::PartialShape shape; + bool isStateInput; + bool isStateOutput; + bool isShapeTensor; }; /** * @brief A helper map to represent descriptions for inputs and outputs * of a network */ -using IONodeDescriptorMap = std::unordered_map; +using std::vector = std::vector; struct NetworkMetadata final { std::string name; - std::vector inputNames; - std::vector outputNames; - std::vector stateNames; - std::vector shapeNames; - - IONodeDescriptorMap parameters; - IONodeDescriptorMap results; - IONodeDescriptorMap states; - IONodeDescriptorMap shapes; - IONodeDescriptorMap profilingOutputs; - - std::unordered_map inputOrder; - std::unordered_map outputOrder; + std::vector inputs; + std::vector outputs; + std::vector profilingOutputs; int numStreams = 1; }; diff --git a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp index 34e9239e8430d1..b1639ac1879d1c 100644 --- a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp @@ -177,7 +177,7 @@ class SyncInferRequest : public ov::IInferRequest { * @param allocator If provided, the tensor uses the custom allocator instead of using the default one. */ void allocate_tensor(std::string tensorName, - const IONodeDescriptor& descriptor, + const IODescriptor& descriptor, TensorType tensorType = TensorType::InputOrOutput, const ov::Allocator& allocator = {}); diff --git a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp index 7a0bfbbb3a61ff..37461be7a7a7f9 100644 --- a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp @@ -159,7 +159,7 @@ void SyncInferRequest::check_tensors() const { } void SyncInferRequest::allocate_tensor(std::string tensorName, - const IONodeDescriptor& descriptor, + const IODescriptor& descriptor, TensorType tensorType, const ov::Allocator& allocator) { std::shared_ptr tensor; diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 89445ebcaf15f4..543e08f642847b 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -22,7 +22,7 @@ namespace { * @param zeDescriptor The Level Zero specific structure used for comparison. * @param name Tensor identifier used for error logging. */ -void check_level_zero_attributes_match(const IONodeDescriptor& nodeDescriptor, +void check_level_zero_attributes_match(const IODescriptor& nodeDescriptor, const ZeroExecutor::ArgumentDescriptor& zeDescriptor, const std::string& name) { const ov::element::Type_t ovPrecision = nodeDescriptor.precision; @@ -98,7 +98,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& OPENVINO_THROW("Invalid graph input descriptor key: " + inputName); } - const IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName); + const IODescriptor& parameterDescriptor = _metadata.parameters.at(inputName); check_level_zero_attributes_match(parameterDescriptor, executorInputDescriptors.at(inputName), inputName); ov::Allocator inputAllocator; @@ -113,7 +113,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& if (contains(_metadata.shapeNames, inputName)) { const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + inputName; - const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(inputName); + const IODescriptor& shapeDescriptor = _metadata.shapes.at(inputName); check_level_zero_attributes_match(shapeDescriptor, executorInputDescriptors.at(shapeBufferName), @@ -128,7 +128,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& OPENVINO_THROW("Invalid graph output descriptor key: " + outputName); } - const IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName); + const IODescriptor& resultDescriptor = _metadata.results.at(outputName); check_level_zero_attributes_match(resultDescriptor, executorOutputDescriptors.at(outputName), outputName); allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, allocator); @@ -137,7 +137,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& if (shapeNameMatch != _nodeNameToLegacyName.end()) { if (contains(_metadata.shapeNames, shapeNameMatch->second)) { const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second; - const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second); + const IODescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second); check_level_zero_attributes_match(shapeDescriptor, executorOutputDescriptors.at(shapeBufferName), @@ -159,7 +159,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& OPENVINO_THROW("Invalid graph output descriptor key: " + stateOutputBufferName); } - const IONodeDescriptor& stateDescriptor = _metadata.states.at(stateName); + const IODescriptor& stateDescriptor = _metadata.states.at(stateName); check_level_zero_attributes_match(stateDescriptor, executorInputDescriptors.at(stateInputBufferName), stateInputBufferName); diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index b85c9cd21983d5..4f4040381140b4 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -101,9 +101,9 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler { * extracted. */ template - void getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, + void getLayoutOrStateDescriptor(std::vector& parameters, + std::vector& results, + std::vector& states, std::vector& stateNames, const T& arg) const; @@ -127,9 +127,9 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler { */ template ::value, bool> = true> - void getNodeOrStateDescriptorLegacy(IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, + void getNodeOrStateDescriptorLegacy(std::vector& parameters, + std::vector& results, + std::vector& states, std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, @@ -141,9 +141,9 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler { std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& state) const; + std::vector& parameters, + std::vector& results, + std::vector& state) const; // ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) template = true> diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index ff7b51443acdc0..85a6b7e36102bf 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -873,9 +873,9 @@ uint32_t LevelZeroCompilerInDriver::getSupportedOpset() const { template template -void LevelZeroCompilerInDriver::getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, +void LevelZeroCompilerInDriver::getLayoutOrStateDescriptor(std::vector& parameters, + std::vector& results, + std::vector& states, std::vector& stateNames, const T& arg) const { std::string legacyName = arg.name; @@ -913,9 +913,9 @@ void LevelZeroCompilerInDriver::getLayoutOrStateDescriptor(IONod template template ::value, bool>> void LevelZeroCompilerInDriver::getNodeOrStateDescriptorLegacy( - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, + std::vector& parameters, + std::vector& results, + std::vector& states, std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, @@ -961,7 +961,7 @@ void LevelZeroCompilerInDriver::getNodeOrStateDescriptorLegacy( * @param names The I/O identifiers shall be stored here in the order found within the compiled model. * @param metadata The Level Zero structure fomr which the descriptors will be extracted. */ -static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, +static void getNodeDescriptor(std::vector& nodeDescriptors, std::vector& names, ze_graph_argument_metadata_t& metadata) { const ov::element::Type_t precision = toOVElementType(metadata.data_type); @@ -981,7 +981,7 @@ static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, {legacyName, metadata.friendly_name, std::move(outputTensorNames), precision, shape, shape}; } -static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, +static void getNodeDescriptor(std::vector& nodeDescriptors, std::vector& names, ze_graph_argument_properties_3_t& arg) { ov::element::Type_t precision = toOVElementType(arg.devicePrecision); @@ -1008,9 +1008,9 @@ void LevelZeroCompilerInDriver::getMetadata(ze_graph_dd std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states) const { + std::vector& parameters, + std::vector& results, + std::vector& states) const { ze_graph_argument_properties_t arg; auto result = graphDdiTableExt->pfnGetArgumentProperties(graphHandle, index, &arg); if (ZE_RESULT_SUCCESS != result) { @@ -1032,9 +1032,9 @@ void LevelZeroCompilerInDriver::getMetadata(ze_grap std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states) const { + std::vector& parameters, + std::vector& results, + std::vector& states) const { ze_graph_argument_properties_2_t arg; auto result = graphDdiTableExt->pfnGetArgumentProperties2(graphHandle, index, &arg); if (ZE_RESULT_SUCCESS != result) { @@ -1078,9 +1078,9 @@ void LevelZeroCompilerInDriver::getMetadata(TableExtension* grap std::vector& inputNames, std::vector& outputNames, std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states) const { + std::vector& parameters, + std::vector& results, + std::vector& states) const { ze_graph_argument_properties_3_t arg; auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg); if (ZE_RESULT_SUCCESS != result) { diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index bcc5a6fde6513a..0871c89561be37 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -38,15 +38,15 @@ const char* NPU_PLUGIN_LIB_NAME = "openvino_intel_npu_plugin"; * @param inputNames The names of the inputs registered in the order given by the model. * @param outputNames The names of the outputs registered in the order given by the model. */ -std::shared_ptr create_dummy_model(const IONodeDescriptorMap& parameterDescriptors, - const IONodeDescriptorMap& resultDescriptors, +std::shared_ptr create_dummy_model(const std::vector& parameterDescriptors, + const std::vector& resultDescriptors, const std::vector& inputNames, const std::vector& outputNames) { ov::ParameterVector parameters; ov::NodeVector results; for (const std::string& inputName : inputNames) { - const IONodeDescriptor& parameterDescriptor = parameterDescriptors.at(inputName); + const IODescriptor& parameterDescriptor = parameterDescriptors.at(inputName); std::shared_ptr parameter = std::make_shared(parameterDescriptor.precision, parameterDescriptor.transposedShape); parameter->set_friendly_name(parameterDescriptor.currentNodeName); @@ -60,7 +60,7 @@ std::shared_ptr create_dummy_model(const IONodeDescriptorMap& paramet // constant can't have dynamic shape). The dummy tensor was also brought in order to register the correct, // potentially dynamic, output shape. for (const std::string& outputName : outputNames) { - const IONodeDescriptor& resultDescriptor = resultDescriptors.at(outputName); + const IODescriptor& resultDescriptor = resultDescriptors.at(outputName); std::shared_ptr constantDummy = std::make_shared(resultDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE); constantDummy->set_friendly_name(resultDescriptor.legacyName);