[NPU] Switching the I/O identification convention to indices (openvin…

…otoolkit#24248) ### Details: - Please see PR#10348 (compiler repository) for a detailed description and some extra validation. ### Tickets: - *CVS-142751*
ilya-lavrenov · Aug 12, 2024 · e567c9e · e567c9e
1 parent 033a515
commit e567c9e
Show file tree

Hide file tree

Showing 22 changed files with 1,098 additions and 902 deletions.
diff --git a/...unctional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp b/...unctional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp"
+
+#include "common_test_utils/test_constants.hpp"
+
+using namespace ExecutionGraphTests;
+
+namespace {
+
+INSTANTIATE_TEST_SUITE_P(smoke_duplicateInputsOutputsNames,
+                         ExecGraphDuplicateInputsOutputsNames,
+                         ::testing::Values(ov::test::utils::DEVICE_CPU),
+                         ExecGraphDuplicateInputsOutputsNames::getTestCaseName);
+
+}  // namespace
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
@@ -10,6 +10,7 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <string_view>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -22,48 +23,107 @@
 namespace intel_npu {
 
 /**
- * @brief A helper structure used for storing the metadata found within the I/O nodes.
- * @details The "legacyName" attribute holds the name most commonly used as map key for multiple structures.
- * This value also corresponds to the identifier used by the OpenVINO 1.0 API.
- *
- * "originalShape" corresponds to the shape registered in the graph, while "transposedShape" holds the shape obtained
- * upon applying a transposition corresponding to the legacy layout value. Use the "transposedShape" one if not sure
- * which one you need.
+ * @brief A helper structure used for storing metadata corresponding to one input/output entry.
  */
-struct IONodeDescriptor {
-    std::string legacyName;
-    std::string currentNodeName;
+struct IODescriptor {
+    /**
+     * @brief The name of the input/output assigned by the compiler.
+     * @details This value may differ from other name attributes:
+     *  - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not
+     * found in the original IR model.
+     *  - The compiler may append indices to names in the case where duplicate names are found.
+     * @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape
+     * tensors) were removed prior to initializing this field.
+     */
+    std::string nameFromCompiler;
+
+    ov::element::Type precision;
+
+    ov::PartialShape shapeFromCompiler;
+
+    /**
+     * @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor.
+     * @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and
+     * state output descriptors are also tied using the "relatedDescriptorIndex" attribute.
+     */
+    bool isStateInput = false;
+
+    /**
+     * @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor.
+     * @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and
+     * state output descriptors are also tied using the "relatedDescriptorIndex" attribute.
+     */
+    bool isStateOutput = false;
+
+    /**
+     * @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced
+     * tensor.
+     * @details This flag is set if the compiler prefixed the name using a "shape" prefix.
+     *
+     * The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to
+     * "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute.
+     */
+    bool isShapeTensor = false;
+
+    /**
+     * @brief Points towards a related descriptor.
+     * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor)
+     * pairs.
+     */
+    std::optional<size_t> relatedDescriptorIndex;
+
+    /**
+     * @brief The friendly name of the node extracted from the IR model.
+     * @details In some cases, this field is required for constructing a dummy model which uses the same input/output
+     * metadata as the original IR model.
+     *
+     * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the
+     * compiler).
+     */
+    std::string nodeFriendlyName;
+
+    /**
+     * @brief The names of the output tensors extracted from the IR model.
+     * @details In some cases, this field is required for constructing a dummy model which uses the same input/output
+     * metadata as the original IR model.
+     *
+     * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the
+     * compiler).
+     */
     std::unordered_set<std::string> outputTensorNames;
-    ov::element::Type_t precision;
-    ov::PartialShape originalShape;
-    ov::PartialShape transposedShape;
-};
 
-/**
- * @brief A helper map to represent descriptions for inputs and outputs
- * of a network
- */
-using IONodeDescriptorMap = std::unordered_map<std::string, IONodeDescriptor>;
+    /**
+     * @brief The shape extracted from the IR model.
+     * @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the
+     * plugin.
+     *
+     * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added
+     * by the compiler).
+     */
+    std::optional<ov::PartialShape> shapeFromIRModel = std::nullopt;
+};
 
 struct NetworkMetadata final {
     std::string name;
 
-    std::vector<std::string> inputNames;
-    std::vector<std::string> outputNames;
-    std::vector<std::string> stateNames;
-    std::vector<std::string> shapeNames;
+    std::vector<IODescriptor> inputs;
+    std::vector<IODescriptor> outputs;
+    std::vector<IODescriptor> profilingOutputs;
 
-    IONodeDescriptorMap parameters;
-    IONodeDescriptorMap results;
-    IONodeDescriptorMap states;
-    IONodeDescriptorMap shapes;
-    IONodeDescriptorMap profilingOutputs;
+    size_t numStreams = 1;
 
-    std::unordered_map<std::string, size_t> inputOrder;
-    std::unordered_map<std::string, size_t> outputOrder;
+    /**
+     * @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the
+     * "relatedDescriptorIndex" attribute.
+     * @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the
+     * same name. The reverse is also applied.
+     *
+     * For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set
+     * to the index of the entry which bears the same name.
+     */
+    void bindRelatedDescriptors();
 
-    int numStreams = 1;
-};
+};  // namespace intel_npu
 
 /**
  * @struct NetworkDescription

diff --git a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp
@@ -92,56 +92,32 @@ class SyncInferRequest : public ov::IInferRequest {
      */
     void initialize_states();
 
+protected:
     /**
-     * @return The state tensors accessible by their names.
-     */
-    std::unordered_map<std::string, std::shared_ptr<VariableState>>& get_variable_states() {
-        return _variableStates;
-    }
-
-    /**
-     * @return The names used by the inputs in the order registered inside the model.
-     */
-    std::vector<std::string> get_input_names() {
-        return _metadata.inputNames;
-    }
-
-    /**
-     * @return The names used by the outputs in the order registered inside the model.
-     */
-    std::vector<std::string> get_output_names() {
-        return _metadata.outputNames;
-    }
-
-    /**
-     * @return The names used by the state variables in the order registered inside the model.
+     * @see ov::ISyncInferRequest
      */
-    std::vector<std::string> get_state_names() {
-        return _metadata.stateNames;
-    }
+    struct FoundPort {
+        size_t idx;
+        enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type;
 
-    /**
-     * @return The names used by the shape variables in the order registered inside the model.
-     */
-    std::vector<std::string> get_shape_names() {
-        return _metadata.shapeNames;
-    }
+        bool found() {
+            return type != Type::NOT_FOUND;
+        }
+        bool is_input() {
+            return type == Type::INPUT;
+        }
+        bool is_output() {
+            return !is_input();
+        }
+    };
 
     /**
-     * @return A map holding references towards all tensors used by the current inference request object.
+     * @brief Finds input or output port
+     * @return structure which contains index of Input/Output or report that port wasn't found
+     * @see ov::ISyncInferRequest
      */
-    std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_all_tensors() {
-        return _allTensors;
-    }
+    FoundPort find_port(const ov::Output<const ov::Node>& port) const;
 
-    /**
-     * @return A map holding references towards all shapes tensors used by the current inference request object.
-     */
-    std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_shapes_tensors() {
-        return _shapesTensors;
-    }
-
-protected:
     /**
      * @brief Basic checks for input/output tensor
      *
@@ -163,45 +139,40 @@ class SyncInferRequest : public ov::IInferRequest {
     virtual void check_network_precision(const ov::element::Type_t precision) const = 0;
 
     /**
-     * @brief Indicates a kind of provided tensor. Marks special tensors, used for internal implementation
-     */
-    enum class TensorType { InputOrOutput, Shape, State };
-
-    /**
-     * @brief Allocates a tensor on host and stores the reference inside the "_allTensors" attribute. If a buffer
-     * address is provided, then the tensor is built upon it and no additional data buffer is allocated.
-     * @param tensorName The name by which the tensor shall be identified
+     * @brief Allocates a tensor on host and stores the reference inside multiple attributes.
      * @param descriptor Tensor's metadata
-     * @param isState If true, the tensor shall also be stored inside the state variables map. In this case, adding the
-     * tensor to this structure would be required in order to correctly answer the state queries.
+     * @param index The index which the allocated tensor shall use.
+     * @param isInput Determines the containers in which the newly allocated tensors will be stored.
      * @param allocator If provided, the tensor uses the custom allocator instead of using the default one.
+     * @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value.
+     * @return Pointer towards the allocated tensor
      */
-    void allocate_tensor(std::string tensorName,
-                         const IONodeDescriptor& descriptor,
-                         TensorType tensorType = TensorType::InputOrOutput,
-                         const ov::Allocator& allocator = {}) const;
-
-    // Mutable to return reference to ov::Tensor
-    mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _allTensors;
-    mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _shapesTensors;
-    // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
-    // memory area for the tensor.
-    mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors;
-
-    mutable std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates;
+    std::shared_ptr<ov::ITensor> allocate_tensor(const IODescriptor& descriptor,
+                                                 const size_t index,
+                                                 const bool isInput,
+                                                 const ov::Allocator& allocator = {},
+                                                 const std::optional<std::size_t> batchSize = std::nullopt) const;
 
     // This is intel_npu::ICompiledModel pointer, but need to use OV base class because
     // ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr!
     std::shared_ptr<const ov::ICompiledModel> _compiledModel;
 
     NetworkMetadata _metadata;
 
-    // Stored in order to avoid additional processing when launching inferences
-    std::vector<std::string> _inputAndStateInputNames;
-    std::vector<std::string> _outputAndStateOutputNames;
+    mutable std::vector<std::shared_ptr<ov::ITensor>> _userInputTensors;
+    mutable std::vector<std::shared_ptr<ov::ITensor>> _userOutputTensors;
 
-    std::unordered_map<std::string, std::string> _nodeNameToLegacyName;
-    std::unordered_map<std::string, std::string> _legacyNameToNodeName;
+    mutable std::vector<ov::SoPtr<ov::IVariableState>> _variableStates;
+
+    /**
+     * @see ov::ISyncInferRequest
+     */
+    mutable std::unordered_map<size_t, FoundPort> _cachedPorts;
+
+    /**
+     * @see ov::ISyncInferRequest
+     */
+    mutable std::mutex _cacheMutex;
 };
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/src/icompiler.cpp b/src/plugins/intel_npu/src/al/src/icompiler.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_npu/al/icompiler.hpp"
+
+namespace intel_npu {
+
+void NetworkMetadata::bindRelatedDescriptors() {
+    size_t ioIndex = 0;
+
+    for (IODescriptor& input : inputs) {
+        if (input.relatedDescriptorIndex.has_value()) {
+            ++ioIndex;
+            continue;
+        }
+
+        if (input.isStateInput) {
+            const auto relatedDescriptorIterator =
+                std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& output) {
+                    return output.isStateOutput && (output.nameFromCompiler == input.nameFromCompiler);
+                });
+
+            if (relatedDescriptorIterator != outputs.end()) {
+                input.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator);
+                outputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
+            }
+        } else if (input.isShapeTensor) {
+            const auto relatedDescriptorIterator =
+                std::find_if(inputs.begin(), inputs.end(), [&](const IODescriptor& candidate) {
+                    return !candidate.isShapeTensor && (candidate.nameFromCompiler == input.nameFromCompiler);
+                });
+
+            if (relatedDescriptorIterator != inputs.end()) {
+                input.relatedDescriptorIndex = std::distance(inputs.begin(), relatedDescriptorIterator);
+                inputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
+            }
+        }
+
+        ++ioIndex;
+    }
+
+    ioIndex = 0;
+
+    for (IODescriptor& output : outputs) {
+        if (output.relatedDescriptorIndex.has_value()) {
+            ++ioIndex;
+            continue;
+        }
+
+        if (output.isShapeTensor) {
+            const auto relatedDescriptorIterator =
+                std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& candidate) {
+                    return !candidate.isShapeTensor && (candidate.nameFromCompiler == output.nameFromCompiler);
+                });
+
+            if (relatedDescriptorIterator != outputs.end()) {
+                output.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator);
+                outputs.at(*output.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
+            }
+        }
+
+        ++ioIndex;
+    }
+}
+
+}  // namespace intel_npu