diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp
new file mode 100644
index 00000000000000..c6d3e14f82e3df
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp"
+
+#include "common_test_utils/test_constants.hpp"
+
+using namespace ExecutionGraphTests;
+
+namespace {
+
+INSTANTIATE_TEST_SUITE_P(smoke_duplicateInputsOutputsNames,
+                         ExecGraphDuplicateInputsOutputsNames,
+                         ::testing::Values(ov::test::utils::DEVICE_CPU),
+                         ExecGraphDuplicateInputsOutputsNames::getTestCaseName);
+
+}  // namespace
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
index 0175949db1ae73..25361c0dc957c5 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
@@ -10,6 +10,7 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <string_view>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -22,48 +23,107 @@
 namespace intel_npu {
 
 /**
- * @brief A helper structure used for storing the metadata found within the I/O nodes.
- * @details The "legacyName" attribute holds the name most commonly used as map key for multiple structures.
- * This value also corresponds to the identifier used by the OpenVINO 1.0 API.
- *
- * "originalShape" corresponds to the shape registered in the graph, while "transposedShape" holds the shape obtained
- * upon applying a transposition corresponding to the legacy layout value. Use the "transposedShape" one if not sure
- * which one you need.
+ * @brief A helper structure used for storing metadata corresponding to one input/output entry.
  */
-struct IONodeDescriptor {
-    std::string legacyName;
-    std::string currentNodeName;
+struct IODescriptor {
+    /**
+     * @brief The name of the input/output assigned by the compiler.
+     * @details This value may differ from other name attributes:
+     *  - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not
+     * found in the original IR model.
+     *  - The compiler may append indices to names in the case where duplicate names are found.
+     * @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape
+     * tensors) were removed prior to initializing this field.
+     */
+    std::string nameFromCompiler;
+
+    ov::element::Type precision;
+
+    ov::PartialShape shapeFromCompiler;
+
+    /**
+     * @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor.
+     * @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and
+     * state output descriptors are also tied using the "relatedDescriptorIndex" attribute.
+     */
+    bool isStateInput = false;
+
+    /**
+     * @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor.
+     * @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and
+     * state output descriptors are also tied using the "relatedDescriptorIndex" attribute.
+     */
+    bool isStateOutput = false;
+
+    /**
+     * @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced
+     * tensor.
+     * @details This flag is set if the compiler prefixed the name using a "shape" prefix.
+     *
+     * The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to
+     * "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute.
+     */
+    bool isShapeTensor = false;
+
+    /**
+     * @brief Points towards a related descriptor.
+     * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor)
+     * pairs.
+     */
+    std::optional<size_t> relatedDescriptorIndex;
+
+    /**
+     * @brief The friendly name of the node extracted from the IR model.
+     * @details In some cases, this field is required for constructing a dummy model which uses the same input/output
+     * metadata as the original IR model.
+     *
+     * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the
+     * compiler).
+     */
+    std::string nodeFriendlyName;
+
+    /**
+     * @brief The names of the output tensors extracted from the IR model.
+     * @details In some cases, this field is required for constructing a dummy model which uses the same input/output
+     * metadata as the original IR model.
+     *
+     * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the
+     * compiler).
+     */
     std::unordered_set<std::string> outputTensorNames;
-    ov::element::Type_t precision;
-    ov::PartialShape originalShape;
-    ov::PartialShape transposedShape;
-};
 
-/**
- * @brief A helper map to represent descriptions for inputs and outputs
- * of a network
- */
-using IONodeDescriptorMap = std::unordered_map<std::string, IONodeDescriptor>;
+    /**
+     * @brief The shape extracted from the IR model.
+     * @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the
+     * plugin.
+     *
+     * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added
+     * by the compiler).
+     */
+    std::optional<ov::PartialShape> shapeFromIRModel = std::nullopt;
+};
 
 struct NetworkMetadata final {
     std::string name;
 
-    std::vector<std::string> inputNames;
-    std::vector<std::string> outputNames;
-    std::vector<std::string> stateNames;
-    std::vector<std::string> shapeNames;
+    std::vector<IODescriptor> inputs;
+    std::vector<IODescriptor> outputs;
+    std::vector<IODescriptor> profilingOutputs;
 
-    IONodeDescriptorMap parameters;
-    IONodeDescriptorMap results;
-    IONodeDescriptorMap states;
-    IONodeDescriptorMap shapes;
-    IONodeDescriptorMap profilingOutputs;
+    size_t numStreams = 1;
 
-    std::unordered_map<std::string, size_t> inputOrder;
-    std::unordered_map<std::string, size_t> outputOrder;
+    /**
+     * @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the
+     * "relatedDescriptorIndex" attribute.
+     * @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the
+     * same name. The reverse is also applied.
+     *
+     * For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set
+     * to the index of the entry which bears the same name.
+     */
+    void bindRelatedDescriptors();
 
-    int numStreams = 1;
-};
+};  // namespace intel_npu
 
 /**
  * @struct NetworkDescription
diff --git a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp
index 7272a67faafff1..bf9e0f20af3b78 100644
--- a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp
+++ b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp
@@ -92,56 +92,32 @@ class SyncInferRequest : public ov::IInferRequest {
      */
     void initialize_states();
 
+protected:
     /**
-     * @return The state tensors accessible by their names.
-     */
-    std::unordered_map<std::string, std::shared_ptr<VariableState>>& get_variable_states() {
-        return _variableStates;
-    }
-
-    /**
-     * @return The names used by the inputs in the order registered inside the model.
-     */
-    std::vector<std::string> get_input_names() {
-        return _metadata.inputNames;
-    }
-
-    /**
-     * @return The names used by the outputs in the order registered inside the model.
-     */
-    std::vector<std::string> get_output_names() {
-        return _metadata.outputNames;
-    }
-
-    /**
-     * @return The names used by the state variables in the order registered inside the model.
+     * @see ov::ISyncInferRequest
      */
-    std::vector<std::string> get_state_names() {
-        return _metadata.stateNames;
-    }
+    struct FoundPort {
+        size_t idx;
+        enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type;
 
-    /**
-     * @return The names used by the shape variables in the order registered inside the model.
-     */
-    std::vector<std::string> get_shape_names() {
-        return _metadata.shapeNames;
-    }
+        bool found() {
+            return type != Type::NOT_FOUND;
+        }
+        bool is_input() {
+            return type == Type::INPUT;
+        }
+        bool is_output() {
+            return !is_input();
+        }
+    };
 
     /**
-     * @return A map holding references towards all tensors used by the current inference request object.
+     * @brief Finds input or output port
+     * @return structure which contains index of Input/Output or report that port wasn't found
+     * @see ov::ISyncInferRequest
      */
-    std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_all_tensors() {
-        return _allTensors;
-    }
+    FoundPort find_port(const ov::Output<const ov::Node>& port) const;
 
-    /**
-     * @return A map holding references towards all shapes tensors used by the current inference request object.
-     */
-    std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_shapes_tensors() {
-        return _shapesTensors;
-    }
-
-protected:
     /**
      * @brief Basic checks for input/output tensor
      *
@@ -163,32 +139,19 @@ class SyncInferRequest : public ov::IInferRequest {
     virtual void check_network_precision(const ov::element::Type_t precision) const = 0;
 
     /**
-     * @brief Indicates a kind of provided tensor. Marks special tensors, used for internal implementation
-     */
-    enum class TensorType { InputOrOutput, Shape, State };
-
-    /**
-     * @brief Allocates a tensor on host and stores the reference inside the "_allTensors" attribute. If a buffer
-     * address is provided, then the tensor is built upon it and no additional data buffer is allocated.
-     * @param tensorName The name by which the tensor shall be identified
+     * @brief Allocates a tensor on host and stores the reference inside multiple attributes.
      * @param descriptor Tensor's metadata
-     * @param isState If true, the tensor shall also be stored inside the state variables map. In this case, adding the
-     * tensor to this structure would be required in order to correctly answer the state queries.
+     * @param index The index which the allocated tensor shall use.
+     * @param isInput Determines the containers in which the newly allocated tensors will be stored.
      * @param allocator If provided, the tensor uses the custom allocator instead of using the default one.
+     * @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value.
+     * @return Pointer towards the allocated tensor
      */
-    void allocate_tensor(std::string tensorName,
-                         const IONodeDescriptor& descriptor,
-                         TensorType tensorType = TensorType::InputOrOutput,
-                         const ov::Allocator& allocator = {}) const;
-
-    // Mutable to return reference to ov::Tensor
-    mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _allTensors;
-    mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _shapesTensors;
-    // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
-    // memory area for the tensor.
-    mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors;
-
-    mutable std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates;
+    std::shared_ptr<ov::ITensor> allocate_tensor(const IODescriptor& descriptor,
+                                                 const size_t index,
+                                                 const bool isInput,
+                                                 const ov::Allocator& allocator = {},
+                                                 const std::optional<std::size_t> batchSize = std::nullopt) const;
 
     // This is intel_npu::ICompiledModel pointer, but need to use OV base class because
     // ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr!
@@ -196,12 +159,20 @@ class SyncInferRequest : public ov::IInferRequest {
 
     NetworkMetadata _metadata;
 
-    // Stored in order to avoid additional processing when launching inferences
-    std::vector<std::string> _inputAndStateInputNames;
-    std::vector<std::string> _outputAndStateOutputNames;
+    mutable std::vector<std::shared_ptr<ov::ITensor>> _userInputTensors;
+    mutable std::vector<std::shared_ptr<ov::ITensor>> _userOutputTensors;
 
-    std::unordered_map<std::string, std::string> _nodeNameToLegacyName;
-    std::unordered_map<std::string, std::string> _legacyNameToNodeName;
+    mutable std::vector<ov::SoPtr<ov::IVariableState>> _variableStates;
+
+    /**
+     * @see ov::ISyncInferRequest
+     */
+    mutable std::unordered_map<size_t, FoundPort> _cachedPorts;
+
+    /**
+     * @see ov::ISyncInferRequest
+     */
+    mutable std::mutex _cacheMutex;
 };
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/src/icompiler.cpp b/src/plugins/intel_npu/src/al/src/icompiler.cpp
new file mode 100644
index 00000000000000..632a466d17d442
--- /dev/null
+++ b/src/plugins/intel_npu/src/al/src/icompiler.cpp
@@ -0,0 +1,67 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "intel_npu/al/icompiler.hpp"
+
+namespace intel_npu {
+
+void NetworkMetadata::bindRelatedDescriptors() {
+    size_t ioIndex = 0;
+
+    for (IODescriptor& input : inputs) {
+        if (input.relatedDescriptorIndex.has_value()) {
+            ++ioIndex;
+            continue;
+        }
+
+        if (input.isStateInput) {
+            const auto relatedDescriptorIterator =
+                std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& output) {
+                    return output.isStateOutput && (output.nameFromCompiler == input.nameFromCompiler);
+                });
+
+            if (relatedDescriptorIterator != outputs.end()) {
+                input.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator);
+                outputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
+            }
+        } else if (input.isShapeTensor) {
+            const auto relatedDescriptorIterator =
+                std::find_if(inputs.begin(), inputs.end(), [&](const IODescriptor& candidate) {
+                    return !candidate.isShapeTensor && (candidate.nameFromCompiler == input.nameFromCompiler);
+                });
+
+            if (relatedDescriptorIterator != inputs.end()) {
+                input.relatedDescriptorIndex = std::distance(inputs.begin(), relatedDescriptorIterator);
+                inputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
+            }
+        }
+
+        ++ioIndex;
+    }
+
+    ioIndex = 0;
+
+    for (IODescriptor& output : outputs) {
+        if (output.relatedDescriptorIndex.has_value()) {
+            ++ioIndex;
+            continue;
+        }
+
+        if (output.isShapeTensor) {
+            const auto relatedDescriptorIterator =
+                std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& candidate) {
+                    return !candidate.isShapeTensor && (candidate.nameFromCompiler == output.nameFromCompiler);
+                });
+
+            if (relatedDescriptorIterator != outputs.end()) {
+                output.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator);
+                outputs.at(*output.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
+            }
+        }
+
+        ++ioIndex;
+    }
+}
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp
index 04b930b7ca63ff..08d5b518b98cad 100644
--- a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp
+++ b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp
@@ -8,53 +8,90 @@
 #include "openvino/op/util/op_types.hpp"
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/runtime/plugin_itt.hpp"
+#include "openvino/util/common_util.hpp"
 #include "transformations/utils/utils.hpp"
 
+namespace {
+
+constexpr size_t BATCH_AXIS = 0;
+
+}
+
 namespace intel_npu {
 
 SyncInferRequest::SyncInferRequest(const std::shared_ptr<const ICompiledModel>& compiledModel)
     : _compiledModel(compiledModel),
-      _metadata(compiledModel->get_network_metadata()) {
+      _metadata(compiledModel->get_network_metadata()),
+      _userInputTensors(_metadata.inputs.size(), nullptr),
+      _userOutputTensors(_metadata.outputs.size(), nullptr) {
     OPENVINO_ASSERT(_compiledModel);
 
-    const std::vector<ov::Output<const ov::Node>>& outputs = get_outputs();
-
-    if (outputs.empty()) {
+    if (get_outputs().empty()) {
         OPENVINO_THROW("Inference request creation: no output found for network " + _metadata.name);
     }
 
-    // Map the node names to the legacy ones used by the I/O tensors in order to allow an easier access to the tensors'
-    // contents
-    for (const auto& [name, resultDescriptor] : _metadata.results) {
-        _nodeNameToLegacyName[name] = resultDescriptor.legacyName;
-        _legacyNameToNodeName[resultDescriptor.legacyName] = name;
-    }
-
-    _inputAndStateInputNames = _metadata.inputNames;
-    _outputAndStateOutputNames = _metadata.outputNames;
-
-    for (const std::string& stateName : _metadata.stateNames) {
-        // State variables shall be identified by specific prefixes in order to avoid a potential tensor name collision
-        _inputAndStateInputNames.push_back(READVALUE_PREFIX + stateName);
-        _outputAndStateOutputNames.push_back(ASSIGN_PREFIX + stateName);
+    // Create map of empty tensors and cache ports from the compiled model
+    // See the ov::ISyncInferRequest constructor
+    auto portType = SyncInferRequest::FoundPort::Type::INPUT;
+    for (const auto& ports : {get_inputs(), get_outputs()}) {
+        for (size_t i = 0; i < ports.size(); i++) {
+            const auto& port = ports[i];
+            size_t portHash = ov::util::hash_combine(std::vector<size_t>{std::hash<const ov::Node*>()(port.get_node()),
+                                                                         std::hash<size_t>()(port.get_index())});
+            _cachedPorts[portHash] = {i, portType};
+        }
+        portType = SyncInferRequest::FoundPort::Type::OUTPUT;
     }
+}
 
-    const auto contains = [](const auto& container, const auto& value) {
-        return std::find(container.begin(), container.end(), value) != container.end();
+SyncInferRequest::FoundPort SyncInferRequest::find_port(const ov::Output<const ov::Node>& port) const {
+    // check if the tensor names of target port is a subset of source port's tensor names
+    auto check_tensor_names = [](const std::unordered_set<std::string>& source,
+                                 const std::unordered_set<std::string>& target) {
+        for (auto const& name : target) {
+            if (source.find(name) == source.end()) {
+                return false;
+            }
+        }
+        return true;
     };
 
-    for (const auto& shapeName : _metadata.shapeNames) {
-        if (contains(_inputAndStateInputNames, shapeName)) {
-            _inputAndStateInputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);
+    // This function is hotspot, need optimization.
+    auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) {
+        return node1 == node2 ||
+               (node1->outputs().size() == node2->outputs().size() &&
+                node1->inputs().size() == node2->inputs().size() && node1->get_type_info() == node2->get_type_info() &&
+                node1->get_friendly_name() == node2->get_friendly_name());
+    };
+    // Find port without caching work slow because we need each time iterate over all ports and compare different
+    // strings So use WA with caching in order to make 2+ calls for the same ports faster.
+    // Calculate hash for the port
+    size_t port_hash = ov::util::hash_combine(
+        std::vector<size_t>{std::hash<const ov::Node*>()(port.get_node()), std::hash<size_t>()(port.get_index())});
+    {
+        std::lock_guard<std::mutex> lock(_cacheMutex);
+        if (_cachedPorts.find(port_hash) != _cachedPorts.end()) {
+            // Cached port for the hash was found
+            return _cachedPorts[port_hash];
         }
-
-        const auto& shapeNameMatch = _legacyNameToNodeName.find(shapeName);
-        if (shapeNameMatch != _legacyNameToNodeName.end()) {
-            if (contains(_outputAndStateOutputNames, shapeNameMatch->second)) {
-                _outputAndStateOutputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);
+    }
+    SyncInferRequest::FoundPort::Type type = SyncInferRequest::FoundPort::Type::INPUT;
+    for (const auto& ports : {get_inputs(), get_outputs()}) {
+        for (size_t i = 0; i < ports.size(); i++) {
+            // The order of the arguments might matter for the "check_tensor_names" call. If the "CompiledModel" object
+            // was obtained via "import_model", then the number of tensor names could be cut to 32 due to limitations
+            // inside the NPU stack. For this particular scenario, we are checking if all tensor names corresponding to
+            // the "CompiledModel" are found in the provided port instead of doing the opposite.
+            if (ports[i].get_index() == port.get_index() && check_nodes(ports[i].get_node(), port.get_node()) &&
+                check_tensor_names(port.get_names(), ports[i].get_names())) {
+                std::lock_guard<std::mutex> lock(_cacheMutex);
+                _cachedPorts[port_hash] = {i, type};
+                return _cachedPorts[port_hash];
             }
         }
+        type = SyncInferRequest::FoundPort::Type::OUTPUT;
     }
+    return {0, SyncInferRequest::FoundPort::Type::NOT_FOUND};
 }
 
 const std::vector<ov::Output<const ov::Node>>& SyncInferRequest::get_inputs() const {
@@ -70,34 +107,41 @@ const std::shared_ptr<const ov::ICompiledModel>& SyncInferRequest::get_compiled_
 }
 
 void SyncInferRequest::initialize_states() {
-    for (const std::string& stateName : _metadata.stateNames) {
-        _variableStates.at(stateName)->reset();
+    for (const ov::SoPtr<ov::IVariableState>& variableState : _variableStates) {
+        variableState->reset();
     }
 }
 
 std::vector<ov::SoPtr<ov::IVariableState>> SyncInferRequest::query_state() const {
-    std::vector<ov::SoPtr<ov::IVariableState>> queryResult;
-
-    for (const std::string& stateName : _metadata.stateNames) {
-        queryResult.push_back(_variableStates.at(stateName));
-    }
-
-    return queryResult;
+    return _variableStates;
 }
 
 ov::SoPtr<ov::ITensor> SyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
-    return _allTensors.at(port.get_node()->get_friendly_name());
+    auto foundPort = find_port(port);
+    OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port);
+
+    if (foundPort.is_input()) {
+        return _userInputTensors.at(foundPort.idx);
+    }
+    return _userOutputTensors.at(foundPort.idx);
 }
 
 void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) {
     OV_ITT_SCOPED_TASK(ov::itt::domains::Plugin, "set_tensor");
+
+    auto foundPort = find_port(port);
+    OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port);
     try {
         check_tensor(port, tensor);
     } catch (const ov::Exception& ex) {
         OPENVINO_THROW("Failed to set tensor. ", ex.what());
     }
 
-    _allTensors[port.get_node()->get_friendly_name()] = tensor._ptr;
+    if (foundPort.is_input()) {
+        _userInputTensors.at(foundPort.idx) = tensor._ptr;
+    } else {
+        _userOutputTensors.at(foundPort.idx) = tensor._ptr;
+    }
 }
 
 std::vector<ov::SoPtr<ov::ITensor>> SyncInferRequest::get_tensors(const ov::Output<const ov::Node>& /*port*/) const {
@@ -151,54 +195,59 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
 void SyncInferRequest::check_tensors() const {
     const auto& inputs = _compiledModel->inputs();
     for (size_t i = 0; i < inputs.size(); i++) {
-        if (_allTensors.find(inputs[i].get_node()->get_friendly_name()) != _allTensors.end()) {
-            check_tensor(inputs[i], _allTensors.at(inputs[i].get_node()->get_friendly_name()));
+        if (_userInputTensors.at(i)) {
+            check_tensor(inputs[i], _userInputTensors.at(i));
         }
     }
 
     const auto& outputs = _compiledModel->outputs();
     for (size_t i = 0; i < outputs.size(); i++) {
-        if (_allTensors.find(outputs[i].get_node()->get_friendly_name()) != _allTensors.end()) {
-            check_tensor(outputs[i], _allTensors.at(outputs[i].get_node()->get_friendly_name()));
+        if (_userOutputTensors.at(i)) {
+            check_tensor(outputs[i], _userOutputTensors.at(i));
         }
     }
 }
 
-void SyncInferRequest::allocate_tensor(std::string tensorName,
-                                       const IONodeDescriptor& descriptor,
-                                       TensorType tensorType,
-                                       const ov::Allocator& allocator) const {
+std::shared_ptr<ov::ITensor> SyncInferRequest::allocate_tensor(const IODescriptor& descriptor,
+                                                               const size_t index,
+                                                               const bool isInput,
+                                                               const ov::Allocator& allocator,
+                                                               const std::optional<std::size_t> batchSize) const {
+    check_network_precision(descriptor.precision);
+
     std::shared_ptr<ov::ITensor> tensor;
+    ov::Shape allocatedTensorShape = descriptor.shapeFromCompiler.get_max_shape();
 
-    check_network_precision(descriptor.precision);
+    if (batchSize.has_value()) {
+        allocatedTensorShape[BATCH_AXIS] = *batchSize;
+    }
 
-    if (allocator) {
-        tensor = ov::make_tensor(descriptor.precision, descriptor.transposedShape.get_max_shape(), allocator);
+    if (descriptor.isStateOutput) {
+        // Only one buffer is required for each (state input, state output) pair, acting as an input before running the
+        // inference and as an output after performing it. Thus both the "state input" and "state output" entries shall
+        // point to the same buffer.
+        OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(),
+                        "The link between state descriptors is missing, state name: ",
+                        descriptor.nameFromCompiler);
+        tensor = _userInputTensors.at(*descriptor.relatedDescriptorIndex);
+    } else if (allocator) {
+        tensor = ov::make_tensor(descriptor.precision, allocatedTensorShape, allocator);
     } else {
-        tensor = ov::make_tensor(descriptor.precision, descriptor.transposedShape.get_max_shape());
+        tensor = ov::make_tensor(descriptor.precision, allocatedTensorShape);
     }
 
-    if (tensorType == TensorType::Shape) {
-        _shapesTensors[tensorName] = tensor;
-        tensorName = SHAPE_TENSOR_PREFIX + tensorName;
-    }
+    if (isInput) {
+        if (_userInputTensors.at(index) == nullptr) {
+            _userInputTensors.at(index) = tensor;
+        }
 
-    if (tensorType == TensorType::State) {
-        _variableStates[tensorName] = std::make_shared<VariableState>(tensorName, tensor);
-
-        // State variables shall be identified by specific prefixes in order to avoid a potential tensor name collision.
-        // Additionally, only one buffer is required in the whole flow, acting as an input before running the inference
-        // and as an output after performing it. Thus both the "state input" and "state output" entries shall point to
-        // the same buffer.
-        _copyAllTensors[READVALUE_PREFIX + tensorName] = std::move(tensor);
-        _copyAllTensors[ASSIGN_PREFIX + tensorName] = _copyAllTensors[READVALUE_PREFIX + tensorName];
-        _allTensors[READVALUE_PREFIX + tensorName] = _copyAllTensors[READVALUE_PREFIX + tensorName];
-        _allTensors[ASSIGN_PREFIX + tensorName] = _copyAllTensors[READVALUE_PREFIX + tensorName];
-    } else {
-        _copyAllTensors[tensorName] = std::move(tensor);
-        if (_allTensors.find(tensorName) == _allTensors.end()) {
-            _allTensors[tensorName] = _copyAllTensors[tensorName];
+        if (descriptor.isStateInput) {
+            _variableStates.push_back(std::make_shared<VariableState>(descriptor.nameFromCompiler, tensor));
         }
+    } else if (_userOutputTensors.at(index) == nullptr) {
+        _userOutputTensors.at(index) = tensor;
     }
+
+    return tensor;
 }
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp
index 7ab180f6ced5e4..10b5c66233f369 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp
@@ -53,11 +53,11 @@ class ZeroExecutor final : public IExecutor {
     inline const uint32_t& get_group_ordinal() const {
         return _group_ordinal;
     }
-    inline const std::unordered_map<std::string, ArgumentDescriptor>& inputs_desc_map() const {
-        return _inputs_desc_map;
+    inline const std::vector<ArgumentDescriptor>& get_input_descriptors() const {
+        return _input_descriptors;
     }
-    inline const std::unordered_map<std::string, ArgumentDescriptor>& outputs_desc_map() const {
-        return _outputs_desc_map;
+    inline const std::vector<ArgumentDescriptor>& get_output_descriptors() const {
+        return _output_descriptors;
     }
 
 private:
@@ -74,8 +74,8 @@ class ZeroExecutor final : public IExecutor {
     ze_graph_handle_t _graph = nullptr;
     ze_graph_properties_t _props{};
 
-    std::unordered_map<std::string, ArgumentDescriptor> _inputs_desc_map;
-    std::unordered_map<std::string, ArgumentDescriptor> _outputs_desc_map;
+    std::vector<ArgumentDescriptor> _input_descriptors;
+    std::vector<ArgumentDescriptor> _output_descriptors;
 
     std::array<std::shared_ptr<CommandQueue>, stage::COUNT> _command_queues;
 
diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
index 8e9262fd2374c4..f6d15d2c2aed5e 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
@@ -16,10 +16,6 @@
 #include "zero_utils.hpp"
 #include "zero_wrappers.hpp"
 
-namespace {
-constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
-}  // namespace
-
 namespace intel_npu {
 
 class ZeroInferRequest final : public SyncInferRequest {
@@ -41,21 +37,40 @@ class ZeroInferRequest final : public SyncInferRequest {
     std::vector<ov::ProfilingInfo> get_profiling_info() const override;
     std::vector<uint8_t> get_raw_profiling_data() const;
 
+    /**
+     * @brief Determines if batching can be addressed inside the plugin. In the positive case, the batch size used by
+     * the model will also be deduced and returned.
+     * @details Batching can be handled by the plugin only if:
+     *  - The batch axis is the first axis.
+     *  - The batch size received by the compiler takes the default value of 1.
+     *  - The batch size found in the IR model matches for all inputs/outputs and takes a value different than the
+     * default one.
+     *
+     * If any of the previous conditions is not fulfilled, the functon will return the default batch size, thus no
+     * custom algorithm will be applied inside the plugin in order to address batching.
+     *
+     * @param metadata Metadata containing the shape values as seen by both the compiler and IR model. These will
+     * ultimately be used for determining the batch size.
+     * @returns The batch size deduced by the algorithm or the default value of 1 if batching cannot be performed inside
+     * the plugin.
+     */
+    std::optional<size_t> getBatchSize(const NetworkMetadata& metadata);
+
     /**
      * @brief Check the received tensor and set the Level Zero tensor accordingly
      * @param tensor Reference to a tensor.
-     * @param name Friendly name of the tensor.
-     * @param isParameter True if tensor is a parameter.
+     * @param index The index corresponding to the position of the tensor inside the I/O structures.
+     * @param isInput Used for identifying the structures to which the tensor belongs.
      */
-    void set_tensor_data(std::shared_ptr<ov::ITensor> tensor, const std::string& name, bool isParameter);
+    void set_tensor_data(const std::shared_ptr<ov::ITensor> tensor, const size_t index, const bool isInput);
 
     /**
      * @brief Check the received remote tensor and copy it to the Level Zero tensor
      * @param tensor Reference to a tensor.
-     * @param name Friendly name of the tensor.
-     * @param isParameter True if tensor is a parameter.
+     * @param index The index corresponding to the position of the tensor inside the I/O structures.
+     * @param isInput Used for identifying the structures to which the tensor belongs.
      */
-    void set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor> tensor, const std::string& name, bool isParameter);
+    void set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTensor> tensor, const size_t index, const bool isInput);
 
     void check_network_precision(const ov::element::Type_t precision) const override;
     void create_pipeline();
@@ -66,6 +81,14 @@ class ZeroInferRequest final : public SyncInferRequest {
     const Config _config;
     Logger _logger;
 
+    // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
+    // memory area for the tensor.
+    mutable std::vector<std::shared_ptr<ov::ITensor>> _levelZeroInputTensors;
+    mutable std::vector<std::shared_ptr<ov::ITensor>> _levelZeroOutputTensors;
+
+    mutable std::vector<std::optional<TensorData>> _inputTensorsData;
+    mutable std::vector<std::optional<TensorData>> _outputTensorsData;
+
     ze_device_properties_t _properties = {};
     std::shared_ptr<const zeroMemory::HostMemAllocator> _inputAllocator;
     std::shared_ptr<const zeroMemory::HostMemAllocator> _outputAllocator;
@@ -74,11 +97,22 @@ class ZeroInferRequest final : public SyncInferRequest {
     zeroProfiling::ProfilingQuery _profilingQuery;
     std::shared_ptr<zeroProfiling::NpuInferProfiling> _npuProfiling;
     std::unique_ptr<Pipeline> _pipeline;
-    mutable std::unordered_map<std::string, TensorData> _tensorsData;
 
-    // If batching is handled on the compiler side then batching on the plugin shall be set to 1, we don't do any
-    // specific operations on the plugin in this case.
-    size_t _batchSize = DEFAULT_BATCH_SIZE;
+    /**
+     * @brief Indicates how many command lists will be used inside the pipeline.
+     * @details Leveraging multiple command lists implies distributing the input/output buffers accross the batch axis
+     * between these lists.
+     *
+     * If batching is handled on compiler's side then a single command list shall be used, we don't do any
+     * specific operation inside the plugin in this case.
+     */
+    size_t _numberOfCommandLists = 1;
+
+    /**
+     * @brief The batch size used by the corresponding model.
+     * @details The attribute contains a value only if the plugin performs the batches splitting operation.
+     */
+    std::optional<std::size_t> _batchSize = std::nullopt;
 
     bool _pipelineIsCreated = false;
 };
diff --git a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp
index 93c9252aecd23b..a0110c3c74e4e7 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp
@@ -96,7 +96,7 @@ class HostMemAllocator final {
 struct MemoryManagementUnit {
     MemoryManagementUnit() = default;
 
-    void appendArgument(const std::string& name, const std::size_t argSize);
+    void appendArgument(const std::size_t argSize);
 
     void allocate(const ze_device_handle_t device_handle, const ze_context_handle_t context);
 
@@ -104,7 +104,7 @@ struct MemoryManagementUnit {
     const void* getDeviceMemRegion() const;
     void* getDeviceMemRegion();
 
-    void* getDevicePtr(const std::string& name);
+    void* getDevicePtr(const size_t index);
 
     bool checkHostPtr(const void* ptr) const;
 
@@ -112,7 +112,7 @@ struct MemoryManagementUnit {
     std::size_t _size = 0;
 
     std::unique_ptr<DeviceMem> _device;
-    std::map<std::string, std::size_t> _offsets;
+    std::vector<std::size_t> _offsets;
 
     static const std::size_t alignment = STANDARD_PAGE_SIZE;
 };
diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
index b8724dcdd53f73..ad946579f11c84 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
@@ -27,11 +27,11 @@ struct Pipeline {
     Pipeline& operator=(Pipeline&&) = delete;
     virtual ~Pipeline() = default;
 
-    virtual void push(size_t batch_index) = 0;
-    virtual void pull(size_t batch_index) = 0;
-    virtual void reset(size_t batch_index) const = 0;
+    virtual void push() = 0;
+    virtual void pull() = 0;
+    virtual void reset() const = 0;
 
-    virtual void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) = 0;
+    virtual void updateCommandList(const TensorData& tensorsData, const uint32_t index) = 0;
 
 protected:
     zeroMemory::MemoryManagementUnit _deviceInputs;
@@ -43,6 +43,7 @@ std::unique_ptr<Pipeline> makePipeline(const std::shared_ptr<const IExecutor>& e
                                        zeroProfiling::ProfilingPool& profiling_pool,
                                        zeroProfiling::ProfilingQuery& profiling_query,
                                        std::shared_ptr<zeroProfiling::NpuInferProfiling> npu_profiling,
-                                       std::unordered_map<std::string, TensorData>& tensors_data,
-                                       const size_t batch_size);
+                                       const std::vector<std::optional<TensorData>>& inputTensorsData,
+                                       const std::vector<std::optional<TensorData>>& outputTensorsData,
+                                       const size_t numberOfCommandLists);
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp
index 16e410c35ed382..194ce7024ac6a4 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp
@@ -95,22 +95,10 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
         zeroUtils::throwOnFail("pfnGetArgumentProperties3",
                                _graph_ddi_table_ext->pfnGetArgumentProperties3(_graph, index, &arg3));
 
-        if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg3.type) {
-            if (isStateInputName(arg3.name) || isShapeTensorName(arg3.name)) {
-                _inputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index}));
-
-            } else {
-                _inputs_desc_map.emplace(
-                    std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index}));
-            }
+        if (arg3.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) {
+            _input_descriptors.push_back(ArgumentDescriptor{arg3, index});
         } else {
-            if (isStateOutputName(arg3.name) || isShapeTensorName(arg3.name)) {
-                _outputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index}));
-
-            } else {
-                _outputs_desc_map.emplace(
-                    std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index}));
-            }
+            _output_descriptors.push_back(ArgumentDescriptor{arg3, index});
         }
     }
 
diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
index 7c36033568591a..27c1aac7eeeff5 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -20,142 +20,124 @@ using namespace intel_npu;
 namespace {
 
 constexpr std::size_t BATCH_AXIS = 0;
+constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
+constexpr bool INPUT = true;
+constexpr bool OUTPUT = false;
 
 /**
  * @brief Checks that the metadata of the provided descriptor corresponds to the values registered in the Level Zero
  * structure.
- * @param nodeDescriptor The OpenVINO API specific I/O descriptor which shall be compared.
+ * @param ioDescriptor The OpenVINO API specific I/O descriptor which shall be compared.
  * @param zeDescriptor The Level Zero specific structure used for comparison.
- * @param name Tensor identifier used for error logging.
  */
-void checkLevelZeroAttributesMatch(const IONodeDescriptor& nodeDescriptor,
-                                   const ZeroExecutor::ArgumentDescriptor& zeDescriptor,
-                                   const std::string& name) {
-    const ov::element::Type_t ovPrecision = nodeDescriptor.precision;
-    const ze_graph_argument_precision_t zePrecision = zeDescriptor.info.devicePrecision;
-
-    if (zeroUtils::getZePrecision(ovPrecision) != zePrecision) {
-        OPENVINO_THROW("Precision mismatch for parameter " + name);
-    }
-
-    const std::vector<size_t>& ovDimensions = nodeDescriptor.transposedShape.get_max_shape();
-
-    if (ovDimensions.size() > ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) {
-        OPENVINO_THROW(
-            "Maximum number of dimensions supported: " + std::to_string(ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) + '\n' +
-            "Given: " + std::to_string(ovDimensions.size()));
+void checkLevelZeroAttributesMatch(const IODescriptor& ioDescriptor,
+                                   const ZeroExecutor::ArgumentDescriptor& zeDescriptor) {
+    std::string zeDescriptorName = zeDescriptor.info.name;
+
+    if (isStateInputName(zeDescriptorName)) {
+        zeDescriptorName = zeDescriptorName.substr(READVALUE_PREFIX.length());
+    } else if (isStateOutputName(zeDescriptorName)) {
+        zeDescriptorName = zeDescriptorName.substr(ASSIGN_PREFIX.length());
+    } else if (isShapeTensorName(zeDescriptorName)) {
+        zeDescriptorName = zeDescriptorName.substr(SHAPE_TENSOR_PREFIX.length());
+    }
+
+    OPENVINO_ASSERT(ioDescriptor.nameFromCompiler == zeDescriptorName,
+                    "Name mismatch between the I/O structure used internally and its Level Zero correspondent: ",
+                    ioDescriptor.nameFromCompiler,
+                    " vs. ",
+                    zeDescriptorName,
+                    ". The I/O order may have been altered, which could lead to an erroneous behavior.");
+    OPENVINO_ASSERT(zeroUtils::getZePrecision(ioDescriptor.precision) == zeDescriptor.info.devicePrecision,
+                    "Precision mismatch for input/output named " + ioDescriptor.nameFromCompiler);
+
+    const std::vector<size_t>& ovDimensions = ioDescriptor.shapeFromCompiler.get_max_shape();
+    OPENVINO_ASSERT(ovDimensions.size() <= ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE,
+                    "Maximum number of dimensions supported: " + std::to_string(ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) +
+                        '\n' + "Given: " + std::to_string(ovDimensions.size()));
+
+    for (size_t index = 0; index < ovDimensions.size(); ++index) {
+        OPENVINO_ASSERT(
+            ioDescriptor.shapeFromCompiler.is_dynamic() || ovDimensions[index] == zeDescriptor.info.dims[index],
+            "Shape mismatch for input/output named " + ioDescriptor.nameFromCompiler);
     }
-
     for (size_t index = ovDimensions.size(); index < ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE; ++index) {
-        if (zeDescriptor.info.dims[index] != 0 && zeDescriptor.info.dims[index] != 1) {
-            OPENVINO_THROW("Shape mismatch for parameter " + name);
-        }
+        OPENVINO_ASSERT(zeDescriptor.info.dims[index] == 0 || zeDescriptor.info.dims[index] == 1,
+                        "Shape mismatch for input/output named " + ioDescriptor.nameFromCompiler);
     }
+}
 
-    for (size_t index = 1; index < ovDimensions.size(); ++index) {
-        if (ovDimensions[index] != zeDescriptor.info.dims[index] && !nodeDescriptor.transposedShape.is_dynamic()) {
-            OPENVINO_THROW("Shape mismatch for parameter " + name);
-        }
+template <typename Type>
+Type extract_object(const ov::AnyMap& params, const ov::Property<Type>& p) {
+    auto itrHandle = params.find(p.name());
+    ov::Any res = nullptr;
+    if (itrHandle == params.end()) {
+        OPENVINO_THROW("No parameter ", p.name(), " found in parameters map");
     }
+    res = itrHandle->second;
+    return res.as<Type>();
 }
 
-std::optional<size_t> getBatchSizeForNode(const IONodeDescriptor& nodeDescriptor,
-                                          const ZeroExecutor::ArgumentDescriptor& zeDescriptor) {
-    Logger logger("GetBatchSizeForNode", Logger::global().level());
+}  // namespace
 
-    if (nodeDescriptor.originalShape.rank().get_length() == 0) {
-        logger.warning("Networks with empty shapes are not supported when batching is handled by the plugin");
+std::optional<size_t> ZeroInferRequest::getBatchSize(const NetworkMetadata& metadata) {
+    if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) {
+        _logger.warning("Batching on the plugin is not used, batching is handled by the compiler");
         return std::nullopt;
     }
 
-    if (nodeDescriptor.originalShape.is_dynamic()) {
-        logger.warning("Dynamic networks are not supported when batching is handled by the plugin");
+    const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel;
+    if (firstOutputShape.is_dynamic()) {
+        _logger.warning("Networks using dynamic shapes are not supported when batching is handled by the plugin");
         return std::nullopt;
     }
-
-    const std::vector<size_t>& ovDimensions = nodeDescriptor.originalShape.get_shape();
-
-    if (ovDimensions[BATCH_AXIS] == zeDescriptor.info.dims[BATCH_AXIS] &&
-        ovDimensions[BATCH_AXIS] != DEFAULT_BATCH_SIZE) {
-        logger.info("Batching on the plugin is not used, batching is handled by the compiler");
+    if (firstOutputShape.rank().get_length() == 0) {
+        _logger.warning(
+            "Networks using rank 0 shapes for inputs/outputs are not supported when batching is handled by the plugin");
         return std::nullopt;
     }
 
-    if (zeDescriptor.info.dims[BATCH_AXIS] == DEFAULT_BATCH_SIZE) {
-        return ovDimensions[BATCH_AXIS];
+    const size_t candidateBatchSize = firstOutputShape[BATCH_AXIS].get_length();
+    if (candidateBatchSize == 0 || candidateBatchSize == DEFAULT_BATCH_SIZE) {
+        _logger.warning("Batching on the plugin is not used, batching is handled by the compiler");
+        return std::nullopt;
     }
 
-    return DEFAULT_BATCH_SIZE;
-}
-
-/**
- * @brief Get the batch size to be handled on the plugin.
- * @details Analyze the shape from the compiled model with the shape from the originalShape and get the originalShape if
- * it is different.
- * @param metadata A map to represent descriptions for inputs and outputs of a network.
- * @param executorInputDescriptors A map to represent Level zero inputs descriptors.
- * @param executorOutputDescriptors A map to represent Level zero outputs descriptors.
- */
-
-std::optional<size_t> getBatchSize(
-    const NetworkMetadata& metadata,
-    const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors,
-    const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors) {
-    std::set<size_t> batch_size;
-
-    Logger logger("getBatchSize", Logger::global().level());
+    auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector<IODescriptor>& descriptors) {
+        for (const IODescriptor& descriptor : descriptors) {
+            OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(),
+                            "Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor");
 
-    for (const std::string& inputName : metadata.inputNames) {
-        auto batchSizeForNode =
-            getBatchSizeForNode(metadata.parameters.at(inputName), executorInputDescriptors.at(inputName));
+            const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler;
+            const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel;
 
-        if (batchSizeForNode.has_value()) {
-            batch_size.insert(*batchSizeForNode);
-        } else {
-            return std::nullopt;
-        }
-    }
+            if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 ||
+                *shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) {
+                return false;
+            }
 
-    for (const std::string& outputName : metadata.outputNames) {
-        if (!executorOutputDescriptors.count(outputName)) {
-            OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
+            if (!descriptor.isStateInput && !descriptor.isStateOutput && !descriptor.isShapeTensor) {
+                if (shapeFromIRModel.is_dynamic() || shapeFromIRModel.rank().get_length() == 0 ||
+                    *shapeFromIRModel.begin() != candidateBatchSize) {
+                    return false;
+                }
+            }
         }
-        auto batchSizeForNode =
-            getBatchSizeForNode(metadata.results.at(outputName), executorOutputDescriptors.at(outputName));
 
-        if (batchSizeForNode.has_value()) {
-            batch_size.insert(*batchSizeForNode);
-        } else {
-            return std::nullopt;
-        }
-    }
+        return true;
+    };
 
-    if (batch_size.size() != 1) {
-        logger.info("Batching works only when we have the same batch size for all tensors!");
+    if (!checkDescriptorsUseCandidateBatchSize(metadata.inputs) ||
+        !checkDescriptorsUseCandidateBatchSize(metadata.outputs)) {
+        _logger.warning("Batching on the plugin is not used, batching is handled by the compiler");
         return std::nullopt;
     }
 
-    auto it = batch_size.begin();
-    if (*it) {
-        return *it;
-    }
+    _logger.warning("Batching is handled by the plugin");
 
-    return std::nullopt;
+    return candidateBatchSize;
 }
 
-template <typename Type>
-Type extract_object(const ov::AnyMap& params, const ov::Property<Type>& p) {
-    auto itrHandle = params.find(p.name());
-    ov::Any res = nullptr;
-    if (itrHandle == params.end()) {
-        OPENVINO_THROW("No parameter ", p.name(), " found in parameters map");
-    }
-    res = itrHandle->second;
-    return res.as<Type>();
-}
-
-}  // namespace
-
 //------------------------------------------------------------------------------
 ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
                                    const std::shared_ptr<const ICompiledModel>& compiledModel,
@@ -167,15 +149,18 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
       _executor(static_cast<const ZeroExecutor*>(_executorPtr.get())),
       _config(config),
       _logger("ZeroInferRequest", config.get<LOG_LEVEL>()),
+      _levelZeroInputTensors(_metadata.inputs.size(), nullptr),
+      _levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
+      _inputTensorsData(_metadata.inputs.size(), std::nullopt),
+      _outputTensorsData(_metadata.outputs.size(), std::nullopt),
       _profilingPool(_executor->graph(), zeroProfiling::POOL_SIZE, _executor->getInitStructs()->getProfilingDdiTable()),
       _profilingQuery(0,
                       _executor->getInitStructs()->getDevice(),
                       _executor->getInitStructs()->getProfilingDdiTable()) {
     _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
-    const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors =
-        _executor->inputs_desc_map();
-    const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors =
-        _executor->outputs_desc_map();
+    const std::vector<ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors = _executor->get_input_descriptors();
+    const std::vector<ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors =
+        _executor->get_output_descriptors();
 
     auto proftype = config.get<PROFILING_TYPE>();
     if (proftype == ov::intel_npu::ProfilingType::INFER) {
@@ -189,10 +174,6 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
     zeroUtils::throwOnFail("zeDeviceGetProperties",
                            zeDeviceGetProperties(_executor->getInitStructs()->getDevice(), &_properties));
 
-    const auto contains = [](const auto& container, const auto& value) {
-        return std::find(container.begin(), container.end(), value) != container.end();
-    };
-
     _outputAllocator = std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs);
     _inputAllocator =
         (_properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED)
@@ -200,145 +181,103 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
                                                                    ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED)
             : _outputAllocator;
 
-    _logger.debug("ZeroInferRequest::ZeroInferRequest - performing I/O buffer allocation using Level Zero API");
-    for (const std::string& inputName : _metadata.inputNames) {
-        if (!executorInputDescriptors.count(inputName)) {
-            OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
-        }
+    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
+        _batchSize = getBatchSize(_metadata);
     }
-
-    for (const std::string& outputName : _metadata.outputNames) {
-        if (!executorOutputDescriptors.count(outputName)) {
-            OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
-        }
+    if (_batchSize.has_value()) {
+        _numberOfCommandLists = *_batchSize;
     }
 
-    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
-        auto batchSize = getBatchSize(_metadata, executorInputDescriptors, executorOutputDescriptors);
+    _logger.debug("ZeroInferRequest::ZeroInferRequest - checking level zero attributes and allocating tensors");
 
-        if (batchSize.has_value()) {
-            _batchSize = *batchSize;
-        }
-    }
-
-    for (const std::string& inputName : _metadata.inputNames) {
-        IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName);
-        checkLevelZeroAttributesMatch(parameterDescriptor, executorInputDescriptors.at(inputName), inputName);
+    size_t ioIndex = 0;
+    for (const IODescriptor& inputDescriptor : _metadata.inputs) {
+        checkLevelZeroAttributesMatch(inputDescriptor, executorInputDescriptors.at(ioIndex));
 
-        // When batching is handled by the plugin we need to modify transposed shape with the original batch size since
-        // it will be forced to 1 at the compilation time
-        if (_batchSize > DEFAULT_BATCH_SIZE) {
-            parameterDescriptor.transposedShape[BATCH_AXIS] = _batchSize;
+        if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) {
+            ++ioIndex;
+            continue;
         }
 
-        if (contains(_metadata.shapeNames, inputName)) {
-            const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + inputName;
-            const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(inputName);
+        _levelZeroInputTensors.at(ioIndex) =
+            allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _batchSize);
+        _inputTensorsData.at(ioIndex) =
+            TensorData{_levelZeroInputTensors.at(ioIndex)->data(), _levelZeroInputTensors.at(ioIndex)->get_byte_size()};
 
-            checkLevelZeroAttributesMatch(shapeDescriptor,
-                                          executorInputDescriptors.at(shapeBufferName),
-                                          shapeBufferName);
-
-            allocate_tensor(inputName, shapeDescriptor, TensorType::Shape, *_inputAllocator);
-            _tensorsData[shapeBufferName] = TensorData{_copyAllTensors.at(shapeBufferName)->data(),
-                                                       _copyAllTensors.at(shapeBufferName)->get_byte_size()};
-        }
+        ++ioIndex;
     }
 
-    for (const std::string& outputName : _metadata.outputNames) {
-        IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName);
-        checkLevelZeroAttributesMatch(resultDescriptor, executorOutputDescriptors.at(outputName), outputName);
+    ioIndex = 0;
+    for (const IODescriptor& outputDescriptor : _metadata.outputs) {
+        checkLevelZeroAttributesMatch(outputDescriptor, executorOutputDescriptors.at(ioIndex));
 
-        // When batching is handled by the plugin we need to modify transposed shape with the original batch size since
-        // it will be forced to 1 at the compilation time
-        if (_batchSize > DEFAULT_BATCH_SIZE) {
-            resultDescriptor.transposedShape[BATCH_AXIS] = _batchSize;
+        if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) {
+            ++ioIndex;
+            continue;
         }
 
-        const auto& shapeNameMatch = _nodeNameToLegacyName.find(outputName);
-        if (shapeNameMatch != _nodeNameToLegacyName.end()) {
-            if (contains(_metadata.shapeNames, shapeNameMatch->second)) {
-                const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second;
-                const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second);
+        _levelZeroOutputTensors.at(ioIndex) =
+            allocate_tensor(outputDescriptor, ioIndex, OUTPUT, *_outputAllocator, _batchSize);
+        _outputTensorsData.at(ioIndex) =
+            std::optional(TensorData{_levelZeroOutputTensors.at(ioIndex)->data(),
+                                     _levelZeroOutputTensors.at(ioIndex)->get_byte_size()});
 
-                checkLevelZeroAttributesMatch(shapeDescriptor,
-                                              executorOutputDescriptors.at(shapeBufferName),
-                                              shapeBufferName);
-
-                allocate_tensor(shapeNameMatch->second, shapeDescriptor, TensorType::Shape, *_outputAllocator);
-                _tensorsData[shapeBufferName] = TensorData{_copyAllTensors.at(shapeBufferName)->data(),
-                                                           _copyAllTensors.at(shapeBufferName)->get_byte_size()};
-            }
-        }
+        ++ioIndex;
     }
 
-    for (const std::string& stateName : _metadata.stateNames) {
-        const std::string& stateInputBufferName = READVALUE_PREFIX + stateName;
-        const std::string& stateOutputBufferName = ASSIGN_PREFIX + stateName;
-
-        if (!executorInputDescriptors.count(stateInputBufferName)) {
-            OPENVINO_THROW("Invalid graph input descriptor key: " + stateInputBufferName);
-        }
-        if (!executorOutputDescriptors.count(stateOutputBufferName)) {
-            OPENVINO_THROW("Invalid graph output descriptor key: " + stateOutputBufferName);
-        }
-
-        const IONodeDescriptor& stateDescriptor = _metadata.states.at(stateName);
-        checkLevelZeroAttributesMatch(stateDescriptor,
-                                      executorInputDescriptors.at(stateInputBufferName),
-                                      stateInputBufferName);
-        checkLevelZeroAttributesMatch(stateDescriptor,
-                                      executorOutputDescriptors.at(stateOutputBufferName),
-                                      stateOutputBufferName);
-
-        // Only one buffer per state variable is required, we'll use the "output" one since this one captures the latest
-        // tensor value
-        allocate_tensor(stateName, stateDescriptor, TensorType::State, *_outputAllocator);
-        _tensorsData[stateInputBufferName] = TensorData{_copyAllTensors.at(stateInputBufferName)->data(),
-                                                        _copyAllTensors.at(stateInputBufferName)->get_byte_size()};
-        _tensorsData[stateOutputBufferName] = TensorData{_copyAllTensors.at(stateOutputBufferName)->data(),
-                                                         _copyAllTensors.at(stateOutputBufferName)->get_byte_size()};
-    }
+    _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest completed");
 }
 
 void ZeroInferRequest::create_pipeline() {
-    for (const std::string& inputName : _metadata.inputNames) {
-        if (_copyAllTensors.find(inputName) != _copyAllTensors.end()) {
-            _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated", inputName.c_str());
+    for (size_t inputIndex = 0; inputIndex < _metadata.inputs.size(); ++inputIndex) {
+        if (_levelZeroInputTensors.at(inputIndex)) {
+            _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated",
+                          _metadata.inputs.at(inputIndex).nodeFriendlyName.c_str());
             continue;
         }
 
-        IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName);
-
         _logger.debug("ZeroInferRequest::create_pipeline - Allocate new tensor");
-        allocate_tensor(inputName, parameterDescriptor, TensorType::InputOrOutput, *_inputAllocator);
-        _tensorsData[inputName] =
-            TensorData{_copyAllTensors.at(inputName)->data(), _copyAllTensors.at(inputName)->get_byte_size()};
+        _levelZeroInputTensors.at(inputIndex) =
+            allocate_tensor(_metadata.inputs.at(inputIndex), inputIndex, INPUT, *_inputAllocator, _batchSize);
+        _inputTensorsData.at(inputIndex) =
+            std::optional(TensorData{_levelZeroInputTensors.at(inputIndex)->data(),
+                                     _levelZeroInputTensors.at(inputIndex)->get_byte_size()});
     }
 
-    for (const std::string& outputName : _metadata.outputNames) {
-        if (_copyAllTensors.find(outputName) != _copyAllTensors.end()) {
-            _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated", outputName.c_str());
+    for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) {
+        if (_levelZeroOutputTensors.at(outputIndex)) {
+            _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated",
+                          _metadata.outputs.at(outputIndex).nodeFriendlyName.c_str());
             continue;
         }
-
-        IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName);
-
         _logger.debug("ZeroInferRequest::create_pipeline - allocate new tensor");
-        allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, *_outputAllocator);
-        _tensorsData[outputName] =
-            TensorData{_copyAllTensors.at(outputName)->data(), _copyAllTensors.at(outputName)->get_byte_size()};
+        _levelZeroOutputTensors.at(outputIndex) =
+            allocate_tensor(_metadata.outputs.at(outputIndex), outputIndex, OUTPUT, *_outputAllocator, _batchSize);
+        _outputTensorsData.at(outputIndex) =
+            std::optional(TensorData{_levelZeroOutputTensors.at(outputIndex)->data(),
+                                     _levelZeroOutputTensors.at(outputIndex)->get_byte_size()});
     }
 
     _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline");
     // Construct pipeline
-    _pipeline =
-        makePipeline(_executorPtr, _config, _profilingPool, _profilingQuery, _npuProfiling, _tensorsData, _batchSize);
+    _pipeline = makePipeline(_executorPtr,
+                             _config,
+                             _profilingPool,
+                             _profilingQuery,
+                             _npuProfiling,
+                             _inputTensorsData,
+                             _outputTensorsData,
+                             _numberOfCommandLists);
     _logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed");
 }
 
-void ZeroInferRequest::set_tensor_data(std::shared_ptr<ov::ITensor> tensor, const std::string& name, bool isParameter) {
+void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor> tensor,
+                                       const size_t index,
+                                       const bool isInput) {
     OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data");
+    auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors;
+    auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData;
+
     bool setTensorData = false;
     bool levelZeroTensorCreatedLocally = true;
 
@@ -353,7 +292,7 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr<ov::ITensor> tensor, cons
             case ZE_MEMORY_TYPE_DEVICE:
             case ZE_MEMORY_TYPE_SHARED:
                 _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context");
-                _copyAllTensors[name] = tensor;
+                levelZeroTensors.at(index) = tensor;
                 levelZeroTensorCreatedLocally = false;
                 setTensorData = true;
                 break;
@@ -365,16 +304,18 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr<ov::ITensor> tensor, cons
     }
 
     if (!setTensorData) {
-        // make sure that the L0 tensor was allocated locally and is not received from the user when receiving random
-        // tensor
-        if ((_tensorsData.find(name) != _tensorsData.end()) && !_tensorsData.at(name).levelZeroTensorCreatedLocally) {
+        // make sure that the L0 tensor was allocated locally and is not received from the user when receiving
+        // random tensor
+        if (tensorsData.at(index).has_value() && !tensorsData.at(index)->levelZeroTensorCreatedLocally) {
             _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor");
             OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor");
 
-            allocate_tensor(name,
-                            isParameter ? _metadata.parameters.at(name) : _metadata.results.at(name),
-                            TensorType::InputOrOutput,
-                            isParameter ? *_inputAllocator : *_outputAllocator);
+            levelZeroTensors.at(index) =
+                allocate_tensor(isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index),
+                                index,
+                                isInput,
+                                isInput ? *_inputAllocator : *_outputAllocator,
+                                _batchSize);
 
             setTensorData = true;
             levelZeroTensorCreatedLocally = true;
@@ -382,29 +323,24 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr<ov::ITensor> tensor, cons
     }
 
     if (setTensorData) {
-        _tensorsData[name] = TensorData{_copyAllTensors.at(name)->data(),
-                                        _copyAllTensors.at(name)->get_byte_size(),
-                                        levelZeroTensorCreatedLocally};
+        tensorsData.at(index) = std::optional(TensorData{levelZeroTensors.at(index)->data(),
+                                                         levelZeroTensors.at(index)->get_byte_size(),
+                                                         levelZeroTensorCreatedLocally});
 
         if (_pipelineIsCreated) {
             _logger.debug("ZeroInferRequest::infer_async - update command list");
 
-            intel_npu::ZeroExecutor::ArgumentDescriptor desc;
-            if (isParameter) {
-                desc = _executor->inputs_desc_map().at(name);
-            } else {
-                desc = _executor->outputs_desc_map().at(name);
-            }
-
             OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList");
-            _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize);
+            _pipeline->updateCommandList(*tensorsData.at(index),
+                                         isInput ? _executor->get_input_descriptors().at(index).idx
+                                                 : _executor->get_output_descriptors().at(index).idx);
         }
     }
 }
 
-void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor> tensor,
-                                              const std::string& name,
-                                              bool isParameter) {
+void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTensor> tensor,
+                                              const size_t index,
+                                              const bool isInput) {
     OV_ITT_TASK_CHAIN(ZERO_SET_REMOTE_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_remote_tensor_data");
 
     auto l0_context = reinterpret_cast<ze_context_handle_t>(
@@ -418,72 +354,82 @@ void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr<ZeroRemoteTensor>
         OPENVINO_THROW("Empty buffer");
     }
 
-    _copyAllTensors[name] = tensor;
-    _tensorsData[name] = TensorData{data, tensor->get_byte_size(), false};
+    auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors;
+    auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData;
+
+    levelZeroTensors.at(index) = tensor;
+    tensorsData.at(index) = std::optional(TensorData{data, tensor->get_byte_size(), false});
 
     if (_pipelineIsCreated) {
         _logger.debug("ZeroInferRequest::infer_async - update command list");
 
-        intel_npu::ZeroExecutor::ArgumentDescriptor desc;
-        if (isParameter) {
-            desc = _executor->inputs_desc_map().at(name);
-        } else {
-            desc = _executor->outputs_desc_map().at(name);
-        }
-
         OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList");
-        _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize);
+        _pipeline->updateCommandList(*tensorsData.at(index),
+                                     isInput ? _executor->get_input_descriptors().at(index).idx
+                                             : _executor->get_output_descriptors().at(index).idx);
     }
 }
 
 void ZeroInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "set_tensor");
+
+    auto foundPort = find_port(port);
+    OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port);
     try {
         check_tensor(port, tensor);
     } catch (const ov::Exception& ex) {
         OPENVINO_THROW("Failed to set tensor. ", ex.what());
     }
 
-    _allTensors[port.get_node()->get_friendly_name()] = tensor._ptr;
+    if (foundPort.is_input()) {
+        _userInputTensors.at(foundPort.idx) = tensor._ptr;
+    } else {
+        _userOutputTensors.at(foundPort.idx) = tensor._ptr;
+    }
 
     if (_initStructs->getMutableCommandListVersion()) {
         auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(tensor._ptr);
 
         if (remoteTensor == nullptr) {
             _logger.debug("ZeroInferRequest::set_tensor - set new tensor");
-            set_tensor_data(tensor._ptr,
-                            port.get_node()->get_friendly_name(),
-                            ov::op::util::is_parameter(port.get_node()));
+            set_tensor_data(tensor._ptr, foundPort.idx, foundPort.is_input());
         } else {
             _logger.debug("ZeroInferRequest::set_tensor - set new remote tensor");
-            set_remote_tensor_data(remoteTensor,
-                                   port.get_node()->get_friendly_name(),
-                                   ov::op::util::is_parameter(port.get_node()));
+            set_remote_tensor_data(remoteTensor, foundPort.idx, foundPort.is_input());
         }
     }
 }
 
 ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_tensor");
-    const std::string& nodeFriendlyName = port.get_node()->get_friendly_name();
 
-    if (_allTensors.find(nodeFriendlyName) != _allTensors.end()) {
+    auto foundPort = find_port(port);
+    OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port);
+
+    const size_t ioIndex = foundPort.idx;
+    const bool isInput = foundPort.is_input();
+    auto& userTensors = isInput ? _userInputTensors : _userOutputTensors;
+
+    if (userTensors.at(ioIndex)) {
         _logger.debug("ZeroInferRequest::get_tensor - tensor allocated, get the tensor");
-        return _allTensors.at(nodeFriendlyName);
+        return userTensors.at(ioIndex);
     }
 
     _logger.debug("ZeroInferRequest::get_tensor - tensor is not allocated, create the tensor");
 
-    const bool isParameter = ov::op::util::is_parameter(port.get_node());
-    allocate_tensor(nodeFriendlyName,
-                    isParameter ? _metadata.parameters.at(nodeFriendlyName) : _metadata.results.at(nodeFriendlyName),
-                    TensorType::InputOrOutput,
-                    isParameter ? *_inputAllocator : *_outputAllocator);
+    auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors;
+    auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData;
 
-    _tensorsData[nodeFriendlyName] =
-        TensorData{_copyAllTensors.at(nodeFriendlyName)->data(), _copyAllTensors.at(nodeFriendlyName)->get_byte_size()};
+    levelZeroTensors.at(ioIndex) =
+        allocate_tensor(isInput ? _metadata.inputs.at(ioIndex) : _metadata.outputs.at(ioIndex),
+                        ioIndex,
+                        isInput,
+                        isInput ? *_inputAllocator : *_outputAllocator,
+                        _batchSize);
+    tensorsData.at(ioIndex) =
+        std::optional(TensorData{levelZeroTensors.at(ioIndex)->data(), levelZeroTensors.at(ioIndex)->get_byte_size()});
 
-    return _allTensors.at(nodeFriendlyName);
+    return levelZeroTensors.at(ioIndex);
 }
 
 void ZeroInferRequest::infer() {
@@ -504,102 +450,99 @@ void ZeroInferRequest::infer_async() {
     }
     _executor->mutexUnlock();
 
-    for (const std::string& name : _inputAndStateInputNames) {
-        auto& inputTensor = _allTensors.at(name);
-
-        if (isShapeTensorName(name)) {
-            const auto actualTensorName = name.substr(SHAPE_TENSOR_PREFIX.size());
-            const auto& inputDims = _allTensors.at(actualTensorName)->get_shape();
+    size_t inputIndex = 0;
+    for (const std::shared_ptr<ov::ITensor>& userTensor : _userInputTensors) {
+        const IODescriptor inputDescriptor = _metadata.inputs.at(inputIndex);
+        if (inputDescriptor.isShapeTensor) {
+            OPENVINO_ASSERT(inputDescriptor.relatedDescriptorIndex.has_value(),
+                            "The link between the dynamic tensor and its shape tensor is missing, entry name: ",
+                            inputDescriptor.nameFromCompiler);
+            const auto& inputDims = _userInputTensors.at(*inputDescriptor.relatedDescriptorIndex)->get_shape();
 
-            for (size_t i = 0; i < inputTensor->get_size(); ++i) {
+            for (size_t i = 0; i < userTensor->get_size(); ++i) {
                 const auto reverseIdx = inputDims.size() - 1 - i;
-                inputTensor->data<uint32_t>()[i] = static_cast<uint32_t>(inputDims[reverseIdx]);
+                userTensor->data<uint32_t>()[i] = static_cast<uint32_t>(inputDims[reverseIdx]);
             }
         }
 
-        auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(inputTensor);
-        void* data = !remoteTensor ? inputTensor->data()
-                                   : extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);
+        auto userRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(userTensor);
+        void* userBuffer = !userRemoteTensor
+                               ? userTensor->data()
+                               : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
 
-        const auto& copyInputTensor = _copyAllTensors.at(name);
-        auto copyRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(copyInputTensor);
-        if (copyRemoteTensor == nullptr) {
-            void* copyData = copyInputTensor->data();
+        const std::shared_ptr<ov::ITensor>& levelZeroTensor = _levelZeroInputTensors.at(inputIndex);
+        auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor);
+        if (levelZeroRemoteTensor == nullptr) {
+            void* levelZeroBuffer = levelZeroTensor->data();
 
-            if (data != copyData) {
-                if (data == nullptr || copyData == nullptr) {
+            if (userBuffer != levelZeroBuffer) {
+                if (userBuffer == nullptr || levelZeroBuffer == nullptr) {
                     OPENVINO_THROW("Empty buffer");
                 }
 
                 _logger.info("Tensor is not allocated in the current Level Zero context");
                 OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy");
-                std::memcpy(copyData, data, inputTensor->get_byte_size());
+                std::memcpy(levelZeroBuffer, userBuffer, userTensor->get_byte_size());
             }
         }
+
+        ++inputIndex;
     }
 
     OV_ITT_TASK_NEXT(ZERO_INFER, "push");
-    for (size_t i = 0; i < _batchSize; i++) {
-        _pipeline->push(i);
-    }
+    _pipeline->push();
 }
 
 void ZeroInferRequest::get_result() {
     OV_ITT_TASK_CHAIN(ZERO_RESULT, itt::domains::LevelZeroBackend, "get_result", "pull");
     _logger.debug("InferRequest::get_result start");
-
-    for (size_t i = 0; i < _batchSize; i++) {
-        _pipeline->pull(i);
-    }
-
-    for (const auto& name : _outputAndStateOutputNames) {
-        const auto& outputTensor = _allTensors.at(name);
-
-        if (isShapeTensorName(name)) {
-            const auto actualTensorName = name.substr(SHAPE_TENSOR_PREFIX.size());
-            const auto& shapeNameMatch = _legacyNameToNodeName.find(actualTensorName);
-            if (shapeNameMatch != _legacyNameToNodeName.end()) {
-                ov::Shape actualDims;
-                actualDims.reserve(outputTensor->get_size());
-
-                for (size_t i = 0; i < outputTensor->get_size(); ++i) {
-                    const auto reverseIdx = outputTensor->get_size() - 1 - i;
-                    actualDims.push_back(outputTensor->data<uint32_t>()[reverseIdx]);
-                }
-                auto& tensorToBeReshaped = _allTensors.at(shapeNameMatch->second);
-                tensorToBeReshaped->set_shape(actualDims);
+    _pipeline->pull();
+
+    size_t outputIndex = 0;
+    for (const std::shared_ptr<ov::ITensor>& userTensor : _userOutputTensors) {
+        const IODescriptor outputDescriptor = _metadata.outputs.at(outputIndex);
+        if (outputDescriptor.isShapeTensor) {
+            OPENVINO_ASSERT(outputDescriptor.relatedDescriptorIndex.has_value(),
+                            "The link between the dynamic tensor and its shape tensor is missing, entry name: ",
+                            outputDescriptor.nameFromCompiler);
+
+            ov::Shape actualDims;
+            actualDims.reserve(userTensor->get_size());
+
+            for (size_t i = 0; i < userTensor->get_size(); ++i) {
+                const auto reverseIdx = userTensor->get_size() - 1 - i;
+                actualDims.push_back(userTensor->data<uint32_t>()[reverseIdx]);
             }
+            auto& tensorToBeReshaped = _userOutputTensors.at(*outputDescriptor.relatedDescriptorIndex);
+            tensorToBeReshaped->set_shape(actualDims);
         }
 
-        auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(outputTensor);
-        void* data = nullptr;
-        if (remoteTensor == nullptr) {
-            data = outputTensor->data();
-        } else {
-            data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);
-        }
+        auto userRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(userTensor);
+        void* userBuffer = !userRemoteTensor
+                               ? userTensor->data()
+                               : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle);
 
-        const auto& copyOutputTensor = _copyAllTensors.at(name);
-        auto copyRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(copyOutputTensor);
-        if (copyRemoteTensor == nullptr) {
-            void* copyData = copyOutputTensor->data();
+        const std::shared_ptr<ov::ITensor>& levelZeroTensor = _levelZeroOutputTensors.at(outputIndex);
+        auto levelZeroRemoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(levelZeroTensor);
+        if (levelZeroRemoteTensor == nullptr) {
+            void* levelZeroBuffer = levelZeroTensor->data();
 
-            if (data != copyData) {
-                if (data == nullptr || copyData == nullptr) {
+            if (userBuffer != levelZeroBuffer) {
+                if (userBuffer == nullptr || levelZeroBuffer == nullptr) {
                     OPENVINO_THROW("Empty buffer");
                 }
 
                 _logger.info("Tensor is not allocated in the current Level Zero context");
                 OV_ITT_TASK_NEXT(ZERO_RESULT, "memcpy");
-                std::memcpy(data, copyData, outputTensor->get_byte_size());
+                std::memcpy(userBuffer, levelZeroBuffer, userTensor->get_byte_size());
             }
         }
+
+        ++outputIndex;
     }
 
     OV_ITT_TASK_NEXT(ZERO_RESULT, "reset");
-    for (size_t i = 0; i < _batchSize; i++) {
-        _pipeline->reset(i);
-    }
+    _pipeline->reset();
     _logger.debug("InferRequest::get_result finished");
 }
 
diff --git a/src/plugins/intel_npu/src/backend/src/zero_init.cpp b/src/plugins/intel_npu/src/backend/src/zero_init.cpp
index 8490220be6a407..7ed1c66c00911a 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_init.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_init.cpp
@@ -29,8 +29,9 @@ static std::tuple<uint32_t, std::string> queryDriverExtensionVersion(
     for (uint32_t i = 0; i < count; ++i) {
         auto& property = extProps[i];
 
-        if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0)
+        if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0) {
             continue;
+        }
 
         // If the driver version is latest, will just use its name.
         if (property.version == ZE_GRAPH_EXT_VERSION_CURRENT) {
diff --git a/src/plugins/intel_npu/src/backend/src/zero_memory.cpp b/src/plugins/intel_npu/src/backend/src/zero_memory.cpp
index 6dea1396c853f0..79a5efab5ee419 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_memory.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_memory.cpp
@@ -71,8 +71,8 @@ bool HostMemAllocator::is_equal(const HostMemAllocator& other) const {
     return (_initStructs == other._initStructs) && (_flag == other._flag);
 }
 
-void MemoryManagementUnit::appendArgument(const std::string& name, const std::size_t argSize) {
-    _offsets.emplace(std::make_pair(name, _size));
+void MemoryManagementUnit::appendArgument(const std::size_t argSize) {
+    _offsets.push_back(_size);
 
     _size += argSize + alignment -
              (argSize % alignment);  // is this really necessary? if 0==argSize%alignment -> add 1 * alignment
@@ -94,16 +94,16 @@ const void* MemoryManagementUnit::getDeviceMemRegion() const {
 void* MemoryManagementUnit::getDeviceMemRegion() {
     return _device ? _device->data() : nullptr;
 }
-void* MemoryManagementUnit::getDevicePtr(const std::string& name) {
+void* MemoryManagementUnit::getDevicePtr(const size_t index) {
     uint8_t* from = static_cast<uint8_t*>(_device ? _device->data() : nullptr);
-    if (from == nullptr) {
-        OPENVINO_THROW("Device memory not allocated yet");
-    }
-    if (!_offsets.count(name)) {
-        OPENVINO_THROW("Invalid memory offset key: ", name);
-    }
+    OPENVINO_ASSERT(from != nullptr, "Device memory not allocated yet");
+    OPENVINO_ASSERT(index < _offsets.size(),
+                    "Memory offset index out of bound. Received: ",
+                    index,
+                    ", memory offset size: ",
+                    _offsets.size());
 
-    return _offsets.at(name) + from;
+    return _offsets.at(index) + from;
 }
 }  // namespace zeroMemory
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index f6def94baf39c4..77d325420ac088 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -25,7 +25,8 @@ struct DiscretePipeline final : public Pipeline {
                      ze_graph_profiling_query_handle_t profiling_handle,
                      const std::array<std::shared_ptr<CommandQueue>, stage::COUNT>& command_queues,
                      const uint32_t& group_ordinal,
-                     std::unordered_map<std::string, TensorData>& tensors_data)
+                     const std::vector<std::optional<TensorData>>& inputTensorsData,
+                     const std::vector<std::optional<TensorData>>& outputTensorsData)
         : _config(config),
           _command_queues{command_queues},
           _command_list{{{device_handle, context, graph_ddi_table_ext, _config, group_ordinal},
@@ -44,23 +45,24 @@ struct DiscretePipeline final : public Pipeline {
         static const std::size_t alignment = STANDARD_PAGE_SIZE;
 
         OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::DiscretePipeline::DiscretePipeline");
-        for (const auto& desc : executor->inputs_desc_map()) {
-            _deviceInputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info));
+        for (const auto& desc : executor->get_input_descriptors()) {
+            _deviceInputs.appendArgument(zeroUtils::getSizeIOBytes(desc.info));
         }
         _deviceInputs.allocate(device_handle, context);
 
         _logger.debug("DiscretePipeline - appending memory copy and set argument value for input");
 
-        for (const auto& desc : executor->inputs_desc_map()) {
-            const TensorData& inputTensorData = tensors_data.at(desc.first);
-            const void* tensorBuffer = reinterpret_cast<const void*>(inputTensorData.mem);
+        size_t inputIndex = 0;
+        for (const auto& desc : executor->get_input_descriptors()) {
+            const void* tensorBuffer = reinterpret_cast<const void*>(inputTensorsData.at(inputIndex)->mem);
 
-            const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.second.info);
+            const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.info);
             std::size_t size = argSize + alignment - (argSize % alignment);
 
-            _command_list[stage::UPLOAD].appendMemoryCopy(_deviceInputs.getDevicePtr(desc.first), tensorBuffer, size);
+            _command_list[stage::UPLOAD].appendMemoryCopy(_deviceInputs.getDevicePtr(inputIndex), tensorBuffer, size);
 
-            executor->setArgumentValue(desc.second.idx, _deviceInputs.getDevicePtr(desc.first));
+            executor->setArgumentValue(desc.idx, _deviceInputs.getDevicePtr(inputIndex));
+            ++inputIndex;
         }
 
         _logger.debug("DiscretePipeline - append signal event");
@@ -68,24 +70,26 @@ struct DiscretePipeline final : public Pipeline {
         _command_list[stage::UPLOAD].appendBarrier();
         _event[stage::UPLOAD].AppendSignalEvent(_command_list[stage::UPLOAD]);
 
-        for (const auto& desc : executor->outputs_desc_map()) {
-            _deviceOutputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info));
+        for (const auto& desc : executor->get_output_descriptors()) {
+            _deviceOutputs.appendArgument(zeroUtils::getSizeIOBytes(desc.info));
         }
         _deviceOutputs.allocate(device_handle, context);
 
         _logger.debug("DiscretePipeline - appending memory copy and set argument value for output");
-        for (const auto& desc : executor->outputs_desc_map()) {
-            const TensorData& outputTensorData = tensors_data.at(desc.first);
-            void* tensorBuffer = reinterpret_cast<void*>(outputTensorData.mem);
 
-            const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.second.info);
+        size_t outputIndex = 0;
+        for (const auto& desc : executor->get_output_descriptors()) {
+            void* tensorBuffer = reinterpret_cast<void*>(outputTensorsData.at(outputIndex)->mem);
+
+            const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.info);
             std::size_t size = argSize + alignment - (argSize % alignment);
 
             _command_list[stage::READBACK].appendMemoryCopy(tensorBuffer,
-                                                            _deviceOutputs.getDevicePtr(desc.first),
+                                                            _deviceOutputs.getDevicePtr(outputIndex),
                                                             size);
 
-            executor->setArgumentValue(desc.second.idx, _deviceOutputs.getDevicePtr(desc.first));
+            executor->setArgumentValue(desc.idx, _deviceOutputs.getDevicePtr(outputIndex));
+            ++outputIndex;
         }
 
         _event[stage::UPLOAD].AppendWaitOnEvent(_command_list[stage::EXECUTE]);
@@ -104,7 +108,7 @@ struct DiscretePipeline final : public Pipeline {
     DiscretePipeline& operator=(const DiscretePipeline&) = delete;
     virtual ~DiscretePipeline() = default;
 
-    void push(size_t) override {
+    void push() override {
         _logger.debug("DiscretePipeline - push() started");
         OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PUSH,
                           itt::domains::LevelZeroBackend,
@@ -119,7 +123,7 @@ struct DiscretePipeline final : public Pipeline {
         _logger.debug("DiscretePipeline - push() completed");
     };
 
-    void pull(size_t) override {
+    void pull() override {
         _logger.debug("DiscretePipeline - pull() started");
         OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PULL,
                           itt::domains::LevelZeroBackend,
@@ -136,14 +140,14 @@ struct DiscretePipeline final : public Pipeline {
         _logger.debug("DiscretePipeline - pull() completed");
     };
 
-    void reset(size_t) const override {
+    void reset() const override {
         // Reset the fence objects
         for (auto& fence : _fence) {
             fence.reset();
         }
     };
 
-    void updateCommandList(const TensorData&, uint32_t, size_t) override {}
+    void updateCommandList(const TensorData&, const uint32_t) override{};
 
 private:
     const Config _config;
@@ -166,23 +170,27 @@ struct IntegratedPipeline final : public Pipeline {
                        std::shared_ptr<zeroProfiling::NpuInferProfiling> npu_profiling,
                        CommandQueue& command_queue,
                        const uint32_t& group_ordinal,
-                       std::unordered_map<std::string, TensorData>& tensors_data,
-                       const size_t batch_size)
+                       const std::vector<std::optional<TensorData>>& inputTensorsData,
+                       const std::vector<std::optional<TensorData>>& outputTensorsData,
+                       const size_t numberOfCommandLists)
         : _config(config),
           _executor(static_cast<const ZeroExecutor*>(executorPtr.get())),
           _command_queue{command_queue},
-          _event_pool{device_handle, context, batch_size ? static_cast<uint32_t>(batch_size) : 1, _config},
+          _event_pool{device_handle,
+                      context,
+                      numberOfCommandLists ? static_cast<uint32_t>(numberOfCommandLists) : 1,
+                      _config},
           _npu_profiling(std::move(npu_profiling)),
           _logger("IntegratedPipeline", _config.get<LOG_LEVEL>()) {
         OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend,
                            "Zero_infer_request::IntegratedPipeline::IntegratedPipeline");
         _logger.debug("IntegratedPipeline - initialize started");
 
-        _command_lists.reserve(batch_size);
-        _events.reserve(batch_size);
-        _fences.reserve(batch_size);
+        _command_lists.reserve(numberOfCommandLists);
+        _events.reserve(numberOfCommandLists);
+        _fences.reserve(numberOfCommandLists);
         _logger.debug("IntegratedPipeline - emplace_back _event_pool and _command_queue");
-        for (size_t i = 0; i < batch_size; i++) {
+        for (size_t i = 0; i < numberOfCommandLists; i++) {
             _command_lists.emplace_back(std::make_unique<CommandList>(
                 device_handle,
                 context,
@@ -194,19 +202,21 @@ struct IntegratedPipeline final : public Pipeline {
             _fences.emplace_back(std::make_unique<Fence>(_command_queue, _config));
         }
 
-        for (size_t i = 0; i < batch_size; i++) {
-            for (const auto& desc : _executor->inputs_desc_map()) {
-                const TensorData& inputTensorData = tensors_data.at(desc.first);
-                _executor->setArgumentValue(
-                    desc.second.idx,
-                    static_cast<unsigned char*>(inputTensorData.mem) + (i * inputTensorData.size) / batch_size);
+        for (size_t i = 0; i < numberOfCommandLists; i++) {
+            size_t ioIndex = 0;
+            for (const auto& desc : _executor->get_input_descriptors()) {
+                _executor->setArgumentValue(desc.idx,
+                                            static_cast<unsigned char*>(inputTensorsData.at(ioIndex)->mem) +
+                                                (i * inputTensorsData.at(ioIndex)->size) / numberOfCommandLists);
+                ++ioIndex;
             }
 
-            for (const auto& desc : _executor->outputs_desc_map()) {
-                const TensorData& outputTensorData = tensors_data.at(desc.first);
-                _executor->setArgumentValue(
-                    desc.second.idx,
-                    static_cast<unsigned char*>(outputTensorData.mem) + (i * outputTensorData.size) / batch_size);
+            ioIndex = 0;
+            for (const auto& desc : _executor->get_output_descriptors()) {
+                _executor->setArgumentValue(desc.idx,
+                                            static_cast<unsigned char*>(outputTensorsData.at(ioIndex)->mem) +
+                                                (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists);
+                ++ioIndex;
             }
 
             /// append timestamp command if feature was activated
@@ -238,51 +248,65 @@ struct IntegratedPipeline final : public Pipeline {
     IntegratedPipeline& operator=(const IntegratedPipeline&) = delete;
     virtual ~IntegratedPipeline() = default;
 
-    void push(size_t batch_index) override {
+    void push() override {
         _logger.debug("IntegratedPipeline - push() started");
-        OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "IntegratedPipeline", "push");
-        if (sync_output_with_fences_) {
-            _command_queue.executeCommandList(*_command_lists.at(batch_index), *_fences.at(batch_index));
-        } else {
-            _command_queue.executeCommandList(*_command_lists.at(batch_index));
+
+        for (size_t i = 0; i < _command_lists.size(); ++i) {
+            OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "IntegratedPipeline", "push");
+            if (sync_output_with_fences_) {
+                _command_queue.executeCommandList(*_command_lists.at(i), *_fences.at(i));
+            } else {
+                _command_queue.executeCommandList(*_command_lists.at(i));
+            }
         }
+
         _logger.debug("IntegratedPipeline - push() completed");
     };
 
-    void pull(size_t batch_index) override {
+    void pull() override {
         _logger.debug("IntegratedPipeline - pull() started");
         OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "IntegratedPipeline", "pull");
-        if (sync_output_with_fences_) {
-            _fences.at(batch_index)->hostSynchronize();
-        } else {
-            _events.at(batch_index)->hostSynchronize();
-        }
-        /// sample npu timestamps if feature was activated
-        if (_npu_profiling != nullptr) {
-            _npu_profiling->sampleNpuTimestamps();
+
+        for (size_t i = 0; i < _command_lists.size(); ++i) {
+            if (sync_output_with_fences_) {
+                _fences.at(i)->hostSynchronize();
+            } else {
+                _events.at(i)->hostSynchronize();
+            }
+            /// sample npu timestamps if feature was activated
+            if (_npu_profiling != nullptr) {
+                _npu_profiling->sampleNpuTimestamps();
+            }
         }
+
         _logger.debug("IntegratedPipeline - pull() completed");
     };
 
-    void reset(size_t batch_index) const override {
+    void reset() const override {
         _logger.debug("IntegratedPipeline - rest() started");
-        if (sync_output_with_fences_) {
-            _fences.at(batch_index)->reset();
-        } else {
-            _events.at(batch_index)->reset();
+
+        for (size_t i = 0; i < _command_lists.size(); ++i) {
+            if (sync_output_with_fences_) {
+                _fences.at(i)->reset();
+            } else {
+                _events.at(i)->reset();
+            }
         }
+
         _logger.debug("IntegratedPipeline - rest() completed");
     };
 
-    void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) override {
+    void updateCommandList(const TensorData& tensorsData, const uint32_t index) override {
         OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL,
                           itt::domains::LevelZeroBackend,
                           "IntegratedPipeline",
                           "updateCommandList");
-        for (size_t i = 0; i < batch_size; i++) {
+        const size_t numberOfCommandLists = _command_lists.size();
+
+        for (size_t i = 0; i < numberOfCommandLists; i++) {
             _command_lists.at(i)->updateMutableCommandList(
                 index,
-                static_cast<unsigned char*>(tensors_data.mem) + (i * tensors_data.size) / batch_size);
+                static_cast<unsigned char*>(tensorsData.mem) + (i * tensorsData.size) / numberOfCommandLists);
             _command_lists.at(i)->close();
         }
     };
@@ -305,8 +329,9 @@ std::unique_ptr<Pipeline> makePipeline(const std::shared_ptr<const IExecutor>& e
                                        zeroProfiling::ProfilingPool& profiling_pool,
                                        zeroProfiling::ProfilingQuery& profiling_query,
                                        std::shared_ptr<zeroProfiling::NpuInferProfiling> npu_profiling,
-                                       std::unordered_map<std::string, TensorData>& tensors_data,
-                                       const size_t batch_size) {
+                                       const std::vector<std::optional<TensorData>>& inputTensorsData,
+                                       const std::vector<std::optional<TensorData>>& outputTensorsData,
+                                       const size_t numberOfCommandLists) {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Infer_request::makePipeline");
     if (profiling_pool.create())
         profiling_query.create(profiling_pool._handle);
@@ -333,8 +358,9 @@ std::unique_ptr<Pipeline> makePipeline(const std::shared_ptr<const IExecutor>& e
                                                     npu_profiling,
                                                     *command_queues[stage::EXECUTE],
                                                     group_ordinal,
-                                                    tensors_data,
-                                                    batch_size);
+                                                    inputTensorsData,
+                                                    outputTensorsData,
+                                                    numberOfCommandLists);
     }
 
     return std::make_unique<DiscretePipeline>(config,
@@ -345,7 +371,8 @@ std::unique_ptr<Pipeline> makePipeline(const std::shared_ptr<const IExecutor>& e
                                               profiling_query.getHandle(),
                                               command_queues,
                                               group_ordinal,
-                                              tensors_data);
+                                              inputTensorsData,
+                                              outputTensorsData);
 }
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp
index ffe022a1800ef5..18042250e46386 100644
--- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp
+++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp
@@ -35,8 +35,10 @@ using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;
     (std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
      std::is_same<T, ze_graph_dditable_ext_1_4_t>::value)
 
-// For ext version >= 1.6, originalShape is avaible
-#define NotSupportOriginalShape(T)                                                                                 \
+// A bug inside the driver makes the "pfnGraphGetArgumentMetadata" call not safe for use prior to
+// "ze_graph_dditable_ext_1_6_t".
+// See: E#117498
+#define NotSupportArgumentMetadata(T)                                                                              \
     (std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
      std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value)
 
@@ -79,16 +81,18 @@ class LevelZeroCompilerInDriver final : public ICompiler {
     /**
      * @brief Serialize input / output information to string format.
      * @details Format:
-     * --inputs_precisions="<input1Name>:<input1Precision> [<input2Name>:<input2Precision>]"
-     * --inputs_layouts="<input1Name>:<input1Layout> [<input2Name>:<input2Layout>]"
-     * --outputs_precisions="<output1Name>:<output1Precision>"
-     * --outputs_layouts="<output1Name>:<output1Layout>"
+     * --inputs_precisions="0:<input1Precision> [1:<input2Precision>]"
+     * --inputs_layouts="0:<input1Layout> [1:<input2Layout>]"
+     * --outputs_precisions="0:<output1Precision>"
+     * --outputs_layouts="0:<output1Layout>"
+     *
+     * For older compiler versions, the name of the inputs/outputs may be used instead of their indices.
      *
      * Since the layout information is no longer an important part of the metadata values when using the 2.0 OV
      * API, the layout fields shall be filled with default values in order to assure the backward compatibility
      * with the driver.
      */
-    static std::string serializeIOInfo(const std::shared_ptr<const ov::Model>& model);
+    static std::string serializeIOInfo(const std::shared_ptr<const ov::Model>& model, const bool useIndices);
 
 private:
     NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;
@@ -97,53 +101,19 @@ class LevelZeroCompilerInDriver final : public ICompiler {
                              ze_graph_compiler_version_info_t compilerVersion) const;
     std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t& compilerVersion) const;
 
-    /**
-     * @brief Extracts the layout value or the state descriptor from the given Level Zero structure.
-     * @details Extracting the layout information is required only when using older driver versions which rely on
-     * this legacy attribute. Since this information is not found within the parameter/result nodes, we need to
-     * extract this value here.
-     *
-     * The state variables are also not found in the previously mentioned nodes, thus if the given Level Zero
-     * parameter corresponds to an input/output, we shall extract the layout value from it. Else it represents a
-     * state variable and the descriptor will be extracted and stored in an OpenVINO specific format.
-     * @param parameters Holds the already extracted input node descriptors. The transposed shape attribute of the
-     * corresponding entry may be updated according to the extracted layout value.
-     * @param results Holds the already extracted output node descriptors. The transposed shape attribute of the
-     * corresponding entry may be updated according to the extracted layout value.
-     * @param states The state descriptors shall be stored here in an OpenVINO specific format.
-     * @param stateNames The output location of the state variables' names in the order found within the compiled
-     * model.
-     * @param arg The Level Zero specific structure from which the layout value or state variable descriptor shall
-     * be extracted.
-     */
-    template <typename T>
-    void getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters,
-                                    IONodeDescriptorMap& results,
-                                    IONodeDescriptorMap& states,
-                                    std::vector<std::string>& stateNames,
-                                    const T& arg) const;
-
-    template <typename T = TableExtension, typename std::enable_if_t<NotSupportOriginalShape(T), bool> = true>
+    template <typename T = TableExtension, typename std::enable_if_t<NotSupportArgumentMetadata(T), bool> = true>
     void getMetadata(TableExtension* graphDdiTableExt,
                      ze_graph_handle_t graphHandle,
                      uint32_t index,
-                     std::vector<std::string>& inputNames,
-                     std::vector<std::string>& outputNames,
-                     std::vector<std::string>& stateNames,
-                     IONodeDescriptorMap& parameters,
-                     IONodeDescriptorMap& results,
-                     IONodeDescriptorMap& state) const;
-
-    template <typename T = TableExtension, typename std::enable_if_t<!NotSupportOriginalShape(T), bool> = true>
+                     std::vector<IODescriptor>& inputs,
+                     std::vector<IODescriptor>& outputs) const;
+
+    template <typename T = TableExtension, typename std::enable_if_t<!NotSupportArgumentMetadata(T), bool> = true>
     void getMetadata(TableExtension* graphDdiTableExt,
                      ze_graph_handle_t graphHandle,
                      uint32_t index,
-                     std::vector<std::string>& inputNames,
-                     std::vector<std::string>& outputNames,
-                     std::vector<std::string>& stateNames,
-                     IONodeDescriptorMap& parameters,
-                     IONodeDescriptorMap& results,
-                     IONodeDescriptorMap& state) const;
+                     std::vector<IODescriptor>& inputs,
+                     std::vector<IODescriptor>& outputs) const;
 
     template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
     ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr<const ov::Model>& model,
diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp
index e9fee3d9ee2f2a..6543b1199b7a4b 100644
--- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp
@@ -100,8 +100,9 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter() : _logger("LevelZeroCompile
     for (uint32_t i = 0; i < count; ++i) {
         auto& property = extProps[i];
 
-        if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0)
+        if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0) {
             continue;
+        }
 
         // If the driver version is latest, will just use its name.
         if (property.version == ZE_GRAPH_EXT_VERSION_CURRENT) {
diff --git a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp
index cc9655a38dd3ff..7259673191441a 100644
--- a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp
+++ b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp
@@ -47,7 +47,12 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh
     // precision/layout preprocessing requirement. We are setting this value to "true" since the API version is no
     // longer a cause for altering the metadata. This is due to the preprocessing performed in the OpenVINO framework's
     // implementaion, the "ov::Model" object is preprocessed before reaching the NPU plugin.
-    const auto new_api_key = "is_new_api";
+    const auto newAPIKey = "is_new_api";
+
+    // Flag used for indicating an NPU plugin version which switched the I/O identification convention from names to
+    // indices. The flag is required in order to inform the driver-compiler adapter to expect indices when attempting to
+    // deserialize the I/O metadata.
+    const auto useIndicesForIOMetadata = "use_indices_for_io_metadata";
 
     // We modify the original model object here therefore a mutex is required
     static std::mutex rtInfoMutex;
@@ -55,12 +60,14 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh
     {
         std::lock_guard<std::mutex> lock(rtInfoMutex);
 
-        _model->set_rt_info(true, new_api_key);
+        _model->set_rt_info(true, newAPIKey);
+        _model->set_rt_info(true, useIndicesForIOMetadata);
 
         manager.run_passes(_model);
 
         auto& rtInfo = _model->get_rt_info();
-        rtInfo.erase(new_api_key);
+        rtInfo.erase(newAPIKey);
+        rtInfo.erase(useIndicesForIOMetadata);
     }
     _logger.debug("serializeModelToStream end");
 }
diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
index 5f41e0dcd8a6aa..efe8d2e594f5b7 100644
--- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
+++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
@@ -174,62 +174,6 @@ std::string rankToLegacyLayoutString(const size_t rank) {
     }
 }
 
-size_t zeLayoutToRank(const ze_graph_argument_layout_t layout) {
-    switch (layout) {
-    case ZE_GRAPH_ARGUMENT_LAYOUT_C:
-        return 1;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_CN:
-        return 2;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_HW:
-        return 2;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_NC:
-        return 2;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_CHW:
-        return 3;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_NCHW:
-        return 4;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC:
-        return 4;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW:
-        return 5;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC:
-        return 5;
-    default:
-        // TODO #-30200 Extend to support all cases
-        return 0;
-    }
-}
-
-/**
- * @brief Transposes the original shape value according to given layout.
- */
-std::vector<size_t> reshapeByLayout(const std::vector<size_t>& originalDimensions,
-                                    const ze_graph_argument_layout_t layout) {
-    std::vector<size_t> order;
-    std::vector<size_t> reshapedDimensions;
-
-    switch (layout) {
-    case ZE_GRAPH_ARGUMENT_LAYOUT_CN:
-        order = NC_TO_CN_LAYOUT_DIMENSIONS_ORDER;
-        break;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC:
-        order = NCHW_TO_NHWC_LAYOUT_DIMENSIONS_ORDER;
-        break;
-    case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC:
-        order = NCDHW_TO_NDHWC_LAYOUT_DIMENSIONS_ORDER;
-        break;
-    default:
-        // TODO #-30200 Extend to support all cases
-        return originalDimensions;
-    }
-
-    for (const size_t& orderElement : order) {
-        reshapedDimensions.push_back(originalDimensions[orderElement]);
-    }
-
-    return reshapedDimensions;
-}
-
 }  // namespace
 
 namespace intel_npu {
@@ -311,7 +255,8 @@ SerializedIR LevelZeroCompilerInDriver<TableExtension>::serializeIR(
 }
 
 template <typename TableExtension>
-std::string LevelZeroCompilerInDriver<TableExtension>::serializeIOInfo(const std::shared_ptr<const ov::Model>& model) {
+std::string LevelZeroCompilerInDriver<TableExtension>::serializeIOInfo(const std::shared_ptr<const ov::Model>& model,
+                                                                       const bool useIndices) {
     const ov::ParameterVector& parameters = model->get_parameters();
     const ov::ResultVector& results = model->get_results();
 
@@ -324,21 +269,32 @@ std::string LevelZeroCompilerInDriver<TableExtension>::serializeIOInfo(const std
     inputsLayoutSS << INPUTS_LAYOUTS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER;
 
     if (!parameters.empty()) {
-        const std::string& firstInputName = parameters.at(0)->get_friendly_name();
+        size_t parameterIndex = 0;
 
         for (const std::shared_ptr<ov::op::v0::Parameter>& parameter : parameters) {
-            const std::string& name = parameter->get_friendly_name();
             const ov::element::Type& precision = parameter->get_element_type();
             const size_t rank = parameter->get_shape().size();
 
-            if (name != firstInputName) {
+            if (parameterIndex != 0) {
                 inputsPrecisionSS << VALUES_SEPARATOR;
                 inputsLayoutSS << VALUES_SEPARATOR;
             }
 
-            inputsPrecisionSS << name << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision);
-            // Ticket: E-88902
-            inputsLayoutSS << name << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank);
+            if (useIndices) {
+                inputsPrecisionSS << parameterIndex;
+                inputsLayoutSS << parameterIndex;
+            } else {
+                const std::string& name = parameter->get_friendly_name();
+
+                inputsPrecisionSS << name;
+                // Ticket: E-88902
+                inputsLayoutSS << name;
+            }
+
+            inputsPrecisionSS << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision);
+            inputsLayoutSS << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank);
+
+            ++parameterIndex;
         }
     }
 
@@ -348,20 +304,31 @@ std::string LevelZeroCompilerInDriver<TableExtension>::serializeIOInfo(const std
     outputsPrecisionSS << OUTPUTS_PRECISIONS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER;
     outputsLayoutSS << OUTPUTS_LAYOUTS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER;
 
-    const std::string& firstOutputName = results.at(0)->get_input_node_ptr(0)->get_friendly_name();
+    size_t resultIndex = 0;
 
     for (const std::shared_ptr<ov::op::v0::Result>& result : results) {
-        const std::string& name = result->get_input_node_ptr(0)->get_friendly_name();
         const ov::element::Type_t precision = result->get_element_type();
         const size_t rank = result->get_shape().size();
 
-        if (name != firstOutputName) {
+        if (resultIndex != 0) {
             outputsPrecisionSS << VALUES_SEPARATOR;
             outputsLayoutSS << VALUES_SEPARATOR;
         }
 
-        outputsPrecisionSS << name << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision);
-        outputsLayoutSS << name << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank);
+        if (useIndices) {
+            outputsPrecisionSS << resultIndex;
+            outputsLayoutSS << resultIndex;
+        } else {
+            const std::string& name = result->get_input_node_ptr(0)->get_friendly_name();
+
+            outputsPrecisionSS << name;
+            outputsLayoutSS << name;
+        }
+
+        outputsPrecisionSS << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision);
+        outputsLayoutSS << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank);
+
+        ++resultIndex;
     }
 
     outputsPrecisionSS << VALUE_DELIMITER;
@@ -808,8 +775,9 @@ ze_result_t LevelZeroCompilerInDriver<TableExtension>::seriazlideIRModelAndCreat
     ze_graph_format_t format = ZE_GRAPH_FORMAT_NGRAPH_LITE;
 
     std::string buildFlags;
+    const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
 
-    buildFlags += serializeIOInfo(model);
+    buildFlags += serializeIOInfo(model, useIndices);
     buildFlags += " ";
     buildFlags += serializeConfig(config, const_cast<ze_graph_compiler_version_info_t&>(compilerVersion));
 
@@ -977,112 +945,68 @@ uint32_t LevelZeroCompilerInDriver<TableExtension>::getSupportedOpsetVersion() c
     return maxOpsetVersion;
 }
 
-template <typename TableExtension>
-template <typename T>
-void LevelZeroCompilerInDriver<TableExtension>::getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters,
-                                                                           IONodeDescriptorMap& results,
-                                                                           IONodeDescriptorMap& states,
-                                                                           std::vector<std::string>& stateNames,
-                                                                           const T& arg) const {
-    std::string legacyName = arg.name;
-
-    // The layout may differ from the default one only when using significantly older drivers. In order to accommodate
-    // this case, an extra attribute needs to be stored which holds the transposed shape.
-    const std::vector<size_t> originalDimensions(arg.dims, arg.dims + zeLayoutToRank(arg.deviceLayout));
-    const std::vector<size_t> reshapedDimensions = reshapeByLayout(originalDimensions, arg.deviceLayout);
-    const ov::Shape shape = ov::Shape(reshapedDimensions);
-
-    if (!isStateInputName(legacyName) && !isStateOutputName(legacyName)) {
-        if (arg.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) {
-            _logger.info("getLayoutOrStateDescriptor Found input \"%s\"", legacyName.c_str());
-
-            parameters[legacyName].transposedShape = shape;
-        }
-        if (arg.type == ZE_GRAPH_ARGUMENT_TYPE_OUTPUT) {
-            _logger.info("getLayoutOrStateDescriptor Found output \"%s\"", legacyName.c_str());
-
-            results[legacyName].transposedShape = shape;
-        }
-    } else if (isStateInputName(legacyName)) {
-        // The inputs and outputs of the state nodes share the same metadata, thus we'll consider only the the inputs
-        // here
-        legacyName = legacyName.substr(READVALUE_PREFIX.length());
-        _logger.info("getLayoutOrStateDescriptor Found state variable \"%s\"", legacyName.c_str());
-
-        const ov::element::Type_t precision = toOVElementType(arg.devicePrecision);
-
-        stateNames.push_back(legacyName);
-        states[legacyName] = {legacyName, "", {}, precision, shape, shape};
-    }
-}
-
 /**
- * @brief Extracts the parameter/result (i.e. input/output) descriptors from Level Zero specific structures into
- * OpenVINO specific ones.
- * @param nodeDescriptors The map in which the result shall be stored.
- * @param names The I/O identifiers shall be stored here in the order found within the compiled model.
- * @param metadata The Level Zero structure fomr which the descriptors will be extracted.
+ * @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific ones.
+ *
+ * @param arg The main Level Zero structure from which most metadata will be extracted.
+ * @param metadata The secondary Level Zero structure from which metadata will be extracted. More specifically, the
+ * argument is used for populating "shapeFromIRModel". Not providing this argument will lead to an empty value for the
+ * referenced attribute.
+ * @returns A descriptor object containing the metadata converted in OpenVINO specific structures.
  */
-static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors,
-                              std::vector<std::string>& names,
-                              ze_graph_argument_properties_3_t& arg) {
+static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
+                                    const std::optional<ze_graph_argument_metadata_t>& metadata) {
     ov::element::Type_t precision = toOVElementType(arg.devicePrecision);
-    ov::Shape shape;
+    ov::Shape shapeFromCompiler, shapeFromIRModel;
     std::unordered_set<std::string> outputTensorNames;
 
     for (uint32_t id = 0; id < arg.associated_tensor_names_count; id++) {
         outputTensorNames.insert(arg.associated_tensor_names[id]);
     }
-
     for (uint32_t id = 0; id < arg.dims_count; id++) {
-        shape.push_back(arg.dims[id]);
+        shapeFromCompiler.push_back(arg.dims[id]);
     }
-
-    const std::string& legacyName = arg.name;
-
-    names.push_back(arg.debug_friendly_name);
-    nodeDescriptors[arg.debug_friendly_name] =
-        {legacyName, arg.debug_friendly_name, std::move(outputTensorNames), precision, shape, shape};
-}
-
-static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors,
-                              std::vector<std::string>& names,
-                              ze_graph_argument_properties_3_t& arg,
-                              ze_graph_argument_metadata_t& metadata) {
-    ov::element::Type_t precision = toOVElementType(arg.devicePrecision);
-    ov::Shape transposedShape, originalShape;
-    std::unordered_set<std::string> outputTensorNames;
-
-    for (uint32_t id = 0; id < arg.associated_tensor_names_count; id++) {
-        outputTensorNames.insert(arg.associated_tensor_names[id]);
-    }
-
-    for (uint32_t id = 0; id < arg.dims_count; id++) {
-        transposedShape.push_back(arg.dims[id]);
+    if (metadata.has_value()) {
+        for (uint32_t id = 0; id < metadata->shape_size; id++) {
+            shapeFromIRModel.push_back(metadata->shape[id]);
+        }
     }
 
-    for (uint32_t id = 0; id < metadata.shape_size; id++) {
-        originalShape.push_back(metadata.shape[id]);
+    // Flags will be used instead of indices for informing the type of the current entry
+    std::string nameFromCompiler = arg.name;
+    bool isStateInput = false;
+    bool isStateOutput = false;
+    bool isShapeTensor = false;
+    if (isStateInputName(nameFromCompiler)) {
+        nameFromCompiler = nameFromCompiler.substr(READVALUE_PREFIX.length());
+        isStateInput = true;
+    } else if (isStateOutputName(nameFromCompiler)) {
+        nameFromCompiler = nameFromCompiler.substr(ASSIGN_PREFIX.length());
+        isStateOutput = true;
+    } else if (isShapeTensorName(nameFromCompiler)) {
+        nameFromCompiler = nameFromCompiler.substr(SHAPE_TENSOR_PREFIX.length());
+        isShapeTensor = true;
     }
 
-    const std::string& legacyName = arg.name;
-
-    names.push_back(arg.debug_friendly_name);
-    nodeDescriptors[arg.debug_friendly_name] =
-        {legacyName, arg.debug_friendly_name, std::move(outputTensorNames), precision, originalShape, transposedShape};
+    return {nameFromCompiler,
+            precision,
+            std::move(shapeFromCompiler),
+            isStateInput,
+            isStateOutput,
+            isShapeTensor,
+            std::nullopt,
+            arg.debug_friendly_name,
+            std::move(outputTensorNames),
+            metadata.has_value() ? std::optional(shapeFromIRModel) : std::nullopt};
 }
 
 template <typename TableExtension>
-template <typename T, std::enable_if_t<NotSupportOriginalShape(T), bool>>
+template <typename T, std::enable_if_t<NotSupportArgumentMetadata(T), bool>>
 void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* graphDdiTableExt,
                                                             ze_graph_handle_t graphHandle,
                                                             uint32_t index,
-                                                            std::vector<std::string>& inputNames,
-                                                            std::vector<std::string>& outputNames,
-                                                            std::vector<std::string>& stateNames,
-                                                            IONodeDescriptorMap& parameters,
-                                                            IONodeDescriptorMap& results,
-                                                            IONodeDescriptorMap& states) const {
+                                                            std::vector<IODescriptor>& inputs,
+                                                            std::vector<IODescriptor>& outputs) const {
     ze_graph_argument_properties_3_t arg;
     auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg);
     if (ZE_RESULT_SUCCESS != result) {
@@ -1094,30 +1018,26 @@ void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* grap
                        uint64_t(result));
     }
 
-    if (!isStateInputName(arg.name) && !isStateOutputName(arg.name)) {
-        if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg.type) {
-            getNodeDescriptor(parameters, inputNames, arg);
-        }
-
-        if (ZE_GRAPH_ARGUMENT_TYPE_OUTPUT == arg.type) {
-            getNodeDescriptor(results, outputNames, arg);
-        }
+    switch (arg.type) {
+    case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
+        inputs.push_back(getIODescriptor(arg, std::nullopt));
+    } break;
+    case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
+        outputs.push_back(getIODescriptor(arg, std::nullopt));
+    } break;
+    default: {
+        OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type);
+    }
     }
-
-    getLayoutOrStateDescriptor(parameters, results, states, stateNames, arg);
 }
 
 template <typename TableExtension>
-template <typename T, std::enable_if_t<!NotSupportOriginalShape(T), bool>>
+template <typename T, std::enable_if_t<!NotSupportArgumentMetadata(T), bool>>
 void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* graphDdiTableExt,
                                                             ze_graph_handle_t graphHandle,
                                                             uint32_t index,
-                                                            std::vector<std::string>& inputNames,
-                                                            std::vector<std::string>& outputNames,
-                                                            std::vector<std::string>& stateNames,
-                                                            IONodeDescriptorMap& parameters,
-                                                            IONodeDescriptorMap& results,
-                                                            IONodeDescriptorMap& states) const {
+                                                            std::vector<IODescriptor>& inputs,
+                                                            std::vector<IODescriptor>& outputs) const {
     ze_graph_argument_properties_3_t arg;
     auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg);
     if (ZE_RESULT_SUCCESS != result) {
@@ -1129,7 +1049,9 @@ void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* grap
                        uint64_t(result));
     }
 
-    if (!isStateInputName(arg.name) && !isStateOutputName(arg.name)) {
+    std::optional<ze_graph_argument_metadata_t> optionalMetadata = std::nullopt;
+
+    if (!isStateInputName(arg.name) && !isStateOutputName(arg.name) && !isShapeTensorName(arg.name)) {
         ze_graph_argument_metadata_t metadata;
         result = graphDdiTableExt->pfnGraphGetArgumentMetadata(graphHandle, index, &metadata);
         if (ZE_RESULT_SUCCESS != result) {
@@ -1141,16 +1063,20 @@ void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* grap
                            uint64_t(result));
         }
 
-        if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg.type) {
-            getNodeDescriptor(parameters, inputNames, arg, metadata);
-        }
-
-        if (ZE_GRAPH_ARGUMENT_TYPE_OUTPUT == arg.type) {
-            getNodeDescriptor(results, outputNames, arg, metadata);
-        }
+        optionalMetadata = std::optional(metadata);
     }
 
-    getLayoutOrStateDescriptor(parameters, results, states, stateNames, arg);
+    switch (arg.type) {
+    case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
+        inputs.push_back(getIODescriptor(arg, optionalMetadata));
+    } break;
+    case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
+        outputs.push_back(getIODescriptor(arg, optionalMetadata));
+    } break;
+    default: {
+        OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type);
+    }
+    }
 }
 
 template <typename TableExtension>
@@ -1171,18 +1097,12 @@ NetworkMetadata LevelZeroCompilerInDriver<TableExtension>::getNetworkMeta(ze_gra
     NetworkMetadata meta;
 
     for (uint32_t index = 0; index < graphProperties.numGraphArgs; ++index) {
-        getMetadata(_graphDdiTableExt,
-                    graphHandle,
-                    index,
-                    meta.inputNames,
-                    meta.outputNames,
-                    meta.stateNames,
-                    meta.parameters,
-                    meta.results,
-                    meta.states);
+        getMetadata(_graphDdiTableExt, graphHandle, index, meta.inputs, meta.outputs);
     }
     // TODO: support this information in CiD [track: E#33479]
     meta.numStreams = 1;
+    meta.bindRelatedDescriptors();
+
     return meta;
 }
 
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index f5d7153974ccd3..1155f313a3cd60 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -37,62 +37,52 @@ const char* NPU_PLUGIN_LIB_NAME = "openvino_intel_npu_plugin";
  * Note that a stored compiled model does not hold the original IR model within it. The only related information
  * which may be extracted is the original model's "parameter"/"result" nodes. Thus, we need to build a dummy model
  * starting from these fields in order to satisfy the API.
- * @param parameterDescriptors Describes the input nodes.
- * @param resultDescriptors Describes the output nodes.
- * @param inputNames The names of the inputs registered in the order given by the model.
- * @param outputNames The names of the outputs registered in the order given by the model.
- * @param isBatchingSupported Newer driver versions support batching mode on the plugin.
+ *
+ * @param inputDescriptors Describes the input nodes.
+ * @param outputDescriptors Describes the output nodes.
+ * @returns The dummy "ov::Model" composed of "parameter" and "result" nodes built using the given descriptors.
  */
-std::shared_ptr<ov::Model> create_dummy_model(const IONodeDescriptorMap& parameterDescriptors,
-                                              const IONodeDescriptorMap& resultDescriptors,
-                                              const std::vector<std::string>& inputNames,
-                                              const std::vector<std::string>& outputNames,
-                                              bool isBatchingSupported) {
+std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& inputDescriptors,
+                                              const std::vector<IODescriptor>& outputDescriptors) {
     ov::ParameterVector parameters;
     ov::NodeVector results;
 
-    for (const std::string& inputName : inputNames) {
-        const IONodeDescriptor& parameterDescriptor = parameterDescriptors.at(inputName);
+    for (const IODescriptor& inputDescriptor : inputDescriptors) {
+        if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor) {
+            continue;
+        }
 
-        std::shared_ptr<ov::op::v0::Parameter> parameter = [&] {
-            if (isBatchingSupported) {
-                return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
-                                                               parameterDescriptor.originalShape);
-            }
-            return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
-                                                           parameterDescriptor.transposedShape);
-        }();
+        std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
+            inputDescriptor.precision,
+            inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
+                                                         : inputDescriptor.shapeFromCompiler);
 
-        parameter->set_friendly_name(parameterDescriptor.currentNodeName);
-        parameter->output(0).get_tensor().set_names(parameterDescriptor.outputTensorNames);
+        parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
+        parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
         parameters.push_back(parameter);
     }
 
-    // The "result" nodes require a parent node in order to satisfy the legacy API naming conventions as well (in
-    // the 1.0 API, the name of an output is given by the parent of the "result" node). Additionally, a dummy shape for
+    // The "result" nodes require a parent node in order to satisfy the API conventions. Additionally, a dummy shape for
     // the "Constant" node was required since the specific constructor does not accept "ov::PartialShape" values (a
     // constant can't have dynamic shape). The dummy tensor was also brought in order to register the correct,
     // potentially dynamic, output shape.
-    for (const std::string& outputName : outputNames) {
-        const IONodeDescriptor& resultDescriptor = resultDescriptors.at(outputName);
+    for (const IODescriptor& outputDescriptor : outputDescriptors) {
+        if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor) {
+            continue;
+        }
+
         std::shared_ptr<ov::Node> constantDummy =
-            std::make_shared<ov::op::v0::Constant>(resultDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);
-        constantDummy->set_friendly_name(resultDescriptor.legacyName);
-
-        const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = [&] {
-            if (isBatchingSupported) {
-                return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
-                                                                resultDescriptor.originalShape,
-                                                                resultDescriptor.outputTensorNames);
-            }
-            return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
-                                                            resultDescriptor.transposedShape,
-                                                            resultDescriptor.outputTensorNames);
-        }();
+            std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);
+
+        const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
+            outputDescriptor.precision,
+            outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
+                                                          : outputDescriptor.shapeFromCompiler,
+            outputDescriptor.outputTensorNames);
 
         std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
         result->output(0).set_tensor_ptr(tensorDummy);
-        result->set_friendly_name(resultDescriptor.currentNodeName);
+        result->set_friendly_name(outputDescriptor.nodeFriendlyName);
         results.push_back(result);
     }
 
@@ -756,11 +746,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
         auto meta = compiler->parse(blob, localConfig);
         meta.name = "net" + std::to_string(_compiledModelLoadCounter++);
 
-        const std::shared_ptr<ov::Model> modelDummy = create_dummy_model(meta.parameters,
-                                                                         meta.results,
-                                                                         meta.inputNames,
-                                                                         meta.outputNames,
-                                                                         _backends->isBatchingSupported());
+        const std::shared_ptr<ov::Model> modelDummy = create_dummy_model(meta.inputs, meta.outputs);
 
         bool profiling = localConfig.get<PERF_COUNT>();
 
diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp
new file mode 100644
index 00000000000000..f029388ab9bb02
--- /dev/null
+++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp
@@ -0,0 +1,20 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp"
+
+#include "common/npu_test_env_cfg.hpp"
+#include "common/utils.hpp"
+#include "common_test_utils/test_constants.hpp"
+
+using namespace ExecutionGraphTests;
+
+namespace {
+
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests,
+                         ExecGraphDuplicateInputsOutputsNames,
+                         ::testing::Values(ov::test::utils::DEVICE_NPU),
+                         ov::test::utils::appendPlatformTypeTestName<ExecGraphDuplicateInputsOutputsNames>);
+
+}  // namespace
diff --git a/src/tests/functional/plugin/shared/include/execution_graph_tests/duplicate_inputs_outputs_names.hpp b/src/tests/functional/plugin/shared/include/execution_graph_tests/duplicate_inputs_outputs_names.hpp
new file mode 100644
index 00000000000000..b6a7f3fcab038b
--- /dev/null
+++ b/src/tests/functional/plugin/shared/include/execution_graph_tests/duplicate_inputs_outputs_names.hpp
@@ -0,0 +1,15 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "gtest/gtest.h"
+
+namespace ExecutionGraphTests {
+
+class ExecGraphDuplicateInputsOutputsNames
+    : public testing::TestWithParam<std::string> {
+public:
+  static std::string getTestCaseName(testing::TestParamInfo<std::string> obj);
+};
+
+} // namespace ExecutionGraphTests
diff --git a/src/tests/functional/plugin/shared/src/execution_graph_tests/duplicate_inputs_outputs_names.cpp b/src/tests/functional/plugin/shared/src/execution_graph_tests/duplicate_inputs_outputs_names.cpp
new file mode 100644
index 00000000000000..879675eeb3e201
--- /dev/null
+++ b/src/tests/functional/plugin/shared/src/execution_graph_tests/duplicate_inputs_outputs_names.cpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp"
+
+#include "functional_test_utils/skip_tests_config.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/runtime/core.hpp"
+
+namespace {
+
+constexpr char DUMMY_NAME[] = "dummy_name";
+
+}  // namespace
+
+namespace ExecutionGraphTests {
+
+std::string ExecGraphDuplicateInputsOutputsNames::getTestCaseName(testing::TestParamInfo<std::string> obj) {
+    std::string targetDevice = obj.param;
+    return "Dev=" + targetDevice;
+}
+
+/**
+ * Checks whether running predictions on a model containing duplicate names within its inputs/outputs yields the same
+ * result as when using unique names for the same architecture.
+ */
+TEST_P(ExecGraphDuplicateInputsOutputsNames, CheckOutputsMatch) {
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    const std::string device_name = this->GetParam();
+    const ov::element::Type precision = ov::element::f32;
+    const ov::Shape shape = {3, 2};
+    float input_data1[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
+    float input_data2[] = {2.0, 2.0, 2.0, 2.0, 2.0, 2.0};
+    const ov::Tensor input_tensor1{precision, shape, input_data1};
+    const ov::Tensor input_tensor2{precision, shape, input_data2};
+
+    // A simple graph with 2 inputs and 2 outputs
+    auto input1 = std::make_shared<ov::op::v0::Parameter>(precision, shape);
+    auto input2 = std::make_shared<ov::op::v0::Parameter>(precision, shape);
+    auto sum = std::make_shared<ov::op::v1::Add>(input1, input2);
+    auto mul = std::make_shared<ov::op::v1::Multiply>(input1, input2);
+    auto output1 = std::make_shared<ov::op::v0::Result>(sum->get_default_output());
+    auto output2 = std::make_shared<ov::op::v0::Result>(mul->get_default_output());
+
+    // Set the same name for all inputs/outputs
+    input1->set_friendly_name(DUMMY_NAME);
+    input2->set_friendly_name(DUMMY_NAME);
+    input1->get_output_tensor(0).set_names({DUMMY_NAME});
+    input2->get_output_tensor(0).set_names({DUMMY_NAME});
+
+    output1->set_friendly_name(DUMMY_NAME);
+    output2->set_friendly_name(DUMMY_NAME);
+    output1->get_input_tensor(0).set_names({DUMMY_NAME});
+    output2->get_input_tensor(0).set_names({DUMMY_NAME});
+
+    auto model = std::make_shared<ov::Model>(ov::ResultVector{output1, output2},
+                                             ov::ParameterVector{input1, input2},
+                                             "SimpleNetwork1");
+
+    // Load the plugin, compile the model and run a single prediction
+    auto core = ov::Core();
+    ov::CompiledModel compiled_model_duplicate_names = core.compile_model(model, device_name);
+    ov::InferRequest inference_request_duplicate_names = compiled_model_duplicate_names.create_infer_request();
+
+    inference_request_duplicate_names.set_tensor(compiled_model_duplicate_names.input(0), input_tensor1);
+    inference_request_duplicate_names.set_tensor(compiled_model_duplicate_names.input(1), input_tensor2);
+    inference_request_duplicate_names.infer();
+
+    const ov::Tensor output_tensor1 =
+        inference_request_duplicate_names.get_tensor(compiled_model_duplicate_names.output(0));
+    const ov::Tensor output_tensor2 =
+        inference_request_duplicate_names.get_tensor(compiled_model_duplicate_names.output(1));
+    const float* output_buffer1 = output_tensor1.data<float>();
+    const float* output_buffer2 = output_tensor2.data<float>();
+
+    // Rebuild the model using unique names for inputs/outputs
+    size_t name_index = 0;
+    input1->set_friendly_name(DUMMY_NAME + std::to_string(name_index++));
+    input2->set_friendly_name(DUMMY_NAME + std::to_string(name_index++));
+    input1->get_output_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index++)});
+    input2->get_output_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index++)});
+
+    output1->set_friendly_name(DUMMY_NAME + std::to_string(name_index++));
+    output2->set_friendly_name(DUMMY_NAME + std::to_string(name_index++));
+    output1->get_input_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index++)});
+    output2->get_input_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index)});
+
+    model = std::make_shared<ov::Model>(ov::ResultVector{output1, output2},
+                                        ov::ParameterVector{input1, input2},
+                                        "SimpleNetwork2");
+
+    // Compile the new model and run a single prediction
+    ov::CompiledModel compiled_model_unique_names = core.compile_model(model, device_name);
+    ov::InferRequest inference_request_unique_names = compiled_model_unique_names.create_infer_request();
+
+    inference_request_unique_names.set_tensor(input1, input_tensor1);
+    inference_request_unique_names.set_tensor(input2, input_tensor2);
+    inference_request_unique_names.infer();
+
+    const ov::Tensor reference_tensor1 =
+        inference_request_unique_names.get_tensor(compiled_model_unique_names.output(0));
+    const ov::Tensor reference_tensor2 =
+        inference_request_unique_names.get_tensor(compiled_model_unique_names.output(1));
+    const float* reference_buffer1 = reference_tensor1.data<float>();
+    const float* reference_buffer2 = reference_tensor2.data<float>();
+
+    // Both models are using the same architecture, thus the results should match
+    for (size_t element_index = 0; element_index < shape_size(shape); ++element_index) {
+        ASSERT_EQ(output_buffer1[element_index], reference_buffer1[element_index]);
+        ASSERT_EQ(output_buffer2[element_index], reference_buffer2[element_index]);
+    }
+}
+
+}  // namespace ExecutionGraphTests