From 1f8bef7fe5e99faa41e105ed9611f4ccd20d5743 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Mon, 11 Nov 2024 18:24:24 +0100
Subject: [PATCH 01/14] Decouple CompiledModel internals from InferRequest

---
 src/plugins/intel_cpu/src/compiled_model.cpp |  1 -
 src/plugins/intel_cpu/src/compiled_model.h   | 75 ++++++++++++++--
 src/plugins/intel_cpu/src/graph.h            |  8 +-
 src/plugins/intel_cpu/src/infer_request.cpp  | 92 ++++++++++----------
 src/plugins/intel_cpu/src/infer_request.h    | 17 ++--
 5 files changed, 122 insertions(+), 71 deletions(-)
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index bbee5d937be5d5..604e426f6e6c4a 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -183,7 +183,6 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
 }
 
 std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request() const {
-    m_numRequests++;
     return std::make_shared<SyncInferRequest>(std::static_pointer_cast<const CompiledModel>(shared_from_this()));
 }
 
diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
index faedf1ae5a744c..aa98b275190daa 100644
--- a/src/plugins/intel_cpu/src/compiled_model.h
+++ b/src/plugins/intel_cpu/src/compiled_model.h
@@ -20,6 +20,15 @@ namespace ov {
 namespace intel_cpu {
 
 class CompiledModel : public ov::ICompiledModel {
+public:
+    struct GraphGuard : public Graph {
+        std::mutex _mutex;
+        struct Lock : public std::unique_lock<std::mutex> {
+            explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
+            GraphGuard& _graph;
+        };
+    };
+
 public:
     typedef std::shared_ptr<CompiledModel> Ptr;
 
@@ -51,9 +60,13 @@ class CompiledModel : public ov::ICompiledModel {
 
     void release_memory() override;
 
+    std::string name() const {
+        return m_name;
+    }
+
 private:
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
-    friend class SyncInferRequest;
+    friend class CompiledModelHandler;
 
     const std::shared_ptr<ov::Model> m_model;
     const std::shared_ptr<const ov::IPlugin> m_plugin;
@@ -66,13 +79,6 @@ class CompiledModel : public ov::ICompiledModel {
     Config m_cfg;
     mutable std::atomic_int m_numRequests = {0};
     std::string m_name;
-    struct GraphGuard : public Graph {
-        std::mutex _mutex;
-        struct Lock : public std::unique_lock<std::mutex> {
-            explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
-            GraphGuard& _graph;
-        };
-    };
 
     const bool m_loaded_from_cache;
     // WARNING: Do not use m_graphs directly.
@@ -94,5 +100,58 @@ class CompiledModel : public ov::ICompiledModel {
     bool m_has_sub_compiled_models = false;
 };
 
+// This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and
+// the CompiledModel internal structures
+class CompiledModelHandler {
+public:
+    CompiledModelHandler(std::shared_ptr<const CompiledModel> compiled_model)
+        : m_compiled_model(std::move(compiled_model)) {
+        OPENVINO_ASSERT(!m_compiled_model->m_graphs.empty(),
+                        "No graph was found in the compiled model: ",
+                        m_compiled_model->name());
+        m_graph = &(m_compiled_model->get_graph()._graph);
+        OPENVINO_ASSERT(m_graph, "Graph ptr null check failed");
+        m_id = (m_compiled_model->m_numRequests)++;
+    }
+
+    ~CompiledModelHandler() {
+        --(m_compiled_model->m_numRequests);
+    }
+
+    CompiledModelHandler(const CompiledModelHandler&) = delete;
+    CompiledModelHandler& operator=(const CompiledModelHandler&) = delete;
+
+    CompiledModelHandler(CompiledModelHandler&&) = default;
+    CompiledModelHandler& operator=(CompiledModelHandler&&) = default;
+
+    const Graph& graph() const {
+        return *m_graph;
+    }
+
+    CompiledModel::GraphGuard::Lock lock() {
+        auto lock = m_compiled_model->get_graph();
+        m_graph = &(lock._graph);
+        OPENVINO_ASSERT(m_graph, "Graph ptr null check failed");
+        return lock;
+    }
+
+    std::string name() const {
+        return m_compiled_model->name();
+    }
+
+    std::shared_ptr<const ov::ICompiledModel> compiled_model() const {
+        return m_compiled_model;
+    }
+
+    int id() const {
+        return m_id;
+    }
+
+private:
+    std::shared_ptr<const CompiledModel> m_compiled_model;
+    const Graph* m_graph;
+    int m_id;
+};
+
 }   // namespace intel_cpu
 }   // namespace ov
diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h
index d50ccc152c9186..7e5814b3d72bda 100644
--- a/src/plugins/intel_cpu/src/graph.h
+++ b/src/plugins/intel_cpu/src/graph.h
@@ -89,22 +89,22 @@ class Graph {
         return _name;
     }
 
-    std::map<std::size_t, NodePtr>& GetInputNodesMap() {
+    const std::map<std::size_t, NodePtr>& GetInputNodesMap() const {
         return inputNodesMap;
     }
 
-    std::map<std::size_t, NodePtr>& GetOutputNodesMap() {
+    const std::map<std::size_t, NodePtr>& GetOutputNodesMap() const {
         return outputNodesMap;
     }
 
-    NodePtr getInputNodeByIndex(const std::size_t &index) {
+    NodeConstPtr getInputNodeByIndex(const std::size_t &index) const {
         auto input = inputNodesMap.find(index);
         if (input == inputNodesMap.end())
             OPENVINO_THROW("CPU execution graph doesn't contain input node with index: ", index);
         return input->second;
     }
 
-    NodePtr getOutputNodeByIndex(const std::size_t &index) {
+    NodeConstPtr getOutputNodeByIndex(const std::size_t &index) const {
         auto output = outputNodesMap.find(index);
         if (output == outputNodesMap.end())
             OPENVINO_THROW("CPU execution graph doesn't contain output node with index: ", index);
diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index f0b817dcda859c..864f1553d74c74 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -5,7 +5,6 @@
 #include "infer_request.h"
 
 #include "async_infer_request.h"
-#include "compiled_model.h"
 #include "dnnl_extension_utils.h"
 #include "itt.h"
 #include "memory_state.h"
@@ -24,9 +23,9 @@ using OvString = ov::element_type_traits<ov::element::string>::value_type;
 
 namespace ov {
 namespace intel_cpu {
-SyncInferRequest::SyncInferRequest(std::shared_ptr<const CompiledModel> compiled_model)
-    : ov::ISyncInferRequest(compiled_model),
-      m_compiled_model(compiled_model) {
+SyncInferRequest::SyncInferRequest(CompiledModelHandler compiled_model)
+    : ov::ISyncInferRequest(compiled_model.compiled_model()),
+      m_compiled_model(std::move(compiled_model)) {
     const auto& inputs = get_inputs();
     for (std::size_t input_index = 0; input_index < inputs.size(); input_index++) {
         m_input_ports_map[input_index] = inputs[input_index];
@@ -40,13 +39,8 @@ SyncInferRequest::SyncInferRequest(std::shared_ptr<const CompiledModel> compiled
 }
 
 void SyncInferRequest::create_infer_request() {
-    auto id = (m_compiled_model->m_numRequests)++;
-    m_profiling_task = openvino::itt::handle("INTEL_CPU_INFER_" + m_compiled_model->m_name + "_" + std::to_string(id));
-
-    if (m_compiled_model->m_graphs.size() == 0) {
-        OPENVINO_THROW("No graph was found");
-    }
-    m_graph = &(m_compiled_model->get_graph()._graph);
+    m_profiling_task = openvino::itt::handle("INTEL_CPU_INFER_" + m_compiled_model.name() + "_" +
+                                             std::to_string(m_compiled_model.id()));
 
     // Alocate memory for each tensor if static shape
     for (const auto& it : m_input_ports_map) {
@@ -57,18 +51,15 @@ void SyncInferRequest::create_infer_request() {
     }
 
     //create states according to the list of the MemoryStateNodes
-    for (auto&& node : m_graph->getInternalStateNodes()) {
+    auto&& graph = m_compiled_model.graph();
+    for (auto&& node : graph.getInternalStateNodes()) {
         m_memory_states.emplace_back(node.second->makeState());
     }
 }
 
-SyncInferRequest::~SyncInferRequest() {
-    --(m_compiled_model->m_numRequests);
-}
-
 // state -> storage
-void SyncInferRequest::assign_states() {
-    auto&& graph_internal_state_nodes = m_graph->getInternalStateNodes();
+void SyncInferRequest::assign_states(Graph& graph) {
+    auto&& graph_internal_state_nodes = graph.getInternalStateNodes();
     for (const auto& state : m_memory_states) {
         auto itr = graph_internal_state_nodes.find(state->get_name());
         if (itr != graph_internal_state_nodes.end()) {
@@ -77,8 +68,8 @@ void SyncInferRequest::assign_states() {
     }
 }
 
-void SyncInferRequest::redefine_memory_for_input_nodes() {
-    const auto cpuInputNodes = m_graph->GetInputNodesMap();
+void SyncInferRequest::redefine_memory_for_input_nodes(Graph& graph) {
+    const auto cpuInputNodes = graph.GetInputNodesMap();
     for (const auto& input_port : m_input_ports_map) {
         const auto inputNode = cpuInputNodes.find(input_port.first);
         if (inputNode == cpuInputNodes.end())
@@ -103,8 +94,8 @@ void SyncInferRequest::update_external_tensor_ptrs() {
 void SyncInferRequest::infer() {
     using namespace openvino::itt;
     OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, m_profiling_task);
-    auto graphLock = m_compiled_model->get_graph();
-    m_graph = &(graphLock._graph);
+    auto graphLock = m_compiled_model.lock();
+    auto&& graph = graphLock._graph;
     auto message = ov::threading::message_manager();
 
     throw_if_canceled();
@@ -120,40 +111,41 @@ void SyncInferRequest::infer() {
         update_external_tensor_ptrs();
     }
 
-    if (m_graph->hasDynamicInput()) {
-        redefine_memory_for_input_nodes();
+    if (graph.hasDynamicInput()) {
+        redefine_memory_for_input_nodes(graph);
     }
 
-    change_default_ptr();
+    change_default_ptr(graph);
 
     throw_if_canceled();
 
     // state -> node
     if (!m_memory_states.empty()) {
-        assign_states();
+        assign_states(graph);
     }
 
-    push_input_data();
+    push_input_data(graph);
 
-    m_graph->Infer(this);
+    graph.Infer(this);
 
     throw_if_canceled();
 
     // update output control blocks, if any, in order to refresh internal buffers
-    if (m_graph->IsDynamic()) {
+    if (graph.IsDynamic()) {
         for (auto&& item : m_outputControlBlocks) {
             item.second.update();
         }
     }
 
-    m_graph->PullOutputData(m_outputs);
+    graph.PullOutputData(m_outputs);
 }
 
 std::vector<ov::ProfilingInfo> SyncInferRequest::get_profiling_info() const {
-    if (!m_graph || !m_graph->IsReady())
+    auto&& graph = m_compiled_model.graph();
+    if (!graph.IsReady())
         OPENVINO_THROW("Graph is not ready!");
     std::vector<ov::ProfilingInfo> perfMap;
-    m_graph->GetPerfData(perfMap);
+    graph.GetPerfData(perfMap);
     return perfMap;
 }
 
@@ -172,13 +164,13 @@ static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr<ov::ITensor>&
     }
 }
 
-void SyncInferRequest::change_default_ptr() {
-    const auto& inputNodesMap = m_graph->GetInputNodesMap();
-    const auto& outputNodesMap = m_graph->GetOutputNodesMap();
+void SyncInferRequest::change_default_ptr(Graph& graph) {
+    const auto& inputNodesMap = graph.GetInputNodesMap();
+    const auto& outputNodesMap = graph.GetOutputNodesMap();
 
     std::unordered_set<const void*> inputPtrs;
     std::function<void(const EdgePtr &edge, ov::SoPtr<ov::ITensor>& tensor)> changeInpPtr;
-    if (m_graph->IsDynamic()) {
+    if (graph.IsDynamic()) {
         changeInpPtr = [&inputPtrs](const EdgePtr &edge, ov::SoPtr<ov::ITensor>& tensor) {
             change_edge_ptr(edge, tensor);
             inputPtrs.insert(tensor->data());
@@ -278,8 +270,8 @@ void SyncInferRequest::change_default_ptr() {
             change_edge_ptr(parentEdge, it.second);
     }
 
-    if (m_graph->IsDynamic()) {
-        const auto &outMemBlocksMap = m_graph->getOutputNodesMemBlocksMap();
+    if (graph.IsDynamic()) {
+        const auto &outMemBlocksMap = graph.getOutputNodesMemBlocksMap();
         for (auto&& item : outMemBlocksMap) {
             const auto& name = item.first;
 
@@ -301,7 +293,7 @@ void SyncInferRequest::change_default_ptr() {
                     controlBlock.currentMemBlock(); // else reuse the existing buffer
 
                 outputMemBlock->setMemBlockResize(memBlock);
-                DEBUG_LOG("reset proxy ", outputMemBlock, ", actual ", controlBlock.currentMemBlock(), " graph ", m_graph, " inferrequest ", this);
+                DEBUG_LOG("reset proxy ", outputMemBlock, ", actual ", controlBlock.currentMemBlock(), " graph ", &graph, " infer request ", this);
                 DEBUG_LOG(name, ", tensor ", controlBlock.tensor());
             } else {
                 outputMemBlock->reset(); // switch to the internal memory since memory sharing is no longer possible
@@ -401,7 +393,9 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
                            " are different.");
         }
 
-        MemoryDescPtr actualDesc = m_graph->getInputNodeByIndex(input_index)->getBaseMemDescAtOutputPort(0);
+        auto&& graph = m_compiled_model.graph();
+
+        MemoryDescPtr actualDesc = graph.getInputNodeByIndex(input_index)->getBaseMemDescAtOutputPort(0);
         if (!actualDesc->isDefined()) {
             // we must define desc for dynamic case
             // otherwise we got incorrect check on shape compatibility inside isCompatible
@@ -448,7 +442,9 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
                            " are different.");
         }
 
-        const auto& desc = m_graph->getOutputNodeByIndex(output_index)->getParentEdgeAt(0)->getMemory().getDesc();
+        auto&& graph = m_compiled_model.graph();
+
+        const auto& desc = graph.getOutputNodeByIndex(output_index)->getParentEdgeAt(0)->getMemory().getDesc();
         if (!isDynamic && mem_desc_ptr->isCompatible(desc)) {
             m_output_external_ptr[output_index] = tensor;
         } else if (m_output_external_ptr.find(output_index) != m_output_external_ptr.end()) {
@@ -471,12 +467,12 @@ void SyncInferRequest::set_tensors_impl(const ov::Output<const ov::Node> port, c
 
 void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyncInferRequest::FoundPort::Type& type) {
     OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "init_tensor");
-    if (!m_graph || !m_graph->IsReady())
-        OPENVINO_THROW("Graph is not ready!");
+    auto&& graph = m_compiled_model.graph();
+    OPENVINO_ASSERT(graph.IsReady(), "Graph is not ready!");
 
     ov::SoPtr<ITensor> tensor;
     if (type == ov::ISyncInferRequest::FoundPort::Type::INPUT) {
-        OPENVINO_ASSERT(m_graph->GetInputNodesMap().find(port_index) != m_graph->GetInputNodesMap().end(),
+        OPENVINO_ASSERT(graph.GetInputNodesMap().find(port_index) != graph.GetInputNodesMap().end(),
                         "Tensor with index: ",
                         port_index,
                         " exists in CPU plugin graph, but absents in model inputs");
@@ -501,7 +497,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
             if (!isDynamic) {
                 auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor);
                 if (mem_desc_ptr->isCompatible(
-                        m_graph->getInputNodeByIndex(port_index)->getChildEdgeAt(0)->getMemory().getDesc())) {
+                        graph.getInputNodeByIndex(port_index)->getChildEdgeAt(0)->getMemory().getDesc())) {
                     m_input_external_ptr[port_index] = tensor;
                 }
             }
@@ -509,7 +505,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
     }
 
     if (type == ov::ISyncInferRequest::FoundPort::Type::OUTPUT) {
-        const auto& outMap = m_graph->GetOutputNodesMap();
+        const auto& outMap = graph.GetOutputNodesMap();
         auto output = outMap.find(port_index);
         OPENVINO_ASSERT(output != outMap.end(),
                         "Tensor with index: ",
@@ -589,10 +585,10 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
     return;
 }
 
-void SyncInferRequest::push_input_data() {
+void SyncInferRequest::push_input_data(Graph& graph) {
     for (auto& input : m_input_ports_map) {
         auto tensor = get_tensor(input.second);
-        m_graph->PushInputData(input.first, tensor);
+        graph.PushInputData(input.first, tensor);
     }
 }
 
diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h
index a9def63d359744..77ceee33bcf39f 100644
--- a/src/plugins/intel_cpu/src/infer_request.h
+++ b/src/plugins/intel_cpu/src/infer_request.h
@@ -4,7 +4,7 @@
 
 #pragma once
 
-#include "graph.h"
+#include "compiled_model.h"
 #include "cpu_tensor.h"
 #include "openvino/runtime/iinfer_request.hpp"
 #include "openvino/runtime/isync_infer_request.hpp"
@@ -13,13 +13,11 @@
 namespace ov {
 namespace intel_cpu {
 
-class CompiledModel;
 class AsyncInferRequest;
 
 class SyncInferRequest : public ov::ISyncInferRequest {
 public:
-    SyncInferRequest(std::shared_ptr<const CompiledModel> compiled_model);
-    virtual ~SyncInferRequest();
+    SyncInferRequest(CompiledModelHandler compiled_model);
 
     void infer() override;
 
@@ -96,11 +94,11 @@ class SyncInferRequest : public ov::ISyncInferRequest {
     void create_infer_request();
     void init_tensor(const std::size_t& port_index, const ov::ISyncInferRequest::FoundPort::Type& type);
 
-    void push_input_data();
-    void redefine_memory_for_input_nodes();
-    void assign_states();
+    void push_input_data(Graph& graph);
+    void redefine_memory_for_input_nodes(Graph& graph);
+    void assign_states(Graph& graph);
     void update_external_tensor_ptrs();
-    void change_default_ptr();
+    void change_default_ptr(Graph& graph);
 
     const ov::Output<const ov::Node>& get_internal_port(const ov::Output<const ov::Node>& port) const;
 
@@ -109,14 +107,13 @@ class SyncInferRequest : public ov::ISyncInferRequest {
 private:
     std::unordered_map<std::size_t, OutputControlBlock> m_outputControlBlocks;
 
-    Graph* m_graph = nullptr;
     std::unordered_map<std::size_t, ov::SoPtr<ov::ITensor>> m_input_external_ptr;
     std::unordered_map<std::size_t, ov::SoPtr<ov::ITensor>> m_output_external_ptr;
 
-    std::shared_ptr<const CompiledModel> m_compiled_model;
     openvino::itt::handle_t m_profiling_task;
     std::vector<MemStatePtr> m_memory_states;
     AsyncInferRequest* m_asyncRequest = nullptr;
+    CompiledModelHandler m_compiled_model;
 
     std::unordered_map<std::size_t, ov::Output<const ov::Node>> m_input_ports_map;
     std::unordered_map<std::size_t, ov::Output<const ov::Node>> m_output_ports_map;

From eff9a600fd0bf5a13d82f4720e69f7ec4b357d37 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Tue, 12 Nov 2024 11:46:41 +0100
Subject: [PATCH 02/14] Throw when release_memory is called during inference
 and add tests

---
 src/plugins/intel_cpu/src/compiled_model.cpp  |   7 +-
 src/plugins/intel_cpu/src/compiled_model.h    |   4 +-
 .../concurent_release_memory.cpp              | 177 ++++++++++++++++++
 3 files changed, 185 insertions(+), 3 deletions(-)
 create mode 100644 src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp

diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index 604e426f6e6c4a..af658a49e7acd2 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -343,8 +343,11 @@ void CompiledModel::export_model(std::ostream& modelStream) const {
 
 void CompiledModel::release_memory() {
     for (auto&& graph : m_graphs) {
-        GraphGuard::Lock graph_lock{graph};
-        auto ctx = graph_lock._graph.getGraphContext();
+        // try to lock mutex, since it may be already locked (e.g by an infer request)
+        std::unique_lock<std::mutex> lock(graph._mutex, std::try_to_lock);
+        OPENVINO_ASSERT(lock.owns_lock(),
+                "Attempt to call release_memory() on a graph locked by another thread");
+        auto ctx = graph.getGraphContext();
         ctx->getNetworkMemoryControl()->releaseMemory();
     }
 }
diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
index aa98b275190daa..814b1da0ec3d65 100644
--- a/src/plugins/intel_cpu/src/compiled_model.h
+++ b/src/plugins/intel_cpu/src/compiled_model.h
@@ -115,7 +115,9 @@ class CompiledModelHandler {
     }
 
     ~CompiledModelHandler() {
-        --(m_compiled_model->m_numRequests);
+        if (m_compiled_model) {
+            --(m_compiled_model->m_numRequests);
+        }
     }
 
     CompiledModelHandler(const CompiledModelHandler&) = delete;
diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
new file mode 100644
index 00000000000000..21ffd41e7f1cb2
--- /dev/null
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
@@ -0,0 +1,177 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gmock/gmock-spec-builders.h>
+#include <gmock/gmock.h>
+#include <gtest/gtest-param-test.h>
+#include <gtest/gtest.h>
+
+#include <common_test_utils/test_common.hpp>
+#include <common_test_utils/test_constants.hpp>
+#include <condition_variable>
+#include <openvino/core/model.hpp>
+#include <openvino/op/op.hpp>
+#include <openvino/openvino.hpp>
+#include <thread>
+
+namespace ov {
+namespace intel_cpu {
+namespace cpu_unit_test {
+// Openvino extension operation that sleeps for X us in its evaluate method
+
+class Sleep : public ov::op::Op {
+public:
+    OPENVINO_OP("Sleep");
+    Sleep() = default;
+    Sleep(const ov::OutputVector& args,
+          size_t sleep,
+          std::shared_ptr<std::mutex> mutex,
+          std::shared_ptr<std::condition_variable> cv,
+          std::shared_ptr<std::atomic<bool>> ready_flag)
+        : Op(args),
+          m_sleep(sleep),
+          m_mutex(mutex),
+          m_cv(cv),
+          m_ready_flag(ready_flag) {
+        constructor_validate_and_infer_types();
+    }
+
+    void validate_and_infer_types() override {
+        set_output_type(0, get_input_element_type(0), get_input_partial_shape(0));
+    }
+
+    std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override {
+        OPENVINO_ASSERT(new_args.size() == 1, "Incorrect number of new arguments");
+        auto new_op = std::make_shared<Sleep>(new_args, m_sleep, m_mutex, m_cv, m_ready_flag);
+        return new_op;
+    }
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        return true;
+    }
+
+    void revalidate_and_infer_types() override {
+        validate_and_infer_types();
+    }
+
+    bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override {
+        {
+            // this is required to start all the evaluate calls at the same time
+            std::unique_lock<std::mutex> lock(*m_mutex);
+            m_cv->wait(lock, [&] {
+                return m_ready_flag->load();
+            });
+        }
+        std::this_thread::sleep_for(std::chrono::microseconds(m_sleep));
+        return true;
+    }
+
+    bool evaluate(ov::TensorVector& output_values,
+                  const ov::TensorVector& input_values,
+                  const ov::EvaluationContext& evaluationContext) const override {
+        return evaluate(output_values, input_values);
+    }
+
+    bool has_evaluate() const override {
+        return true;
+    }
+
+private:
+    size_t m_sleep;  // sleep time in us
+    std::shared_ptr<std::mutex> m_mutex;
+    std::shared_ptr<std::condition_variable> m_cv;
+    std::shared_ptr<std::atomic<bool>> m_ready_flag;
+};
+}  // namespace cpu_unit_test
+}  // namespace intel_cpu
+}  // namespace ov
+
+class ReleaseMemoryMultiThreadTest : public ::testing::Test {
+protected:
+    void SetUp() override {
+        using namespace ov::intel_cpu::cpu_unit_test;
+
+        param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1});
+
+        constexpr size_t sleep_time = 5;  // us
+        mutex = std::make_shared<std::mutex>();
+        cv = std::make_shared<std::condition_variable>();
+        ready_flag = std::make_shared<std::atomic<bool>>(false);
+
+        auto sleep = std::make_shared<Sleep>(ov::OutputVector{param}, sleep_time, mutex, cv, ready_flag);
+        ov::ResultVector results{std::make_shared<ov::op::v0::Result>(sleep)};
+        ov::ParameterVector params{param};
+
+        auto model = std::make_shared<ov::Model>(results, params, "testModel");
+
+        compiled_model = core.compile_model(model, ov::test::utils::DEVICE_CPU, {{"NUM_STREAMS", num_streams}});
+    }
+
+protected:
+    const size_t num_streams = 4;
+    ov::Core core;
+    ov::CompiledModel compiled_model;
+    std::shared_ptr<ov::op::v0::Parameter> param;
+
+    std::shared_ptr<std::mutex> mutex;
+    std::shared_ptr<std::condition_variable> cv;
+    std::shared_ptr<std::atomic<bool>> ready_flag;
+};
+
+TEST_F(ReleaseMemoryMultiThreadTest, smoke_throwInferenceIsRunning) {
+    // Create and infer a few infer requests concurrently
+    std::vector<ov::InferRequest> inferRequests;
+    for (size_t i = 0; i < num_streams; i++) {
+        auto inferRequest = compiled_model.create_infer_request();
+        inferRequest.set_tensor(param, ov::Tensor(ov::element::f32, ov::Shape{1}));
+        inferRequests.push_back(std::move(inferRequest));
+    }
+    // infer the infer requests
+    for (auto& inferRequest : inferRequests) {
+        inferRequest.start_async();
+    }
+
+    // While the infer requests are waiting on the cv, call release_memory.
+    // We expect that the method will throw an exception when it is called while infer requests are running.
+    EXPECT_THROW(compiled_model.release_memory(), ov::Exception);
+
+    {
+        // lets unlock cv
+        std::lock_guard<std::mutex> lock(*mutex);
+        ready_flag->store(true);
+    }
+    cv->notify_all();
+
+    for (auto& inferRequest : inferRequests) {
+        inferRequest.wait();
+    }
+}
+
+TEST_F(ReleaseMemoryMultiThreadTest, smoke_noThrowInferenceIsNotRunning) {
+    // Create and infer a few infer requests concurrently
+    std::vector<ov::InferRequest> inferRequests;
+    for (size_t i = 0; i < num_streams; i++) {
+        auto inferRequest = compiled_model.create_infer_request();
+        inferRequest.set_tensor(param, ov::Tensor(ov::element::f32, ov::Shape{1}));
+        inferRequests.push_back(std::move(inferRequest));
+    }
+    // infer the infer requests
+    for (auto& inferRequest : inferRequests) {
+        inferRequest.start_async();
+    }
+
+    {
+        // lets unlock cv
+        std::lock_guard<std::mutex> lock(*mutex);
+        ready_flag->store(true);
+    }
+    cv->notify_all();
+
+    for (auto& inferRequest : inferRequests) {
+        inferRequest.wait();
+    }
+
+    // Don't throw when the infer requests are finished
+    EXPECT_NO_THROW(compiled_model.release_memory());
+}
\ No newline at end of file

From ea804a06d076cfed27468a4258721e03878705a5 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Tue, 12 Nov 2024 12:01:33 +0100
Subject: [PATCH 03/14] Clean up includes

---
 .../ov_executable_network/concurent_release_memory.cpp         | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
index 21ffd41e7f1cb2..9fbc9e01951ef0 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
@@ -2,9 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include <gmock/gmock-spec-builders.h>
-#include <gmock/gmock.h>
-#include <gtest/gtest-param-test.h>
 #include <gtest/gtest.h>
 
 #include <common_test_utils/test_common.hpp>

From 0382c27dc139eac32fc74deabbb1708b2e83620d Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Tue, 12 Nov 2024 12:19:45 +0100
Subject: [PATCH 04/14] Clean up test

---
 .../concurent_release_memory.cpp              | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
index 9fbc9e01951ef0..eb7c44acdf039f 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
@@ -6,22 +6,22 @@
 
 #include <common_test_utils/test_common.hpp>
 #include <common_test_utils/test_constants.hpp>
-#include <condition_variable>
 #include <openvino/core/model.hpp>
 #include <openvino/op/op.hpp>
 #include <openvino/openvino.hpp>
+
 #include <thread>
+#include <condition_variable>
 
 namespace ov {
-namespace intel_cpu {
-namespace cpu_unit_test {
+namespace test {
 // Openvino extension operation that sleeps for X us in its evaluate method
 
-class Sleep : public ov::op::Op {
+class SleepCustomOp : public ov::op::Op {
 public:
-    OPENVINO_OP("Sleep");
-    Sleep() = default;
-    Sleep(const ov::OutputVector& args,
+    OPENVINO_OP("SleepCustomOp");
+    SleepCustomOp() = default;
+    SleepCustomOp(const ov::OutputVector& args,
           size_t sleep,
           std::shared_ptr<std::mutex> mutex,
           std::shared_ptr<std::condition_variable> cv,
@@ -40,7 +40,7 @@ class Sleep : public ov::op::Op {
 
     std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override {
         OPENVINO_ASSERT(new_args.size() == 1, "Incorrect number of new arguments");
-        auto new_op = std::make_shared<Sleep>(new_args, m_sleep, m_mutex, m_cv, m_ready_flag);
+        auto new_op = std::make_shared<SleepCustomOp>(new_args, m_sleep, m_mutex, m_cv, m_ready_flag);
         return new_op;
     }
 
@@ -80,15 +80,10 @@ class Sleep : public ov::op::Op {
     std::shared_ptr<std::condition_variable> m_cv;
     std::shared_ptr<std::atomic<bool>> m_ready_flag;
 };
-}  // namespace cpu_unit_test
-}  // namespace intel_cpu
-}  // namespace ov
 
 class ReleaseMemoryMultiThreadTest : public ::testing::Test {
 protected:
     void SetUp() override {
-        using namespace ov::intel_cpu::cpu_unit_test;
-
         param = std::make_shared<ov::op::v0::Parameter>(ov::element::f32, ov::Shape{1});
 
         constexpr size_t sleep_time = 5;  // us
@@ -96,7 +91,7 @@ class ReleaseMemoryMultiThreadTest : public ::testing::Test {
         cv = std::make_shared<std::condition_variable>();
         ready_flag = std::make_shared<std::atomic<bool>>(false);
 
-        auto sleep = std::make_shared<Sleep>(ov::OutputVector{param}, sleep_time, mutex, cv, ready_flag);
+        auto sleep = std::make_shared<SleepCustomOp>(ov::OutputVector{param}, sleep_time, mutex, cv, ready_flag);
         ov::ResultVector results{std::make_shared<ov::op::v0::Result>(sleep)};
         ov::ParameterVector params{param};
 
@@ -115,6 +110,10 @@ class ReleaseMemoryMultiThreadTest : public ::testing::Test {
     std::shared_ptr<std::condition_variable> cv;
     std::shared_ptr<std::atomic<bool>> ready_flag;
 };
+}  // namespace test
+}  // namespace ov
+
+using namespace ov::test;
 
 TEST_F(ReleaseMemoryMultiThreadTest, smoke_throwInferenceIsRunning) {
     // Create and infer a few infer requests concurrently

From 74d6ff88ae355332f9c78b44574821345be200cb Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Tue, 12 Nov 2024 13:46:52 +0100
Subject: [PATCH 05/14] Add more synchronizations

---
 .../concurent_release_memory.cpp              | 51 ++++++++++++++-----
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
index eb7c44acdf039f..4c8fb1945bb8fa 100644
--- a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
+++ b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_executable_network/concurent_release_memory.cpp
@@ -16,6 +16,9 @@
 namespace ov {
 namespace test {
 // Openvino extension operation that sleeps for X us in its evaluate method
+namespace {
+enum class TestSteps { INIT, ENTER_EVALUATE, RUN_EVALUATE };
+}  // namespace
 
 class SleepCustomOp : public ov::op::Op {
 public:
@@ -25,12 +28,12 @@ class SleepCustomOp : public ov::op::Op {
           size_t sleep,
           std::shared_ptr<std::mutex> mutex,
           std::shared_ptr<std::condition_variable> cv,
-          std::shared_ptr<std::atomic<bool>> ready_flag)
+          std::shared_ptr<std::atomic<TestSteps>> test_step)
         : Op(args),
           m_sleep(sleep),
           m_mutex(mutex),
           m_cv(cv),
-          m_ready_flag(ready_flag) {
+          m_test_step(test_step) {
         constructor_validate_and_infer_types();
     }
 
@@ -40,7 +43,7 @@ class SleepCustomOp : public ov::op::Op {
 
     std::shared_ptr<ov::Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override {
         OPENVINO_ASSERT(new_args.size() == 1, "Incorrect number of new arguments");
-        auto new_op = std::make_shared<SleepCustomOp>(new_args, m_sleep, m_mutex, m_cv, m_ready_flag);
+        auto new_op = std::make_shared<SleepCustomOp>(new_args, m_sleep, m_mutex, m_cv, m_test_step);
         return new_op;
     }
 
@@ -53,11 +56,17 @@ class SleepCustomOp : public ov::op::Op {
     }
 
     bool evaluate(ov::TensorVector& outputs, const ov::TensorVector& inputs) const override {
+        // signal entering the evaluate method
+        {
+            std::lock_guard<std::mutex> lock(*m_mutex);
+            m_test_step->store(TestSteps::ENTER_EVALUATE);
+        }
+        m_cv->notify_all();
         {
             // this is required to start all the evaluate calls at the same time
             std::unique_lock<std::mutex> lock(*m_mutex);
             m_cv->wait(lock, [&] {
-                return m_ready_flag->load();
+                return m_test_step->load() == TestSteps::RUN_EVALUATE;
             });
         }
         std::this_thread::sleep_for(std::chrono::microseconds(m_sleep));
@@ -78,7 +87,7 @@ class SleepCustomOp : public ov::op::Op {
     size_t m_sleep;  // sleep time in us
     std::shared_ptr<std::mutex> m_mutex;
     std::shared_ptr<std::condition_variable> m_cv;
-    std::shared_ptr<std::atomic<bool>> m_ready_flag;
+    std::shared_ptr<std::atomic<TestSteps>> m_test_step;
 };
 
 class ReleaseMemoryMultiThreadTest : public ::testing::Test {
@@ -89,9 +98,9 @@ class ReleaseMemoryMultiThreadTest : public ::testing::Test {
         constexpr size_t sleep_time = 5;  // us
         mutex = std::make_shared<std::mutex>();
         cv = std::make_shared<std::condition_variable>();
-        ready_flag = std::make_shared<std::atomic<bool>>(false);
+        test_step = std::make_shared<std::atomic<TestSteps>>(TestSteps::INIT);
 
-        auto sleep = std::make_shared<SleepCustomOp>(ov::OutputVector{param}, sleep_time, mutex, cv, ready_flag);
+        auto sleep = std::make_shared<SleepCustomOp>(ov::OutputVector{param}, sleep_time, mutex, cv, test_step);
         ov::ResultVector results{std::make_shared<ov::op::v0::Result>(sleep)};
         ov::ParameterVector params{param};
 
@@ -101,14 +110,14 @@ class ReleaseMemoryMultiThreadTest : public ::testing::Test {
     }
 
 protected:
-    const size_t num_streams = 4;
+    const size_t num_streams = 1; // use only one async stream to simplify invocation order syncronization
     ov::Core core;
     ov::CompiledModel compiled_model;
     std::shared_ptr<ov::op::v0::Parameter> param;
 
     std::shared_ptr<std::mutex> mutex;
     std::shared_ptr<std::condition_variable> cv;
-    std::shared_ptr<std::atomic<bool>> ready_flag;
+    std::shared_ptr<std::atomic<TestSteps>> test_step;
 };
 }  // namespace test
 }  // namespace ov
@@ -128,14 +137,22 @@ TEST_F(ReleaseMemoryMultiThreadTest, smoke_throwInferenceIsRunning) {
         inferRequest.start_async();
     }
 
+    //wait till the infer request enters evaluate
+    {
+        std::unique_lock<std::mutex> lock(*mutex);
+        cv->wait(lock, [&] {
+            return test_step->load() == TestSteps::ENTER_EVALUATE;
+        });
+    }
+
     // While the infer requests are waiting on the cv, call release_memory.
     // We expect that the method will throw an exception when it is called while infer requests are running.
     EXPECT_THROW(compiled_model.release_memory(), ov::Exception);
 
+    // lets unlock cv
     {
-        // lets unlock cv
         std::lock_guard<std::mutex> lock(*mutex);
-        ready_flag->store(true);
+        test_step->store(TestSteps::RUN_EVALUATE);
     }
     cv->notify_all();
 
@@ -157,10 +174,18 @@ TEST_F(ReleaseMemoryMultiThreadTest, smoke_noThrowInferenceIsNotRunning) {
         inferRequest.start_async();
     }
 
+    //wait till the infer request enters evaluate
+    {
+        std::unique_lock<std::mutex> lock(*mutex);
+        cv->wait(lock, [&] {
+            return test_step->load() == TestSteps::ENTER_EVALUATE;
+        });
+    }
+
+    // lets unlock cv
     {
-        // lets unlock cv
         std::lock_guard<std::mutex> lock(*mutex);
-        ready_flag->store(true);
+        test_step->store(TestSteps::RUN_EVALUATE);
     }
     cv->notify_all();
 

From 146bceabe09daa32dbb4808f96d04fc6e44fc44a Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Tue, 12 Nov 2024 16:46:40 +0100
Subject: [PATCH 06/14] Proper name for the CompiledNetwork holder

---
 src/plugins/intel_cpu/src/compiled_model.h  | 16 ++++++++--------
 src/plugins/intel_cpu/src/infer_request.cpp |  2 +-
 src/plugins/intel_cpu/src/infer_request.h   |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
index 814b1da0ec3d65..723da16791941b 100644
--- a/src/plugins/intel_cpu/src/compiled_model.h
+++ b/src/plugins/intel_cpu/src/compiled_model.h
@@ -66,7 +66,7 @@ class CompiledModel : public ov::ICompiledModel {
 
 private:
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
-    friend class CompiledModelHandler;
+    friend class CompiledModelHolder;
 
     const std::shared_ptr<ov::Model> m_model;
     const std::shared_ptr<const ov::IPlugin> m_plugin;
@@ -102,9 +102,9 @@ class CompiledModel : public ov::ICompiledModel {
 
 // This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and
 // the CompiledModel internal structures
-class CompiledModelHandler {
+class CompiledModelHolder {
 public:
-    CompiledModelHandler(std::shared_ptr<const CompiledModel> compiled_model)
+    CompiledModelHolder(std::shared_ptr<const CompiledModel> compiled_model)
         : m_compiled_model(std::move(compiled_model)) {
         OPENVINO_ASSERT(!m_compiled_model->m_graphs.empty(),
                         "No graph was found in the compiled model: ",
@@ -114,17 +114,17 @@ class CompiledModelHandler {
         m_id = (m_compiled_model->m_numRequests)++;
     }
 
-    ~CompiledModelHandler() {
+    ~CompiledModelHolder() {
         if (m_compiled_model) {
             --(m_compiled_model->m_numRequests);
         }
     }
 
-    CompiledModelHandler(const CompiledModelHandler&) = delete;
-    CompiledModelHandler& operator=(const CompiledModelHandler&) = delete;
+    CompiledModelHolder(const CompiledModelHolder&) = delete;
+    CompiledModelHolder& operator=(const CompiledModelHolder&) = delete;
 
-    CompiledModelHandler(CompiledModelHandler&&) = default;
-    CompiledModelHandler& operator=(CompiledModelHandler&&) = default;
+    CompiledModelHolder(CompiledModelHolder&&) = default;
+    CompiledModelHolder& operator=(CompiledModelHolder&&) = default;
 
     const Graph& graph() const {
         return *m_graph;
diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index 864f1553d74c74..5319fde26eadd5 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -23,7 +23,7 @@ using OvString = ov::element_type_traits<ov::element::string>::value_type;
 
 namespace ov {
 namespace intel_cpu {
-SyncInferRequest::SyncInferRequest(CompiledModelHandler compiled_model)
+SyncInferRequest::SyncInferRequest(CompiledModelHolder compiled_model)
     : ov::ISyncInferRequest(compiled_model.compiled_model()),
       m_compiled_model(std::move(compiled_model)) {
     const auto& inputs = get_inputs();
diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h
index 77ceee33bcf39f..a3fa965cfef7d3 100644
--- a/src/plugins/intel_cpu/src/infer_request.h
+++ b/src/plugins/intel_cpu/src/infer_request.h
@@ -17,7 +17,7 @@ class AsyncInferRequest;
 
 class SyncInferRequest : public ov::ISyncInferRequest {
 public:
-    SyncInferRequest(CompiledModelHandler compiled_model);
+    SyncInferRequest(CompiledModelHolder compiled_model);
 
     void infer() override;
 
@@ -113,7 +113,7 @@ class SyncInferRequest : public ov::ISyncInferRequest {
     openvino::itt::handle_t m_profiling_task;
     std::vector<MemStatePtr> m_memory_states;
     AsyncInferRequest* m_asyncRequest = nullptr;
-    CompiledModelHandler m_compiled_model;
+    CompiledModelHolder m_compiled_model;
 
     std::unordered_map<std::size_t, ov::Output<const ov::Node>> m_input_ports_map;
     std::unordered_map<std::size_t, ov::Output<const ov::Node>> m_output_ports_map;

From e7ca5abd0070f7a28aea2aa7565702c8164064ee Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Wed, 4 Dec 2024 16:42:09 +0100
Subject: [PATCH 07/14] Apply review comments

---
 src/plugins/intel_cpu/src/compiled_model.cpp | 3 ++-
 src/plugins/intel_cpu/src/compiled_model.h   | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
index af658a49e7acd2..3d7fcb1c8952be 100644
--- a/src/plugins/intel_cpu/src/compiled_model.cpp
+++ b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -346,7 +346,8 @@ void CompiledModel::release_memory() {
         // try to lock mutex, since it may be already locked (e.g by an infer request)
         std::unique_lock<std::mutex> lock(graph._mutex, std::try_to_lock);
         OPENVINO_ASSERT(lock.owns_lock(),
-                "Attempt to call release_memory() on a graph locked by another thread");
+                        "Attempt to call release_memory() on a compiled model in a busy state. Please ensure that all "
+                        "infer requests are completed before releasing memory.");
         auto ctx = graph.getGraphContext();
         ctx->getNetworkMemoryControl()->releaseMemory();
     }
diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
index 723da16791941b..be20179d181ce7 100644
--- a/src/plugins/intel_cpu/src/compiled_model.h
+++ b/src/plugins/intel_cpu/src/compiled_model.h
@@ -110,7 +110,6 @@ class CompiledModelHolder {
                         "No graph was found in the compiled model: ",
                         m_compiled_model->name());
         m_graph = &(m_compiled_model->get_graph()._graph);
-        OPENVINO_ASSERT(m_graph, "Graph ptr null check failed");
         m_id = (m_compiled_model->m_numRequests)++;
     }
 

From 46cfca7f718c290a7bdffd2ccdb41830c12490f3 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Fri, 13 Dec 2024 16:45:54 +0100
Subject: [PATCH 08/14] Fix code formatting

---
 src/plugins/intel_cpu/src/compiled_model.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
index be20179d181ce7..f7d2903b0526cf 100644
--- a/src/plugins/intel_cpu/src/compiled_model.h
+++ b/src/plugins/intel_cpu/src/compiled_model.h
@@ -154,5 +154,5 @@ class CompiledModelHolder {
     int m_id;
 };
 
-}   // namespace intel_cpu
-}   // namespace ov
+}  // namespace intel_cpu
+}  // namespace ov

From bdebeecaee04a78a0709f13e1a0b20092a4371c6 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Fri, 13 Dec 2024 18:10:27 +0100
Subject: [PATCH 09/14] Remove direct access to the Input and Output nodes maps

---
 src/plugins/intel_cpu/src/graph.h             | 30 +++++---
 src/plugins/intel_cpu/src/infer_request.cpp   | 69 +++++++++----------
 src/plugins/intel_cpu/src/nodes/composite.cpp |  4 +-
 src/plugins/intel_cpu/src/nodes/if.cpp        | 24 +++----
 src/plugins/intel_cpu/src/nodes/lora.cpp      |  4 +-
 .../intel_cpu/src/nodes/tensoriterator.cpp    | 12 ++--
 6 files changed, 72 insertions(+), 71 deletions(-)

diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h
index 3912031605d84f..6d931124f1255c 100644
--- a/src/plugins/intel_cpu/src/graph.h
+++ b/src/plugins/intel_cpu/src/graph.h
@@ -87,28 +87,42 @@ class Graph {
         return _name;
     }
 
-    const std::map<std::size_t, NodePtr>& GetInputNodesMap() const {
-        return inputNodesMap;
+    NodePtr getInputNodeByIndex(std::size_t index) {
+        auto input = inputNodesMap.find(index);
+        if (input == inputNodesMap.end())
+           return nullptr;
+        return input->second;
     }
 
-    const std::map<std::size_t, NodePtr>& GetOutputNodesMap() const {
-        return outputNodesMap;
+    NodePtr getOutputNodeByIndex(std::size_t index) {
+        auto output = outputNodesMap.find(index);
+        if (output == outputNodesMap.end())
+            return nullptr;
+        return output->second;
     }
 
-    NodeConstPtr getInputNodeByIndex(const std::size_t& index) const {
+    NodeConstPtr getInputNodeByIndex(std::size_t index) const {
         auto input = inputNodesMap.find(index);
         if (input == inputNodesMap.end())
-            OPENVINO_THROW("CPU execution graph doesn't contain input node with index: ", index);
+            return nullptr;
         return input->second;
     }
 
-    NodeConstPtr getOutputNodeByIndex(const std::size_t& index) const {
+    NodeConstPtr getOutputNodeByIndex(std::size_t index) const {
         auto output = outputNodesMap.find(index);
         if (output == outputNodesMap.end())
-            OPENVINO_THROW("CPU execution graph doesn't contain output node with index: ", index);
+            return nullptr;
         return output->second;
     }
 
+    size_t inputsNumber() const {
+        return inputNodesMap.size();
+    }
+    
+    size_t outputsNumber() const {
+        return outputNodesMap.size();
+    }
+
     dnnl::engine getEngine() const {
         return m_context->getEngine();
     }
diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index 25bca8efbe2c8f..e063a57a488db4 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -69,14 +69,12 @@ void SyncInferRequest::assign_states(Graph& graph) {
 }
 
 void SyncInferRequest::redefine_memory_for_input_nodes(Graph& graph) {
-    const auto cpuInputNodes = graph.GetInputNodesMap();
     for (const auto& input_port : m_input_ports_map) {
-        const auto inputNode = cpuInputNodes.find(input_port.first);
-        if (inputNode == cpuInputNodes.end())
-            OPENVINO_THROW("CPU execution graph doesn't contain input node with index: ", input_port.first);
-        if (inputNode->second->isDynamicNode()) {
+        auto inputNode = graph.getInputNodeByIndex(input_port.first);
+        OPENVINO_ASSERT(inputNode, "CPU execution graph doesn't contain output node with index: ", input_port.first);;
+        if (inputNode->isDynamicNode()) {
             auto tensor = get_tensor(input_port.second);
-            inputNode->second->redefineOutputMemory({tensor->get_shape()});
+            inputNode->redefineOutputMemory({tensor->get_shape()});
         }
     }
 }
@@ -165,9 +163,6 @@ static inline void change_edge_ptr(const EdgePtr& edge, ov::SoPtr<ov::ITensor>&
 }
 
 void SyncInferRequest::change_default_ptr(Graph& graph) {
-    const auto& inputNodesMap = graph.GetInputNodesMap();
-    const auto& outputNodesMap = graph.GetOutputNodesMap();
-
     std::unordered_set<const void*> inputPtrs;
     std::function<void(const EdgePtr& edge, ov::SoPtr<ov::ITensor>& tensor)> changeInpPtr;
     if (graph.IsDynamic()) {
@@ -182,9 +177,8 @@ void SyncInferRequest::change_default_ptr(Graph& graph) {
     }
 
     for (auto& it : m_input_external_ptr) {
-        auto input = inputNodesMap.find(it.first);
-        OPENVINO_ASSERT(inputNodesMap.end() != input, "Cannot find input tensor with index: ", it.first);
-        NodePtr inputNodePtr = input->second;
+        auto inputNodePtr = graph.getInputNodeByIndex(it.first);
+        OPENVINO_ASSERT(inputNodePtr, "Cannot find input tensor with index: ", it.first);
         if (inputNodePtr->getDstDataAtPort(0) == static_cast<void*>(it.second->data()))
             continue;
         auto& childEdges = inputNodePtr->getChildEdges();
@@ -230,9 +224,9 @@ void SyncInferRequest::change_default_ptr(Graph& graph) {
     }
 
     for (auto& it : m_output_external_ptr) {
-        auto output = outputNodesMap.find(it.first);
-        OPENVINO_ASSERT(outputNodesMap.end() != output, "Cannot find output tensor with index: ", it.first);
-        auto parentEdge = output->second->getParentEdgeAt(0);
+        auto output = graph.getOutputNodeByIndex(it.first);
+        OPENVINO_ASSERT(output, "Cannot find output tensor with index: ", it.first);
+        auto parentEdge = output->getParentEdgeAt(0);
         void* const outputRawPtr = parentEdge->getMemory().getData();
         if (outputRawPtr == static_cast<void*>(it.second->data()))
             continue;
@@ -273,21 +267,21 @@ void SyncInferRequest::change_default_ptr(Graph& graph) {
     if (graph.IsDynamic()) {
         const auto& outMemBlocksMap = graph.getOutputNodesMemBlocksMap();
         for (auto&& item : outMemBlocksMap) {
-            const auto& name = item.first;
+            const auto index = item.first;
 
             // share intel_cpu::Tensor to Graph by injecting to corresponding ProxyMemoryBlock instance.
             auto outputMemBlock = item.second;
-            OPENVINO_ASSERT(outputMemBlock, "proxy mem block for output ", name, " is empty.");
+            OPENVINO_ASSERT(outputMemBlock, "proxy mem block for output ", index, " is empty.");
 
-            auto controlBlockItr = m_outputControlBlocks.find(name);
+            auto controlBlockItr = m_outputControlBlocks.find(index);
 
             if (controlBlockItr != m_outputControlBlocks.end()) {
-                auto output = outputNodesMap.find(name);
-                OPENVINO_ASSERT(outputNodesMap.end() != output,
-                                "Node with name: ",
-                                name,
+                auto output = graph.getOutputNodeByIndex(index);
+                OPENVINO_ASSERT(output,
+                                "Output with index: ",
+                                index,
                                 " is absent in the outputNodesMap");
-                auto parentEdge = output->second->getParentEdgeAt(0);
+                auto parentEdge = output->getParentEdgeAt(0);
                 // avoid cyclic memory use
                 auto&& controlBlock = controlBlockItr->second;
 
@@ -306,7 +300,7 @@ void SyncInferRequest::change_default_ptr(Graph& graph) {
                           &graph,
                           " infer request ",
                           this);
-                DEBUG_LOG(name, ", tensor ", controlBlock.tensor());
+                DEBUG_LOG(index, ", tensor ", controlBlock.tensor());
             } else {
                 outputMemBlock->reset();  // switch to the internal memory since memory sharing is no longer possible
             }
@@ -407,7 +401,10 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
 
         auto&& graph = m_compiled_model.graph();
 
-        MemoryDescPtr actualDesc = graph.getInputNodeByIndex(input_index)->getBaseMemDescAtOutputPort(0);
+        auto inputNode = graph.getInputNodeByIndex(input_index);
+        OPENVINO_ASSERT(inputNode, "CPU execution graph doesn't contain input node with index: ", input_index);
+
+        MemoryDescPtr actualDesc = inputNode->getBaseMemDescAtOutputPort(0);
         if (!actualDesc->isDefined()) {
             // we must define desc for dynamic case
             // otherwise we got incorrect check on shape compatibility inside isCompatible
@@ -456,7 +453,9 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& in_port, con
 
         auto&& graph = m_compiled_model.graph();
 
-        const auto& desc = graph.getOutputNodeByIndex(output_index)->getParentEdgeAt(0)->getMemory().getDesc();
+        auto outputNode = graph.getOutputNodeByIndex(output_index);
+        OPENVINO_ASSERT(outputNode, "CPU execution graph doesn't contain output node with index: ", output_index);
+        const auto& desc = outputNode->getParentEdgeAt(0)->getMemory().getDesc();
         if (!isDynamic && mem_desc_ptr->isCompatible(desc)) {
             m_output_external_ptr[output_index] = tensor;
         } else if (m_output_external_ptr.find(output_index) != m_output_external_ptr.end()) {
@@ -485,7 +484,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
 
     ov::SoPtr<ITensor> tensor;
     if (type == ov::ISyncInferRequest::FoundPort::Type::INPUT) {
-        OPENVINO_ASSERT(graph.GetInputNodesMap().find(port_index) != graph.GetInputNodesMap().end(),
+        OPENVINO_ASSERT(graph.getInputNodeByIndex(port_index) == nullptr,
                         "Tensor with index: ",
                         port_index,
                         " exists in CPU plugin graph, but absents in model inputs");
@@ -509,8 +508,9 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
 
             if (!isDynamic) {
                 auto mem_desc_ptr = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor);
-                if (mem_desc_ptr->isCompatible(
-                        graph.getInputNodeByIndex(port_index)->getChildEdgeAt(0)->getMemory().getDesc())) {
+                auto inputNode = graph.getInputNodeByIndex(port_index);
+                OPENVINO_ASSERT(inputNode, "CPU execution graph doesn't contain input node with index: ", port_index);
+                if (mem_desc_ptr->isCompatible(inputNode->getChildEdgeAt(0)->getMemory().getDesc())) {
                     m_input_external_ptr[port_index] = tensor;
                 }
             }
@@ -518,16 +518,15 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
     }
 
     if (type == ov::ISyncInferRequest::FoundPort::Type::OUTPUT) {
-        const auto& outMap = graph.GetOutputNodesMap();
-        auto output = outMap.find(port_index);
-        OPENVINO_ASSERT(output != outMap.end(),
+        auto output = graph.getOutputNodeByIndex(port_index);
+        OPENVINO_ASSERT(output,
                         "Tensor with index: ",
                         port_index,
                         " exists in CPU plugin graph, but absents in model outputs");
         if (m_outputs.find(port_index) == m_outputs.end()) {
             const auto& port = m_output_ports_map[port_index];
             const auto& port_shape = port.get_partial_shape();
-            const auto& graph_shape = output->second->getInputShapeAtPort(0);
+            const auto& graph_shape = output->getInputShapeAtPort(0);
 
             // WA, due to the transformations and constant folding, shape inference of the resulting model may
             // have static shapes, while they are dynamic in the initial representation
@@ -557,7 +556,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
                         tensor = std::make_shared<Tensor>(memory);
                     } else {
                         const auto graph_prec =
-                            output->second->getParentEdgeAt(0)->getMemory().getDesc().getPrecision();
+                            output->getParentEdgeAt(0)->getMemory().getDesc().getPrecision();
                         OutputControlBlock control_block{model_prec, Shape{shape}};
 
                         DEBUG_LOG(port_index,
@@ -581,7 +580,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
             m_outputs[port_index] = tensor;
             if (!port_shape.is_dynamic() && !m_output_external_ptr.count(port_index)) {
                 auto desc = MemoryDescUtils::generateCpuBlockedMemoryDesc(tensor);
-                if (desc->isCompatible(output->second->getParentEdgeAt(0)->getMemory().getDesc())) {
+                if (desc->isCompatible(output->getParentEdgeAt(0)->getMemory().getDesc())) {
                     m_output_external_ptr[port_index] = tensor;
                 }
             }
diff --git a/src/plugins/intel_cpu/src/nodes/composite.cpp b/src/plugins/intel_cpu/src/nodes/composite.cpp
index 616d3df6950e9a..0d8b33d90fbd9c 100644
--- a/src/plugins/intel_cpu/src/nodes/composite.cpp
+++ b/src/plugins/intel_cpu/src/nodes/composite.cpp
@@ -75,7 +75,7 @@ void Composite::selectOptimalPrimitiveDescriptor() {
 
 // @todo add ascii diagramm for memory mapping / reuse
 void Composite::createPrimitive() {
-    OPENVINO_ASSERT(getOriginalInputsNumber() == m_graph.GetInputNodesMap().size(),
+    OPENVINO_ASSERT(getOriginalInputsNumber() == m_graph.inputsNumber(),
                     "Number of node inputs must be equal the number of inner graph's inputs");
 
     std::vector<MemoryPtr> inputMemory;
@@ -83,7 +83,7 @@ void Composite::createPrimitive() {
         inputMemory.emplace_back(getSrcMemoryAtPort(i));
     }
 
-    OPENVINO_ASSERT(getOriginalOutputsNumber() == m_graph.GetOutputNodesMap().size(),
+    OPENVINO_ASSERT(getOriginalOutputsNumber() == m_graph.outputsNumber(),
                     "Number of node outputs must be equal the number of inner graph's outputs");
 
     std::vector<MemoryPtr> outputMemory;
diff --git a/src/plugins/intel_cpu/src/nodes/if.cpp b/src/plugins/intel_cpu/src/nodes/if.cpp
index 8de1cf14920d74..88e2c84970d874 100644
--- a/src/plugins/intel_cpu/src/nodes/if.cpp
+++ b/src/plugins/intel_cpu/src/nodes/if.cpp
@@ -86,11 +86,9 @@ void If::getSupportedDescriptors() {
     subGraphThen.CreateGraph(thenBody, context);
     subGraphElse.CreateGraph(elseBody, context);
 
-    const auto& inMapThen = subGraphThen.GetInputNodesMap();
     for (const auto& param : ifOp->get_then_body()->get_parameters()) {
-        auto inNode = inMapThen.find(ifOp->get_then_body()->get_parameter_index(param));
-        if (inNode != inMapThen.end()) {
-            inputMemThen.push_back(getToMemories(inNode->second.get(), 0));
+        if (auto inNode = subGraphThen.getInputNodeByIndex(ifOp->get_then_body()->get_parameter_index(param))) {
+            inputMemThen.push_back(getToMemories(inNode.get(), 0));
         } else {
             OPENVINO_THROW("Then body of node If with name ",
                            getName(),
@@ -99,11 +97,9 @@ void If::getSupportedDescriptors() {
         }
     }
 
-    const auto& inMapElse = subGraphElse.GetInputNodesMap();
     for (const auto& param : ifOp->get_else_body()->get_parameters()) {
-        auto inNode = inMapElse.find(ifOp->get_else_body()->get_parameter_index(param));
-        if (inNode != inMapElse.end()) {
-            inputMemElse.push_back(getToMemories(inNode->second.get(), 0));
+        if (auto inNode = subGraphElse.getInputNodeByIndex(ifOp->get_else_body()->get_parameter_index(param))) {
+            inputMemElse.push_back(getToMemories(inNode.get(), 0));
         } else {
             OPENVINO_THROW("Else body of node If with name ",
                            getName(),
@@ -112,11 +108,9 @@ void If::getSupportedDescriptors() {
         }
     }
 
-    const auto& outMapThen = subGraphThen.GetOutputNodesMap();
     for (const auto& out : ifOp->get_then_body()->get_results()) {
-        auto outNode = outMapThen.find(ifOp->get_then_body()->get_result_index(out));
-        if (outNode != outMapThen.end()) {
-            auto outMem = outNode->second->getSrcMemoryAtPort(0);
+        if (auto outNode = subGraphThen.getOutputNodeByIndex(ifOp->get_then_body()->get_result_index(out))) {
+            auto outMem = outNode->getSrcMemoryAtPort(0);
             outputMemThen.push_back(outMem);
         } else {
             OPENVINO_THROW("Then body of node If with name ",
@@ -126,11 +120,9 @@ void If::getSupportedDescriptors() {
         }
     }
 
-    const auto& outMapElse = subGraphElse.GetOutputNodesMap();
     for (const auto& out : ifOp->get_else_body()->get_results()) {
-        auto outNode = outMapElse.find(ifOp->get_else_body()->get_result_index(out));
-        if (outNode != outMapElse.end()) {
-            auto outMem = outNode->second->getSrcMemoryAtPort(0);
+        if (auto outNode = subGraphElse.getOutputNodeByIndex(ifOp->get_else_body()->get_result_index(out))) {
+            auto outMem = outNode->getSrcMemoryAtPort(0);
             outputMemElse.push_back(outMem);
         } else {
             OPENVINO_THROW("Else body of node If with name ",
diff --git a/src/plugins/intel_cpu/src/nodes/lora.cpp b/src/plugins/intel_cpu/src/nodes/lora.cpp
index 0dcb2e9ef2b9e5..c59a3a7fa37578 100644
--- a/src/plugins/intel_cpu/src/nodes/lora.cpp
+++ b/src/plugins/intel_cpu/src/nodes/lora.cpp
@@ -88,7 +88,7 @@ void LoRA::selectOptimalPrimitiveDescriptor() {
 
 // @todo add ascii diagram for memory mapping / reuse
 void LoRA::createPrimitive() {
-    CPU_NODE_ASSERT(getOriginalInputsNumber() == m_graph.GetInputNodesMap().size(),
+    CPU_NODE_ASSERT(getOriginalInputsNumber() == m_graph.inputsNumber(),
                     "Number of node inputs must be equal the number of inner graph's inputs");
 
     std::vector<MemoryPtr> inputMemory;
@@ -99,7 +99,7 @@ void LoRA::createPrimitive() {
         inputMemory.emplace_back(std::move(mem));
     }
 
-    CPU_NODE_ASSERT(getOriginalOutputsNumber() == m_graph.GetOutputNodesMap().size(),
+    CPU_NODE_ASSERT(getOriginalOutputsNumber() == m_graph.outputsNumber(),
                     "Number of node outputs must be equal the number of inner graph's outputs");
 
     std::vector<MemoryPtr> outputMemory{getDstMemoryAtPort(0)};
diff --git a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
index e2bd8ed6f25b5d..99281189b8196b 100644
--- a/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
+++ b/src/plugins/intel_cpu/src/nodes/tensoriterator.cpp
@@ -440,19 +440,15 @@ void TensorIterator::getSupportedDescriptors() {
     const std::shared_ptr<const ov::Model> body = tiOp->get_function();
     sub_graph.CreateGraph(body, context);
 
-    const auto& inMap = sub_graph.GetInputNodesMap();
     for (const auto& param : tiOp->get_function()->get_parameters()) {
-        auto inNode = inMap.find(tiOp->get_function()->get_parameter_index(param));
-        if (inNode != inMap.end()) {
-            input_mems.push_back(getToMemories(inNode->second.get(), 0));
+        if (auto inNode = sub_graph.getInputNodeByIndex(tiOp->get_function()->get_parameter_index(param))) {
+            input_mems.push_back(getToMemories(inNode.get(), 0));
         }
     }
 
-    const auto& outMap = sub_graph.GetOutputNodesMap();
     for (const auto& out : tiOp->get_function()->get_results()) {
-        auto outNode = outMap.find(tiOp->get_function()->get_result_index(out));
-        if (outNode != outMap.end()) {
-            auto outMem = outNode->second->getSrcMemoryAtPort(0);
+        if (auto outNode = sub_graph.getOutputNodeByIndex(tiOp->get_function()->get_result_index(out))) {
+            auto outMem = outNode->getSrcMemoryAtPort(0);
             output_mem.push_back(outMem);
         }
     }

From cbccf892fb68769529a42a6778f42d4606b91561 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Fri, 13 Dec 2024 19:05:59 +0100
Subject: [PATCH 10/14] Remove direct access to the memory nodes

---
 src/plugins/intel_cpu/src/graph.cpp         | 19 +++++++++++++++++--
 src/plugins/intel_cpu/src/graph.h           |  6 ++++--
 src/plugins/intel_cpu/src/infer_request.cpp | 19 ++-----------------
 src/plugins/intel_cpu/src/infer_request.h   |  1 -
 src/plugins/intel_cpu/src/nodes/memory.hpp  |  2 +-
 5 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp
index 7fb5f512227cf9..fd6721ce4c83ad 100644
--- a/src/plugins/intel_cpu/src/graph.cpp
+++ b/src/plugins/intel_cpu/src/graph.cpp
@@ -1940,8 +1940,23 @@ std::shared_ptr<ov::Model> Graph::dump() const {
     return dump_graph_as_ie_ngraph_net(*this);
 }
 
-const std::unordered_map<std::string, node::MemoryStateNode*>& Graph::getInternalStateNodes() const {
-    return m_context->getMemoryStatesRegister()->getMemoryStates();
+std::vector<MemStatePtr> Graph::memoryStates() const {
+    std::vector<MemStatePtr> resultVector;
+
+    for (auto&& item : m_context->getMemoryStatesRegister()->getMemoryStates()) {
+        resultVector.emplace_back(item.second->makeState());
+    }
+    return resultVector;
+}
+
+void Graph::assignStates(const std::vector<MemStatePtr>& states) {
+    auto&& inputStateNodes = m_context->getMemoryStatesRegister()->getMemoryStates();
+    for (const auto& state : states) {
+        auto itr = inputStateNodes.find(state->get_name());
+        if (itr != inputStateNodes.end()) {
+            itr->second->assignState(state);
+        }
+    }
 }
 
 }  // namespace intel_cpu
diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h
index 6d931124f1255c..e67a59c101d763 100644
--- a/src/plugins/intel_cpu/src/graph.h
+++ b/src/plugins/intel_cpu/src/graph.h
@@ -14,6 +14,7 @@
 #include "edge.h"
 #include "graph_context.h"
 #include "memory_control.hpp"
+#include "memory_state.h"
 #include "node.h"
 #include "nodes/input.h"
 #include "openvino/core/node_vector.hpp"
@@ -131,6 +132,9 @@ class Graph {
         return m_context;
     }
 
+    std::vector<MemStatePtr> memoryStates() const;
+    void assignStates(const std::vector<MemStatePtr>& state);
+
     void GetPerfData(std::vector<ov::ProfilingInfo>& perfMap) const;
 
     void CreateEdge(const NodePtr& parent, const NodePtr& child, int parentPort = 0, int childPort = 0);
@@ -216,8 +220,6 @@ class Graph {
         return graphHasDynamicInput;
     }
 
-    const std::unordered_map<std::string, node::MemoryStateNode*>& getInternalStateNodes() const;
-
     /**
      * Init graph using \p model, \p context, \p inputConfigs and \p outputConfigs
      */
diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index e063a57a488db4..378e4ff10ea077 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -8,7 +8,6 @@
 #include "dnnl_extension_utils.h"
 #include "itt.h"
 #include "memory_desc/cpu_memory_desc_utils.h"
-#include "memory_state.h"
 #include "nodes/common/cpu_convert.h"
 #include "nodes/memory_state_base.h"
 #include "openvino/core/shape.hpp"
@@ -51,21 +50,7 @@ void SyncInferRequest::create_infer_request() {
     }
 
     // create states according to the list of the MemoryStateNodes
-    auto&& graph = m_compiled_model.graph();
-    for (auto&& node : graph.getInternalStateNodes()) {
-        m_memory_states.emplace_back(node.second->makeState());
-    }
-}
-
-// state -> storage
-void SyncInferRequest::assign_states(Graph& graph) {
-    auto&& graph_internal_state_nodes = graph.getInternalStateNodes();
-    for (const auto& state : m_memory_states) {
-        auto itr = graph_internal_state_nodes.find(state->get_name());
-        if (itr != graph_internal_state_nodes.end()) {
-            itr->second->assignState(state);
-        }
-    }
+    m_memory_states = m_compiled_model.graph().memoryStates();
 }
 
 void SyncInferRequest::redefine_memory_for_input_nodes(Graph& graph) {
@@ -119,7 +104,7 @@ void SyncInferRequest::infer() {
 
     // state -> node
     if (!m_memory_states.empty()) {
-        assign_states(graph);
+        graph.assignStates(m_memory_states);
     }
 
     push_input_data(graph);
diff --git a/src/plugins/intel_cpu/src/infer_request.h b/src/plugins/intel_cpu/src/infer_request.h
index 0361db70a6c354..daae553dff2ea4 100644
--- a/src/plugins/intel_cpu/src/infer_request.h
+++ b/src/plugins/intel_cpu/src/infer_request.h
@@ -98,7 +98,6 @@ class SyncInferRequest : public ov::ISyncInferRequest {
 
     void push_input_data(Graph& graph);
     void redefine_memory_for_input_nodes(Graph& graph);
-    void assign_states(Graph& graph);
     void update_external_tensor_ptrs();
     void change_default_ptr(Graph& graph);
 
diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp
index 1571e8fffa2231..9c0c9664ce8a27 100644
--- a/src/plugins/intel_cpu/src/nodes/memory.hpp
+++ b/src/plugins/intel_cpu/src/nodes/memory.hpp
@@ -29,7 +29,7 @@ class MemoryStatesRegister {
     void registerInput(MemoryInputBase* node);
     void remove(MemoryNode* node);
 
-    const InputNodesMap& getMemoryStates() const {
+    const InputNodesMap& getMemoryStates() {
         return memory_inputs;
     }
 

From 4080c1a56b870aa6f86a5a57972ceec360e0e8db Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Fri, 13 Dec 2024 19:22:58 +0100
Subject: [PATCH 11/14] Remove the const qualifier from the method that allows
 changing the graph state

---
 src/plugins/intel_cpu/src/graph.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h
index e67a59c101d763..66bdd3ad01b2ba 100644
--- a/src/plugins/intel_cpu/src/graph.h
+++ b/src/plugins/intel_cpu/src/graph.h
@@ -234,7 +234,7 @@ class Graph {
     void Activate(const std::vector<MemoryPtr>& externalInputMemory = {},
                   const std::vector<MemoryPtr>& externalOutputMemory = {});
 
-    const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() const {
+    const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() {
         return outputNodesMemBlocksMap;
     }
 

From 83a643d13c769fe53cda080bf2bdc1b7bebea206 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Fri, 13 Dec 2024 19:36:49 +0100
Subject: [PATCH 12/14] Code style

---
 src/plugins/intel_cpu/src/graph.h           |  4 ++--
 src/plugins/intel_cpu/src/infer_request.cpp | 10 +++-------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h
index 66bdd3ad01b2ba..5d5d5b335a36f2 100644
--- a/src/plugins/intel_cpu/src/graph.h
+++ b/src/plugins/intel_cpu/src/graph.h
@@ -91,7 +91,7 @@ class Graph {
     NodePtr getInputNodeByIndex(std::size_t index) {
         auto input = inputNodesMap.find(index);
         if (input == inputNodesMap.end())
-           return nullptr;
+            return nullptr;
         return input->second;
     }
 
@@ -119,7 +119,7 @@ class Graph {
     size_t inputsNumber() const {
         return inputNodesMap.size();
     }
-    
+
     size_t outputsNumber() const {
         return outputNodesMap.size();
     }
diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index 378e4ff10ea077..b18bdba43d6a59 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -56,7 +56,7 @@ void SyncInferRequest::create_infer_request() {
 void SyncInferRequest::redefine_memory_for_input_nodes(Graph& graph) {
     for (const auto& input_port : m_input_ports_map) {
         auto inputNode = graph.getInputNodeByIndex(input_port.first);
-        OPENVINO_ASSERT(inputNode, "CPU execution graph doesn't contain output node with index: ", input_port.first);;
+        OPENVINO_ASSERT(inputNode, "CPU execution graph doesn't contain output node with index: ", input_port.first);
         if (inputNode->isDynamicNode()) {
             auto tensor = get_tensor(input_port.second);
             inputNode->redefineOutputMemory({tensor->get_shape()});
@@ -262,10 +262,7 @@ void SyncInferRequest::change_default_ptr(Graph& graph) {
 
             if (controlBlockItr != m_outputControlBlocks.end()) {
                 auto output = graph.getOutputNodeByIndex(index);
-                OPENVINO_ASSERT(output,
-                                "Output with index: ",
-                                index,
-                                " is absent in the outputNodesMap");
+                OPENVINO_ASSERT(output, "Output with index: ", index, " is absent in the outputNodesMap");
                 auto parentEdge = output->getParentEdgeAt(0);
                 // avoid cyclic memory use
                 auto&& controlBlock = controlBlockItr->second;
@@ -540,8 +537,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
 
                         tensor = std::make_shared<Tensor>(memory);
                     } else {
-                        const auto graph_prec =
-                            output->getParentEdgeAt(0)->getMemory().getDesc().getPrecision();
+                        const auto graph_prec = output->getParentEdgeAt(0)->getMemory().getDesc().getPrecision();
                         OutputControlBlock control_block{model_prec, Shape{shape}};
 
                         DEBUG_LOG(port_index,

From 5e24cde8bfa7aab3611bb071cf1f81c903e83ef7 Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Mon, 16 Dec 2024 11:52:45 +0100
Subject: [PATCH 13/14] Fix input output node persistence check

---
 src/plugins/intel_cpu/src/infer_request.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index b18bdba43d6a59..505926ad4ed222 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -466,10 +466,10 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
 
     ov::SoPtr<ITensor> tensor;
     if (type == ov::ISyncInferRequest::FoundPort::Type::INPUT) {
-        OPENVINO_ASSERT(graph.getInputNodeByIndex(port_index) == nullptr,
+        OPENVINO_ASSERT(graph.getInputNodeByIndex(port_index),
                         "Tensor with index: ",
                         port_index,
-                        " exists in CPU plugin graph, but absents in model inputs");
+                        " absent in the plugin's graph inputs");
         const auto& port = m_input_ports_map[port_index];
         tensor = ov::ISyncInferRequest::get_tensor(port);
 
@@ -504,7 +504,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
         OPENVINO_ASSERT(output,
                         "Tensor with index: ",
                         port_index,
-                        " exists in CPU plugin graph, but absents in model outputs");
+                        " absent in the plugin's graph outputs");
         if (m_outputs.find(port_index) == m_outputs.end()) {
             const auto& port = m_output_ports_map[port_index];
             const auto& port_shape = port.get_partial_shape();

From cabc589db355881828417c13505c6509d470e78b Mon Sep 17 00:00:00 2001
From: Maksim Kutakov <maksim.kutakov@intel.com>
Date: Mon, 16 Dec 2024 11:57:22 +0100
Subject: [PATCH 14/14] Code style

---
 src/plugins/intel_cpu/src/infer_request.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/plugins/intel_cpu/src/infer_request.cpp b/src/plugins/intel_cpu/src/infer_request.cpp
index 505926ad4ed222..44b9904bde202a 100644
--- a/src/plugins/intel_cpu/src/infer_request.cpp
+++ b/src/plugins/intel_cpu/src/infer_request.cpp
@@ -501,10 +501,7 @@ void SyncInferRequest::init_tensor(const std::size_t& port_index, const ov::ISyn
 
     if (type == ov::ISyncInferRequest::FoundPort::Type::OUTPUT) {
         auto output = graph.getOutputNodeByIndex(port_index);
-        OPENVINO_ASSERT(output,
-                        "Tensor with index: ",
-                        port_index,
-                        " absent in the plugin's graph outputs");
+        OPENVINO_ASSERT(output, "Tensor with index: ", port_index, " absent in the plugin's graph outputs");
         if (m_outputs.find(port_index) == m_outputs.end()) {
             const auto& port = m_output_ports_map[port_index];
             const auto& port_shape = port.get_partial_shape();