openvinotoolkit · maxnick · Dec 16, 2024 · Nov 11, 2024 · Nov 12, 2024 · Nov 12, 2024
@@ -184,7 +184,6 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
 }
 
 std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request() const {
-    m_numRequests++;
     return std::make_shared<SyncInferRequest>(std::static_pointer_cast<const CompiledModel>(shared_from_this()));
 }
 
@@ -344,8 +343,12 @@ void CompiledModel::export_model(std::ostream& modelStream) const {
 
 void CompiledModel::release_memory() {
     for (auto&& graph : m_graphs) {
-        GraphGuard::Lock graph_lock{graph};
-        auto ctx = graph_lock._graph.getGraphContext();
+        // try to lock mutex, since it may be already locked (e.g by an infer request)
+        std::unique_lock<std::mutex> lock(graph._mutex, std::try_to_lock);
+        OPENVINO_ASSERT(lock.owns_lock(),
+                        "Attempt to call release_memory() on a compiled model in a busy state. Please ensure that all "
+                        "infer requests are completed before releasing memory.");
+        auto ctx = graph.getGraphContext();
         ctx->getNetworkMemoryControl()->releaseMemory();
     }
 }

@@ -20,6 +20,15 @@ namespace ov {
 namespace intel_cpu {
 
 class CompiledModel : public ov::ICompiledModel {
+public:
+    struct GraphGuard : public Graph {
+        std::mutex _mutex;
+        struct Lock : public std::unique_lock<std::mutex> {
+            explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
+            GraphGuard& _graph;
+        };
+    };
+
 public:
     typedef std::shared_ptr<CompiledModel> Ptr;
 
@@ -51,9 +60,13 @@ class CompiledModel : public ov::ICompiledModel {
 
     void release_memory() override;
 
+    std::string name() const {
+        return m_name;
+    }
+
 private:
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
-    friend class SyncInferRequest;
+    friend class CompiledModelHolder;
 
     const std::shared_ptr<ov::Model> m_model;
     const std::shared_ptr<const ov::IPlugin> m_plugin;
@@ -66,13 +79,6 @@ class CompiledModel : public ov::ICompiledModel {
     Config m_cfg;
     mutable std::atomic_int m_numRequests = {0};
     std::string m_name;
-    struct GraphGuard : public Graph {
-        std::mutex _mutex;
-        struct Lock : public std::unique_lock<std::mutex> {
-            explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
-            GraphGuard& _graph;
-        };
-    };
 
     const bool m_loaded_from_cache;
     // WARNING: Do not use m_graphs directly.
@@ -94,5 +100,59 @@ class CompiledModel : public ov::ICompiledModel {
     bool m_has_sub_compiled_models = false;
 };
 
+// This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and
+// the CompiledModel internal structures
+class CompiledModelHolder {
+public:
+    CompiledModelHolder(std::shared_ptr<const CompiledModel> compiled_model)
+        : m_compiled_model(std::move(compiled_model)) {
+        OPENVINO_ASSERT(!m_compiled_model->m_graphs.empty(),
+                        "No graph was found in the compiled model: ",
+                        m_compiled_model->name());
+        m_graph = &(m_compiled_model->get_graph()._graph);
+        m_id = (m_compiled_model->m_numRequests)++;
+    }
+
+    ~CompiledModelHolder() {
+        if (m_compiled_model) {
+            --(m_compiled_model->m_numRequests);
+        }
+    }
+
+    CompiledModelHolder(const CompiledModelHolder&) = delete;
+    CompiledModelHolder& operator=(const CompiledModelHolder&) = delete;
+
+    CompiledModelHolder(CompiledModelHolder&&) = default;
+    CompiledModelHolder& operator=(CompiledModelHolder&&) = default;
+
+    const Graph& graph() const {
+        return *m_graph;
+    }
+
+    CompiledModel::GraphGuard::Lock lock() {
+        auto lock = m_compiled_model->get_graph();
+        m_graph = &(lock._graph);
+        OPENVINO_ASSERT(m_graph, "Graph ptr null check failed");
+        return lock;
+    }
+
+    std::string name() const {
+        return m_compiled_model->name();
+    }
+
+    std::shared_ptr<const ov::ICompiledModel> compiled_model() const {
+        return m_compiled_model;
+    }
+
+    int id() const {
+        return m_id;
+    }
+
+private:
+    std::shared_ptr<const CompiledModel> m_compiled_model;
+    const Graph* m_graph;
+    int m_id;
+};
+
 }  // namespace intel_cpu
 }  // namespace ov
@@ -1940,8 +1940,23 @@ std::shared_ptr<ov::Model> Graph::dump() const {
     return dump_graph_as_ie_ngraph_net(*this);
 }
 
-const std::unordered_map<std::string, node::MemoryStateNode*>& Graph::getInternalStateNodes() const {
-    return m_context->getMemoryStatesRegister()->getMemoryStates();
+std::vector<MemStatePtr> Graph::memoryStates() const {
+    std::vector<MemStatePtr> resultVector;
+
+    for (auto&& item : m_context->getMemoryStatesRegister()->getMemoryStates()) {
+        resultVector.emplace_back(item.second->makeState());
+    }
+    return resultVector;
+}
+
+void Graph::assignStates(const std::vector<MemStatePtr>& states) {
+    auto&& inputStateNodes = m_context->getMemoryStatesRegister()->getMemoryStates();
+    for (const auto& state : states) {
+        auto itr = inputStateNodes.find(state->get_name());
+        if (itr != inputStateNodes.end()) {
+            itr->second->assignState(state);
+        }
+    }
 }
 
 }  // namespace intel_cpu

@@ -14,6 +14,7 @@
 #include "edge.h"
 #include "graph_context.h"
 #include "memory_control.hpp"
+#include "memory_state.h"
 #include "node.h"
 #include "nodes/input.h"
 #include "openvino/core/node_vector.hpp"
@@ -87,28 +88,42 @@ class Graph {
         return _name;
     }
 
-    std::map<std::size_t, NodePtr>& GetInputNodesMap() {
-        return inputNodesMap;
+    NodePtr getInputNodeByIndex(std::size_t index) {
+        auto input = inputNodesMap.find(index);
+        if (input == inputNodesMap.end())
+            return nullptr;
+        return input->second;
     }
 
-    std::map<std::size_t, NodePtr>& GetOutputNodesMap() {
-        return outputNodesMap;
+    NodePtr getOutputNodeByIndex(std::size_t index) {
+        auto output = outputNodesMap.find(index);
+        if (output == outputNodesMap.end())
+            return nullptr;
+        return output->second;
     }
 
-    NodePtr getInputNodeByIndex(const std::size_t& index) {
+    NodeConstPtr getInputNodeByIndex(std::size_t index) const {
         auto input = inputNodesMap.find(index);
         if (input == inputNodesMap.end())
-            OPENVINO_THROW("CPU execution graph doesn't contain input node with index: ", index);
+            return nullptr;
         return input->second;
     }
 
-    NodePtr getOutputNodeByIndex(const std::size_t& index) {
+    NodeConstPtr getOutputNodeByIndex(std::size_t index) const {
         auto output = outputNodesMap.find(index);
         if (output == outputNodesMap.end())
-            OPENVINO_THROW("CPU execution graph doesn't contain output node with index: ", index);
+            return nullptr;
         return output->second;
     }
 
+    size_t inputsNumber() const {
+        return inputNodesMap.size();
+    }
+
+    size_t outputsNumber() const {
+        return outputNodesMap.size();
+    }
+
     dnnl::engine getEngine() const {
         return m_context->getEngine();
     }
@@ -117,6 +132,9 @@ class Graph {
         return m_context;
     }
 
+    std::vector<MemStatePtr> memoryStates() const;
+    void assignStates(const std::vector<MemStatePtr>& state);
+
     void GetPerfData(std::vector<ov::ProfilingInfo>& perfMap) const;
 
     void CreateEdge(const NodePtr& parent, const NodePtr& child, int parentPort = 0, int childPort = 0);
@@ -202,8 +220,6 @@ class Graph {
         return graphHasDynamicInput;
     }
 
-    const std::unordered_map<std::string, node::MemoryStateNode*>& getInternalStateNodes() const;
-
     /**
      * Init graph using \p model, \p context, \p inputConfigs and \p outputConfigs
      */
@@ -218,7 +234,7 @@ class Graph {
     void Activate(const std::vector<MemoryPtr>& externalInputMemory = {},
                   const std::vector<MemoryPtr>& externalOutputMemory = {});
 
-    const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() const {
+    const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() {
         return outputNodesMemBlocksMap;
     }