Skip to content

Commit

Permalink
[CPU] Throw when release_memory is called during inference (#27520)
Browse files Browse the repository at this point in the history
### Details:
This PR changes the behavior of the `CompiledModel::release_memory()`
implementation in the CPU plugin for the situation when the method is
being called concurrently with the other graph state modifying methods
(e.g. graph initialization, inference, properties request). This is
necessary to ensure thread safety and provide a clear defined behavior
when the method is called concurrently.
Also, the PR contains some refactoring of the Infer request
implementation, aimed at decoupling the InferRequest implementation from
the compiled model internals and providing a safer interface that
ensures thread safe access to the CPU graph structures.
  • Loading branch information
maxnick authored Dec 16, 2024
1 parent 3af0f7c commit 05a6f4f
Show file tree
Hide file tree
Showing 12 changed files with 406 additions and 156 deletions.
9 changes: 6 additions & 3 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
}

std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request() const {
m_numRequests++;
return std::make_shared<SyncInferRequest>(std::static_pointer_cast<const CompiledModel>(shared_from_this()));
}

Expand Down Expand Up @@ -344,8 +343,12 @@ void CompiledModel::export_model(std::ostream& modelStream) const {

void CompiledModel::release_memory() {
for (auto&& graph : m_graphs) {
GraphGuard::Lock graph_lock{graph};
auto ctx = graph_lock._graph.getGraphContext();
// try to lock mutex, since it may be already locked (e.g by an infer request)
std::unique_lock<std::mutex> lock(graph._mutex, std::try_to_lock);
OPENVINO_ASSERT(lock.owns_lock(),
"Attempt to call release_memory() on a compiled model in a busy state. Please ensure that all "
"infer requests are completed before releasing memory.");
auto ctx = graph.getGraphContext();
ctx->getNetworkMemoryControl()->releaseMemory();
}
}
Expand Down
76 changes: 68 additions & 8 deletions src/plugins/intel_cpu/src/compiled_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ namespace ov {
namespace intel_cpu {

class CompiledModel : public ov::ICompiledModel {
public:
struct GraphGuard : public Graph {
std::mutex _mutex;
struct Lock : public std::unique_lock<std::mutex> {
explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
GraphGuard& _graph;
};
};

public:
typedef std::shared_ptr<CompiledModel> Ptr;

Expand Down Expand Up @@ -51,9 +60,13 @@ class CompiledModel : public ov::ICompiledModel {

void release_memory() override;

std::string name() const {
return m_name;
}

private:
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
friend class SyncInferRequest;
friend class CompiledModelHolder;

const std::shared_ptr<ov::Model> m_model;
const std::shared_ptr<const ov::IPlugin> m_plugin;
Expand All @@ -66,13 +79,6 @@ class CompiledModel : public ov::ICompiledModel {
Config m_cfg;
mutable std::atomic_int m_numRequests = {0};
std::string m_name;
struct GraphGuard : public Graph {
std::mutex _mutex;
struct Lock : public std::unique_lock<std::mutex> {
explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
GraphGuard& _graph;
};
};

const bool m_loaded_from_cache;
// WARNING: Do not use m_graphs directly.
Expand All @@ -94,5 +100,59 @@ class CompiledModel : public ov::ICompiledModel {
bool m_has_sub_compiled_models = false;
};

// This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and
// the CompiledModel internal structures
class CompiledModelHolder {
public:
CompiledModelHolder(std::shared_ptr<const CompiledModel> compiled_model)
: m_compiled_model(std::move(compiled_model)) {
OPENVINO_ASSERT(!m_compiled_model->m_graphs.empty(),
"No graph was found in the compiled model: ",
m_compiled_model->name());
m_graph = &(m_compiled_model->get_graph()._graph);
m_id = (m_compiled_model->m_numRequests)++;
}

~CompiledModelHolder() {
if (m_compiled_model) {
--(m_compiled_model->m_numRequests);
}
}

CompiledModelHolder(const CompiledModelHolder&) = delete;
CompiledModelHolder& operator=(const CompiledModelHolder&) = delete;

CompiledModelHolder(CompiledModelHolder&&) = default;
CompiledModelHolder& operator=(CompiledModelHolder&&) = default;

const Graph& graph() const {
return *m_graph;
}

CompiledModel::GraphGuard::Lock lock() {
auto lock = m_compiled_model->get_graph();
m_graph = &(lock._graph);
OPENVINO_ASSERT(m_graph, "Graph ptr null check failed");
return lock;
}

std::string name() const {
return m_compiled_model->name();
}

std::shared_ptr<const ov::ICompiledModel> compiled_model() const {
return m_compiled_model;
}

int id() const {
return m_id;
}

private:
std::shared_ptr<const CompiledModel> m_compiled_model;
const Graph* m_graph;
int m_id;
};

} // namespace intel_cpu
} // namespace ov
19 changes: 17 additions & 2 deletions src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1940,8 +1940,23 @@ std::shared_ptr<ov::Model> Graph::dump() const {
return dump_graph_as_ie_ngraph_net(*this);
}

const std::unordered_map<std::string, node::MemoryStateNode*>& Graph::getInternalStateNodes() const {
return m_context->getMemoryStatesRegister()->getMemoryStates();
std::vector<MemStatePtr> Graph::memoryStates() const {
std::vector<MemStatePtr> resultVector;

for (auto&& item : m_context->getMemoryStatesRegister()->getMemoryStates()) {
resultVector.emplace_back(item.second->makeState());
}
return resultVector;
}

void Graph::assignStates(const std::vector<MemStatePtr>& states) {
auto&& inputStateNodes = m_context->getMemoryStatesRegister()->getMemoryStates();
for (const auto& state : states) {
auto itr = inputStateNodes.find(state->get_name());
if (itr != inputStateNodes.end()) {
itr->second->assignState(state);
}
}
}

} // namespace intel_cpu
Expand Down
38 changes: 27 additions & 11 deletions src/plugins/intel_cpu/src/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "edge.h"
#include "graph_context.h"
#include "memory_control.hpp"
#include "memory_state.h"
#include "node.h"
#include "nodes/input.h"
#include "openvino/core/node_vector.hpp"
Expand Down Expand Up @@ -87,28 +88,42 @@ class Graph {
return _name;
}

std::map<std::size_t, NodePtr>& GetInputNodesMap() {
return inputNodesMap;
NodePtr getInputNodeByIndex(std::size_t index) {
auto input = inputNodesMap.find(index);
if (input == inputNodesMap.end())
return nullptr;
return input->second;
}

std::map<std::size_t, NodePtr>& GetOutputNodesMap() {
return outputNodesMap;
NodePtr getOutputNodeByIndex(std::size_t index) {
auto output = outputNodesMap.find(index);
if (output == outputNodesMap.end())
return nullptr;
return output->second;
}

NodePtr getInputNodeByIndex(const std::size_t& index) {
NodeConstPtr getInputNodeByIndex(std::size_t index) const {
auto input = inputNodesMap.find(index);
if (input == inputNodesMap.end())
OPENVINO_THROW("CPU execution graph doesn't contain input node with index: ", index);
return nullptr;
return input->second;
}

NodePtr getOutputNodeByIndex(const std::size_t& index) {
NodeConstPtr getOutputNodeByIndex(std::size_t index) const {
auto output = outputNodesMap.find(index);
if (output == outputNodesMap.end())
OPENVINO_THROW("CPU execution graph doesn't contain output node with index: ", index);
return nullptr;
return output->second;
}

size_t inputsNumber() const {
return inputNodesMap.size();
}

size_t outputsNumber() const {
return outputNodesMap.size();
}

dnnl::engine getEngine() const {
return m_context->getEngine();
}
Expand All @@ -117,6 +132,9 @@ class Graph {
return m_context;
}

std::vector<MemStatePtr> memoryStates() const;
void assignStates(const std::vector<MemStatePtr>& state);

void GetPerfData(std::vector<ov::ProfilingInfo>& perfMap) const;

void CreateEdge(const NodePtr& parent, const NodePtr& child, int parentPort = 0, int childPort = 0);
Expand Down Expand Up @@ -202,8 +220,6 @@ class Graph {
return graphHasDynamicInput;
}

const std::unordered_map<std::string, node::MemoryStateNode*>& getInternalStateNodes() const;

/**
* Init graph using \p model, \p context, \p inputConfigs and \p outputConfigs
*/
Expand All @@ -218,7 +234,7 @@ class Graph {
void Activate(const std::vector<MemoryPtr>& externalInputMemory = {},
const std::vector<MemoryPtr>& externalOutputMemory = {});

const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() const {
const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() {
return outputNodesMemBlocksMap;
}

Expand Down
Loading

0 comments on commit 05a6f4f

Please sign in to comment.