Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CPU] Throw when release_memory is called during inference #27520

Merged
merged 17 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
}

std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request() const {
m_numRequests++;
return std::make_shared<SyncInferRequest>(std::static_pointer_cast<const CompiledModel>(shared_from_this()));
}

Expand Down Expand Up @@ -344,8 +343,12 @@ void CompiledModel::export_model(std::ostream& modelStream) const {

void CompiledModel::release_memory() {
for (auto&& graph : m_graphs) {
GraphGuard::Lock graph_lock{graph};
auto ctx = graph_lock._graph.getGraphContext();
// try to lock mutex, since it may be already locked (e.g by an infer request)
std::unique_lock<std::mutex> lock(graph._mutex, std::try_to_lock);
OPENVINO_ASSERT(lock.owns_lock(),
"Attempt to call release_memory() on a compiled model in a busy state. Please ensure that all "
"infer requests are completed before releasing memory.");
auto ctx = graph.getGraphContext();
ctx->getNetworkMemoryControl()->releaseMemory();
}
}
Expand Down
76 changes: 68 additions & 8 deletions src/plugins/intel_cpu/src/compiled_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ namespace ov {
namespace intel_cpu {

class CompiledModel : public ov::ICompiledModel {
public:
struct GraphGuard : public Graph {
std::mutex _mutex;
struct Lock : public std::unique_lock<std::mutex> {
explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
GraphGuard& _graph;
};
};

public:
typedef std::shared_ptr<CompiledModel> Ptr;

Expand Down Expand Up @@ -51,9 +60,13 @@ class CompiledModel : public ov::ICompiledModel {

void release_memory() override;

std::string name() const {
return m_name;
}

private:
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
friend class SyncInferRequest;
friend class CompiledModelHolder;

const std::shared_ptr<ov::Model> m_model;
const std::shared_ptr<const ov::IPlugin> m_plugin;
Expand All @@ -66,13 +79,6 @@ class CompiledModel : public ov::ICompiledModel {
Config m_cfg;
mutable std::atomic_int m_numRequests = {0};
std::string m_name;
struct GraphGuard : public Graph {
std::mutex _mutex;
struct Lock : public std::unique_lock<std::mutex> {
explicit Lock(GraphGuard& graph) : std::unique_lock<std::mutex>(graph._mutex), _graph(graph) {}
GraphGuard& _graph;
};
};

const bool m_loaded_from_cache;
// WARNING: Do not use m_graphs directly.
Expand All @@ -94,5 +100,59 @@ class CompiledModel : public ov::ICompiledModel {
bool m_has_sub_compiled_models = false;
};

// This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and
// the CompiledModel internal structures
class CompiledModelHolder {
public:
CompiledModelHolder(std::shared_ptr<const CompiledModel> compiled_model)
: m_compiled_model(std::move(compiled_model)) {
OPENVINO_ASSERT(!m_compiled_model->m_graphs.empty(),
"No graph was found in the compiled model: ",
m_compiled_model->name());
m_graph = &(m_compiled_model->get_graph()._graph);
m_id = (m_compiled_model->m_numRequests)++;
EgorDuplensky marked this conversation as resolved.
Show resolved Hide resolved
}

~CompiledModelHolder() {
if (m_compiled_model) {
--(m_compiled_model->m_numRequests);
}
}

CompiledModelHolder(const CompiledModelHolder&) = delete;
CompiledModelHolder& operator=(const CompiledModelHolder&) = delete;

CompiledModelHolder(CompiledModelHolder&&) = default;
CompiledModelHolder& operator=(CompiledModelHolder&&) = default;

const Graph& graph() const {
return *m_graph;
}

CompiledModel::GraphGuard::Lock lock() {
auto lock = m_compiled_model->get_graph();
m_graph = &(lock._graph);
OPENVINO_ASSERT(m_graph, "Graph ptr null check failed");
return lock;
}

std::string name() const {
return m_compiled_model->name();
}

std::shared_ptr<const ov::ICompiledModel> compiled_model() const {
return m_compiled_model;
}

int id() const {
return m_id;
}

private:
std::shared_ptr<const CompiledModel> m_compiled_model;
const Graph* m_graph;
int m_id;
};

} // namespace intel_cpu
} // namespace ov
19 changes: 17 additions & 2 deletions src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1940,8 +1940,23 @@ std::shared_ptr<ov::Model> Graph::dump() const {
return dump_graph_as_ie_ngraph_net(*this);
}

const std::unordered_map<std::string, node::MemoryStateNode*>& Graph::getInternalStateNodes() const {
return m_context->getMemoryStatesRegister()->getMemoryStates();
std::vector<MemStatePtr> Graph::memoryStates() const {
std::vector<MemStatePtr> resultVector;

for (auto&& item : m_context->getMemoryStatesRegister()->getMemoryStates()) {
resultVector.emplace_back(item.second->makeState());
}
return resultVector;
}

void Graph::assignStates(const std::vector<MemStatePtr>& states) {
auto&& inputStateNodes = m_context->getMemoryStatesRegister()->getMemoryStates();
for (const auto& state : states) {
auto itr = inputStateNodes.find(state->get_name());
if (itr != inputStateNodes.end()) {
itr->second->assignState(state);
}
}
}

} // namespace intel_cpu
Expand Down
38 changes: 27 additions & 11 deletions src/plugins/intel_cpu/src/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "edge.h"
#include "graph_context.h"
#include "memory_control.hpp"
#include "memory_state.h"
#include "node.h"
#include "nodes/input.h"
#include "openvino/core/node_vector.hpp"
Expand Down Expand Up @@ -87,28 +88,42 @@ class Graph {
return _name;
}

std::map<std::size_t, NodePtr>& GetInputNodesMap() {
return inputNodesMap;
NodePtr getInputNodeByIndex(std::size_t index) {
auto input = inputNodesMap.find(index);
if (input == inputNodesMap.end())
return nullptr;
return input->second;
}

std::map<std::size_t, NodePtr>& GetOutputNodesMap() {
return outputNodesMap;
NodePtr getOutputNodeByIndex(std::size_t index) {
auto output = outputNodesMap.find(index);
if (output == outputNodesMap.end())
return nullptr;
return output->second;
}

NodePtr getInputNodeByIndex(const std::size_t& index) {
NodeConstPtr getInputNodeByIndex(std::size_t index) const {
auto input = inputNodesMap.find(index);
if (input == inputNodesMap.end())
OPENVINO_THROW("CPU execution graph doesn't contain input node with index: ", index);
return nullptr;
return input->second;
}

NodePtr getOutputNodeByIndex(const std::size_t& index) {
NodeConstPtr getOutputNodeByIndex(std::size_t index) const {
auto output = outputNodesMap.find(index);
if (output == outputNodesMap.end())
OPENVINO_THROW("CPU execution graph doesn't contain output node with index: ", index);
return nullptr;
return output->second;
}

size_t inputsNumber() const {
return inputNodesMap.size();
}

size_t outputsNumber() const {
return outputNodesMap.size();
}

dnnl::engine getEngine() const {
return m_context->getEngine();
}
Expand All @@ -117,6 +132,9 @@ class Graph {
return m_context;
}

std::vector<MemStatePtr> memoryStates() const;
void assignStates(const std::vector<MemStatePtr>& state);

void GetPerfData(std::vector<ov::ProfilingInfo>& perfMap) const;

void CreateEdge(const NodePtr& parent, const NodePtr& child, int parentPort = 0, int childPort = 0);
Expand Down Expand Up @@ -202,8 +220,6 @@ class Graph {
return graphHasDynamicInput;
}

const std::unordered_map<std::string, node::MemoryStateNode*>& getInternalStateNodes() const;

/**
* Init graph using \p model, \p context, \p inputConfigs and \p outputConfigs
*/
Expand All @@ -218,7 +234,7 @@ class Graph {
void Activate(const std::vector<MemoryPtr>& externalInputMemory = {},
const std::vector<MemoryPtr>& externalOutputMemory = {});

const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() const {
const std::unordered_map<std::size_t, ProxyMemoryBlockPtr>& getOutputNodesMemBlocksMap() {
return outputNodesMemBlocksMap;
}

Expand Down
Loading
Loading