diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 2ff2ee2636ec02..424e9658775466 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -71,6 +71,7 @@ void Graph::CreateGraph(NET &net, const GraphContext::CPtr ctx) { ForgetGraphData(); context = ctx; + m_stream = dnnl::stream(getEngine()); Replicate(net); @@ -87,6 +88,7 @@ void Graph::CreateGraph(const std::vector& graphNodes, ForgetGraphData(); context = ctx; + m_stream = dnnl::stream(getEngine()); this->_name = std::move(name); this->reuse_io_tensors = false; @@ -439,8 +441,6 @@ void Graph::InitOptimalPrimitiveDescriptors() { void Graph::CreatePrimitivesAndExecConstants() const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "Graph::CreatePrimitivesAndExecConstants"); - dnnl::stream stream(getEngine()); - using shared_memory_ptr = WeightsSharing::SharedMemory::Ptr; auto acquireSharedOutputs = [this](const NodePtr & node) { @@ -480,13 +480,13 @@ void Graph::CreatePrimitivesAndExecConstants() const { auto sharedOutputs = acquireSharedOutputs(node); if (std::get<0>(sharedOutputs) || std::get<1>(sharedOutputs)) { - ExecuteNode(node, stream); + ExecuteNode(node, m_stream); for (auto & output : std::get<2>(sharedOutputs)) output->valid(true); } } else { - ExecuteNode(node, stream); + ExecuteNode(node, m_stream); } } } @@ -1123,15 +1123,13 @@ void Graph::PullOutputData(std::unordered_map>& } void Graph::InferStatic(SyncInferRequest* request) { - dnnl::stream stream(getEngine()); - for (const auto& node : m_executableGraphNodes) { VERBOSE(node, getConfig().debugCaps.verbose); PERF(node, getConfig().collectPerfCounters); if (request) request->throw_if_canceled(); - ExecuteNode(node, stream); + ExecuteNode(node, m_stream); } } @@ -1342,8 +1340,6 @@ class UpdateNodes : public UpdateNodesBase { void Graph::InferDynamic(SyncInferRequest* request) { - dnnl::stream stream(getEngine()); - std::unique_ptr updateNodes{}; if (parallel_get_max_threads() > 1) { updateNodes.reset(new UpdateNodes(m_executableGraphNodes)); @@ -1362,7 +1358,7 @@ void Graph::InferDynamic(SyncInferRequest* request) { if (request) request->throw_if_canceled(); try { - ExecuteNode(node, stream); + ExecuteNode(node, m_stream); } catch (const std::exception& exp) { OPENVINO_THROW(node, exp.what()); } diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 1a08446b59d9f6..2acdf40b9e664f 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -250,6 +250,7 @@ class Graph { std::vector m_executableSyncNodesInds; GraphContext::CPtr context; + dnnl::stream m_stream; void EnforceInferencePrecision(); void EnforceBF16();