From cc9882f95291f85e94db33177de79b49b183167a Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 15 Mar 2023 17:26:06 +0100 Subject: [PATCH 1/5] First step FC optimized --- .../src/memory_desc/dnnl_memory_desc.cpp | 9 +++- .../src/memory_desc/dnnl_memory_desc.h | 1 + .../src/nodes/common/dnnl_executor.cpp | 21 ---------- .../src/nodes/common/dnnl_executor.h | 33 +++++++++++++-- .../intel_cpu/src/nodes/fullyconnected.cpp | 41 ++++++++++--------- .../intel_cpu/src/nodes/fullyconnected.h | 2 + 6 files changed, 61 insertions(+), 46 deletions(-) diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp index 1f2a17189a31cc..0458f93836779d 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.cpp @@ -36,13 +36,18 @@ MemoryDescPtr DnnlMemoryDesc::cloneWithNewPrecision(const InferenceEngine::Preci } bool DnnlMemoryDesc::isCompatible(const MemoryDesc &rhs) const { - if (MemoryDescType::Dnnl == rhs.getType()) { - return this->desc == rhs.as()->desc; + if (MemoryDescType::Dnnl & rhs.getType()) { + auto* dnnMemDesc = rhs.as(); + return isCompatible(*dnnMemDesc); } else { return false; } } +bool DnnlMemoryDesc::isCompatible(const DnnlMemoryDesc& rhs) const { + return this->desc == rhs.desc; +} + std::string DnnlMemoryDesc::serializeFormat() const { dnnl::impl::memory_desc_wrapper wrapped(desc.get()); if (wrapped.is_wino_desc()) { diff --git a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h index c6a88794485c40..373e66679f8824 100644 --- a/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h +++ b/src/plugins/intel_cpu/src/memory_desc/dnnl_memory_desc.h @@ -26,6 +26,7 @@ class DnnlMemoryDesc : public virtual MemoryDesc { MemoryDescPtr cloneWithNewPrecision(const InferenceEngine::Precision prec) const override; bool isCompatible(const MemoryDesc& rhs) const override; + bool isCompatible(const DnnlMemoryDesc& rhs) const; bool hasLayoutType(LayoutType layoutType) const override { return false; } diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp index 3f055cc63fe039..ca09526e4a4c8b 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp @@ -58,27 +58,6 @@ const_dnnl_primitive_desc_t DnnlExecutor::getPrimitiveDesc() const { return execPrim.get_primitive_desc(); } -dnnl::memory::desc DnnlExecutor::getSrcDesc() const { - auto pd = getPrimitiveDesc(); - auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::src_md); - - return md->getDnnlDesc(); -} - -dnnl::memory::desc DnnlExecutor::getWeightDesc() const { - auto pd = getPrimitiveDesc(); - auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::weights_md); - - return md->getDnnlDesc(); -} - -dnnl::memory::desc DnnlExecutor::getDstDesc() const { - auto pd = getPrimitiveDesc(); - auto md = DnnlExtensionUtils::query_md(pd, dnnl::query::dst_md); - - return md->getDnnlDesc(); -} - impl_desc_type DnnlExecutor::getImplementationType() const { auto pd = getPrimitiveDesc(); return parse_impl_name(DnnlExtensionUtils::query_impl_info_str(pd)); diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h index f824fd8146ecb6..21a6d4f4634bbc 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h @@ -31,17 +31,44 @@ class DnnlExecutor { virtual ~DnnlExecutor() = default; dnnl::primitive getExecPrim() const; const_dnnl_primitive_desc_t getPrimitiveDesc() const; - dnnl::memory::desc getSrcDesc() const; - dnnl::memory::desc getWeightDesc() const; - dnnl::memory::desc getDstDesc() const; impl_desc_type getImplementationType() const; + DnnlMemoryDescPtr getSrcDesc() const { + return src_md; + } + DnnlMemoryDescPtr getWeightDesc() const { + return wghts_md; + } + DnnlMemoryDescPtr getDstDesc() const { + return dst_md; + } + DnnlMemoryDescPtr getScratchPadDesc() const { + return scrch_md; + } + + const dnnl::memory::desc& getDnnlSrcDesc() const { + return src_md->getDnnlDesc(); + } + const dnnl::memory::desc& getDnnlWeightDesc() const { + return wghts_md->getDnnlDesc(); + } + const dnnl::memory::desc& getDnnlDstDesc() const { + return dst_md->getDnnlDesc(); + } + const dnnl::memory::desc& getDnnlScratchPadDesc() const { + return scrch_md->getDnnlDesc(); + } + protected: DnnlExecutor() = default; dnnl::primitive execPrim; // key is the port number for the primitive that needs memory reordering std::unordered_map inputReorders; std::unordered_map outputReorders; + DnnlMemoryDescPtr src_md; + DnnlMemoryDescPtr wghts_md; + DnnlMemoryDescPtr dst_md; + DnnlMemoryDescPtr scrch_md; }; } // namespace intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 23b6c5be7cee9f..63aa4a70d2fc9a 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -311,7 +311,7 @@ void FullyConnected::prepareParams() { implementationTypeIP, useConv1x1}; - auto engine = getEngine(); + auto& engine = getEngine(); auto builder = [&engine](const FCKey& key) -> executorPtr { executorPtr execPtr = nullptr; @@ -404,23 +404,23 @@ void FullyConnected::prepareParams() { execPtr = result.first; if (execPtr) { - // no executor yet or shapes changed - if (!prevExecPtr || prevExecPtr->getSrcDesc() != execPtr->getSrcDesc()) { - auto oldMem = srcMemPtr->GetPrimitive(); - // fast path: wanted is same with parent node output, typical is static shape with inner product - if (execPtr->getSrcDesc() == inDesc->getDnnlDesc()) { - primArgs[DNNL_ARG_SRC] = std::move(oldMem); - } else { - primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getSrcDesc(), oldMem.get_engine(), oldMem.get_data_handle()); - } + if (execPtr->getSrcDesc()->isCompatible(*inDesc)) { + primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); + } else { + auto start = std::chrono::steady_clock::now(); + primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getDnnlSrcDesc(), engine, srcMemPtr->GetData()); //385.681 [ms] + auto end = std::chrono::steady_clock::now(); + g_counters[8] += std::chrono::duration_cast(end - start).count(); } - if (!prevExecPtr || prevExecPtr->getDstDesc() != execPtr->getDstDesc()) { - auto oldMem = dstMemPtr->GetPrimitive(); - if (execPtr->getDstDesc() == outDesc->getDnnlDesc()) { - primArgs[DNNL_ARG_DST] = std::move(oldMem); - } else { - primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDstDesc(), oldMem.get_engine(), oldMem.get_data_handle()); - } + + if (execPtr->getDstDesc()->isCompatible(*outDesc)) { + primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); + } else { + primArgs[DNNL_ARG_DST] = dnnl::memory(execPtr->getDnnlDstDesc(), engine, dstMemPtr->GetData()); + } + + if (!prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { + primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive(); } if (!prevExecPtr || prevExecPtr->getWeightDesc() != execPtr->getWeightDesc()) { primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(DnnlExtensionUtils::makeDescriptor(execPtr->getWeightDesc()))->GetPrimitive(); @@ -438,9 +438,10 @@ void FullyConnected::prepareParams() { primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive(); } - auto pd = execPtr->getPrimitiveDesc(); - auto scratchpadMem = getScratchPadMem(pd); - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + if (!scratchPad || !scratchPad->getDesc().isCompatible(*(execPtr->getScratchPadDesc()))) { + scratchPad = context->getScratchPad()->createScratchPadMem(execPtr->getScratchPadDesc()); + } + primArgs[DNNL_ARG_SCRATCHPAD] = scratchPad->GetPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n"); diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 4de5dff882649d..30fbbcb38b9884 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -116,6 +116,8 @@ class FullyConnected : public Node { float minSparseRate = 1.f; float weiSparseRate = 0.f; bool useSparseWeightsDecompression(); + + MemoryPtr scratchPad; }; } // namespace node From 26573d694e00351acdde14c8e4057579a7193f57 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 15 Mar 2023 18:17:33 +0100 Subject: [PATCH 2/5] Code fix --- src/plugins/intel_cpu/src/nodes/fullyconnected.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 63aa4a70d2fc9a..e338809496c272 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -407,10 +407,7 @@ void FullyConnected::prepareParams() { if (execPtr->getSrcDesc()->isCompatible(*inDesc)) { primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); } else { - auto start = std::chrono::steady_clock::now(); - primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getDnnlSrcDesc(), engine, srcMemPtr->GetData()); //385.681 [ms] - auto end = std::chrono::steady_clock::now(); - g_counters[8] += std::chrono::duration_cast(end - start).count(); + primArgs[DNNL_ARG_SRC] = dnnl::memory(execPtr->getDnnlSrcDesc(), engine, srcMemPtr->GetData()); } if (execPtr->getDstDesc()->isCompatible(*outDesc)) { @@ -422,9 +419,6 @@ void FullyConnected::prepareParams() { if (!prevExecPtr || !execPtr->getWeightDesc()->isCompatible(*(prevExecPtr->getWeightDesc()))) { primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(execPtr->getWeightDesc())->GetPrimitive(); } - if (!prevExecPtr || prevExecPtr->getWeightDesc() != execPtr->getWeightDesc()) { - primArgs[DNNL_ARG_WEIGHTS] = prepareWeightMemory(DnnlExtensionUtils::makeDescriptor(execPtr->getWeightDesc()))->GetPrimitive(); - } // changed shapes may also cause the kernel type changed selected_pd->setImplementationType(execPtr->getImplementationType()); // WA: We update implType to know whether weights decompression was used inside the kernel From 2c1862b041677245100eb1cdec4781572f0015c8 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Wed, 15 Mar 2023 18:59:49 +0100 Subject: [PATCH 3/5] Fix further --- src/plugins/intel_cpu/src/nodes/fullyconnected.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index e338809496c272..33141cb48e3073 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -915,10 +915,18 @@ bool FullyConnected::canBeExecutedInConv1x1() const { } FullyConnected::ExecutorInnerProduct::ExecutorInnerProduct(const dnnl::inner_product_forward::primitive_desc& pd) { + src_md = DnnlExtensionUtils::makeDescriptor(pd.src_desc()); + dst_md = DnnlExtensionUtils::makeDescriptor(pd.dst_desc()); + wghts_md = DnnlExtensionUtils::makeDescriptor(pd.weights_desc()); + scrch_md = DnnlExtensionUtils::makeDescriptor(pd.scratchpad_desc()); execPrim = dnnl::inner_product_forward(pd); } FullyConnected::ExecutorConv1x1::ExecutorConv1x1(const dnnl::convolution_forward::primitive_desc& pd) { + src_md = DnnlExtensionUtils::makeDescriptor(pd.src_desc()); + dst_md = DnnlExtensionUtils::makeDescriptor(pd.dst_desc()); + wghts_md = DnnlExtensionUtils::makeDescriptor(pd.weights_desc()); + scrch_md = DnnlExtensionUtils::makeDescriptor(pd.scratchpad_desc()); execPrim = dnnl::convolution_forward(pd); } From 0ee9d3b1863665bcaae01b3239f01537c5aaa24e Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Fri, 17 Mar 2023 13:28:25 +0100 Subject: [PATCH 4/5] Move toward dnnl executor --- src/plugins/intel_cpu/src/node.cpp | 6 ----- src/plugins/intel_cpu/src/node.h | 10 +++---- .../src/nodes/common/dnnl_executor.cpp | 18 ++++++++++++- .../src/nodes/common/dnnl_executor.h | 7 +++-- src/plugins/intel_cpu/src/nodes/concat.h | 1 + src/plugins/intel_cpu/src/nodes/conv.cpp | 7 ++--- src/plugins/intel_cpu/src/nodes/deconv.cpp | 11 +++----- .../intel_cpu/src/nodes/fullyconnected.cpp | 26 +++--------------- .../intel_cpu/src/nodes/fullyconnected.h | 12 --------- src/plugins/intel_cpu/src/nodes/input.h | 1 + src/plugins/intel_cpu/src/nodes/interaction.h | 1 + src/plugins/intel_cpu/src/nodes/lrn.cpp | 27 ++++++++++++------- src/plugins/intel_cpu/src/nodes/lrn.h | 4 +++ src/plugins/intel_cpu/src/nodes/matmul.cpp | 22 +++++++++------ src/plugins/intel_cpu/src/nodes/matmul.h | 4 +++ src/plugins/intel_cpu/src/nodes/pooling.cpp | 27 ++++++++++++------- src/plugins/intel_cpu/src/nodes/pooling.h | 5 ++++ src/plugins/intel_cpu/src/nodes/reorder.cpp | 6 ++++- src/plugins/intel_cpu/src/nodes/reorder.h | 1 + src/plugins/intel_cpu/src/nodes/rnn.cpp | 19 +++++++------ src/plugins/intel_cpu/src/nodes/rnn.h | 5 ++++ src/plugins/intel_cpu/src/nodes/softmax.cpp | 26 +++++++++++------- src/plugins/intel_cpu/src/nodes/softmax.h | 5 ++++ src/plugins/intel_cpu/src/nodes/transpose.h | 1 + 24 files changed, 142 insertions(+), 110 deletions(-) diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index 03529f39d1c003..64752ea8692fdd 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -550,12 +550,6 @@ std::vector Node::getAvailableFormatsForDims(const Shape &di return {memory::format_tag::any}; } -void Node::execute(dnnl::stream strm) { - if (prim) { - prim.execute(strm, primArgs); - } -} - void Node::updateShapes() { IE_ASSERT(isDynamicNode()) << "Node::updateShapes() is called to a static shape node of type: " << getTypeStr() << " with name: " << getName(); if (needShapeInfer()) { diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 0d15441972af92..dd78bfd0159b85 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -334,7 +334,7 @@ class Node { void resolveInPlaceEdges(); - virtual void execute(dnnl::stream strm); + virtual void execute(dnnl::stream strm) = 0; void updateShapes(); void updateDynamicParams(); void executeDynamic(dnnl::stream strm); @@ -578,7 +578,6 @@ class Node { std::vector supportedPrimitiveDescriptors; std::unordered_map primArgs; std::unordered_map postOpsArgs; - dnnl::primitive prim; std::vector descs; const GraphContext::CPtr context; @@ -649,9 +648,10 @@ class Node { IE_THROW(NotImplemented) << "[DS] prapareParams not implemented for node with type " << NameFromType(getType()); } - MemoryPtr getScratchPadMem(const const_dnnl_primitive_desc_t& pd) { - auto scratchpadMemoryDesc = DnnlExtensionUtils::query_md(pd, dnnl::query::scratchpad_md); - scratchpadMem = context->getScratchPad()->createScratchPadMem(scratchpadMemoryDesc); + MemoryPtr getScratchPadMem(const DnnlMemoryDescPtr& desc) { + if (!scratchpadMem || !scratchpadMem->getDesc().isCompatible(*desc)) { + scratchpadMem = context->getScratchPad()->createScratchPadMem(desc); + } return scratchpadMem; } diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp index ca09526e4a4c8b..7d337457494de9 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.cpp @@ -9,6 +9,14 @@ using namespace dnnl; namespace ov { namespace intel_cpu { +DnnlExecutor::DnnlExecutor(const dnnl::primitive_desc& pd) { + execPrim = dnnl::primitive(pd); + src_md = DnnlExtensionUtils::makeDescriptor(pd.src_desc()); + dst_md = DnnlExtensionUtils::makeDescriptor(pd.dst_desc()); + wghts_md = DnnlExtensionUtils::makeDescriptor(pd.weights_desc()); + scrch_md = DnnlExtensionUtils::makeDescriptor(pd.scratchpad_desc()); +} + DnnlExecutor::IntermReorder::IntermReorder(const dnnl::memory::desc& descSrc, const dnnl::memory::desc& descDst, const dnnl::engine& engine) : m_descSrc(descSrc), m_descDst(descDst) { @@ -20,7 +28,15 @@ void DnnlExecutor::IntermReorder::exec(dnnl::memory& memSrc, dnnl::memory& memDs m_reorder.execute(strm, memSrc, memDst); } -void DnnlExecutor::exec(std::unordered_map primArgs, dnnl::stream strm) { +void DnnlExecutor::exec(const std::unordered_map& primArgs, dnnl::stream strm) { + if (inputReorders.empty() && outputReorders.empty()) { + execPrim.execute(strm, primArgs); + } else { + reorder_exec(primArgs, strm); + } +} + +void DnnlExecutor::reorder_exec(std::unordered_map primArgs, dnnl::stream strm) { for (auto &inReorder : inputReorders) { if (primArgs.count(inReorder.first)) { dnnl::memory memDst(inReorder.second.getDstDesc(), strm.get_engine()); diff --git a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h index 21a6d4f4634bbc..0f3eff13797eef 100644 --- a/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h +++ b/src/plugins/intel_cpu/src/nodes/common/dnnl_executor.h @@ -26,7 +26,8 @@ class DnnlExecutor { }; public: - void exec(std::unordered_map primArgs, dnnl::stream strm); + explicit DnnlExecutor(const dnnl::primitive_desc& pd); + void exec(const std::unordered_map& primArgs, dnnl::stream strm); bool needReordering() const; virtual ~DnnlExecutor() = default; dnnl::primitive getExecPrim() const; @@ -60,7 +61,9 @@ class DnnlExecutor { } protected: - DnnlExecutor() = default; + void reorder_exec(std::unordered_map primArgs, dnnl::stream strm); + + protected: dnnl::primitive execPrim; // key is the port number for the primitive that needs memory reordering std::unordered_map inputReorders; diff --git a/src/plugins/intel_cpu/src/nodes/concat.h b/src/plugins/intel_cpu/src/nodes/concat.h index 9a0a8a66274321..32831bcede332a 100644 --- a/src/plugins/intel_cpu/src/nodes/concat.h +++ b/src/plugins/intel_cpu/src/nodes/concat.h @@ -52,6 +52,7 @@ class Concat : public Node { InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32; bool canExecRef = false; static constexpr size_t MAX_RANK_REF = 6; + dnnl::primitive prim; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 3ce92de2169f6c..002af7c98b24f4 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1495,8 +1495,7 @@ void Convolution::prepareParams() { Node::appendPostOpArgs(*pAttrLocal, primArgs, convPostOpsArgs[preferLegacyPostOps]); - auto pd = execPtr->getPrimitiveDesc(); - auto scratchpadMem = getScratchPadMem(pd); + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); #ifdef CPU_DEBUG_CAPS @@ -1513,9 +1512,7 @@ Convolution::ConvolutionExecutor::ConvolutionExecutor(const dnnl::convolution_fo const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine) { - execPrim = dnnl::convolution_forward(pd); - + const dnnl::engine& engine) : DnnlExecutor(pd) { if (inMemDesc != pd.src_desc()) { inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)}); } diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index db013ced146e6d..a8dbae4d00a471 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -991,8 +991,7 @@ void Deconvolution::prepareParams() { } Node::appendPostOpArgs(*pAttrLocal, primArgs, postOpsArgs); - auto pd = execPtr->getPrimitiveDesc(); - auto scratchpadMem = getScratchPadMem(pd); + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { @@ -1094,9 +1093,7 @@ Deconvolution::DeconvExecutorDefault::DeconvExecutorDefault(const dnnl::convolut const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine) { - execPrim = dnnl::convolution_backward_data(pd); - + const dnnl::engine& engine) : DnnlExecutor(pd) { if (inMemDesc != pd.diff_dst_desc()) { inputReorders.insert({DNNL_ARG_DIFF_DST, IntermReorder(inMemDesc, pd.diff_dst_desc(), engine)}); } @@ -1114,9 +1111,7 @@ Deconvolution::DeconvExecutorInt8::DeconvExecutorInt8(const dnnl::deconvolution_ const dnnl::memory::desc& inMemDesc, const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, - const dnnl::engine& engine) { - execPrim = dnnl::deconvolution_forward(pd); - + const dnnl::engine& engine) : DnnlExecutor(pd) { if (inMemDesc != pd.src_desc()) { inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)}); } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 33141cb48e3073..18e2353d40395a 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -333,7 +333,7 @@ void FullyConnected::prepareParams() { } if (prim_desc) { - execPtr = std::make_shared(prim_desc); + execPtr = std::make_shared(prim_desc); } } // fallback @@ -388,7 +388,7 @@ void FullyConnected::prepareParams() { } } - execPtr = std::make_shared(prim_desc); + execPtr = std::make_shared(prim_desc); } return execPtr; }; @@ -432,10 +432,8 @@ void FullyConnected::prepareParams() { primArgs[DNNL_ARG_BIAS] = biasMemPtr->GetPrimitive(); } - if (!scratchPad || !scratchPad->getDesc().isCompatible(*(execPtr->getScratchPadDesc()))) { - scratchPad = context->getScratchPad()->createScratchPadMem(execPtr->getScratchPadDesc()); - } - primArgs[DNNL_ARG_SCRATCHPAD] = scratchPad->GetPrimitive(); + auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); + primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive(); #ifdef CPU_DEBUG_CAPS if (result.second == CacheEntryBase::LookUpStatus::Miss) { DEBUG_LOG("verbose##", getName(), "##", pd->info(), "\n"); @@ -914,22 +912,6 @@ bool FullyConnected::canBeExecutedInConv1x1() const { return retVal; } -FullyConnected::ExecutorInnerProduct::ExecutorInnerProduct(const dnnl::inner_product_forward::primitive_desc& pd) { - src_md = DnnlExtensionUtils::makeDescriptor(pd.src_desc()); - dst_md = DnnlExtensionUtils::makeDescriptor(pd.dst_desc()); - wghts_md = DnnlExtensionUtils::makeDescriptor(pd.weights_desc()); - scrch_md = DnnlExtensionUtils::makeDescriptor(pd.scratchpad_desc()); - execPrim = dnnl::inner_product_forward(pd); -} - -FullyConnected::ExecutorConv1x1::ExecutorConv1x1(const dnnl::convolution_forward::primitive_desc& pd) { - src_md = DnnlExtensionUtils::makeDescriptor(pd.src_desc()); - dst_md = DnnlExtensionUtils::makeDescriptor(pd.dst_desc()); - wghts_md = DnnlExtensionUtils::makeDescriptor(pd.weights_desc()); - scrch_md = DnnlExtensionUtils::makeDescriptor(pd.scratchpad_desc()); - execPrim = dnnl::convolution_forward(pd); -} - MemoryPtr FullyConnected::prepareWeightMemory(DnnlMemoryDescPtr weightDesc) { if (!getParentEdgeAt(1)->getParent()->isConstant()) IE_THROW() << "Weight input is not const for node " << getName() << "."; diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 30fbbcb38b9884..3f0983f2fc2a77 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -90,16 +90,6 @@ class FullyConnected : public Node { std::unordered_map privateWeightCache; dnnl::primitive_attr attr; - class ExecutorInnerProduct : public DnnlExecutor { - public: - ExecutorInnerProduct(const dnnl::inner_product_forward::primitive_desc& pd); - }; - - class ExecutorConv1x1 : public DnnlExecutor { - public: - ExecutorConv1x1(const dnnl::convolution_forward::primitive_desc& pd); - }; - static dnnl::convolution_forward::primitive_desc createDescriptorInternalForConv(DnnlMemoryDescCPtr inputDescPtr, DnnlMemoryDescCPtr weightDescPtr, @@ -116,8 +106,6 @@ class FullyConnected : public Node { float minSparseRate = 1.f; float weiSparseRate = 0.f; bool useSparseWeightsDecompression(); - - MemoryPtr scratchPad; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h index d3c05b721da6f0..71ae6b91e7660c 100644 --- a/src/plugins/intel_cpu/src/nodes/input.h +++ b/src/plugins/intel_cpu/src/nodes/input.h @@ -31,6 +31,7 @@ class Input : public Node { void withMeanImage(); MemoryCPtr getMemoryPtr() const; + void execute(dnnl::stream strm) override {} void executeDynamicImpl(dnnl::stream strm) override {} bool isExecutable() const override { return false; diff --git a/src/plugins/intel_cpu/src/nodes/interaction.h b/src/plugins/intel_cpu/src/nodes/interaction.h index 661cfc22de8b88..122ae3b2addc8c 100644 --- a/src/plugins/intel_cpu/src/nodes/interaction.h +++ b/src/plugins/intel_cpu/src/nodes/interaction.h @@ -60,6 +60,7 @@ class Interaction : public Node { private: void execRef(dnnl::stream strm); + dnnl::primitive prim; size_t batchSize = 0; size_t featureSize = 0; size_t inputSizes = 0; diff --git a/src/plugins/intel_cpu/src/nodes/lrn.cpp b/src/plugins/intel_cpu/src/nodes/lrn.cpp index 5cc0dce6230eae..f5f8995626d3e4 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.cpp +++ b/src/plugins/intel_cpu/src/nodes/lrn.cpp @@ -182,7 +182,7 @@ void Lrn::prepareParams() { LrnKey key = {inpDesc, selected_pd->getImplementationType(), alg, size, k, alpha, beta, attr}; auto engine = getEngine(); - auto builder = [&engine](const LrnKey& key) -> dnnl::primitive { + auto builder = [&engine](const LrnKey& key) -> executorPtr { auto desc = std::make_shared( engine, dnnl::prop_kind::forward_inference, @@ -205,25 +205,24 @@ void Lrn::prepareParams() { break; } if (!itpd.next_impl()) - return dnnl::lrn_forward(); + return nullptr; } - return dnnl::lrn_forward(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - auto src = srcMemPtr->GetPrimitive(); - auto dst = dstMemPtr->GetPrimitive(); - primArgs = { {DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()} }; + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = srcMemPtr->GetPrimitive(); + primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); } bool Lrn::created() const { @@ -250,6 +249,14 @@ void Lrn::createDescriptor(const std::vector &inputDesc, descs.push_back(desc); } +void Lrn::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << errorPrefix << " doesn't have an initialized executor"; + } +} + void Lrn::executeDynamicImpl(dnnl::stream strm) { execute(strm); } diff --git a/src/plugins/intel_cpu/src/nodes/lrn.h b/src/plugins/intel_cpu/src/nodes/lrn.h index b821fa8b70e521..c1635261f70faf 100644 --- a/src/plugins/intel_cpu/src/nodes/lrn.h +++ b/src/plugins/intel_cpu/src/nodes/lrn.h @@ -9,6 +9,7 @@ #include #include #include +#include "common/dnnl_executor.h" namespace ov { namespace intel_cpu { @@ -31,11 +32,14 @@ class Lrn : public Node { } void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; dnnl::algorithm alg; size_t size = 1; int k = 1; diff --git a/src/plugins/intel_cpu/src/nodes/matmul.cpp b/src/plugins/intel_cpu/src/nodes/matmul.cpp index 83e4bd1a179294..5bfe42500adb15 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.cpp +++ b/src/plugins/intel_cpu/src/nodes/matmul.cpp @@ -613,7 +613,7 @@ void MatMul::prepareParams() { auto engine = getEngine(); - auto builder = [&engine](const MatMulKey& key) -> dnnl::primitive { + auto builder = [&engine](const MatMulKey& key) -> executorPtr { dnnl::matmul::primitive_desc matmul_desc; if (key.bias) { @@ -653,22 +653,20 @@ void MatMul::prepareParams() { break; } } - return matmul(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; + auto schratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); - - primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SCRATCHPAD] = schratchpadMem->GetPrimitive(); primArgs[DNNL_ARG_SRC_0] = src0MemPtr->GetPrimitive(); primArgs[DNNL_ARG_WEIGHTS_0] = src1MemPtr->GetPrimitive(); primArgs[DNNL_ARG_DST] = dstMemPtr->GetPrimitive(); @@ -678,6 +676,14 @@ void MatMul::prepareParams() { appendPostOpArgs(*attr, primArgs, postOpsArgs); } +void MatMul::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << errorPrefix << " doesn't have an initialized executor"; + } +} + void MatMul::executeDynamicImpl(dnnl::stream strm) { execute(strm); } diff --git a/src/plugins/intel_cpu/src/nodes/matmul.h b/src/plugins/intel_cpu/src/nodes/matmul.h index 5c8902483972b8..16d2140cbe5eee 100644 --- a/src/plugins/intel_cpu/src/nodes/matmul.h +++ b/src/plugins/intel_cpu/src/nodes/matmul.h @@ -10,6 +10,7 @@ #include #include #include "memory_desc/dnnl_blocked_memory_desc.h" +#include "common/dnnl_executor.h" namespace ov { namespace intel_cpu { @@ -38,6 +39,7 @@ class MatMul : public Node { } void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -48,6 +50,8 @@ class MatMul : public Node { AttrPtr initPrimitiveAttr(const VectorDims& dims); private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; dnnl::memory::desc getBiasDescFrom(const DnnlMemoryDescCPtr outMemDesc); std::pair makeDummyInputShapes(const Shape& in0, const Shape& in1) const; diff --git a/src/plugins/intel_cpu/src/nodes/pooling.cpp b/src/plugins/intel_cpu/src/nodes/pooling.cpp index fc56f8d812ce54..b31c358911904a 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.cpp +++ b/src/plugins/intel_cpu/src/nodes/pooling.cpp @@ -369,7 +369,7 @@ void Pooling::prepareParams() { alg, selected_pd->getImplementationType()}; auto engine = getEngine(); - auto builder = [&engine](const PoolingKey& key) -> dnnl::primitive { + auto builder = [&engine](const PoolingKey& key) -> executorPtr { primitive_desc_iterator itpd = createDescriptorHelper(engine, key.inp->getDnnlDesc(), key.out->getDnnlDesc(), @@ -393,27 +393,34 @@ void Pooling::prepareParams() { break; } - return pooling_forward(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; - - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()}}; + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); Node::appendPostOpArgs(*attr, primArgs, postOpsArgs); } +void Pooling::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << "Pooling node with name '" << getName() << "' doesn't have an initialized executor"; + } +} + void Pooling::executeDynamicImpl(dnnl::stream strm) { execute(strm); } diff --git a/src/plugins/intel_cpu/src/nodes/pooling.h b/src/plugins/intel_cpu/src/nodes/pooling.h index 2daaa3f9a528e8..6d76e3d48980a2 100644 --- a/src/plugins/intel_cpu/src/nodes/pooling.h +++ b/src/plugins/intel_cpu/src/nodes/pooling.h @@ -10,6 +10,7 @@ #include #include #include +#include "common/dnnl_executor.h" namespace ov { namespace intel_cpu { @@ -30,6 +31,7 @@ class Pooling : public Node { } void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -38,6 +40,9 @@ class Pooling : public Node { AttrPtr initPrimitiveAttr() override; private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; + void setPostOps(dnnl::primitive_attr &attr); void initEffectiveAttributes(const Shape &inDims, const Shape &outDims); diff --git a/src/plugins/intel_cpu/src/nodes/reorder.cpp b/src/plugins/intel_cpu/src/nodes/reorder.cpp index 2efcd0e44b6e69..5dd5674abd9814 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.cpp +++ b/src/plugins/intel_cpu/src/nodes/reorder.cpp @@ -336,7 +336,11 @@ void Reorder::execute(dnnl::stream strm) { src_blocked->setDataHandle(getParentEdgeAt(0)->getMemory().GetData()); dst_blocked->setDataHandle(getChildEdgeAt(0)->getMemory().GetData()); - Node::execute(strm); + if (prim) { + prim.execute(strm, primArgs); + } else { + IE_THROW() << "Reorder node with name " << getName() << " doesn't have an initialized primitive"; + } } } diff --git a/src/plugins/intel_cpu/src/nodes/reorder.h b/src/plugins/intel_cpu/src/nodes/reorder.h index f6091a6c91bd43..4bd3fa8fc3211b 100644 --- a/src/plugins/intel_cpu/src/nodes/reorder.h +++ b/src/plugins/intel_cpu/src/nodes/reorder.h @@ -66,6 +66,7 @@ class Reorder : public Node { static void reorderData(const Memory &input, const Memory &output, MultiCachePtr cache = nullptr); private: + dnnl::reorder::primitive prim; std::shared_ptr input; std::shared_ptr output; diff --git a/src/plugins/intel_cpu/src/nodes/rnn.cpp b/src/plugins/intel_cpu/src/nodes/rnn.cpp index e7b97b9355d214..4ed7ed7a4e5550 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.cpp +++ b/src/plugins/intel_cpu/src/nodes/rnn.cpp @@ -1062,7 +1062,7 @@ void RNN::prepareParams() { RNNKey key = { inDataDescs, outDataDescs, wDescs, cell_type, cell_act, direction, *attr }; auto engine = getEngine(); - auto builder = [&engine](const RNNKey& key) -> dnnl::primitive { + auto builder = [&engine](const RNNKey& key) -> executorPtr { const auto descPtr = createPrimitiveDescriptor(engine, key.cellType, key.cellAct, @@ -1072,23 +1072,22 @@ void RNN::prepareParams() { key.wDescs, key.attr); - return dnnl::primitive(descPtr); + return std::make_shared(descPtr); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; - - auto pd = prim.get_primitive_desc(); - scratchpadMem = getScratchPadMem(pd); + scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); if (!wasMemoryPrepared || wFormatWasChanged) { - auto pd = prim.get_primitive_desc(); + auto pd = execPtr->getPrimitiveDesc(); auto query_weights_md = [&](int idx = 0) -> dnnl::memory::desc { auto what = dnnl::convert_to_c(dnnl::query::weights_md); const_dnnl_memory_desc_t cdesc = dnnl_primitive_desc_query_md(pd, what, idx); @@ -1118,7 +1117,7 @@ std::shared_ptr RNN::getDstMemDesc(dnnl::primitive_desc_iterator& pr } void RNN::execute(dnnl::stream strm) { - if (!prim) + if (!execPtr) THROW_ERROR << "does not have initialized primitive to execute."; const auto src_data_mem = getParentEdgeAt(0)->getMemoryPtr(); @@ -1160,7 +1159,7 @@ void RNN::execute(dnnl::stream strm) { } } - prim.execute(strm, args); + execPtr->exec(args, strm); } void RNN::executeDynamicImpl(dnnl::stream strm) { diff --git a/src/plugins/intel_cpu/src/nodes/rnn.h b/src/plugins/intel_cpu/src/nodes/rnn.h index b94d026adcf75c..dbe4f9769d14b7 100644 --- a/src/plugins/intel_cpu/src/nodes/rnn.h +++ b/src/plugins/intel_cpu/src/nodes/rnn.h @@ -11,6 +11,8 @@ #include #include +#include "common/dnnl_executor.h" + namespace ov { namespace intel_cpu { namespace node { @@ -66,6 +68,9 @@ class RNN : public Node { void copyWeightsData(); + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; + /** Specify mode Cell or Seq. true - Cell, false - Seq */ bool is_cell = false; diff --git a/src/plugins/intel_cpu/src/nodes/softmax.cpp b/src/plugins/intel_cpu/src/nodes/softmax.cpp index 7f3d3c337e5792..65176e4a7c7907 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.cpp +++ b/src/plugins/intel_cpu/src/nodes/softmax.cpp @@ -170,7 +170,7 @@ void SoftMax::prepareParams() { SoftmaxKey key = {inpDesc, selected_pd->getImplementationType(), axis, *attr}; auto engine = getEngine(); - auto builder = [&engine](const SoftmaxKey& key) -> dnnl::primitive { + auto builder = [&engine](const SoftmaxKey& key) -> executorPtr { softmax_forward::primitive_desc prim_desc; auto desc = std::make_shared( engine, @@ -196,26 +196,32 @@ void SoftMax::prepareParams() { break; } if (!itpd.next_impl()) - return softmax_forward(); + return nullptr; } - return softmax_forward(prim_desc); + return std::make_shared(prim_desc); }; auto cache = context->getParamsCache(); auto result = cache->getOrCreate(key, builder); - if (!result.first) { + execPtr = result.first; + if (!execPtr) { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - prim = result.first; + auto scratchpadMem = getScratchPadMem(execPtr->getScratchPadDesc()); - auto pd = prim.get_primitive_desc(); - auto scratchpadMem = getScratchPadMem(pd); + primArgs[DNNL_ARG_SCRATCHPAD] = scratchpadMem->GetPrimitive(); + primArgs[DNNL_ARG_SRC] = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs[DNNL_ARG_DST] = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); +} - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_DST, dst}, {DNNL_ARG_SCRATCHPAD, scratchpadMem->GetPrimitive()}}; +void SoftMax::execute(dnnl::stream strm) { + if (execPtr) { + execPtr->exec(primArgs, strm); + } else { + IE_THROW() << "Softmax node with name '" << getName() << "' doesn't have an initialized executor"; + } } void SoftMax::executeDynamicImpl(dnnl::stream strm) { diff --git a/src/plugins/intel_cpu/src/nodes/softmax.h b/src/plugins/intel_cpu/src/nodes/softmax.h index 78fc51115a18d7..1a472075168406 100644 --- a/src/plugins/intel_cpu/src/nodes/softmax.h +++ b/src/plugins/intel_cpu/src/nodes/softmax.h @@ -11,6 +11,8 @@ #include #include +#include "common/dnnl_executor.h" + namespace ov { namespace intel_cpu { namespace node { @@ -26,11 +28,14 @@ class SoftMax : public Node { bool created() const override; AttrPtr initPrimitiveAttr() override; void prepareParams() override; + void execute(dnnl::stream strm) override; void executeDynamicImpl(dnnl::stream strm) override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: + using executorPtr = std::shared_ptr; + executorPtr execPtr = nullptr; size_t axis = 0; }; diff --git a/src/plugins/intel_cpu/src/nodes/transpose.h b/src/plugins/intel_cpu/src/nodes/transpose.h index b13bc1a0a745ab..03988d24fe8367 100644 --- a/src/plugins/intel_cpu/src/nodes/transpose.h +++ b/src/plugins/intel_cpu/src/nodes/transpose.h @@ -48,6 +48,7 @@ class Transpose : public Node { }; using executorPtr = std::shared_ptr; executorPtr execPtr = nullptr; + dnnl::primitive prim; struct TransposeJitExecutor : public TransposeExecutor { TransposeJitExecutor(const PermuteParams& params); From 765f5e1a0044ebb52b056202df6571db1aa51fcb Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Tue, 21 Mar 2023 18:41:08 +0100 Subject: [PATCH 5/5] Use stored md for interim reorders --- src/plugins/intel_cpu/src/nodes/conv.cpp | 12 ++++++------ src/plugins/intel_cpu/src/nodes/deconv.cpp | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 002af7c98b24f4..25655838aa960c 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -1513,16 +1513,16 @@ Convolution::ConvolutionExecutor::ConvolutionExecutor(const dnnl::convolution_fo const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, const dnnl::engine& engine) : DnnlExecutor(pd) { - if (inMemDesc != pd.src_desc()) { - inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)}); + if (inMemDesc != getDnnlSrcDesc()) { + inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)}); } - if (weightMemDesc != pd.weights_desc()) { - inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)}); + if (weightMemDesc != getDnnlWeightDesc()) { + inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)}); } - if (outMemDesc != pd.dst_desc()) { - outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)}); + if (outMemDesc != getDnnlDstDesc()) { + outputReorders.insert({DNNL_ARG_DST, IntermReorder(getDnnlDstDesc(), outMemDesc, engine)}); } } diff --git a/src/plugins/intel_cpu/src/nodes/deconv.cpp b/src/plugins/intel_cpu/src/nodes/deconv.cpp index a8dbae4d00a471..2395a4a6af2a8d 100644 --- a/src/plugins/intel_cpu/src/nodes/deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/deconv.cpp @@ -1112,16 +1112,16 @@ Deconvolution::DeconvExecutorInt8::DeconvExecutorInt8(const dnnl::deconvolution_ const dnnl::memory::desc& weightMemDesc, const dnnl::memory::desc& outMemDesc, const dnnl::engine& engine) : DnnlExecutor(pd) { - if (inMemDesc != pd.src_desc()) { - inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, pd.src_desc(), engine)}); + if (inMemDesc != getDnnlSrcDesc()) { + inputReorders.insert({DNNL_ARG_SRC, IntermReorder(inMemDesc, getDnnlSrcDesc(), engine)}); } - if (weightMemDesc != pd.weights_desc()) { - inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, pd.weights_desc(), engine)}); + if (weightMemDesc != getDnnlWeightDesc()) { + inputReorders.insert({DNNL_ARG_WEIGHTS, IntermReorder(weightMemDesc, getDnnlWeightDesc(), engine)}); } - if (outMemDesc != pd.dst_desc()) { - outputReorders.insert({DNNL_ARG_DST, IntermReorder(pd.dst_desc(), outMemDesc, engine)}); + if (outMemDesc != getDnnlDstDesc()) { + outputReorders.insert({DNNL_ARG_DST, IntermReorder(getDnnlDstDesc(), outMemDesc, engine)}); } }