From 1ec5f8cb0b2c248a57d11c84a02fa1242314dfe2 Mon Sep 17 00:00:00 2001 From: Gorokhov Dmitriy <dmitry.gorokhov@intel.com> Date: Wed, 10 Mar 2021 09:42:17 +0300 Subject: [PATCH] Enabled Convolution + post ops fusing (#20) --- .../src/mkldnn_plugin/CMakeLists.txt | 4 - .../src/mkldnn_plugin/mkldnn_graph.cpp | 30 - .../src/mkldnn_plugin/mkldnn_graph.h | 1 - .../mkldnn_plugin/mkldnn_graph_optimizer.cpp | 595 ++++++++---------- .../mkldnn_plugin/mkldnn_graph_optimizer.h | 6 +- .../src/mkldnn_plugin/mkldnn_node.cpp | 35 +- .../src/mkldnn_plugin/mkldnn_node.h | 69 +- .../src/mkldnn_plugin/nodes/gather.cpp | 6 +- .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp | 108 ++-- .../mkldnn_plugin/nodes/mkldnn_conv_node.h | 3 +- .../nodes/mkldnn_eltwise_node.cpp | 87 +-- .../mkldnn_plugin/nodes/mkldnn_eltwise_node.h | 1 + .../mkldnn_plugin/nodes/mkldnn_gemm_node.cpp | 4 +- .../mkldnn_plugin/nodes/mkldnn_input_node.cpp | 4 +- .../mkldnn_plugin/nodes/mkldnn_input_node.h | 4 + .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp | 6 +- .../nodes/mkldnn_normalize_node.cpp | 16 +- .../mkldnn_plugin/nodes/mkldnn_pad_node.cpp | 2 +- .../nodes/mkldnn_pooling_node.cpp | 6 +- .../nodes/mkldnn_reshape_node.cpp | 4 +- .../nodes/mkldnn_scatter_update_node.cpp | 6 +- .../nodes/mkldnn_softmax_node.cpp | 2 +- .../nodes/mkldnn_transpose_node.cpp | 4 +- .../src/mkldnn_plugin/utils/ngraph_utils.hpp | 12 + .../skip_tests_config.cpp | 5 +- .../cpu/single_layer_tests/normalize.cpp | 4 +- .../cpu/test_utils/fusing_test_utils.hpp | 8 +- 27 files changed, 513 insertions(+), 519 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index 1cd7941eb61d53..29f24e82cbab3a 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -99,10 +99,6 @@ set(LAYERS # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/unique.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/unsqueeze.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/softmax.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/emitter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_eltwise_emitters.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_mkldnn_emitters.cpp -# # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax_imp.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 1e6423a48ca62c..cda74fbf103794 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -265,11 +265,6 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana op2node[op] = node; - const auto& rtInfo = op->get_rt_info(); - if (rtInfo.count("originalLayersNames")) { - node->originalLayers = getRTInfoValue(rtInfo, "originalLayersNames"); - } - for (size_t port = 0; port < op->get_input_size(); port++) { auto parentOp = op->get_input_node_shared_ptr(port); @@ -349,7 +344,6 @@ void MKLDNNGraph::InitGraph() { CreatePrimitives(); - SetOriginalLayerNames(); // // if (!config.dumpToDot.empty()) // dumpToDotFile(config.dumpToDot + "_init.dot"); @@ -381,30 +375,6 @@ void MKLDNNGraph::InitGraph() { ExecuteConstantNodesOnly(); } -void MKLDNNGraph::SetOriginalLayerNames() { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames"); - // Do it before cleanup. Because it will lose original layers information - for (auto &graphNode : graphNodes) { - auto nodeType = graphNode->getType(); - if (nodeType == Reorder || nodeType == Output) continue; - - if (graphNode->getOriginalLayers().empty()) { - graphNode->addOriginalLayer(graphNode->getOriginalName()); - } - - if (!graphNode->getFusedWith().empty() || !graphNode->getMergeWith().empty()) { - // Original layer names - std::vector<MKLDNNNodePtr> internal = graphNode->getFusedWith(); - auto &merged = graphNode->getMergeWith(); - internal.insert(internal.end(), merged.begin(), merged.end()); - - for (auto &sub_node : internal) { - graphNode->addOriginalLayer(sub_node->getOriginalName()); - } - } - } -} - void MKLDNNGraph::InitNodes() { OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes"); for (auto &node : graphNodes) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index 47c1bdc35ecc2f..2383221ff6325e 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -200,7 +200,6 @@ class MKLDNNGraph { void AllocateWithReuse(); void CreatePrimitives(); void ExecuteConstantNodesOnly(); - void SetOriginalLayerNames(); void do_before(const std::string &dir, const MKLDNNNodePtr &node); void do_after(const std::string &dir, const MKLDNNNodePtr &node); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index ea4cc191ecbbaf..9d6183991f2870 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -54,6 +54,12 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { // MergeTwoEqualScaleShifts(graph); // graph.RemoveDroppedNodes(); + FuseConvolutionAndBias(graph); + graph.RemoveDroppedNodes(); + + FuseMultiplyAndAdd(graph); + graph.RemoveDroppedNodes(); + FuseBroadcastAndEltwise(graph); graph.RemoveDroppedNodes(); @@ -67,36 +73,17 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { // FuseScaleShiftAndQuantize(graph); // graph.RemoveDroppedNodes(); -// TODO [NM]: do we still have networks that requires this optimizations? Preferable should be removed. -// MergeGroupConvolution(graph); -// graph.RemoveDroppedNodes(); - // TODO [NM]: transformation should be implemented w/o using of CNNLayer // FuseConvolutionAndZeroPoints(graph); // graph.RemoveDroppedNodes(); -// TODO [NM]: transformation should be implemented w/o using of CNNLayer -// FuseConvolutionAndDepthwise(graph); -// graph.RemoveDroppedNodes(); - -// TODO [NM]: transformation should be implemented w/o using of CNNLayer -// FuseConvolutionAndActivation(graph); -// graph.RemoveDroppedNodes(); - -// TODO [NM]: transformation should be implemented w/o using of CNNLayer -// FuseConvolutionAndDepthwise(graph); -// graph.RemoveDroppedNodes(); - - FuseConvolutionAndQuantize(graph); +// TODO [NM]: While fusing simple operation into any node (except Eltwise) we need to check that other inputs are Constant nodes. + FuseConvolutionAndSimpleOperation(graph); graph.RemoveDroppedNodes(); graph.SortTopologically(); graph.RemoveDroppedEdges(); -// TODO [NM]: transformation should be implemented w/o using of CNNLayer -// FuseConvolutionAndDepthwise(graph); -// graph.RemoveDroppedNodes(); - FusePoolingAndQuantize(graph); graph.RemoveDroppedNodes(); @@ -116,9 +103,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { FuseConvolutionSumAndConvolutionSumActivation(graph); graph.RemoveDroppedNodes(); -// TODO [NM]: transformation should be implemented w/o using of CNNLayer -// FuseConvolutionAndSimpleOperation(graph); -// graph.RemoveDroppedNodes(); + FuseConvolutionAndSimpleOperation(graph); + graph.RemoveDroppedNodes(); // TODO [NM]: transformation should be implemented w/o using of CNNLayer // FuseFullyConnectedAndSimpleOperation(graph); @@ -158,6 +144,229 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap graph.RemoveDroppedEdges(); } +void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { + auto& graphNodes = graph.GetNodes(); + + auto isSutableParentNode = [](MKLDNNNodePtr node) { + return node->getType() == Convolution && + node->getChildEdges().size() == 1 && + node->getFusedWith().empty(); + }; + + auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) + return false; + + auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent(); + if (biasNode->getChildEdges().size() != 1) + return false; + + auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getDims(); + auto biasDims = biasNode->getChildEdgesAtPort(0)[0]->getDims(); + // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases. + // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant. + if (convOutDims.ndims() != biasDims.ndims() || biasDims.ndims() < 2) + return false; + + if (biasDims[0] != 1 || biasDims[1] != convOutDims[1]) + return false; + + for (int i = 2; i < biasDims.ndims(); i++) { + if (biasDims[i] != 1) + return false; + } + + return true; + }; + + auto parent = graphNodes.begin(); + while (parent != graphNodes.end()) { + auto parentNode = *parent; + if (!isSutableParentNode(parentNode)) { + parent++; + continue; + } + + auto childNode = parentNode->getChildEdgeAt(0)->getChild(); + if (!isSutableChildNode(parentNode, childNode)) { + parent++; + continue; + } + + auto childs = childNode->childEdges; + auto parents = childNode->parentEdges; + + for (size_t i = 0; i < parents.size(); i++) { + auto p_edge = parents[i].lock(); + if (!p_edge) continue; + auto parent = p_edge->getParent(); + if (!parent) continue; + + if (parent == parentNode) { + for (size_t j = 0; j < childs.size(); j++) { + if (!childs[j].lock()) + continue; + auto child = childs[j].lock()->getChild(); + if (!child) + continue; + + MKLDNNEdgePtr &remEdge = p_edge; + int inNum = 0; + if (remEdge) { + inNum = remEdge->getInputNum(); + remEdge->drop(); + removeEdge(graph, remEdge); + } + remEdge = childs[j].lock(); + int outNum = 0; + if (remEdge) { + outNum = remEdge->getOutputNum(); + remEdge->drop(); + removeEdge(graph, remEdge); + } + MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); + auto &graphEdges = graph.GetEdges(); + graphEdges.push_back(newEdge); + parent->addEdge(newEdge); + } + } else { + MKLDNNEdgePtr &remEdge = p_edge; + int inNum = 0; + if (remEdge) { + inNum = remEdge->getInputNum(); + remEdge->drop(); + removeEdge(graph, remEdge); + } + + auto parentEltwise = parentNode; + MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); + auto &graphEdges = graph.GetEdges(); + graphEdges.push_back(newEdge); + parent->addEdge(newEdge); + + auto newBiasDim = parent->outDims[inNum][1]; + parent->outDims[inNum] = MKLDNNDims({newBiasDim}); + parentEltwise->inDims.push_back(parent->outDims[0]); + } + } + + graph.DropNode(childNode); + + parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1)); + } +} + +void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { + auto& graphNodes = graph.GetNodes(); + + auto isSutableSecondInput = [](MKLDNNNodePtr node, MKLDNNDims dataDims) { + auto secondInputDims = node->outDims[0]; + if (secondInputDims.ndims() != dataDims.ndims() || secondInputDims.ndims() < 2) + return false; + + if (secondInputDims[0] != 1 || secondInputDims[1] != dataDims[1]) + return false; + + for (size_t i = 2; i < secondInputDims.ndims(); i++) { + if (secondInputDims[i] != 1) + return false; + } + + return true; + }; + + auto isSutableParentNode = [&](MKLDNNNodePtr node) { + if (node->getAlgorithm() != EltwiseMultiply || !node->getFusedWith().empty() || + node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) + return false; + + return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getDims()); + }; + + auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) + return false; + + return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getDims()); + }; + + auto parent = graphNodes.begin(); + while (parent != graphNodes.end()) { + auto parentNode = *parent; + if (!isSutableParentNode(parentNode)) { + parent++; + continue; + } + + auto childNode = parentNode->getChildEdgeAt(0)->getChild(); + if (!isSutableChildNode(parentNode, childNode)) { + parent++; + continue; + } + + auto childs = childNode->childEdges; + auto parents = childNode->parentEdges; + + for (size_t i = 0; i < parents.size(); i++) { + auto p_edge = parents[i].lock(); + if (!p_edge) continue; + auto parent = p_edge->getParent(); + if (!parent) continue; + + if (parent == parentNode) { + for (size_t j = 0; j < childs.size(); j++) { + if (!childs[j].lock()) + continue; + auto child = childs[j].lock()->getChild(); + if (!child) + continue; + + MKLDNNEdgePtr &remEdge = p_edge; + int inNum = 0; + if (remEdge) { + inNum = remEdge->getInputNum(); + remEdge->drop(); + removeEdge(graph, remEdge); + } + remEdge = childs[j].lock(); + int outNum = 0; + if (remEdge) { + outNum = remEdge->getOutputNum(); + remEdge->drop(); + removeEdge(graph, remEdge); + } + MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); + auto &graphEdges = graph.GetEdges(); + graphEdges.push_back(newEdge); + parent->addEdge(newEdge); + } + } else { + MKLDNNEdgePtr &remEdge = p_edge; + int inNum = 0; + if (remEdge) { + inNum = remEdge->getInputNum(); + remEdge->drop(); + removeEdge(graph, remEdge); + } + + auto parentEltwise = parentNode; + MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); + auto &graphEdges = graph.GetEdges(); + graphEdges.push_back(newEdge); + parent->addEdge(newEdge); + + parentEltwise->inDims.push_back(parent->outDims[0]); + } + } + + parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1)); + parentNode->setAlgorithm(EltwiseMulAdd); + parentNode->addOriginalLayer(childNode->getOriginalLayers()); + + graph.DropNode(childNode); + } +} + void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { // auto& graphNodes = graph.GetNodes(); // @@ -383,63 +592,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { // } } -void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) { - for (auto node : graph.GetNodes()) { - // Split with at least 2 Convolutions - if (!IsOneOf(node->getType(), {Split}) || node->getChildEdges().size() < 2 || - !IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), {Convolution})) { - continue; - } - bool canBeMerged = true; - - auto& split = node; - - auto convInEdge = split->getChildEdgeAt(0); - auto conv = convInEdge->getChild(); - auto convOutEdge = conv->getChildEdgeAt(0); - - auto convType = conv->getType(); - auto convInDims = convInEdge->getDims(); - auto convOutDims = convOutEdge->getDims(); - - // Convolutions of same the type with Concat as a child - for (size_t i = 1; i < split->getChildEdges().size(); i++) { - auto childEdge = split->getChildEdgeAt(i); - auto child = childEdge->getChild(); - Type type = child->getType(); - - if (convType != type || child->getChildEdgeAt(0)->getChild()->getType() != Concatenation || - convOutDims != child->getChildEdgeAt(0)->getDims() || child->getChildEdges().size() != 1 || - convInDims != childEdge->getDims()) { - canBeMerged = false; - break; - } - } - - if (!canBeMerged) continue; - - // TODO: Rewrite topology optimizer at all. it should be clean and understandable - auto concat = conv->getChildEdgeAt(0)->getChild(); - // Merge and remove Convolution - while (split->getChildEdges().size() > 1) { - auto peerInEdge = split->getChildEdgeAt(1); - auto peer = peerInEdge->getChild(); - conv->mergeWith(peer); - convInDims[1] += (peerInEdge->getDims())[1]; - convOutDims[1] += (peer->getChildEdgeAt(0)->getDims())[1]; - peer->remove(); - } - conv->inDims[0] = convInDims; - conv->outDims[0] = convOutDims; - - conv->fuseWith(split); - conv->fuseWith(concat); - - graph.DropNode(split); - graph.DropNode(concat); - } -} - // WA: We need it until LP transformations will not optimize this pattern inside void MKLDNNGraphOptimizer::MergeTwoEqualScaleShifts(MKLDNNGraph& graph) { // auto& graphNodes = graph.GetNodes(); @@ -579,74 +731,6 @@ void MKLDNNGraphOptimizer::FuseBatchNormWithScale(MKLDNNGraph &graph) { // } } -void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) { -// auto& graphNodes = graph.GetNodes(); -// -// auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) { -// auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(conv.get()); -// if (binConv) { -// if (!binConv->canFuse(activation)) -// return false; -// } -// -// if (!activation->getCnnLayer()) -// return false; -// -// auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(activation.get()); -// -// return eltwiseNode && -// (eltwiseNode->getOpType() == Relu || -// (conv->getCnnLayer()->precision == Precision::FP32 && -// IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid, -// Round}))); -// }; -// -// for (int i = 0; i < graphNodes.size(); i++) { -// if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) { -// auto conv = graphNodes[i]; -// -// auto fuse = [&] (MKLDNNNodePtr relu) { -// conv->fuseWith(relu); -// }; -// -// if (conv->getChildEdges().size() == 1) { -// auto ch1 = conv->getChildEdgeAt(0)->getChild(); -// -// if (isFusingSupported(conv, ch1)) { -// fuse(ch1); -// -// if (ch1->getChildEdges().size() == 1) { -// auto ch2 = ch1->getChildEdgeAt(0)->getChild(); -// -// if (isFusingSupported(conv, ch2)) { -// fuse(ch2); -// graph.DropNode(ch2); -// } -// } -// graph.DropNode(ch1); -// } else { -// if (ch1->type == Pooling) { -// auto pool = ch1; -// -// auto* pLayer = dynamic_cast<PoolingLayer *>(pool->getCnnLayer().get()); -// if (pLayer == nullptr) -// IE_THROW() << "Cannot get pooling layer " << pool->getName(); -// bool is_max_pool = pLayer->_type == PoolingLayer::PoolType::MAX; -// -// if (is_max_pool && pool->getChildEdges().size() == 1) { -// auto ch2 = pool->getChildEdgeAt(0)->getChild(); -// if (isFusingSupported(conv, ch2)) { -// fuse(ch2); -// graph.DropNode(ch2); -// } -// } -// } -// } -// } -// } -// } -} - void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) { // auto& graphNodes = graph.GetNodes(); // @@ -754,77 +838,6 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra // } } -void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) { -// auto& graphNodes = graph.GetNodes(); -// -// auto isSutableParentNode = [](MKLDNNNodePtr node) { -// bool isSutableConv = (node->getType() == Convolution) && -// node->getCnnLayer()->precision == Precision::FP32; -// bool isSutableBinConv = node->getType() == BinaryConvolution; -// return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1; -// }; -// -// auto isSutableChildNode = [](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { -// if (childNode->getType() != Eltwise) -// return false; -// -// if (!childNode->getCnnLayer()) -// return false; -// -// auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parentNode.get()); -// if (binConv) { -// if (!binConv->canFuse(childNode)) -// return false; -// } -// -// auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(childNode.get()); -// if (eltwiseNode == nullptr) -// IE_THROW() << "Cannot get eltwise node " << childNode->getName(); -// return ((eltwiseNode->getOpType() == MulAdd && childNode->getCnnLayer()->blobs.size() == 2) || -// (eltwiseNode->getOpType() == Prelu)); -// }; -// -// for (int i = 0; i < graphNodes.size(); i++) { -// auto conv = graphNodes[i]; -// if (!isSutableParentNode(conv)) continue; -// -// auto depthwise0 = conv->getChildEdgeAt(0)->getChild(); -// if (!isSutableChildNode(conv, depthwise0)) continue; -// -// conv->fuseWith(depthwise0); -// -// if (depthwise0->getChildEdges().size() == 1) { -// auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild(); -// -// if (isSutableChildNode(conv, depthwise1)) { -// conv->fuseWith(depthwise1); -// -// auto parents = depthwise1->parentEdges; -// for (size_t j = 0; j < parents.size(); j++) { -// auto p_edge = parents[j].lock(); -// if (p_edge->getParent()->getType() == Eltwise) -// continue; -// -// removeEdge(graph, p_edge); -// } -// -// graph.DropNode(depthwise1); -// } -// } -// -// auto parents = depthwise0->parentEdges; -// for (size_t j = 0; j < parents.size(); j++) { -// auto p_edge = parents[j].lock(); -// if (p_edge->getParent()->getType() == Convolution || p_edge->getParent()->getType() == BinaryConvolution) -// continue; -// -// removeEdge(graph, p_edge); -// } -// -// graph.DropNode(depthwise0); -// } -} - void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { // auto& graphNodes = graph.GetNodes(); // @@ -927,116 +940,60 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { // } } -void MKLDNNGraphOptimizer::FuseConvolutionAndQuantize(MKLDNNGraph &graph) { +void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); auto isSutableParentNode = [](MKLDNNNodePtr node) { - bool isSutableBinConv = node->getType() == Convolution; - - if (isSutableBinConv) { - return isSutableBinConv && node->getChildEdges().size() == 1; - } else { - return false; - } + return node->getType() == Convolution && + node->getChildEdges().size() == 1; }; - auto isSutableChildNode = [](MKLDNNNodePtr node) { - if (node->getType() != Quantize) - return false; + auto isSutableChildNode = [&](MKLDNNNodePtr node) { + if (node->getType() == Quantize) { + auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get()); + if (quantizeNode == nullptr) + IE_THROW() << "Cannot get quantize layer " << node->getName(); - auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot get quantize layer " << node->getName(); + return !quantizeNode->isBinarization(); + } else if (node->getType() == Eltwise) { + return one_of(node->getAlgorithm(), EltwiseMulAdd, EltwisePrelu, EltwiseRelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseSwish, EltwiseHswish, + EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero); + } - return !quantizeNode->isBinarization(); + return false; }; - for (int i = 0; i < graphNodes.size(); i++) { - auto parent = graphNodes[i]; - if (!isSutableParentNode(parent)) continue; + auto parent = graphNodes.begin(); + while (parent != graphNodes.end()) { + auto parentNode = *parent; + if (!isSutableParentNode(parentNode)) { + parent++; + continue; + } - auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(child)) continue; + auto childNode = parentNode->getChildEdgeAt(0)->getChild(); + if (!isSutableChildNode(childNode)) { + parent++; + continue; + } - parent->fuseWith(child); + childNode->fuseInto(parentNode); - auto parents = child->parentEdges; - for (size_t j = 0; j < parents.size(); j++) { - auto p_edge = parents[j].lock(); - if (p_edge->getParent()->getType() == Convolution) - continue; + if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { + auto parentEdges = childNode->parentEdges; + for (auto &parentEdge : parentEdges) { + auto p_edge = parentEdge.lock(); + if (p_edge->getParent()->getType() == Convolution) + continue; - removeEdge(graph, p_edge); + removeEdge(graph, p_edge); + } } - graph.DropNode(child); + graph.DropNode(childNode); } } -void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) { -// auto& graphNodes = graph.GetNodes(); -// -// auto isSutableParentNode = [](MKLDNNNodePtr node) { -// return node->getType() == Convolution && -// node->getChildEdges().size() == 1 && -// node->getCnnLayer()->precision == Precision::FP32; -// }; -// -// auto isSutableChildNode = [&](MKLDNNNodePtr node) { -// if (!node->getCnnLayer()) -// return false; -// -// if (node->getType() == Quantize) { -// auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get()); -// if (quantizeNode == nullptr) -// IE_THROW() << "Cannot get quantize layer " << node->getName(); -// -// return !quantizeNode->isBinarization(); -// } else if (node->getType() == Eltwise) { -// auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get()); -// if (eltwiseNode == nullptr) -// IE_THROW() << "Cannot get eltwise node " << node->getName(); -// -// return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) || -// (eltwiseNode->getOpType() == Prelu) || -// IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, -// Hsigmoid, Round})); -// } -// -// return false; -// }; -// -// auto parent = graphNodes.begin(); -// while (parent != graphNodes.end()) { -// auto parentNode = *parent; -// if (!isSutableParentNode(parentNode)) { -// parent++; -// continue; -// } -// -// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); -// if (!isSutableChildNode(childNode)) { -// parent++; -// continue; -// } -// -// parentNode->fuseWith(childNode); -// -// if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { -// auto parentEdges = childNode->parentEdges; -// for (auto &parentEdge : parentEdges) { -// auto p_edge = parentEdge.lock(); -// if (p_edge->getParent()->getType() == Convolution) -// continue; -// -// removeEdge(graph, p_edge); -// } -// } -// -// graph.DropNode(childNode); -// } -} - void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); @@ -1064,7 +1021,7 @@ void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) auto child = parent->getChildEdgeAt(0)->getChild(); if (!isSutableChildNode(parent, child)) continue; - parent->fuseWith(child); + child->fuseInto(parent); auto parents = child->parentEdges; for (size_t i = 0; i < parents.size(); i++) { @@ -1286,10 +1243,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) { auto relu_shared = graphNode->getChildEdgeAt(0)->getChild(); lastNode = relu_shared; - mergedConv->fuseWith(sum); + sum->fuseInto(mergedConv); } - mergedConv->fuseWith(lastNode); + lastNode->fuseInto(mergedConv); if (mergedConv->fusedWith.size() > 0 && (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) { @@ -1393,7 +1350,7 @@ void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { continue; } - parentNode->fuseWith(childNode); + childNode->fuseInto(parentNode); if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { auto parentEdges = childNode->parentEdges; @@ -1450,7 +1407,7 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) continue; } - parentNode->fuseWith(childNode); + childNode->fuseInto(parentNode); if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { auto parentEdges = childNode->parentEdges; @@ -1492,7 +1449,7 @@ void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) continue; } - parentNode->fuseWith(childNode); + childNode->fuseInto(parentNode); if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { auto parentEdges = childNode->parentEdges; @@ -1551,7 +1508,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { continue; } - parentNode->fuseWith(childNode); + childNode->fuseInto(parentNode); if (childNode->getType() == Quantize) { auto parentEdges = childNode->parentEdges; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h index f19dbc1743439e..1f9fa8cd97385d 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h @@ -19,14 +19,12 @@ class MKLDNNGraphOptimizer { void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph); private: - void MergeGroupConvolution(MKLDNNGraph& graph); + void FuseConvolutionAndBias(MKLDNNGraph &graph); + void FuseMultiplyAndAdd(MKLDNNGraph &graph); void MergeTwoEqualScaleShifts(MKLDNNGraph& graph); - void FuseConvolutionAndActivation(MKLDNNGraph &graph); void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph); - void FuseConvolutionAndDepthwise(MKLDNNGraph &graph); void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph); void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph); - void FuseConvolutionAndQuantize(MKLDNNGraph &graph); void FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph); void FusePoolingAndQuantize(MKLDNNGraph &graph); void FuseBatchNormWithScale(MKLDNNGraph& graph); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 938b99a430639d..8701ad831fb983 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -211,8 +211,6 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en algorithm = Algorithm::Undefined; fusingPort = -1; - originalName = name; - originalInputsNumber = op->get_input_size(); for (size_t i = 0; i < op->get_input_size(); i++) { inDims.emplace_back(op->get_input_shape(i)); originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i))); @@ -236,19 +234,28 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en // } } + const auto& rtInfo = op->get_rt_info(); + if (rtInfo.count("originalLayersNames")) { + originalLayers = getRTInfoValue(rtInfo, "originalLayersNames"); + } -// if (op->params.find("PrimitivesPriority") != layer->params.end()) { -// std::istringstream stream(layer->params["PrimitivesPriority"]); -// std::string str; -// while (getline(stream, str, ',')) { -// if (str.substr(0, 4) != "cpu:") -// continue; -// implPriorities.push_back(parse_impl_name(str)); -// if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown && -// str != "cpu:unknown") -// IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName(); -// } -// } + if (originalLayers.empty()) { + addOriginalLayer(name); + } + + auto primitivesPriority = getPrimitivesPriorityValue(op); + if (!primitivesPriority.empty()) { + std::istringstream stream(primitivesPriority); + std::string str; + while (getline(stream, str, ',')) { + if (str.substr(0, 4) != "cpu:") + continue; + implPriorities.push_back(parse_impl_name(str)); + if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown && + str != "cpu:unknown") + IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName(); + } + } if (op != nullptr) { std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(op); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 64224975b5d675..a36cb2abb7e968 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -27,6 +27,7 @@ #include "utils/ngraph_utils.hpp" #include <ngraph/ops.hpp> #include <ngraph/node.hpp> +#include <ie_precision.hpp> namespace MKLDNNPlugin { @@ -400,15 +401,35 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { bool isFusedWith(Type type) const; - void fuseWith(const MKLDNNNodePtr &fusingNode) { + void addFusedNode(const MKLDNNNodePtr &fusingNode) { fusedWith.push_back(fusingNode); + } - for (int i = 0; i< inDims.size(); i++) { - if (fusingNode->getParentEdgesAtPort(i)[0]->getParent().get() == this) { + virtual void fuseInto(MKLDNNNodePtr& parentNode) { + // The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one. + for (int i = 0; i < getParentEdges().size(); i++) { + if (getParentEdgesAtPort(i)[0]->getParent().get() == parentNode.get()) { setFusingPort(i); break; } } + + auto parentFusedNodes = parentNode->getFusedWith(); + if (getFusingPort() < 0 && !parentFusedNodes.empty()) { + for (int i = 0; i < getParentEdges().size(); i++) { + if (getParentEdgesAtPort(i)[0]->getParent().get() == parentFusedNodes[parentFusedNodes.size() - 1].get()) { + setFusingPort(i); + break; + } + } + } + + if (getFusingPort() == -1) { + THROW_IE_EXCEPTION << "Cannot determine fusing port between nodes: " << parentNode->getName() << " and " << getName(); + } + + parentNode->addFusedNode(getParentEdgesAtPort(getFusingPort())[0]->getChild()); + parentNode->addOriginalLayer(getOriginalLayers()); } void clearFusedWith() { @@ -419,8 +440,6 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { mergedWith.push_back(merge); } - void addOriginalLayer(const std::string& layerName); - const std::vector <MKLDNNNodePtr> &getMergeWith() { return mergedWith; } @@ -441,6 +460,8 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { return name; } + void addOriginalLayer(const std::string& layerName); + const std::string getOriginalLayers() const { return originalLayers; } @@ -449,10 +470,6 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { return type; } -// const InferenceEngine::CNNLayerPtr &getCnnLayer() const { -// return cnnLayer; -// } - const std::vector<PrimitiveDescInfo>& getSupportedPrimitiveDescriptors() const { return supportedPrimitiveDescriptors; } @@ -602,18 +619,42 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { return originalOutputPrecisions; } - size_t getOriginalInputsNumber() const { - return originalInputsNumber; + InferenceEngine::Precision getOriginalInputPrecisionAtPort(size_t port) const { + if (originalInputPrecisions.size() <= port) { + THROW_IE_EXCEPTION << "Incorrect input port number for node " << getName(); + } + return originalInputPrecisions[port]; + } + InferenceEngine::Precision getOriginalOutputPrecisionAtPort(size_t port) const { + if (originalOutputPrecisions.size() <= port) { + THROW_IE_EXCEPTION << "Incorrect output port number for node " << getName(); + } + return originalOutputPrecisions[port]; } - std::string getOriginalName() const { - return originalName; + void setOriginalInputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) { + if (originalInputPrecisions.size() <= port) { + THROW_IE_EXCEPTION << "Incorrect input port number for node " << getName(); + } + originalInputPrecisions[port] = precision; + } + + void addOriginalInputPrecision(InferenceEngine::Precision precision) { + originalInputPrecisions.push_back(precision); + } + + size_t getOriginalInputsNumber() const { + return originalInputPrecisions.size(); } Algorithm getAlgorithm() const { return algorithm; } + void setAlgorithm(Algorithm alg) { + algorithm = alg; + } + virtual bool canFuse(const MKLDNNNodePtr& node) const { return false; } @@ -713,8 +754,6 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { std::vector<MKLDNNEdgeWeakPtr> parentEdges; std::vector<MKLDNNEdgeWeakPtr> childEdges; - std::string originalName; - size_t originalInputsNumber; std::vector<InferenceEngine::Precision> originalInputPrecisions; std::vector<InferenceEngine::Precision> originalOutputPrecisions; diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp index 543ab5a0e5fc1a..159570c67a808b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp @@ -26,9 +26,9 @@ using MKLDNNPlugin::TensorDescCreatorTypes; class GatherImpl: public ExtLayerBase { public: - static bool isSupportedOperation(const ngraph::Node& op, std::string& errorMessage) noexcept { + static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept { try { - auto gatherOp = ngraph::as_type<const ngraph::op::v1::Gather>(&op); + auto gatherOp = ngraph::as_type_ptr<const ngraph::op::v1::Gather>(op); if (!gatherOp) { errorMessage = "Only opset1 Gather operation is supported"; return false; @@ -51,7 +51,7 @@ class GatherImpl: public ExtLayerBase { errorPrefix_ = std::string("Layer Gather with name '") + op->get_friendly_name() + "' "; std::string errorMessage; - if (!isSupportedOperation(*op, errorMessage)) { + if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index 80bac32d359cec..001916b4c32b5e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -20,12 +20,32 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; +bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept { + try { + if (!ngraph::is_type<ngraph::op::v1::Convolution>(op) && !ngraph::is_type<ngraph::op::v1::GroupConvolution>(op)) { + errorMessage = "Only opset1 Convolution and GroupConvolution operations are supported"; + return false; + } + size_t ndims = op->get_input_shape(0).size(); + if ((ndims < 4) || (ndims > 5)) { + IE_THROW() << "Only 4D and 5D blobs are supported as input"; + } + } catch (...) { + return false; + } + + return true; +} + MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false), isDW(false), isMerged(false), + : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false), isGrouped(false), /* dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef), */ groupNum(1lu), eltwisePrecision(Precision::FP32) { - // TODO [NM]: do we still have networks that requires this optimizations? Preferable should be removed. - isMerged = false; // (!getMergeWith().empty()); // grouped convolution was constructed from split->concat subgraph + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + isPrimitivesPriorityDefined = op->get_rt_info().count("PrimitivesPriority") != 0; auto convolutionOp = ngraph::as_type_ptr<ngraph::op::v1::Convolution>(op); @@ -43,13 +63,6 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node> groupIC = IC; groupOC = weightDims[0]; - isDW = groupNum == groupOC && groupNum == groupIC; - - if (isMerged) { - groupNum = getMergeWith().size() + 1; - } - - withBiases = getOriginalInputsNumber() == 3; biasesDims = { groupOC }; for (int i = 0; i < convolutionOp->get_strides().size(); i++) { @@ -61,46 +74,36 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node> paddingL = convolutionOp->get_pads_begin(); paddingR = convolutionOp->get_pads_end(); } else if (groupConvolutionOp) { - algorithm = ConvolutionGrouped; - - groupNum = groupConvolutionOp->input_value(1).get_shape()[0]; - isGrouped = true; + algorithm = ConvolutionGrouped; - weightDims = groupConvolutionOp->input_value(1).get_shape(); + groupNum = groupConvolutionOp->input_value(1).get_shape()[0]; + isGrouped = true; - IC = weightDims[2]; - groupIC = IC; - groupOC = weightDims[1]; + weightDims = groupConvolutionOp->input_value(1).get_shape(); - isDW = groupNum == groupOC && groupNum == groupIC; + groupIC = weightDims[2]; + IC = groupIC * groupNum; + groupOC = weightDims[1]; - if (isMerged) { - groupNum = getMergeWith().size() + 1; - } + biasesDims = {groupOC * groupNum}; - withBiases = getOriginalInputsNumber() == 3; - biasesDims = {groupOC}; - - for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) { - stride.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_strides()[i])); - } - for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) { - dilation.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_dilations()[i]) - 1); - } - paddingL = groupConvolutionOp->get_pads_begin(); - paddingR = groupConvolutionOp->get_pads_end(); - } else { - IE_THROW(NotImplemented) - << "CPU Convolution node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); + for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) { + stride.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_strides()[i])); + } + for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) { + dilation.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_dilations()[i]) - 1); + } + paddingL = groupConvolutionOp->get_pads_begin(); + paddingR = groupConvolutionOp->get_pads_end(); } } bool MKLDNNConvolutionNode::canBeExecutedInInt8() { - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[0]); + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); if (!inputZeroPoints.empty()) inputDataType = memory::data_type::u8; - auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[1]); + auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1)); if (!weightsZeroPoints.empty()) weightsDataType = memory::data_type::s8; @@ -112,9 +115,9 @@ InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MK int fusingPort = fusingNode->getFusingPort(); if (fusingPort == 0) { - eltwisePrecision = fusingNode->getOriginalInputPrecisions()[1]; + eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(1); } else if (fusingPort == 1) { - eltwisePrecision = fusingNode->getOriginalInputPrecisions()[0]; + eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(0); } else { IE_THROW() << "Cannot determine Eltwise post op precision for Convolution node with name '" << getName() << "'"; } @@ -126,6 +129,8 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { if (!descs.empty()) return; + withBiases = getOriginalInputsNumber() == 3; + withSum = false; int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber()); for (int i = 0; i < fusedWith.size(); i++) { @@ -139,14 +144,14 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { } } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[0]); + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0)); if (!inputZeroPoints.empty()) inputDataType = memory::data_type::u8; - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisions()[0]); + auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0)); eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); if (!fusedWith.empty()) { - outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalInputPrecisions()[0]); + outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalInputPrecisionAtPort(0)); eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); } @@ -171,13 +176,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { IE_THROW() << "Incorrect number of output edges for layer " << getName(); int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims(); - if ((ndims < 4) || (ndims > 5)) { - IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input."; - } - - if (isMerged && isGrouped) - IE_THROW() << "Convolution initialization. Group splitted mode are used together with direct group specification."; - MKLDNNDims weightsDims = MKLDNNDims(weightDims); withDWConv = isFusedWith(Convolution); @@ -228,9 +226,9 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { : memory::format_tag::nhwc); createDescriptor({in_candidate}, {out_candidate}); } else { - inputDataType = (getOriginalInputPrecisions()[0] == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16 + inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32; - outputDataType = (getOriginalOutputPrecisions()[0] == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16 + outputDataType = (getOriginalOutputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32; eltwisePrecision = Precision::FP32; for (int i = 0; i < fusedWith.size(); i++) { @@ -411,7 +409,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { dataConfig.constant = false; dataConfig.desc = getDstMemDesc(itpd, i); - if (!(isGrouped || isMerged)) + if (!isGrouped) dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc); config.outConfs.push_back(dataConfig); @@ -475,10 +473,6 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine:: MKLDNNMemoryDesc in_candidate(inDesc); MKLDNNMemoryDesc out_candidate(outDesc); - // grouping and autoblocking is not compatible - if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended())) - return; - MKLDNNDims blocked_weightDims(weightDims); MKLDNNDims blocked_biasesDims(biasesDims); MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::format_tag::any}; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h index 2dde482ee89fb7..5749e76301bc8f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h @@ -19,6 +19,7 @@ class MKLDNNConvolutionNode : public MKLDNNNode { MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNConvolutionNode() override = default; + static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc, const std::vector<InferenceEngine::TensorDesc>& outputDesc) override; @@ -58,8 +59,6 @@ class MKLDNNConvolutionNode : public MKLDNNNode { bool withBiases; bool withSum; bool withDWConv; - bool isDW; - bool isMerged; bool isGrouped; bool isPrimitivesPriorityDefined; std::vector<ptrdiff_t> stride; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 90f2e3a21c6cea..6252a9e88d5bd7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -14,6 +14,8 @@ #include "mkldnn_extension_utils.h" #include "mkldnn_quantize_node.h" #include "mkldnn_pooling_node.h" +#include "mkldnn_input_node.h" +#include "common/cpu_convert.h" #include "emitters/jit_emitter.hpp" #include "emitters/jit_eltwise_emitters.hpp" @@ -916,28 +918,12 @@ std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_pt {ngraph::op::v0::PRelu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) { node.algorithm = EltwisePrelu; }}, - // TODO [NM]: we need to introduce custom MulAdd operation -// {ngraph::op::v0::MulAdd::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) { -// node.algorithm = EltwiseMish; -// node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_mish; -// }}, }; MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { if (initializers.find(op->get_type_info()) != initializers.end()) { initializers[op->get_type_info()](op, *this); - - std::shared_ptr<const ngraph::opset1::Constant> secondIn; - const auto isConstantBroadcastbleSecondInput = [&](const std::shared_ptr<ngraph::Node>& op) { - secondIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1)); - return secondIn != nullptr && MKLDNNExtensionUtils::isPerTensorOrPerChannelBroadcastable(op->get_input_shape(0), op->get_input_shape(1)); - }; - if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu) && isConstantBroadcastbleSecondInput(op)) { - scales = secondIn->cast_vector<float>(); - } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract) && isConstantBroadcastbleSecondInput(op)) { - shifts = secondIn->cast_vector<float>(); - } } else { IE_THROW(NotImplemented) << "CPU Eltwise node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); @@ -1018,8 +1004,8 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { for (auto& fusedNode : fusedWith) { if (fusedNode->getType() == Eltwise) { - for (int i = 1; i < fusedNode->getOriginalInputPrecisions().size(); i++) { - inputPrecisions.push_back(fusedNode->getOriginalInputPrecisions()[i]); + for (int i = 1; i < fusedNode->getOriginalInputsNumber(); i++) { + inputPrecisions.push_back(fusedNode->getOriginalInputPrecisionAtPort(i)); } } } @@ -1027,9 +1013,9 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { if (inputPrecisions.size() != getParentEdges().size()) IE_THROW() << "Eltwise node with name `" << getName() << "` has invalid input precisions configuration."; - InferenceEngine::Precision outputPrecision = getOriginalOutputPrecisions()[0]; + InferenceEngine::Precision outputPrecision = getOriginalOutputPrecisionAtPort(0); if (!fusedWith.empty()) { - outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0]; + outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); } if (!mayiuse(avx512_core)) { @@ -1681,6 +1667,31 @@ bool MKLDNNEltwiseNode::canBeInPlace() const { } void MKLDNNEltwiseNode::fillScalesAndShifts() { + std::shared_ptr<const ngraph::opset1::Constant> secondIn; + const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) { + if (getParentEdgeAt(1)->getParent()->getType() != Input || + !getParentEdgeAt(1)->getParent()->isConstant() || + !MKLDNNExtensionUtils::isPerTensorOrPerChannelBroadcastable(getParentEdgesAtPort(0)[0]->getDims().ToSizeVector(), + constInput->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector())) { + IE_THROW() << "Fusing Eltwise node with name '" + getName() + "' " << "as post operation is not supported"; + } + + auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get()); + auto constBlob = constInputNode->getConstBlob(); + auto srtPtr = constBlob->cbuffer().as<int8_t *>(); + buffer.resize(constBlob->size()); + cpu_convert(srtPtr, &buffer[0], constBlob->getTensorDesc().getPrecision(), Precision::FP32, constBlob->size()); + }; + + if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) { + fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales); + } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) { + fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), shifts); + } else if (one_of(getAlgorithm(), EltwiseMulAdd)) { + fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales); + fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts); + } + const size_t bufferSize = static_cast<size_t>(outDims[0][outDims[0].size() > 1 ? 1 : 0]); const size_t bufferSizeAligned = rnd_up(bufferSize, 16); @@ -1723,6 +1734,16 @@ void MKLDNNEltwiseNode::fillScalesAndShifts() { } } +void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) { + // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API. + bool isSpecialConvolutionAddFusing = parentNode->getType() == Convolution && getAlgorithm() == EltwiseAdd && + getParentEdgesAtPort(0)[0]->getDims().ToSizeVector() == getParentEdgesAtPort(1)[0]->getDims().ToSizeVector(); + if (!isSpecialConvolutionAddFusing && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract, EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePrelu)) { + fillScalesAndShifts(); + } + MKLDNNNode::fuseInto(parentNode); +} + void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) { const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' "; if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { @@ -1747,31 +1768,27 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) { case mkldnn::algorithm::eltwise_round_half_to_even: case mkldnn::algorithm::eltwise_round_half_away_from_zero: ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta()); - return; - case mkldnn::algorithm::depthwise_scale_shift: - IE_THROW() << "[NM] Not implemented"; - return; + break; default: IE_THROW() << errorPrefix << "as post operation is not supported"; } } else { switch (getAlgorithm()) { case EltwiseAdd: case EltwiseSubtract: - if (shifts.empty()) IE_THROW() << errorPrefix << "has empty shifts"; - break; case EltwiseMultiply: case EltwiseDivide: + case EltwiseMulAdd: + if (scales.empty() || shifts.empty()) + IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; + ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]); + break; case EltwisePrelu: - if (scales.empty()) IE_THROW() << errorPrefix << "has empty scales"; + if (scales.empty()) + IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated"; + ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr); break; default: IE_THROW() << errorPrefix << "as post operation is not supported"; } - fillScalesAndShifts(); - if (getAlgorithm() == EltwisePrelu) { - ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr); - } else { - ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]); - } } } @@ -1814,8 +1831,8 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { // Limitation: inputs precision definition inside Eltwise node assumes fusing is applied for 0-th port, // otherwise we need identical precision on all inputs of fused node - for (int i = 1; i < getOriginalInputPrecisions().size(); i++) { - if (getOriginalInputPrecisions()[0] != getOriginalInputPrecisions()[i]) { + for (int i = 1; i < getOriginalInputsNumber(); i++) { + if (getOriginalInputPrecisionAtPort(0) != getOriginalInputPrecisionAtPort(i)) { return false; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h index a69d5eb31f40c2..4e81586087015b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h @@ -84,6 +84,7 @@ class MKLDNNEltwiseNode : public MKLDNNNode { void appendPostOps(mkldnn::post_ops& ops) override; InferenceEngine::Precision getRuntimePrecision() const override; + void fuseInto(MKLDNNNodePtr& parentNode) override; private: mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp index fcb84e0c040271..12abf1474bb6ed 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp @@ -120,8 +120,8 @@ void MKLDNNGemmNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto inPrec0 = getOriginalInputPrecisions()[0]; - auto inPrec1 = getOriginalInputPrecisions()[1]; + auto inPrec0 = getOriginalInputPrecisionAtPort(0); + auto inPrec1 = getOriginalInputPrecisionAtPort(1); if ((inPrec0 != Precision::U8 && inPrec0 != Precision::I8) || inPrec1 != Precision::I8 || isThreeInputs) { if (inPrec0 == Precision::BF16 || inPrec1 == Precision::BF16) { inPrec0 = Precision::BF16; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp index b5c74959aae7f7..889756a9b2d2e8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp @@ -71,7 +71,7 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() { LayerConfig config; config.dynBatchSupport = true; if (getType() == Input || getType() == MemoryInput) { - precision = getOriginalOutputPrecisions()[0]; + precision = getOriginalOutputPrecisionAtPort(0); if (precision == Precision::U16 || isMeanImage) { precision = Precision::FP32; } @@ -84,7 +84,7 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() { dataConfig.desc = mem_tdesc; config.outConfs.push_back(dataConfig); } else if (getType() == Output) { - precision = getOriginalInputPrecisions()[0]; + precision = getOriginalInputPrecisionAtPort(0); if (precision == Precision::U16) precision = Precision::FP32; DataConfig dataConfig; dataConfig.inPlace = -1; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h index b1dc432f31b662..6761f9e0ed6cad 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h @@ -25,6 +25,10 @@ class MKLDNNInputNode : public MKLDNNNode { isMeanImage = true; } + const InferenceEngine::Blob::CPtr getConstBlob() const { + return constBlob; + } + private: InferenceEngine::Precision precision; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index 33ade1e6120870..de37cb8637f7c5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -701,21 +701,21 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { setPostOps(attr, true); - Precision inputPrecision = getOriginalInputPrecisions()[0]; + Precision inputPrecision = getOriginalInputPrecisionAtPort(0); if (getParentEdgeAt(0)->getDims().ndims() < 3 || getParentEdgeAt(0)->getDims().ndims() > 5 || acrossChannels_ || !normalizeVariance_) { if (!isFloatCompatible(inputPrecision)) { inputPrecision = Precision::FP32; } } - Precision outputPrecision = getOriginalOutputPrecisions()[0]; + Precision outputPrecision = getOriginalOutputPrecisionAtPort(0); if (!mayiuse(avx512_core)) { if (outputPrecision == Precision::BF16) outputPrecision = Precision::FP32; } if (!fusedWith.empty()) { - outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0]; + outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); } // ref with float planar and no fusion diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index c004b28d218519..738b86ff99a7f7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -720,11 +720,11 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { setPostOps(attr, true); - Precision inputPrecision = getOriginalInputPrecisions()[DATA]; - Precision outputPrecision = getOriginalOutputPrecisions()[DATA]; + Precision inputPrecision = getOriginalInputPrecisionAtPort(DATA); + Precision outputPrecision = getOriginalOutputPrecisionAtPort(DATA); if (!fusedWith.empty()) { - outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0]; + outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0); } if (inputPrecision == Precision::BF16 || outputPrecision == Precision::BF16) { @@ -781,9 +781,9 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { } bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const { - auto isConvertedToScaleShift = [](MKLDNNNodePtr node) { + auto isConvertableToScaleShift = [](MKLDNNNodePtr node) { return one_of(node->getAlgorithm(), EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu) && - node->getParentEdgeAt(1)->getParent()->isConstant() && + node->getParentEdgeAt(1)->getParent()->getType() == Input && node->getParentEdgeAt(1)->getParent()->isConstant() && MKLDNNExtensionUtils::isPerTensorOrPerChannelBroadcastable(node->getParentEdgeAt(0)->getDims().ToSizeVector(), node->getParentEdgeAt(1)->getDims().ToSizeVector()); }; @@ -796,10 +796,8 @@ bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const { } else if (node->getType() == Eltwise) { return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseBoundedRelu, EltwiseClamp, EltwiseTanh, EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, - EltwiseRoundHalfAwayFromZero, EltwiseLinear, EltwiseAbs, EltwiseSquare, EltwiseSqrt) || - isConvertedToScaleShift(node); - // TODO [NM]: implemented after enabling MulAdd operation - // ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) + EltwiseRoundHalfAwayFromZero, EltwiseLinear, EltwiseAbs, EltwiseSquare, EltwiseSqrt, EltwiseMulAdd) || + isConvertableToScaleShift(node); } return false; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp index ba3007e0d0ede8..2aeeb297a11a6c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp @@ -110,7 +110,7 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { std::vector<InferenceEngine::Precision> supportedPrecisions = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::I32, InferenceEngine::Precision::BF16, InferenceEngine::Precision::I8, InferenceEngine::Precision::U8}; - InferenceEngine::Precision precision = getOriginalInputPrecisions()[DATA_ID]; + InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(DATA_ID); if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), precision) == supportedPrecisions.end()) precision = precision.is_float() ? InferenceEngine::Precision::FP32 : InferenceEngine::Precision::I32; auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp index 794815544ed236..c345fa92451a09 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp @@ -85,8 +85,8 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - inputPrecision = getOriginalInputPrecisions()[0]; - outputPrecision = getOriginalOutputPrecisions()[0]; + inputPrecision = getOriginalInputPrecisionAtPort(0); + outputPrecision = getOriginalOutputPrecisionAtPort(0); // MKLDNN supports only equal precisions for input and output if (one_of(inputPrecision, Precision::FP32, Precision::BF16)) { @@ -94,7 +94,7 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { } if (!fusedWith.empty()) { - outputPrecision = fusedWith.back()->getOriginalOutputPrecisions()[0]; + outputPrecision = fusedWith.back()->getOriginalOutputPrecisionAtPort(0); } auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp index d0c4e5f0ed53f4..6c935ebadb008d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp @@ -25,9 +25,9 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::Precision precision = getOriginalInputPrecisions()[0]; + InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - precision = getOriginalOutputPrecisions()[0]; + precision = getOriginalOutputPrecisionAtPort(0); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); // Current reshape implementation is simple memory reinterpret, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp index 408ecb54fb5958..5b9692fc562903 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp @@ -147,7 +147,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { } } - indicesPrec = getOriginalInputPrecisions()[INDICES_ID]; + indicesPrec = getOriginalInputPrecisionAtPort(INDICES_ID); auto indicesType = MKLDNNExtensionUtils::IEPrecisionToDataType(indicesPrec); indicesSize = MKLDNNExtensionUtils::sizeOfDataType(indicesType); if (indicesSize >= 8) { @@ -160,7 +160,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { indicesType = MKLDNNExtensionUtils::IEPrecisionToDataType(indicesPrec); if (axisRelaxed) { - axisPrec = getOriginalInputPrecisions()[AXIS_ID]; + axisPrec = getOriginalInputPrecisionAtPort(AXIS_ID); auto axisType = MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec); axisSize = MKLDNNExtensionUtils::sizeOfDataType(axisType); if (axisSize >= 8) { @@ -172,7 +172,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { } } - dataPrec = getOriginalInputPrecisions()[DATA_ID]; + dataPrec = getOriginalInputPrecisionAtPort(DATA_ID); auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(dataPrec); dataSize = MKLDNNExtensionUtils::sizeOfDataType(dataType); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp index 42cd241f1a9e92..6d4c9a27dc4d8b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp @@ -27,7 +27,7 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() { if (descs.size()) return; - InferenceEngine::Precision precision = getOriginalInputPrecisions()[0]; + InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) precision = InferenceEngine::Precision::FP32; auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp index 4cb8bc06a90808..5819617bfea26a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp @@ -181,10 +181,10 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; // - prec = getOriginalInputPrecisions()[0]; + prec = getOriginalInputPrecisionAtPort(0); auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); - auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[1]); + auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1)); InferenceEngine::LayerConfig config; config.dynBatchSupport = true; diff --git a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp index 05ce831fb11616..62420cfca6c33a 100644 --- a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp @@ -6,6 +6,7 @@ #include <cassert> #include <ngraph/variant.hpp> +#include "transformations/rt_info/primitives_priority_attribute.hpp" namespace MKLDNNPlugin { @@ -19,6 +20,17 @@ inline std::string getRTInfoValue(const std::map<std::string, std::shared_ptr<ng } }; +inline std::string getPrimitivesPriorityValue(const std::shared_ptr<ngraph::Node> &node) { + const auto &rtInfo = node->get_rt_info(); + using PrimitivesPriorityWraper = ngraph::VariantWrapper<ngraph::PrimitivesPriority>; + + if (!rtInfo.count(PrimitivesPriorityWraper::type_info.name)) return ""; + + const auto &attr = rtInfo.at(PrimitivesPriorityWraper::type_info.name); + ngraph::PrimitivesPriority pp = ngraph::as_type_ptr<PrimitivesPriorityWraper>(attr)->get(); + return pp.getPrimitivesPriority(); +} + template <typename T> inline const std::shared_ptr<T> getNgraphOpAs(const std::shared_ptr<ngraph::Node>& op) { auto typedOp = ngraph::as_type_ptr<T>(op); diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 837d758269e0ee..d2ee3bae38eb30 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -59,11 +59,12 @@ std::vector<std::string> disabledTestPatterns() { R"(.*BinaryConvolutionLayerTest.*)" }; - if (!InferenceEngine::with_cpu_x86_avx512_core()) { +// TODO [NM]: Disabled until BF16 transformer is not migrated on CPU graph representation. +// if (!InferenceEngine::with_cpu_x86_avx512_core()) { // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives, // tests are useless on such platforms retVector.emplace_back(R"(.*BF16.*)"); - } +// } return retVector; } diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp index 2ce34a6555c21a..74b265d1935dcf 100755 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp @@ -85,7 +85,9 @@ std::vector<fusingSpecificParams> fusingParamsSet { fusingDividePerChannel, fusingPReluPerChannel, fusingPReluPerTensor, - fusingRelu + fusingRelu, + fusingGelu, + fusingReluScaleShift }; const float epsilon = 1e-4f; diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp index 9ce8004e58a55b..2a56c02bce0a55 100644 --- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp +++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp @@ -163,7 +163,7 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNode IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; ngraph::Shape newShape(shape.size(), 1); newShape[1] = shape[1]; - auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true); + auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true); return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode); }, "Multiply(PerChannel)"}, {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){ @@ -172,7 +172,7 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNode IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; ngraph::Shape newShape(shape.size(), 1); newShape[1] = shape[1]; - auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true); + auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true); return std::make_shared<ngraph::opset1::Add>(inpNode, constNode); }, "Add(PerChannel)"}}), {"Relu", "Add"}}; @@ -183,7 +183,7 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; ngraph::Shape newShape(shape.size(), 1); newShape[1] = shape[1]; - auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true); + auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true); return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode); }, "Multiply(PerChannel)"}, {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) { @@ -192,7 +192,7 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only"; ngraph::Shape newShape(shape.size(), 1); newShape[1] = shape[1]; - auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true); + auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true); return std::make_shared<ngraph::opset1::Add>(inpNode, constNode); }, "Add(PerChannel)"}}), {"Add"} };