From 1ec5f8cb0b2c248a57d11c84a02fa1242314dfe2 Mon Sep 17 00:00:00 2001
From: Gorokhov Dmitriy <dmitry.gorokhov@intel.com>
Date: Wed, 10 Mar 2021 09:42:17 +0300
Subject: [PATCH] Enabled Convolution + post ops fusing (#20)

---
 .../src/mkldnn_plugin/CMakeLists.txt          |   4 -
 .../src/mkldnn_plugin/mkldnn_graph.cpp        |  30 -
 .../src/mkldnn_plugin/mkldnn_graph.h          |   1 -
 .../mkldnn_plugin/mkldnn_graph_optimizer.cpp  | 595 ++++++++----------
 .../mkldnn_plugin/mkldnn_graph_optimizer.h    |   6 +-
 .../src/mkldnn_plugin/mkldnn_node.cpp         |  35 +-
 .../src/mkldnn_plugin/mkldnn_node.h           |  69 +-
 .../src/mkldnn_plugin/nodes/gather.cpp        |   6 +-
 .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp  | 108 ++--
 .../mkldnn_plugin/nodes/mkldnn_conv_node.h    |   3 +-
 .../nodes/mkldnn_eltwise_node.cpp             |  87 +--
 .../mkldnn_plugin/nodes/mkldnn_eltwise_node.h |   1 +
 .../mkldnn_plugin/nodes/mkldnn_gemm_node.cpp  |   4 +-
 .../mkldnn_plugin/nodes/mkldnn_input_node.cpp |   4 +-
 .../mkldnn_plugin/nodes/mkldnn_input_node.h   |   4 +
 .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp   |   6 +-
 .../nodes/mkldnn_normalize_node.cpp           |  16 +-
 .../mkldnn_plugin/nodes/mkldnn_pad_node.cpp   |   2 +-
 .../nodes/mkldnn_pooling_node.cpp             |   6 +-
 .../nodes/mkldnn_reshape_node.cpp             |   4 +-
 .../nodes/mkldnn_scatter_update_node.cpp      |   6 +-
 .../nodes/mkldnn_softmax_node.cpp             |   2 +-
 .../nodes/mkldnn_transpose_node.cpp           |   4 +-
 .../src/mkldnn_plugin/utils/ngraph_utils.hpp  |  12 +
 .../skip_tests_config.cpp                     |   5 +-
 .../cpu/single_layer_tests/normalize.cpp      |   4 +-
 .../cpu/test_utils/fusing_test_utils.hpp      |   8 +-
 27 files changed, 513 insertions(+), 519 deletions(-)

diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index 1cd7941eb61d53..29f24e82cbab3a 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -99,10 +99,6 @@ set(LAYERS
 #    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/unique.cpp
 #    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/unsqueeze.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/softmax.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/emitter.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_eltwise_emitters.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_mkldnn_emitters.cpp
-#
 #    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax.cpp
 #    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax_imp.cpp
 #    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index 1e6423a48ca62c..cda74fbf103794 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -265,11 +265,6 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
 
         op2node[op] = node;
 
-        const auto& rtInfo = op->get_rt_info();
-        if (rtInfo.count("originalLayersNames")) {
-            node->originalLayers = getRTInfoValue(rtInfo, "originalLayersNames");
-        }
-
         for (size_t port = 0; port < op->get_input_size(); port++) {
             auto parentOp = op->get_input_node_shared_ptr(port);
 
@@ -349,7 +344,6 @@ void MKLDNNGraph::InitGraph() {
 
     CreatePrimitives();
 
-    SetOriginalLayerNames();
 //
 //    if (!config.dumpToDot.empty())
 //        dumpToDotFile(config.dumpToDot + "_init.dot");
@@ -381,30 +375,6 @@ void MKLDNNGraph::InitGraph() {
     ExecuteConstantNodesOnly();
 }
 
-void MKLDNNGraph::SetOriginalLayerNames() {
-    OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames");
-    // Do it before cleanup. Because it will lose original layers information
-    for (auto &graphNode : graphNodes) {
-        auto nodeType = graphNode->getType();
-        if (nodeType == Reorder || nodeType == Output) continue;
-
-        if (graphNode->getOriginalLayers().empty()) {
-            graphNode->addOriginalLayer(graphNode->getOriginalName());
-        }
-
-        if (!graphNode->getFusedWith().empty() || !graphNode->getMergeWith().empty()) {
-            // Original layer names
-            std::vector<MKLDNNNodePtr> internal = graphNode->getFusedWith();
-            auto &merged = graphNode->getMergeWith();
-            internal.insert(internal.end(), merged.begin(), merged.end());
-
-            for (auto &sub_node : internal) {
-                graphNode->addOriginalLayer(sub_node->getOriginalName());
-            }
-        }
-    }
-}
-
 void MKLDNNGraph::InitNodes() {
     OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::InitNodes");
     for (auto &node : graphNodes) {
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index 47c1bdc35ecc2f..2383221ff6325e 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -200,7 +200,6 @@ class MKLDNNGraph {
     void AllocateWithReuse();
     void CreatePrimitives();
     void ExecuteConstantNodesOnly();
-    void SetOriginalLayerNames();
 
     void do_before(const std::string &dir, const MKLDNNNodePtr &node);
     void do_after(const std::string &dir, const MKLDNNNodePtr &node);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
index ea4cc191ecbbaf..9d6183991f2870 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -54,6 +54,12 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
 //    MergeTwoEqualScaleShifts(graph);
 //    graph.RemoveDroppedNodes();
 
+    FuseConvolutionAndBias(graph);
+    graph.RemoveDroppedNodes();
+
+    FuseMultiplyAndAdd(graph);
+    graph.RemoveDroppedNodes();
+
     FuseBroadcastAndEltwise(graph);
     graph.RemoveDroppedNodes();
 
@@ -67,36 +73,17 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
 //    FuseScaleShiftAndQuantize(graph);
 //    graph.RemoveDroppedNodes();
 
-// TODO [NM]: do we still have networks that requires this optimizations? Preferable should be removed.
-//    MergeGroupConvolution(graph);
-//    graph.RemoveDroppedNodes();
-
 // TODO [NM]: transformation should be implemented w/o using of CNNLayer
 //    FuseConvolutionAndZeroPoints(graph);
 //    graph.RemoveDroppedNodes();
 
-// TODO [NM]: transformation should be implemented w/o using of CNNLayer
-//    FuseConvolutionAndDepthwise(graph);
-//    graph.RemoveDroppedNodes();
-
-// TODO [NM]: transformation should be implemented w/o using of CNNLayer
-//    FuseConvolutionAndActivation(graph);
-//    graph.RemoveDroppedNodes();
-
-// TODO [NM]: transformation should be implemented w/o using of CNNLayer
-//    FuseConvolutionAndDepthwise(graph);
-//    graph.RemoveDroppedNodes();
-
-    FuseConvolutionAndQuantize(graph);
+// TODO [NM]: While fusing simple operation into any node (except Eltwise) we need to check that other inputs are Constant nodes.
+    FuseConvolutionAndSimpleOperation(graph);
     graph.RemoveDroppedNodes();
 
     graph.SortTopologically();
     graph.RemoveDroppedEdges();
 
-// TODO [NM]: transformation should be implemented w/o using of CNNLayer
-//    FuseConvolutionAndDepthwise(graph);
-//    graph.RemoveDroppedNodes();
-
     FusePoolingAndQuantize(graph);
     graph.RemoveDroppedNodes();
 
@@ -116,9 +103,8 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     FuseConvolutionSumAndConvolutionSumActivation(graph);
     graph.RemoveDroppedNodes();
 
-// TODO [NM]: transformation should be implemented w/o using of CNNLayer
-//    FuseConvolutionAndSimpleOperation(graph);
-//    graph.RemoveDroppedNodes();
+    FuseConvolutionAndSimpleOperation(graph);
+    graph.RemoveDroppedNodes();
 
 // TODO [NM]: transformation should be implemented w/o using of CNNLayer
 //    FuseFullyConnectedAndSimpleOperation(graph);
@@ -158,6 +144,229 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
     graph.RemoveDroppedEdges();
 }
 
+void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableParentNode = [](MKLDNNNodePtr node) {
+        return node->getType() == Convolution &&
+               node->getChildEdges().size() == 1 &&
+               node->getFusedWith().empty();
+    };
+
+    auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
+            return false;
+
+        auto biasNode = childNode->getParentEdgesAtPort(1)[0]->getParent();
+        if (biasNode->getChildEdges().size() != 1)
+            return false;
+
+        auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getDims();
+        auto biasDims = biasNode->getChildEdgesAtPort(0)[0]->getDims();
+        // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases.
+        // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant.
+        if (convOutDims.ndims() != biasDims.ndims() || biasDims.ndims() < 2)
+            return false;
+
+        if (biasDims[0] != 1 || biasDims[1] != convOutDims[1])
+            return false;
+
+        for (int i = 2; i < biasDims.ndims(); i++) {
+            if (biasDims[i] != 1)
+                return false;
+        }
+
+        return true;
+    };
+
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSutableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(parentNode, childNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childs = childNode->childEdges;
+        auto parents = childNode->parentEdges;
+
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (!p_edge) continue;
+            auto parent = p_edge->getParent();
+            if (!parent) continue;
+
+            if (parent == parentNode) {
+                for (size_t j = 0; j < childs.size(); j++) {
+                    if (!childs[j].lock())
+                        continue;
+                    auto child = childs[j].lock()->getChild();
+                    if (!child)
+                        continue;
+
+                    MKLDNNEdgePtr &remEdge = p_edge;
+                    int inNum = 0;
+                    if (remEdge) {
+                        inNum = remEdge->getInputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    remEdge = childs[j].lock();
+                    int outNum = 0;
+                    if (remEdge) {
+                        outNum = remEdge->getOutputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
+                    auto &graphEdges = graph.GetEdges();
+                    graphEdges.push_back(newEdge);
+                    parent->addEdge(newEdge);
+                }
+            } else {
+                MKLDNNEdgePtr &remEdge = p_edge;
+                int inNum = 0;
+                if (remEdge) {
+                    inNum = remEdge->getInputNum();
+                    remEdge->drop();
+                    removeEdge(graph, remEdge);
+                }
+
+                auto parentEltwise = parentNode;
+                MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
+                auto &graphEdges = graph.GetEdges();
+                graphEdges.push_back(newEdge);
+                parent->addEdge(newEdge);
+
+                auto newBiasDim = parent->outDims[inNum][1];
+                parent->outDims[inNum] = MKLDNNDims({newBiasDim});
+                parentEltwise->inDims.push_back(parent->outDims[0]);
+            }
+        }
+
+        graph.DropNode(childNode);
+
+        parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1));
+    }
+}
+
+void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) {
+    auto& graphNodes = graph.GetNodes();
+
+    auto isSutableSecondInput = [](MKLDNNNodePtr node, MKLDNNDims dataDims) {
+        auto secondInputDims = node->outDims[0];
+        if (secondInputDims.ndims() != dataDims.ndims() || secondInputDims.ndims() < 2)
+            return false;
+
+        if (secondInputDims[0] != 1 || secondInputDims[1] != dataDims[1])
+            return false;
+
+        for (size_t i = 2; i < secondInputDims.ndims(); i++) {
+            if (secondInputDims[i] != 1)
+                return false;
+        }
+
+        return true;
+    };
+
+    auto isSutableParentNode = [&](MKLDNNNodePtr node) {
+        if (node->getAlgorithm() != EltwiseMultiply || !node->getFusedWith().empty() ||
+            node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1)
+            return false;
+
+        return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getDims());
+    };
+
+    auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
+        if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2)
+            return false;
+
+        return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getDims());
+    };
+
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSutableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(parentNode, childNode)) {
+            parent++;
+            continue;
+        }
+
+        auto childs = childNode->childEdges;
+        auto parents = childNode->parentEdges;
+
+        for (size_t i = 0; i < parents.size(); i++) {
+            auto p_edge = parents[i].lock();
+            if (!p_edge) continue;
+            auto parent = p_edge->getParent();
+            if (!parent) continue;
+
+            if (parent == parentNode) {
+                for (size_t j = 0; j < childs.size(); j++) {
+                    if (!childs[j].lock())
+                        continue;
+                    auto child = childs[j].lock()->getChild();
+                    if (!child)
+                        continue;
+
+                    MKLDNNEdgePtr &remEdge = p_edge;
+                    int inNum = 0;
+                    if (remEdge) {
+                        inNum = remEdge->getInputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    remEdge = childs[j].lock();
+                    int outNum = 0;
+                    if (remEdge) {
+                        outNum = remEdge->getOutputNum();
+                        remEdge->drop();
+                        removeEdge(graph, remEdge);
+                    }
+                    MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum));
+                    auto &graphEdges = graph.GetEdges();
+                    graphEdges.push_back(newEdge);
+                    parent->addEdge(newEdge);
+                }
+            } else {
+                MKLDNNEdgePtr &remEdge = p_edge;
+                int inNum = 0;
+                if (remEdge) {
+                    inNum = remEdge->getInputNum();
+                    remEdge->drop();
+                    removeEdge(graph, remEdge);
+                }
+
+                auto parentEltwise = parentNode;
+                MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size()));
+                auto &graphEdges = graph.GetEdges();
+                graphEdges.push_back(newEdge);
+                parent->addEdge(newEdge);
+
+                parentEltwise->inDims.push_back(parent->outDims[0]);
+            }
+        }
+
+        parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1));
+        parentNode->setAlgorithm(EltwiseMulAdd);
+        parentNode->addOriginalLayer(childNode->getOriginalLayers());
+
+        graph.DropNode(childNode);
+    }
+}
+
 void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
 //    auto& graphNodes = graph.GetNodes();
 //
@@ -383,63 +592,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) {
 //    }
 }
 
-void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) {
-    for (auto node : graph.GetNodes()) {
-        // Split with at least 2 Convolutions
-        if (!IsOneOf(node->getType(), {Split}) || node->getChildEdges().size() < 2 ||
-                !IsOneOf(node->getChildEdgeAt(0)->getChild()->getType(), {Convolution})) {
-            continue;
-        }
-        bool canBeMerged = true;
-
-        auto& split = node;
-
-        auto convInEdge = split->getChildEdgeAt(0);
-        auto conv = convInEdge->getChild();
-        auto convOutEdge = conv->getChildEdgeAt(0);
-
-        auto convType = conv->getType();
-        auto convInDims = convInEdge->getDims();
-        auto convOutDims = convOutEdge->getDims();
-
-        // Convolutions of same the type with Concat as a child
-        for (size_t i = 1; i < split->getChildEdges().size(); i++) {
-            auto childEdge = split->getChildEdgeAt(i);
-            auto child = childEdge->getChild();
-            Type type = child->getType();
-
-            if (convType != type || child->getChildEdgeAt(0)->getChild()->getType() != Concatenation ||
-                    convOutDims != child->getChildEdgeAt(0)->getDims() || child->getChildEdges().size() != 1 ||
-                    convInDims != childEdge->getDims()) {
-                canBeMerged = false;
-                break;
-            }
-        }
-
-        if (!canBeMerged) continue;
-
-        // TODO: Rewrite topology optimizer at all. it should be clean and understandable
-        auto concat = conv->getChildEdgeAt(0)->getChild();
-        // Merge and remove Convolution
-        while (split->getChildEdges().size() > 1) {
-            auto peerInEdge = split->getChildEdgeAt(1);
-            auto peer = peerInEdge->getChild();
-            conv->mergeWith(peer);
-            convInDims[1] += (peerInEdge->getDims())[1];
-            convOutDims[1] += (peer->getChildEdgeAt(0)->getDims())[1];
-            peer->remove();
-        }
-        conv->inDims[0] = convInDims;
-        conv->outDims[0] = convOutDims;
-
-        conv->fuseWith(split);
-        conv->fuseWith(concat);
-
-        graph.DropNode(split);
-        graph.DropNode(concat);
-    }
-}
-
 //  WA: We need it until LP transformations will not optimize this pattern inside
 void MKLDNNGraphOptimizer::MergeTwoEqualScaleShifts(MKLDNNGraph& graph) {
 //    auto& graphNodes = graph.GetNodes();
@@ -579,74 +731,6 @@ void MKLDNNGraphOptimizer::FuseBatchNormWithScale(MKLDNNGraph &graph) {
 //    }
 }
 
-void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) {
-//    auto& graphNodes = graph.GetNodes();
-//
-//    auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) {
-//        auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(conv.get());
-//        if (binConv) {
-//            if (!binConv->canFuse(activation))
-//                return false;
-//        }
-//
-//        if (!activation->getCnnLayer())
-//            return false;
-//
-//        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(activation.get());
-//
-//        return eltwiseNode &&
-//            (eltwiseNode->getOpType() == Relu ||
-//            (conv->getCnnLayer()->precision == Precision::FP32 &&
-//            IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid,
-//                                               Round})));
-//    };
-//
-//    for (int i = 0; i < graphNodes.size(); i++) {
-//        if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) {
-//            auto conv = graphNodes[i];
-//
-//            auto fuse = [&] (MKLDNNNodePtr relu) {
-//                conv->fuseWith(relu);
-//            };
-//
-//            if (conv->getChildEdges().size() == 1) {
-//                auto ch1 = conv->getChildEdgeAt(0)->getChild();
-//
-//                if (isFusingSupported(conv, ch1)) {
-//                    fuse(ch1);
-//
-//                    if (ch1->getChildEdges().size() == 1) {
-//                        auto ch2 = ch1->getChildEdgeAt(0)->getChild();
-//
-//                        if (isFusingSupported(conv, ch2)) {
-//                            fuse(ch2);
-//                            graph.DropNode(ch2);
-//                        }
-//                    }
-//                    graph.DropNode(ch1);
-//                } else {
-//                    if (ch1->type == Pooling) {
-//                        auto pool = ch1;
-//
-//                        auto* pLayer = dynamic_cast<PoolingLayer *>(pool->getCnnLayer().get());
-//                        if (pLayer == nullptr)
-//                            IE_THROW() << "Cannot get pooling layer " << pool->getName();
-//                        bool is_max_pool = pLayer->_type == PoolingLayer::PoolType::MAX;
-//
-//                        if (is_max_pool && pool->getChildEdges().size() == 1) {
-//                            auto ch2 = pool->getChildEdgeAt(0)->getChild();
-//                            if (isFusingSupported(conv, ch2)) {
-//                                fuse(ch2);
-//                                graph.DropNode(ch2);
-//                            }
-//                        }
-//                    }
-//                }
-//            }
-//        }
-//    }
-}
-
 void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) {
 //    auto& graphNodes = graph.GetNodes();
 //
@@ -754,77 +838,6 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
 //    }
 }
 
-void MKLDNNGraphOptimizer::FuseConvolutionAndDepthwise(MKLDNNGraph &graph) {
-//    auto& graphNodes = graph.GetNodes();
-//
-//    auto isSutableParentNode = [](MKLDNNNodePtr node) {
-//        bool isSutableConv = (node->getType() == Convolution) &&
-//                             node->getCnnLayer()->precision == Precision::FP32;
-//        bool isSutableBinConv = node->getType() == BinaryConvolution;
-//        return (isSutableConv || isSutableBinConv) && node->getChildEdges().size() == 1;
-//    };
-//
-//    auto isSutableChildNode = [](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-//        if (childNode->getType() != Eltwise)
-//            return false;
-//
-//        if (!childNode->getCnnLayer())
-//            return false;
-//
-//        auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parentNode.get());
-//        if (binConv) {
-//            if (!binConv->canFuse(childNode))
-//                return false;
-//        }
-//
-//        auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(childNode.get());
-//        if (eltwiseNode == nullptr)
-//            IE_THROW() << "Cannot get eltwise node " << childNode->getName();
-//        return ((eltwiseNode->getOpType() == MulAdd && childNode->getCnnLayer()->blobs.size() == 2) ||
-//                (eltwiseNode->getOpType() == Prelu));
-//    };
-//
-//    for (int i = 0; i < graphNodes.size(); i++) {
-//        auto conv = graphNodes[i];
-//        if (!isSutableParentNode(conv)) continue;
-//
-//        auto depthwise0 = conv->getChildEdgeAt(0)->getChild();
-//        if (!isSutableChildNode(conv, depthwise0)) continue;
-//
-//        conv->fuseWith(depthwise0);
-//
-//        if (depthwise0->getChildEdges().size() == 1) {
-//            auto depthwise1 = depthwise0->getChildEdgeAt(0)->getChild();
-//
-//            if (isSutableChildNode(conv, depthwise1)) {
-//                conv->fuseWith(depthwise1);
-//
-//                auto parents = depthwise1->parentEdges;
-//                for (size_t j = 0; j < parents.size(); j++) {
-//                    auto p_edge = parents[j].lock();
-//                    if (p_edge->getParent()->getType() == Eltwise)
-//                        continue;
-//
-//                    removeEdge(graph, p_edge);
-//                }
-//
-//                graph.DropNode(depthwise1);
-//            }
-//        }
-//
-//        auto parents = depthwise0->parentEdges;
-//        for (size_t j = 0; j < parents.size(); j++) {
-//            auto p_edge = parents[j].lock();
-//            if (p_edge->getParent()->getType() == Convolution || p_edge->getParent()->getType() == BinaryConvolution)
-//                continue;
-//
-//            removeEdge(graph, p_edge);
-//        }
-//
-//        graph.DropNode(depthwise0);
-//    }
-}
-
 void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
 //    auto& graphNodes = graph.GetNodes();
 //
@@ -927,116 +940,60 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) {
 //    }
 }
 
-void MKLDNNGraphOptimizer::FuseConvolutionAndQuantize(MKLDNNGraph &graph) {
+void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        bool isSutableBinConv = node->getType() == Convolution;
-
-        if (isSutableBinConv) {
-            return isSutableBinConv && node->getChildEdges().size() == 1;
-        } else {
-            return false;
-        }
+        return node->getType() == Convolution &&
+               node->getChildEdges().size() == 1;
     };
 
-    auto isSutableChildNode = [](MKLDNNNodePtr node) {
-        if (node->getType() != Quantize)
-            return false;
+    auto isSutableChildNode = [&](MKLDNNNodePtr node) {
+        if (node->getType() == Quantize) {
+            auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
+            if (quantizeNode == nullptr)
+                IE_THROW() << "Cannot get quantize layer " << node->getName();
 
-        auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-        if (quantizeNode == nullptr)
-            IE_THROW() << "Cannot get quantize layer " << node->getName();
+            return !quantizeNode->isBinarization();
+        } else if (node->getType() == Eltwise) {
+            return one_of(node->getAlgorithm(), EltwiseMulAdd, EltwisePrelu, EltwiseRelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseSwish, EltwiseHswish,
+                                                EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero);
+        }
 
-        return !quantizeNode->isBinarization();
+        return false;
     };
 
-    for (int i = 0; i < graphNodes.size(); i++) {
-        auto parent = graphNodes[i];
-        if (!isSutableParentNode(parent)) continue;
+    auto parent = graphNodes.begin();
+    while (parent != graphNodes.end()) {
+        auto parentNode = *parent;
+        if (!isSutableParentNode(parentNode)) {
+            parent++;
+            continue;
+        }
 
-        auto child = parent->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(child)) continue;
+        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
+        if (!isSutableChildNode(childNode)) {
+            parent++;
+            continue;
+        }
 
-        parent->fuseWith(child);
+        childNode->fuseInto(parentNode);
 
-        auto parents = child->parentEdges;
-        for (size_t j = 0; j < parents.size(); j++) {
-            auto p_edge = parents[j].lock();
-            if (p_edge->getParent()->getType() == Convolution)
-                continue;
+        if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
+            auto parentEdges = childNode->parentEdges;
+            for (auto &parentEdge : parentEdges) {
+                auto p_edge = parentEdge.lock();
+                if (p_edge->getParent()->getType() == Convolution)
+                    continue;
 
-            removeEdge(graph, p_edge);
+                removeEdge(graph, p_edge);
+            }
         }
 
-        graph.DropNode(child);
+        graph.DropNode(childNode);
     }
 }
 
-void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) {
-//    auto& graphNodes = graph.GetNodes();
-//
-//    auto isSutableParentNode = [](MKLDNNNodePtr node) {
-//        return node->getType() == Convolution &&
-//               node->getChildEdges().size() == 1 &&
-//               node->getCnnLayer()->precision == Precision::FP32;
-//    };
-//
-//    auto isSutableChildNode = [&](MKLDNNNodePtr node) {
-//        if (!node->getCnnLayer())
-//            return false;
-//
-//        if (node->getType() == Quantize) {
-//            auto* quantizeNode = dynamic_cast<MKLDNNQuantizeNode*>(node.get());
-//            if (quantizeNode == nullptr)
-//                IE_THROW() << "Cannot get quantize layer " << node->getName();
-//
-//            return !quantizeNode->isBinarization();
-//        } else if (node->getType() == Eltwise) {
-//            auto* eltwiseNode = dynamic_cast<MKLDNNEltwiseNode *>(node.get());
-//            if (eltwiseNode == nullptr)
-//                IE_THROW() << "Cannot get eltwise node " << node->getName();
-//
-//            return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) ||
-//                    (eltwiseNode->getOpType() == Prelu) ||
-//                    IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish,
-//                                                       Hsigmoid, Round}));
-//        }
-//
-//        return false;
-//    };
-//
-//    auto parent = graphNodes.begin();
-//    while (parent != graphNodes.end()) {
-//        auto parentNode = *parent;
-//        if (!isSutableParentNode(parentNode)) {
-//            parent++;
-//            continue;
-//        }
-//
-//        auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-//        if (!isSutableChildNode(childNode)) {
-//            parent++;
-//            continue;
-//        }
-//
-//        parentNode->fuseWith(childNode);
-//
-//        if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
-//            auto parentEdges = childNode->parentEdges;
-//            for (auto &parentEdge : parentEdges) {
-//                auto p_edge = parentEdge.lock();
-//                if (p_edge->getParent()->getType() == Convolution)
-//                    continue;
-//
-//                removeEdge(graph, p_edge);
-//            }
-//        }
-//
-//        graph.DropNode(childNode);
-//    }
-}
-
 void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
@@ -1064,7 +1021,7 @@ void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph)
         auto child = parent->getChildEdgeAt(0)->getChild();
         if (!isSutableChildNode(parent, child)) continue;
 
-        parent->fuseWith(child);
+        child->fuseInto(parent);
 
         auto parents = child->parentEdges;
         for (size_t i = 0; i < parents.size(); i++) {
@@ -1286,10 +1243,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
                 isFusingSupported(graphNode, graphNode->getChildEdgeAt(0)->getChild())) {
             auto relu_shared = graphNode->getChildEdgeAt(0)->getChild();
             lastNode = relu_shared;
-            mergedConv->fuseWith(sum);
+            sum->fuseInto(mergedConv);
         }
 
-        mergedConv->fuseWith(lastNode);
+        lastNode->fuseInto(mergedConv);
 
         if (mergedConv->fusedWith.size() > 0 &&
            (mergedConv->fusedWith[0]->getType() == Convolution || mergedConv->fusedWith[0]->getType() == BinaryConvolution)) {
@@ -1393,7 +1350,7 @@ void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) {
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
         if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
@@ -1450,7 +1407,7 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph)
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
         if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
@@ -1492,7 +1449,7 @@ void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph)
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
         if (childNode->getType() == Quantize || childNode->getType() == Eltwise) {
             auto parentEdges = childNode->parentEdges;
@@ -1551,7 +1508,7 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) {
             continue;
         }
 
-        parentNode->fuseWith(childNode);
+        childNode->fuseInto(parentNode);
 
         if (childNode->getType() == Quantize) {
             auto parentEdges = childNode->parentEdges;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
index f19dbc1743439e..1f9fa8cd97385d 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@@ -19,14 +19,12 @@ class MKLDNNGraphOptimizer {
     void ApplyImplSpecificGraphOptimizations(MKLDNNGraph& graph);
 
 private:
-    void MergeGroupConvolution(MKLDNNGraph& graph);
+    void FuseConvolutionAndBias(MKLDNNGraph &graph);
+    void FuseMultiplyAndAdd(MKLDNNGraph &graph);
     void MergeTwoEqualScaleShifts(MKLDNNGraph& graph);
-    void FuseConvolutionAndActivation(MKLDNNGraph &graph);
     void FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph);
-    void FuseConvolutionAndDepthwise(MKLDNNGraph &graph);
     void FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph);
     void FuseConvolutionAndDWConvolution(MKLDNNGraph &graph);
-    void FuseConvolutionAndQuantize(MKLDNNGraph &graph);
     void FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph);
     void FusePoolingAndQuantize(MKLDNNGraph &graph);
     void FuseBatchNormWithScale(MKLDNNGraph& graph);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index 938b99a430639d..8701ad831fb983 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -211,8 +211,6 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
     algorithm = Algorithm::Undefined;
     fusingPort = -1;
 
-    originalName = name;
-    originalInputsNumber = op->get_input_size();
     for (size_t i = 0; i < op->get_input_size(); i++) {
         inDims.emplace_back(op->get_input_shape(i));
         originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i)));
@@ -236,19 +234,28 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::en
 //        }
     }
 
+    const auto& rtInfo = op->get_rt_info();
+    if (rtInfo.count("originalLayersNames")) {
+        originalLayers = getRTInfoValue(rtInfo, "originalLayersNames");
+    }
 
-//    if (op->params.find("PrimitivesPriority") != layer->params.end()) {
-//        std::istringstream stream(layer->params["PrimitivesPriority"]);
-//        std::string str;
-//        while (getline(stream, str, ',')) {
-//            if (str.substr(0, 4) != "cpu:")
-//                continue;
-//            implPriorities.push_back(parse_impl_name(str));
-//            if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown &&
-//                    str != "cpu:unknown")
-//                IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName();
-//        }
-//    }
+    if (originalLayers.empty()) {
+        addOriginalLayer(name);
+    }
+
+    auto primitivesPriority = getPrimitivesPriorityValue(op);
+    if (!primitivesPriority.empty()) {
+        std::istringstream stream(primitivesPriority);
+        std::string str;
+        while (getline(stream, str, ',')) {
+            if (str.substr(0, 4) != "cpu:")
+                continue;
+            implPriorities.push_back(parse_impl_name(str));
+            if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown &&
+                str != "cpu:unknown")
+                IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName();
+        }
+    }
 
     if (op != nullptr) {
         std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(op);
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 64224975b5d675..a36cb2abb7e968 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -27,6 +27,7 @@
 #include "utils/ngraph_utils.hpp"
 #include <ngraph/ops.hpp>
 #include <ngraph/node.hpp>
+#include <ie_precision.hpp>
 
 namespace MKLDNNPlugin {
 
@@ -400,15 +401,35 @@ class MKLDNNNode : public InferenceEngine::details::no_copy {
 
     bool isFusedWith(Type type) const;
 
-    void fuseWith(const MKLDNNNodePtr &fusingNode) {
+    void addFusedNode(const MKLDNNNodePtr &fusingNode) {
         fusedWith.push_back(fusingNode);
+    }
 
-        for (int i = 0; i< inDims.size(); i++) {
-            if (fusingNode->getParentEdgesAtPort(i)[0]->getParent().get() == this) {
+    virtual void fuseInto(MKLDNNNodePtr& parentNode) {
+        // The graph supports fusing only of consecutive nodes and some graph logic requires to know through which input port a node was fused into parent one.
+        for (int i = 0; i < getParentEdges().size(); i++) {
+            if (getParentEdgesAtPort(i)[0]->getParent().get() == parentNode.get()) {
                 setFusingPort(i);
                 break;
             }
         }
+
+        auto parentFusedNodes = parentNode->getFusedWith();
+        if (getFusingPort() < 0 && !parentFusedNodes.empty()) {
+            for (int i = 0; i < getParentEdges().size(); i++) {
+                if (getParentEdgesAtPort(i)[0]->getParent().get() == parentFusedNodes[parentFusedNodes.size() - 1].get()) {
+                    setFusingPort(i);
+                    break;
+                }
+            }
+        }
+
+        if (getFusingPort() == -1) {
+            THROW_IE_EXCEPTION << "Cannot determine fusing port between nodes: " << parentNode->getName() << " and " << getName();
+        }
+
+        parentNode->addFusedNode(getParentEdgesAtPort(getFusingPort())[0]->getChild());
+        parentNode->addOriginalLayer(getOriginalLayers());
     }
 
     void clearFusedWith() {
@@ -419,8 +440,6 @@ class MKLDNNNode : public InferenceEngine::details::no_copy {
         mergedWith.push_back(merge);
     }
 
-    void addOriginalLayer(const std::string& layerName);
-
     const std::vector <MKLDNNNodePtr> &getMergeWith() {
         return mergedWith;
     }
@@ -441,6 +460,8 @@ class MKLDNNNode : public InferenceEngine::details::no_copy {
         return name;
     }
 
+    void addOriginalLayer(const std::string& layerName);
+
     const std::string getOriginalLayers() const {
         return originalLayers;
     }
@@ -449,10 +470,6 @@ class MKLDNNNode : public InferenceEngine::details::no_copy {
         return type;
     }
 
-//    const InferenceEngine::CNNLayerPtr &getCnnLayer() const {
-//        return cnnLayer;
-//    }
-
     const std::vector<PrimitiveDescInfo>& getSupportedPrimitiveDescriptors() const {
         return supportedPrimitiveDescriptors;
     }
@@ -602,18 +619,42 @@ class MKLDNNNode : public InferenceEngine::details::no_copy {
         return originalOutputPrecisions;
     }
 
-    size_t getOriginalInputsNumber() const {
-        return originalInputsNumber;
+    InferenceEngine::Precision getOriginalInputPrecisionAtPort(size_t port) const {
+        if (originalInputPrecisions.size() <= port) {
+            THROW_IE_EXCEPTION << "Incorrect input port number for node " << getName();
+        }
+        return originalInputPrecisions[port];
+    }
+    InferenceEngine::Precision getOriginalOutputPrecisionAtPort(size_t port) const {
+        if (originalOutputPrecisions.size() <= port) {
+            THROW_IE_EXCEPTION << "Incorrect output port number for node " << getName();
+        }
+        return originalOutputPrecisions[port];
     }
 
-    std::string getOriginalName() const {
-        return originalName;
+    void setOriginalInputPrecisionAtPort(size_t port, InferenceEngine::Precision precision) {
+        if (originalInputPrecisions.size() <= port) {
+            THROW_IE_EXCEPTION << "Incorrect input port number for node " << getName();
+        }
+        originalInputPrecisions[port] = precision;
+    }
+
+    void addOriginalInputPrecision(InferenceEngine::Precision precision) {
+        originalInputPrecisions.push_back(precision);
+    }
+
+    size_t getOriginalInputsNumber() const {
+        return originalInputPrecisions.size();
     }
 
     Algorithm getAlgorithm() const {
         return algorithm;
     }
 
+    void setAlgorithm(Algorithm alg) {
+        algorithm = alg;
+    }
+
     virtual bool canFuse(const MKLDNNNodePtr& node) const {
         return false;
     }
@@ -713,8 +754,6 @@ class MKLDNNNode : public InferenceEngine::details::no_copy {
     std::vector<MKLDNNEdgeWeakPtr> parentEdges;
     std::vector<MKLDNNEdgeWeakPtr> childEdges;
 
-    std::string originalName;
-    size_t originalInputsNumber;
     std::vector<InferenceEngine::Precision> originalInputPrecisions;
     std::vector<InferenceEngine::Precision> originalOutputPrecisions;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
index 543ab5a0e5fc1a..159570c67a808b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp
@@ -26,9 +26,9 @@ using MKLDNNPlugin::TensorDescCreatorTypes;
 
 class GatherImpl: public ExtLayerBase {
 public:
-    static bool isSupportedOperation(const ngraph::Node& op, std::string& errorMessage) noexcept {
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
         try {
-            auto gatherOp = ngraph::as_type<const ngraph::op::v1::Gather>(&op);
+            auto gatherOp = ngraph::as_type_ptr<const ngraph::op::v1::Gather>(op);
             if (!gatherOp) {
                 errorMessage = "Only opset1 Gather operation is supported";
                 return false;
@@ -51,7 +51,7 @@ class GatherImpl: public ExtLayerBase {
             errorPrefix_ = std::string("Layer Gather with name '") + op->get_friendly_name() + "' ";
 
             std::string errorMessage;
-            if (!isSupportedOperation(*op, errorMessage)) {
+            if (!isSupportedOperation(op, errorMessage)) {
                 IE_THROW(NotImplemented) << errorMessage;
             }
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
index 80bac32d359cec..001916b4c32b5e 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp
@@ -20,12 +20,32 @@ using namespace mkldnn;
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
 
+bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!ngraph::is_type<ngraph::op::v1::Convolution>(op) && !ngraph::is_type<ngraph::op::v1::GroupConvolution>(op)) {
+            errorMessage = "Only opset1 Convolution and GroupConvolution operations are supported";
+            return false;
+        }
+        size_t ndims = op->get_input_shape(0).size();
+        if ((ndims < 4) || (ndims > 5)) {
+            IE_THROW() << "Only 4D and 5D blobs are supported as input";
+        }
+    } catch (...) {
+        return false;
+    }
+
+    return true;
+}
+
 MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache)
-        : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false), isDW(false), isMerged(false),
+        : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false),
           isGrouped(false), /* dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef), */
           groupNum(1lu), eltwisePrecision(Precision::FP32) {
-    // TODO [NM]: do we still have networks that requires this optimizations? Preferable should be removed.
-    isMerged = false; // (!getMergeWith().empty());  // grouped convolution was constructed from split->concat subgraph
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+
     isPrimitivesPriorityDefined = op->get_rt_info().count("PrimitivesPriority") != 0;
 
     auto convolutionOp = ngraph::as_type_ptr<ngraph::op::v1::Convolution>(op);
@@ -43,13 +63,6 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
         groupIC = IC;
         groupOC = weightDims[0];
 
-        isDW = groupNum == groupOC && groupNum == groupIC;
-
-        if (isMerged) {
-            groupNum = getMergeWith().size() + 1;
-        }
-
-        withBiases = getOriginalInputsNumber() == 3;
         biasesDims = { groupOC };
 
         for (int i = 0; i < convolutionOp->get_strides().size(); i++) {
@@ -61,46 +74,36 @@ MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>
         paddingL = convolutionOp->get_pads_begin();
         paddingR = convolutionOp->get_pads_end();
     } else if (groupConvolutionOp) {
-            algorithm = ConvolutionGrouped;
-
-            groupNum = groupConvolutionOp->input_value(1).get_shape()[0];
-            isGrouped = true;
+        algorithm = ConvolutionGrouped;
 
-            weightDims = groupConvolutionOp->input_value(1).get_shape();
+        groupNum = groupConvolutionOp->input_value(1).get_shape()[0];
+        isGrouped = true;
 
-            IC = weightDims[2];
-            groupIC = IC;
-            groupOC = weightDims[1];
+        weightDims = groupConvolutionOp->input_value(1).get_shape();
 
-            isDW = groupNum == groupOC && groupNum == groupIC;
+        groupIC = weightDims[2];
+        IC = groupIC * groupNum;
+        groupOC = weightDims[1];
 
-            if (isMerged) {
-                groupNum = getMergeWith().size() + 1;
-            }
+        biasesDims = {groupOC * groupNum};
 
-            withBiases = getOriginalInputsNumber() == 3;
-            biasesDims = {groupOC};
-
-            for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) {
-                stride.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_strides()[i]));
-            }
-            for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) {
-                dilation.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_dilations()[i]) - 1);
-            }
-            paddingL = groupConvolutionOp->get_pads_begin();
-            paddingR = groupConvolutionOp->get_pads_end();
-    } else {
-        IE_THROW(NotImplemented)
-                << "CPU Convolution node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name();
+        for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) {
+            stride.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_strides()[i]));
+        }
+        for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) {
+            dilation.push_back(static_cast<ptrdiff_t>(groupConvolutionOp->get_dilations()[i]) - 1);
+        }
+        paddingL = groupConvolutionOp->get_pads_begin();
+        paddingR = groupConvolutionOp->get_pads_end();
     }
 }
 
 bool MKLDNNConvolutionNode::canBeExecutedInInt8() {
-    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[0]);
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
     if (!inputZeroPoints.empty())
         inputDataType = memory::data_type::u8;
 
-    auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[1]);
+    auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
     if (!weightsZeroPoints.empty())
         weightsDataType = memory::data_type::s8;
 
@@ -112,9 +115,9 @@ InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MK
 
     int fusingPort = fusingNode->getFusingPort();
     if (fusingPort == 0) {
-        eltwisePrecision = fusingNode->getOriginalInputPrecisions()[1];
+        eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(1);
     } else if (fusingPort == 1) {
-        eltwisePrecision = fusingNode->getOriginalInputPrecisions()[0];
+        eltwisePrecision = fusingNode->getOriginalInputPrecisionAtPort(0);
     } else {
         IE_THROW() << "Cannot determine Eltwise post op precision for Convolution node with name '" << getName() << "'";
     }
@@ -126,6 +129,8 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
     if (!descs.empty())
         return;
 
+    withBiases = getOriginalInputsNumber() == 3;
+
     withSum = false;
     int expectedInputEdgesNum = static_cast<int>(getOriginalInputsNumber());
     for (int i = 0; i < fusedWith.size(); i++) {
@@ -139,14 +144,14 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
         }
     }
 
-    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[0]);
+    auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(0));
     if (!inputZeroPoints.empty())
         inputDataType = memory::data_type::u8;
 
-    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisions()[0]);
+    auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisionAtPort(0));
     eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
     if (!fusedWith.empty()) {
-        outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalInputPrecisions()[0]);
+        outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalInputPrecisionAtPort(0));
         eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType);
     }
 
@@ -171,13 +176,6 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
         IE_THROW() << "Incorrect number of output edges for layer " << getName();
 
     int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims();
-    if ((ndims < 4) || (ndims > 5)) {
-        IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input.";
-    }
-
-    if (isMerged && isGrouped)
-        IE_THROW() << "Convolution initialization. Group splitted mode are used together with direct group specification.";
-
     MKLDNNDims weightsDims = MKLDNNDims(weightDims);
 
     withDWConv = isFusedWith(Convolution);
@@ -228,9 +226,9 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() {
                                                                                                   : memory::format_tag::nhwc);
         createDescriptor({in_candidate}, {out_candidate});
     } else {
-        inputDataType = (getOriginalInputPrecisions()[0] == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
+        inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
                                                                                                            : memory::data_type::f32;
-        outputDataType = (getOriginalOutputPrecisions()[0] == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
+        outputDataType = (getOriginalOutputPrecisionAtPort(0) == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16
                                                                                                              : memory::data_type::f32;
         eltwisePrecision = Precision::FP32;
         for (int i = 0; i < fusedWith.size(); i++) {
@@ -411,7 +409,7 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() {
 
                 dataConfig.constant = false;
                 dataConfig.desc = getDstMemDesc(itpd, i);
-                if (!(isGrouped || isMerged))
+                if (!isGrouped)
                     dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(dataConfig.desc);
                 config.outConfs.push_back(dataConfig);
 
@@ -475,10 +473,6 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector<InferenceEngine::
     MKLDNNMemoryDesc in_candidate(inDesc);
     MKLDNNMemoryDesc out_candidate(outDesc);
 
-    // grouping and autoblocking is not compatible
-    if (((isGrouped && !isDW) || isMerged) && (in_candidate.blocksExtended() || out_candidate.blocksExtended()))
-        return;
-
     MKLDNNDims blocked_weightDims(weightDims);
     MKLDNNDims blocked_biasesDims(biasesDims);
     MKLDNNMemoryDesc wgh_candidate{blocked_weightDims, wdt, memory::format_tag::any};
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
index 2dde482ee89fb7..5749e76301bc8f 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h
@@ -19,6 +19,7 @@ class MKLDNNConvolutionNode : public MKLDNNNode {
     MKLDNNConvolutionNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNConvolutionNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void createDescriptor(const std::vector<InferenceEngine::TensorDesc>& inputDesc,
                           const std::vector<InferenceEngine::TensorDesc>& outputDesc) override;
@@ -58,8 +59,6 @@ class MKLDNNConvolutionNode : public MKLDNNNode {
     bool withBiases;
     bool withSum;
     bool withDWConv;
-    bool isDW;
-    bool isMerged;
     bool isGrouped;
     bool isPrimitivesPriorityDefined;
     std::vector<ptrdiff_t> stride;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
index 90f2e3a21c6cea..6252a9e88d5bd7 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp
@@ -14,6 +14,8 @@
 #include "mkldnn_extension_utils.h"
 #include "mkldnn_quantize_node.h"
 #include "mkldnn_pooling_node.h"
+#include "mkldnn_input_node.h"
+#include "common/cpu_convert.h"
 
 #include "emitters/jit_emitter.hpp"
 #include "emitters/jit_eltwise_emitters.hpp"
@@ -916,28 +918,12 @@ std::map<const ngraph::DiscreteTypeInfo, std::function<void(const std::shared_pt
     {ngraph::op::v0::PRelu::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
         node.algorithm = EltwisePrelu;
     }},
-    // TODO [NM]: we need to introduce custom MulAdd operation
-//    {ngraph::op::v0::MulAdd::type_info, [](const std::shared_ptr<ngraph::Node>& op, MKLDNNEltwiseNode& node) {
-//        node.algorithm = EltwiseMish;
-//        node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_mish;
-//    }},
 };
 
 MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
         MKLDNNNode(op, eng, cache) {
     if (initializers.find(op->get_type_info()) != initializers.end()) {
         initializers[op->get_type_info()](op, *this);
-
-        std::shared_ptr<const ngraph::opset1::Constant> secondIn;
-        const auto isConstantBroadcastbleSecondInput = [&](const std::shared_ptr<ngraph::Node>& op) {
-            secondIn = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(op->get_input_node_shared_ptr(1));
-            return secondIn != nullptr && MKLDNNExtensionUtils::isPerTensorOrPerChannelBroadcastable(op->get_input_shape(0), op->get_input_shape(1));
-        };
-        if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu) && isConstantBroadcastbleSecondInput(op)) {
-            scales = secondIn->cast_vector<float>();
-        } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract) && isConstantBroadcastbleSecondInput(op)) {
-            shifts = secondIn->cast_vector<float>();
-        }
     } else {
         IE_THROW(NotImplemented)
             << "CPU Eltwise node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name();
@@ -1018,8 +1004,8 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
 
     for (auto& fusedNode : fusedWith) {
         if (fusedNode->getType() == Eltwise) {
-            for (int i = 1; i < fusedNode->getOriginalInputPrecisions().size(); i++) {
-                inputPrecisions.push_back(fusedNode->getOriginalInputPrecisions()[i]);
+            for (int i = 1; i < fusedNode->getOriginalInputsNumber(); i++) {
+                inputPrecisions.push_back(fusedNode->getOriginalInputPrecisionAtPort(i));
             }
         }
     }
@@ -1027,9 +1013,9 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() {
     if (inputPrecisions.size() != getParentEdges().size())
         IE_THROW() << "Eltwise node with name `" << getName() << "` has invalid input precisions configuration.";
 
-    InferenceEngine::Precision outputPrecision = getOriginalOutputPrecisions()[0];
+    InferenceEngine::Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
     if (!fusedWith.empty()) {
-        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0];
+        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
 
     if (!mayiuse(avx512_core)) {
@@ -1681,6 +1667,31 @@ bool MKLDNNEltwiseNode::canBeInPlace() const {
 }
 
 void MKLDNNEltwiseNode::fillScalesAndShifts() {
+    std::shared_ptr<const ngraph::opset1::Constant> secondIn;
+    const auto fillValuesFrom = [&](const MKLDNNNodePtr& constInput, std::vector<float>& buffer) {
+        if (getParentEdgeAt(1)->getParent()->getType() != Input ||
+            !getParentEdgeAt(1)->getParent()->isConstant() ||
+            !MKLDNNExtensionUtils::isPerTensorOrPerChannelBroadcastable(getParentEdgesAtPort(0)[0]->getDims().ToSizeVector(),
+                                                                        constInput->getChildEdgesAtPort(0)[0]->getDims().ToSizeVector())) {
+            IE_THROW() << "Fusing Eltwise node with name '" + getName() + "' " << "as post operation is not supported";
+        }
+
+        auto *constInputNode = dynamic_cast<MKLDNNInputNode *>(constInput.get());
+        auto constBlob = constInputNode->getConstBlob();
+        auto srtPtr = constBlob->cbuffer().as<int8_t *>();
+        buffer.resize(constBlob->size());
+        cpu_convert(srtPtr, &buffer[0], constBlob->getTensorDesc().getPrecision(), Precision::FP32, constBlob->size());
+    };
+
+    if (one_of(getAlgorithm(), EltwiseMultiply, EltwiseDivide, EltwisePrelu)) {
+        fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
+    } else if (one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract)) {
+        fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), shifts);
+    } else if (one_of(getAlgorithm(), EltwiseMulAdd)) {
+        fillValuesFrom(getParentEdgesAtPort(1)[0]->getParent(), scales);
+        fillValuesFrom(getParentEdgesAtPort(2)[0]->getParent(), shifts);
+    }
+
     const size_t bufferSize = static_cast<size_t>(outDims[0][outDims[0].size() > 1 ? 1 : 0]);
     const size_t bufferSizeAligned = rnd_up(bufferSize, 16);
 
@@ -1723,6 +1734,16 @@ void MKLDNNEltwiseNode::fillScalesAndShifts() {
     }
 }
 
+void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) {
+    // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API.
+    bool isSpecialConvolutionAddFusing = parentNode->getType() == Convolution && getAlgorithm() == EltwiseAdd &&
+            getParentEdgesAtPort(0)[0]->getDims().ToSizeVector() == getParentEdgesAtPort(1)[0]->getDims().ToSizeVector();
+    if (!isSpecialConvolutionAddFusing && one_of(getAlgorithm(), EltwiseAdd, EltwiseSubtract, EltwiseMultiply, EltwiseDivide, EltwiseMulAdd, EltwisePrelu)) {
+        fillScalesAndShifts();
+    }
+    MKLDNNNode::fuseInto(parentNode);
+}
+
 void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) {
     const std::string errorPrefix = "Appending Eltwise node with name '" + getName() + "' ";
     if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) {
@@ -1747,31 +1768,27 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) {
             case mkldnn::algorithm::eltwise_round_half_to_even:
             case mkldnn::algorithm::eltwise_round_half_away_from_zero:
                 ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta());
-                return;
-            case mkldnn::algorithm::depthwise_scale_shift:
-                IE_THROW() << "[NM] Not implemented";
-                return;
+                break;
             default: IE_THROW() << errorPrefix << "as post operation is not supported";
         }
     } else {
         switch (getAlgorithm()) {
             case EltwiseAdd:
             case EltwiseSubtract:
-                if (shifts.empty()) IE_THROW() << errorPrefix << "has empty shifts";
-                break;
             case EltwiseMultiply:
             case EltwiseDivide:
+            case EltwiseMulAdd:
+                if (scales.empty() || shifts.empty())
+                    IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
+                ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]);
+                break;
             case EltwisePrelu:
-                if (scales.empty()) IE_THROW() << errorPrefix << "has empty scales";
+                if (scales.empty())
+                    IE_THROW() << errorPrefix << "cannot be performed since buffers are not allocated";
+                ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr);
                 break;
             default: IE_THROW() << errorPrefix << "as post operation is not supported";
         }
-        fillScalesAndShifts();
-        if (getAlgorithm() == EltwisePrelu) {
-            ops.append_depthwise(mkldnn::algorithm::depthwise_prelu, &scales[0], nullptr);
-        } else {
-            ops.append_depthwise(mkldnn::algorithm::depthwise_scale_shift, &scales[0], &shifts[0]);
-        }
     }
 }
 
@@ -1814,8 +1831,8 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const {
 
             // Limitation: inputs precision definition inside Eltwise node assumes fusing is applied for 0-th port,
             // otherwise we need identical precision on all inputs of fused node
-            for (int i = 1; i < getOriginalInputPrecisions().size(); i++) {
-                if (getOriginalInputPrecisions()[0] != getOriginalInputPrecisions()[i]) {
+            for (int i = 1; i < getOriginalInputsNumber(); i++) {
+                if (getOriginalInputPrecisionAtPort(0) != getOriginalInputPrecisionAtPort(i)) {
                     return false;
                 }
             }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
index a69d5eb31f40c2..4e81586087015b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h
@@ -84,6 +84,7 @@ class MKLDNNEltwiseNode : public MKLDNNNode {
     void appendPostOps(mkldnn::post_ops& ops) override;
 
     InferenceEngine::Precision getRuntimePrecision() const override;
+    void fuseInto(MKLDNNNodePtr& parentNode) override;
 
 private:
     mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
index fcb84e0c040271..12abf1474bb6ed 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp
@@ -120,8 +120,8 @@ void MKLDNNGemmNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto inPrec0 = getOriginalInputPrecisions()[0];
-    auto inPrec1 = getOriginalInputPrecisions()[1];
+    auto inPrec0 = getOriginalInputPrecisionAtPort(0);
+    auto inPrec1 = getOriginalInputPrecisionAtPort(1);
     if ((inPrec0 != Precision::U8 && inPrec0 != Precision::I8) || inPrec1 != Precision::I8 || isThreeInputs) {
         if (inPrec0 == Precision::BF16 || inPrec1 == Precision::BF16) {
             inPrec0 = Precision::BF16;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
index b5c74959aae7f7..889756a9b2d2e8 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp
@@ -71,7 +71,7 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() {
     LayerConfig config;
     config.dynBatchSupport = true;
     if (getType() == Input || getType() == MemoryInput) {
-        precision = getOriginalOutputPrecisions()[0];
+        precision = getOriginalOutputPrecisionAtPort(0);
         if (precision == Precision::U16 || isMeanImage) {
             precision = Precision::FP32;
         }
@@ -84,7 +84,7 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() {
         dataConfig.desc = mem_tdesc;
         config.outConfs.push_back(dataConfig);
     } else if (getType() == Output) {
-        precision = getOriginalInputPrecisions()[0];
+        precision = getOriginalInputPrecisionAtPort(0);
         if (precision == Precision::U16) precision = Precision::FP32;
         DataConfig dataConfig;
         dataConfig.inPlace = -1;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
index b1dc432f31b662..6761f9e0ed6cad 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h
@@ -25,6 +25,10 @@ class MKLDNNInputNode : public MKLDNNNode {
         isMeanImage = true;
     }
 
+    const InferenceEngine::Blob::CPtr getConstBlob() const {
+        return constBlob;
+    }
+
 private:
     InferenceEngine::Precision precision;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
index 33ade1e6120870..de37cb8637f7c5 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
@@ -701,21 +701,21 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
 
     setPostOps(attr, true);
 
-    Precision inputPrecision = getOriginalInputPrecisions()[0];
+    Precision inputPrecision = getOriginalInputPrecisionAtPort(0);
     if (getParentEdgeAt(0)->getDims().ndims() < 3 || getParentEdgeAt(0)->getDims().ndims() > 5
             || acrossChannels_ || !normalizeVariance_) {
         if (!isFloatCompatible(inputPrecision)) {
             inputPrecision = Precision::FP32;
         }
     }
-    Precision outputPrecision = getOriginalOutputPrecisions()[0];
+    Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
     if (!mayiuse(avx512_core)) {
         if (outputPrecision == Precision::BF16)
             outputPrecision = Precision::FP32;
     }
 
     if (!fusedWith.empty()) {
-        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0];
+        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
 
     // ref with float planar and no fusion
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
index c004b28d218519..738b86ff99a7f7 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp
@@ -720,11 +720,11 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() {
 
     setPostOps(attr, true);
 
-    Precision inputPrecision = getOriginalInputPrecisions()[DATA];
-    Precision outputPrecision = getOriginalOutputPrecisions()[DATA];
+    Precision inputPrecision = getOriginalInputPrecisionAtPort(DATA);
+    Precision outputPrecision = getOriginalOutputPrecisionAtPort(DATA);
 
     if (!fusedWith.empty()) {
-        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0];
+        outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0);
     }
 
     if (inputPrecision == Precision::BF16 || outputPrecision == Precision::BF16) {
@@ -781,9 +781,9 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() {
 }
 
 bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const {
-    auto isConvertedToScaleShift = [](MKLDNNNodePtr node) {
+    auto isConvertableToScaleShift = [](MKLDNNNodePtr node) {
         return one_of(node->getAlgorithm(), EltwiseAdd, EltwiseMultiply, EltwiseSubtract, EltwiseDivide, EltwisePrelu) &&
-               node->getParentEdgeAt(1)->getParent()->isConstant() &&
+               node->getParentEdgeAt(1)->getParent()->getType() == Input && node->getParentEdgeAt(1)->getParent()->isConstant() &&
                MKLDNNExtensionUtils::isPerTensorOrPerChannelBroadcastable(node->getParentEdgeAt(0)->getDims().ToSizeVector(),
                                                                           node->getParentEdgeAt(1)->getDims().ToSizeVector());
     };
@@ -796,10 +796,8 @@ bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const {
     } else if (node->getType() == Eltwise) {
         return one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseBoundedRelu, EltwiseClamp, EltwiseTanh,
                                             EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
-                                            EltwiseRoundHalfAwayFromZero, EltwiseLinear, EltwiseAbs, EltwiseSquare, EltwiseSqrt) ||
-                isConvertedToScaleShift(node);
-                // TODO [NM]: implemented after enabling MulAdd operation
-                // ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2)
+                                            EltwiseRoundHalfAwayFromZero, EltwiseLinear, EltwiseAbs, EltwiseSquare, EltwiseSqrt, EltwiseMulAdd) ||
+               isConvertableToScaleShift(node);
     }
 
     return false;
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
index ba3007e0d0ede8..2aeeb297a11a6c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp
@@ -110,7 +110,7 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() {
     std::vector<InferenceEngine::Precision> supportedPrecisions = {InferenceEngine::Precision::FP32, InferenceEngine::Precision::I32,
                                                                    InferenceEngine::Precision::BF16, InferenceEngine::Precision::I8,
                                                                    InferenceEngine::Precision::U8};
-    InferenceEngine::Precision precision = getOriginalInputPrecisions()[DATA_ID];
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(DATA_ID);
     if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), precision) == supportedPrecisions.end())
         precision = precision.is_float() ? InferenceEngine::Precision::FP32 : InferenceEngine::Precision::I32;
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
index 794815544ed236..c345fa92451a09 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp
@@ -85,8 +85,8 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
     if (getChildEdges().empty())
         IE_THROW() << "Incorrect number of output edges for layer " << getName();
 
-    inputPrecision = getOriginalInputPrecisions()[0];
-    outputPrecision = getOriginalOutputPrecisions()[0];
+    inputPrecision = getOriginalInputPrecisionAtPort(0);
+    outputPrecision = getOriginalOutputPrecisionAtPort(0);
 
     // MKLDNN supports only equal precisions for input and output
     if (one_of(inputPrecision, Precision::FP32, Precision::BF16)) {
@@ -94,7 +94,7 @@ void MKLDNNPoolingNode::getSupportedDescriptors() {
     }
 
     if (!fusedWith.empty()) {
-        outputPrecision = fusedWith.back()->getOriginalOutputPrecisions()[0];
+        outputPrecision = fusedWith.back()->getOriginalOutputPrecisionAtPort(0);
     }
 
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
index d0c4e5f0ed53f4..6c935ebadb008d 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp
@@ -25,9 +25,9 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    InferenceEngine::Precision precision = getOriginalInputPrecisions()[0];
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
-    precision = getOriginalOutputPrecisions()[0];
+    precision = getOriginalOutputPrecisionAtPort(0);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
 
     // Current reshape implementation is simple memory reinterpret,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
index 408ecb54fb5958..5b9692fc562903 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp
@@ -147,7 +147,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
         }
     }
 
-    indicesPrec = getOriginalInputPrecisions()[INDICES_ID];
+    indicesPrec = getOriginalInputPrecisionAtPort(INDICES_ID);
     auto indicesType = MKLDNNExtensionUtils::IEPrecisionToDataType(indicesPrec);
     indicesSize = MKLDNNExtensionUtils::sizeOfDataType(indicesType);
     if (indicesSize >= 8) {
@@ -160,7 +160,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
     indicesType = MKLDNNExtensionUtils::IEPrecisionToDataType(indicesPrec);
 
     if (axisRelaxed) {
-        axisPrec = getOriginalInputPrecisions()[AXIS_ID];
+        axisPrec = getOriginalInputPrecisionAtPort(AXIS_ID);
         auto axisType = MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec);
         axisSize = MKLDNNExtensionUtils::sizeOfDataType(axisType);
         if (axisSize >= 8) {
@@ -172,7 +172,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() {
         }
     }
 
-    dataPrec = getOriginalInputPrecisions()[DATA_ID];
+    dataPrec = getOriginalInputPrecisionAtPort(DATA_ID);
     auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(dataPrec);
     dataSize = MKLDNNExtensionUtils::sizeOfDataType(dataType);
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
index 42cd241f1a9e92..6d4c9a27dc4d8b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp
@@ -27,7 +27,7 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() {
     if (descs.size())
         return;
 
-    InferenceEngine::Precision precision = getOriginalInputPrecisions()[0];
+    InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0);
     if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
         precision = InferenceEngine::Precision::FP32;
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
index 4cb8bc06a90808..5819617bfea26a 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp
@@ -181,10 +181,10 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 //
-    prec = getOriginalInputPrecisions()[0];
+    prec = getOriginalInputPrecisionAtPort(0);
     auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
     auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec);
-    auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[1]);
+    auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(1));
 
     InferenceEngine::LayerConfig config;
     config.dynBatchSupport = true;
diff --git a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp
index 05ce831fb11616..62420cfca6c33a 100644
--- a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp
+++ b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp
@@ -6,6 +6,7 @@
 
 #include <cassert>
 #include <ngraph/variant.hpp>
+#include "transformations/rt_info/primitives_priority_attribute.hpp"
 
 namespace MKLDNNPlugin {
 
@@ -19,6 +20,17 @@ inline std::string getRTInfoValue(const std::map<std::string, std::shared_ptr<ng
     }
 };
 
+inline std::string getPrimitivesPriorityValue(const std::shared_ptr<ngraph::Node> &node) {
+    const auto &rtInfo = node->get_rt_info();
+    using PrimitivesPriorityWraper = ngraph::VariantWrapper<ngraph::PrimitivesPriority>;
+
+    if (!rtInfo.count(PrimitivesPriorityWraper::type_info.name)) return "";
+
+    const auto &attr = rtInfo.at(PrimitivesPriorityWraper::type_info.name);
+    ngraph::PrimitivesPriority pp = ngraph::as_type_ptr<PrimitivesPriorityWraper>(attr)->get();
+    return pp.getPrimitivesPriority();
+}
+
 template <typename T>
 inline const std::shared_ptr<T> getNgraphOpAs(const std::shared_ptr<ngraph::Node>& op) {
     auto typedOp = ngraph::as_type_ptr<T>(op);
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index 837d758269e0ee..d2ee3bae38eb30 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -59,11 +59,12 @@ std::vector<std::string> disabledTestPatterns() {
         R"(.*BinaryConvolutionLayerTest.*)"
     };
 
-    if (!InferenceEngine::with_cpu_x86_avx512_core()) {
+// TODO [NM]: Disabled until BF16 transformer is not migrated on CPU graph representation.
+//    if (!InferenceEngine::with_cpu_x86_avx512_core()) {
         // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives,
         // tests are useless on such platforms
        retVector.emplace_back(R"(.*BF16.*)");
-    }
+//    }
 
     return retVector;
 }
diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
index 2ce34a6555c21a..74b265d1935dcf 100755
--- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/normalize.cpp
@@ -85,7 +85,9 @@ std::vector<fusingSpecificParams> fusingParamsSet {
         fusingDividePerChannel,
         fusingPReluPerChannel,
         fusingPReluPerTensor,
-        fusingRelu
+        fusingRelu,
+        fusingGelu,
+        fusingReluScaleShift
 };
 
 const float epsilon = 1e-4f;
diff --git a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
index 9ce8004e58a55b..2a56c02bce0a55 100644
--- a/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
+++ b/inference-engine/tests/functional/plugin/cpu/test_utils/fusing_test_utils.hpp
@@ -163,7 +163,7 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNode
                      IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                  ngraph::Shape newShape(shape.size(), 1);
                  newShape[1] = shape[1];
-                 auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                 auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                  return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
             }, "Multiply(PerChannel)"},
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params){
@@ -172,7 +172,7 @@ const auto fusingReluScaleShift = fusingSpecificParams{std::make_shared<postNode
                  IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                 ngraph::Shape newShape(shape.size(), 1);
                 newShape[1] = shape[1];
-                auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                 return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
             }, "Add(PerChannel)"}}), {"Relu", "Add"}};
 
@@ -183,7 +183,7 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
                      IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                  ngraph::Shape newShape(shape.size(), 1);
                  newShape[1] = shape[1];
-                 auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                 auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                  return std::make_shared<ngraph::opset1::Multiply>(inpNode, constNode);
             }, "Multiply(PerChannel)"},
             {[](std::shared_ptr<ngraph::Node> inpNode, const ngraph::element::Type& ngPrc, ngraph::ParameterVector& params) {
@@ -192,7 +192,7 @@ const auto fusingScaleShift = fusingSpecificParams{ std::make_shared<postNodesMg
                  IE_THROW() << "If shape.size() == 1 then Granularity can be PerTensor only";
                 ngraph::Shape newShape(shape.size(), 1);
                 newShape[1] = shape[1];
-                auto constNode = ngraph::builder::makeConstant<float>(ngraph::element::f32, newShape, {}, true);
+                auto constNode = ngraph::builder::makeConstant(ngPrc, newShape, std::vector<float>{}, true);
                 return std::make_shared<ngraph::opset1::Add>(inpNode, constNode);
             }, "Add(PerChannel)"}}), {"Add"} };