From 01af7f2591c48e658069d482b26339e47cc37c07 Mon Sep 17 00:00:00 2001
From: Nikolay Shchegolev <nikolay.shchegolev@intel.com>
Date: Thu, 29 Apr 2021 10:38:35 +0300
Subject: [PATCH] [CPU] {GRM, LSTM, RNN} Cells and Sequences migration on
 nGraph. (#55)

---
 .../src/mkldnn_plugin/CMakeLists.txt          |   4 +-
 .../src/mkldnn_plugin/mkldnn_graph.cpp        | 181 ++++++------
 .../src/mkldnn_plugin/mkldnn_graph.h          |   4 +-
 .../src/mkldnn_plugin/mkldnn_node.cpp         |  57 ++--
 .../src/mkldnn_plugin/mkldnn_node.h           |   1 -
 .../convert_to_cpu_specific_opset.hpp         |   4 +
 .../rnn_sequences_optimization.cpp            | 144 +++++++++
 .../rnn_sequences_optimization.hpp            |  29 ++
 .../src/mkldnn_plugin/nodes/mkldnn_rnn.cpp    |  68 +++--
 .../src/mkldnn_plugin/nodes/mkldnn_rnn.h      |   4 +-
 .../nodes/mkldnn_tensoriterator_node.cpp      | 274 ++++++++++++++----
 .../nodes/mkldnn_tensoriterator_node.h        |  28 +-
 .../skip_tests_config.cpp                     |  14 +-
 13 files changed, 579 insertions(+), 233 deletions(-)
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
 create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
index ec2d9ff2f9cdfc..9945746d1a3620 100644
--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt
+++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -34,12 +34,12 @@ set(LAYERS
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_fake_quantize_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reorder_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reshape_node.cpp
-#    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_rnn.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_rnn.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_align_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_pooling_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_softmax_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_split_node.cpp
-#    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tensoriterator_node.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tensoriterator_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tile_node.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_mvn_node.cpp
    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
index 94cb2ad7634151..3a4ce660779e69 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -91,107 +91,92 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg
     status = Ready;
 }
 
-//template void MKLDNNGraph::CreateGraph(const TensorIterator::Body&,
-//        const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
+template void MKLDNNGraph::CreateGraph(const std::shared_ptr<const ngraph::Function>&,
+        const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
 template void MKLDNNGraph::CreateGraph(const CNNNetwork&,
         const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
-//template void MKLDNNGraph::CreateGraph(CNNNetwork&,
-//        const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&);
 
-//void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) {
-//    this->_name = "subgraph";
-//    this->reuse_io_tensors = false;
-//
-//    // Map data object onto producer layer(node)
-//    std::unordered_map<Data*, std::pair<MKLDNNNodePtr, int>> data2node;
-//
-//    // nodes which has no consumers (output or just unused). But doesn't marked as graph output.
-//    // Will be stored as fake output separately.
-//    std::unordered_set<DataPtr> unused_data;
-//
-//    // Step 1. Replicate input nodes
-//    for (const auto &input : subgraph.inputs) {
-//        if (input->getPrecision() == Precision::UNSPECIFIED) continue;  // const node holder
-//
-//        auto creator = getCreatorLayer(input).lock();
-//        if (creator == nullptr) {
-//            creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()}));
-//            creator->outData.push_back(input);
-//        }
-//
-//        const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache));
-//        data2node[input.get()] = {node, 0};
-//
-//        graphNodes.push_back(node);
-//        inputNodesMap[input->getName()] = node;
-//
-//        if (getInputTo(input).empty()) {
-//            unused_data.insert(input);
-//        }
-//    }
-//
-//    // Step 2. Replicate all internal nodes.
-//    for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) {
-//        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
-//        graphNodes.push_back(node);
-//
-//        for (int port = 0; port < layer->insData.size(); port++) {
-//            auto data = layer->insData[port].lock();
-//
-//            auto port_info = data2node[data.get()];
-//            auto parent_node = port_info.first;
-//            auto parent_port_idx = port_info.second;
-//
-//            MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port));
-//            node->addEdge(edge);
-//            graphEdges.push_back(edge);
-//        }
-//        int out_port_idx = 0;
-//        for (auto &out_data : layer->outData) {
-//            data2node[out_data.get()] = {node, out_port_idx++};
-//            if (getInputTo(out_data).empty()) {
-//                unused_data.insert(out_data);
-//            }
-//        }
-//    }
-//
-//    // Step 3. Add output nodes and output stubs for unused data objects.
-//    for (const auto &output : subgraph.outputs) {
-//        auto port_info = data2node[output.get()];
-//        auto parent_node = port_info.first;
-//        auto parent_port_idx = port_info.second;
-//
-//        CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()}));
-//        layer->insData.push_back(output);
-//
-//        const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)};
-//
-//        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
-//        node->addEdge(edge);
-//        graphEdges.push_back(edge);
-//        graphNodes.push_back(node);
-//        outputNodesMap.push_back(node);
-//
-//        unused_data.erase(output);
-//    }
-//
-//    // Add stub output node for unused data
-//    for (auto to_stub_data : unused_data) {
-//        auto port_info = data2node[to_stub_data.get()];
-//        auto parent_node = port_info.first;
-//        auto parent_port_idx = port_info.second;
-//
-//        CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()}));
-//        layer->insData.push_back(to_stub_data);
-//
-//        const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache));
-//
-//        MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0));
-//        node->addEdge(edge);
-//        graphEdges.push_back(edge);
-//        graphNodes.push_back(node);
-//    }
-//}
+void MKLDNNGraph::Replicate(const std::shared_ptr<const ngraph::Function> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) {
+    this->_name = "subgraph";
+    this->reuse_io_tensors = false;
+
+    // Map data object onto producer node
+    std::map<std::shared_ptr<ngraph::Node>, std::pair<MKLDNNNodePtr, int>> op2node;
+
+    // nodes which has no consumers (output or just unused). But doesn't marked as graph output.
+    // Will be stored as fake output separately.
+    std::deque<ngraph::Output<ngraph::Node>> unusedOutputs;
+
+    auto getParentOutputPort = [](const std::shared_ptr<ngraph::Node> childOp, const std::shared_ptr<ngraph::Node> parentOp,
+                                  const size_t childInputPort) -> int {
+        for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) {
+            if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) {
+                return static_cast<int>(parentPort);
+            }
+        }
+
+        return -1;
+    };
+
+    for (const auto op : subgraph->get_ordered_ops()) {
+        const MKLDNNNodePtr node {MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)};
+        graphNodes.push_back(node);
+
+        if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) {
+            inputNodesMap[node->getName()] = node;
+        }
+
+        if (op->get_type_info() == ngraph::op::v0::Result::type_info) {
+            auto prev = op->get_input_node_shared_ptr(0);
+            std::string inputID;
+            inputID = prev->get_friendly_name();
+            if (prev->get_output_size() > 1) {
+                inputID += "." + std::to_string(op->get_input_source_output(0).get_index());
+            }
+
+            outputNodesMap[inputID] = node;
+        }
+
+        for (size_t port = 0; port < op->get_input_size(); port++) {
+            auto parentOp = op->get_input_node_shared_ptr(port);
+
+            auto portInfo = op2node[parentOp];
+            auto parentNode = portInfo.first;
+
+            MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), port));
+            node->addEdge(edge);
+            graphEdges.push_back(edge);
+        }
+
+        if (!MKLDNNPlugin::one_of(op->get_type_info(),
+                ngraph::op::v0::Result::type_info,
+                ngraph::op::v3::Assign::type_info,
+                ngraph::op::v6::Assign::type_info)) {
+            int outPortIdx = 0;
+            for (int oi = 0; oi < op->get_output_size(); oi++) {
+                op2node[op->output(oi).get_node_shared_ptr()] = {node, outPortIdx++};
+                if (op->get_output_target_inputs(oi).empty()) {
+                    unusedOutputs.push_back(op->output(oi));
+                }
+            }
+        }
+    }
+
+    // Add stub output node for unused data
+    for (auto unusedOutput : unusedOutputs) {
+        auto portInfo = op2node[unusedOutput.get_node_shared_ptr()];
+        auto parentNode = portInfo.first;
+        auto port = portInfo.second;
+        const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+                                                                        parentNode->getOriginalOutputPrecisionAtPort(port),
+                                                                        nodeName, "Result", getEngine(), weightsCache);
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
+        outNode->addEdge(edge);
+        graphEdges.push_back(edge);
+        graphNodes.push_back(outNode);
+    }
+}
 
 void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) {
     OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork");
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
index 2383221ff6325e..6a9085cffcfe59 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h
@@ -4,14 +4,12 @@
 
 #pragma once
 
-#include "ie_parallel.hpp"
 #include "cpp/ie_cnn_network.h"
 #include "config.h"
 #include "mkldnn_memory.h"
 #include "mean_image.h"
 #include "mkldnn_node.h"
 #include "mkldnn_edge.h"
-#include "threading/ie_thread_local.hpp"
 #include <map>
 #include <string>
 #include <vector>
@@ -190,7 +188,7 @@ class MKLDNNGraph {
     static mkldnn::engine eng;
 
     void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr);
-//    void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
+    void Replicate(const std::shared_ptr<const ngraph::Function> &subgraph, const MKLDNNExtensionManager::Ptr& extMgr);
     void InitGraph();
     void InitNodes();
     void InitDescriptors();
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
index 5895a60ad0c6d6..0ca8fb195cd334 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp
@@ -161,17 +161,17 @@ static const InferenceEngine::details::caseless_unordered_map<std::string, Type>
         { "Pad", Pad },
         { "Transpose", Transpose },
 //        { "Copy", Copy },
-//        { "LSTMCell", RNNCell },
-//        { "GRUCell", RNNCell },
-//        { "RNNCell", RNNCell },
-//        { "LSTMSequence", RNNSeq },
-//        { "GRUSequence", RNNSeq },
-//        { "RNNSequence", RNNSeq },
+        { "LSTMCell", RNNCell },
+        { "GRUCell", RNNCell },
+        { "RNNCell", RNNCell },
+        { "LSTMSequence", RNNSeq },
+        { "GRUSequence", RNNSeq },
+        { "RNNSequence", RNNSeq },
         { "FakeQuantize", FakeQuantize },
         { "BinaryConvolution", BinaryConvolution },
         { "DeformableConvolution", DeformableConvolution },
-//        { "TensorIterator", TensorIterator },
-//        { "Loop", TensorIterator },
+        { "TensorIterator", TensorIterator },
+        { "Loop", TensorIterator },
         { "ReadValue", MemoryInput},  // for construction from name ctor, arbitrary name is used
         { "Assign", MemoryOutput },  // for construction from layer ctor
         { "Convert", Convert },
@@ -1293,27 +1293,26 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr<ngraph::Node>
     }
 
     // TODO [NM]: enable after all nodes will be migrated on ngraph
-    // if (newNode == nullptr) {
-    //     try {
-    //         std::unique_ptr<MKLDNNNode> ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage));
-    //         if (ol != nullptr && ol->created(extMgr))
-    //             newNode = ol.release();
-    //     } catch (const InferenceEngine::Exception& ex) {
-    //         if (ex.getStatus() != NOT_IMPLEMENTED) {
-    //             throw;
-    //         } else {
-    //             errorMessage += getExceptionDescWithoutStatus(ex);
-    //         }
-    //     }
-    // }
-
-// TODO [NM]: Not implemented
-//    //  WA-start : TI node requires all attributes to construct internal subgpath
-//    //             including extManager, socket and mkldnn::eng.
-//    MKLDNNTensorIteratorNode *ti = dynamic_cast<MKLDNNTensorIteratorNode*>(newNode);
-//    if (ti != nullptr)
-//        ti->setExtManager(extMgr);
-//    //  WA-end
+//     if (newNode == nullptr) {
+//         try {
+//             std::unique_ptr<MKLDNNNode> ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage));
+//             if (ol != nullptr && ol->created(extMgr))
+//                 newNode = ol.release();
+//         } catch (const InferenceEngine::Exception& ex) {
+//             if (ex.getStatus() != NOT_IMPLEMENTED) {
+//                 throw;
+//             } else {
+//                 errorMessage += getExceptionDescWithoutStatus(ex);
+//             }
+//         }
+//     }
+
+    //  WA-start : TI node requires all attributes to construct internal subgpath
+    //             including extManager, socket and mkldnn::eng.
+    MKLDNNTensorIteratorNode *ti = dynamic_cast<MKLDNNTensorIteratorNode*>(newNode);
+    if (ti != nullptr)
+        ti->setExtManager(extMgr);
+    //  WA-end
 
     if (!newNode) {
         std::string errorDetails;
diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
index 5a47a3b1a3b457..632d3253ee6be7 100644
--- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h
+++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h
@@ -11,7 +11,6 @@
 #include <cassert>
 #include <algorithm>
 #include <caseless.hpp>
-#include <ie_common.h>
 #include "mkldnn_dims.h"
 #include "mkldnn_memory.h"
 #include "mkldnn_edge.h"
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
index f9bef52913bed3..8496558e61481f 100644
--- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp
@@ -14,6 +14,7 @@
 #include "convert_to_leaky_relu.hpp"
 #include "convert_to_swish_cpu.hpp"
 #include "reshape_prelu.hpp"
+#include "rnn_sequences_optimization.hpp"
 
 namespace MKLDNNPlugin {
 
@@ -34,6 +35,9 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ngraph::Function> &nGraphF
     manager.register_pass<ConvertToLeakyRelu>();
     manager.register_pass<ReshapePRelu>();
     manager.register_pass<ConvertToSwishCPU>();
+    manager.register_pass<OptimizeGRUSequenceTransposes>();
+    manager.register_pass<OptimizeLSTMSequenceTransposes>();
+    manager.register_pass<OptimizeRNNSequenceTransposes>();
     if (!ngraph::op::util::has_op_with_type<ngraph::op::FakeQuantize>(nGraphFunc)) {
         manager.register_pass<ReshapeFullyConnectedFusion>();
     }
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
new file mode 100644
index 00000000000000..74d52c5aaf1129
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp
@@ -0,0 +1,144 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "rnn_sequences_optimization.hpp"
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <ngraph/pattern/op/or.hpp>
+#include <transformations/utils/utils.hpp>
+
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeGRUSequenceTransposes, "OptimizeGRUSequenceTransposes", 0);
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeLSTMSequenceTransposes, "OptimizeLSTMSequenceTransposes", 0);
+NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeRNNSequenceTransposes, "OptimizeRNNSequenceTransposes", 0);
+
+namespace {
+    int64_t getSeqAxis(const std::shared_ptr<ngraph::Node>& sequenceOp) {
+        // Optimization.
+        // Plug-ins support seqAxis attribute (value 1 or 0) for Seq ops, but according to the spec we don't
+        // support this attribute and should insert Transpose layer before and after Seq op in TI to Sequences
+        // transformation. Additional Transpose layers affect the performance, so we try to detect pattern
+        // Transpose(axis_order={1,0,2}) -> Seq -> Transpose(axis_order={2,1,0,3}
+        // and replace unnecessary Transpose ops with SeqIE (seqAxis = 0) to transfer value
+        // of the attribute to plug-ins.
+        // todo: specify seqAxis attribute for Sequence ops.
+        int64_t seqAxis = 1; // default
+        const auto& target_inputs = sequenceOp->output(0).get_target_inputs();
+        if (target_inputs.size() == 1) {
+            const auto& transpose_before = std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(sequenceOp->input_value(0).get_node_shared_ptr());
+            const auto& transpose_after = std::dynamic_pointer_cast<ngraph::op::v1::Transpose>(target_inputs.begin()->get_node()->shared_from_this());
+            if (transpose_after != nullptr && transpose_before != nullptr) {
+                auto order_before = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(
+                        transpose_before->input_value(1).get_node_shared_ptr());
+                auto order_after = std::dynamic_pointer_cast<ngraph::op::v0::Constant>(
+                        transpose_after->input_value(1).get_node_shared_ptr());
+                if (order_before != nullptr && order_after != nullptr) {
+                    auto order_before_values = order_before->cast_vector<int64_t>();
+                    auto order_after_values = order_after->cast_vector<int64_t>();
+                    std::vector<int64_t> order_ref_before = {1, 0, 2};
+                    std::vector<int64_t> order_ref_after = {2, 1, 0, 3};
+                    if (order_before_values == order_ref_before && order_after_values == order_ref_after) {
+                        seqAxis = 0;
+                    }
+                }
+            }
+        }
+        return seqAxis;
+    }
+
+    bool transform(const std::shared_ptr<ngraph::Node>& sequenceOp) {
+        // Detect pattern: Transpose_before -> Seq -> Transpose_after
+        auto seqAxis = getSeqAxis(sequenceOp);
+        if (seqAxis == 0) {
+            ngraph::Output<ngraph::Node> in_0 = sequenceOp->get_input_source_output(0).get_node_shared_ptr()->get_input_source_output(0);
+
+            auto newInShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, sequenceOp->get_input_shape(0));
+            auto reshape1 = std::make_shared<ngraph::op::v1::Reshape>(in_0, newInShape, false);
+            ngraph::replace_node(sequenceOp->get_input_node_shared_ptr(0), {reshape1->output(0)});
+
+            const auto &gruTargetInputs = sequenceOp->output(0).get_target_inputs();
+            if (gruTargetInputs.empty())
+                return false;
+            auto transposeAfter = gruTargetInputs.begin()->get_node()->shared_from_this();
+
+            auto newOutShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, transposeAfter->get_output_shape(0));
+            auto reshape2 = std::make_shared<ngraph::op::v1::Reshape>(sequenceOp->output(0), newOutShape, false);
+            ngraph::replace_node(transposeAfter, {reshape2->output(0)});
+        } else {
+            auto originShape = sequenceOp->get_output_shape(0);
+            auto seqOut = sequenceOp->get_output_target_inputs(0).begin()->get_node()->shared_from_this();
+
+            auto tncShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {originShape[2], originShape[0], originShape[3]});
+            auto reshape1 = std::make_shared<ngraph::op::v1::Reshape>(sequenceOp->output(0), tncShape, false);
+
+            auto order = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {1, 0, 2});
+            auto transpose = std::make_shared<ngraph::op::v1::Transpose>(reshape1->output(0), order);
+
+            auto ndtcShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, originShape);
+            auto reshape2 = std::make_shared<ngraph::op::v1::Reshape>(transpose->output(0), ndtcShape, false);
+            reshape2->set_friendly_name(sequenceOp->get_friendly_name()+".0");
+
+            ngraph::insert_new_node_between(sequenceOp, seqOut, reshape2);
+        }
+        return true;
+    }
+} // namespace
+
+MKLDNNPlugin::OptimizeGRUSequenceTransposes::OptimizeGRUSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto gruSequence = std::dynamic_pointer_cast<ngraph::op::v5::GRUSequence>(m.get_match_root());
+        if (!gruSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (gruSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(gruSequence);
+    };
+
+    auto gruSequenceNgraph = ngraph::pattern::wrap_type<ngraph::op::v5::GRUSequence>();
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(gruSequenceNgraph, "OptimizeGRUSequenceTransposes");
+    this->register_matcher(m, callback);
+}
+
+MKLDNNPlugin::OptimizeRNNSequenceTransposes::OptimizeRNNSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto rnnSequence = std::dynamic_pointer_cast<ngraph::op::v5::RNNSequence>(m.get_match_root());
+        if (!rnnSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (rnnSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(rnnSequence);
+    };
+
+    auto rnnSequenceNgraph = ngraph::pattern::wrap_type<ngraph::op::v5::RNNSequence>();
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(rnnSequenceNgraph, "OptimizeRNNSequenceTransposes");
+    this->register_matcher(m, callback);
+}
+
+MKLDNNPlugin::OptimizeLSTMSequenceTransposes::OptimizeLSTMSequenceTransposes() {
+    ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) {
+        auto lstmSequence = std::dynamic_pointer_cast<ngraph::op::v5::LSTMSequence>(m.get_match_root());
+        if (!lstmSequence) {
+            return false;
+        }
+        // Bidirectional cases are not supported
+        if (lstmSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL)
+            return false;
+
+        return transform(lstmSequence);
+    };
+
+    auto lstmSequenceNgraph_0 = ngraph::pattern::wrap_type<ngraph::op::v0::LSTMSequence>();
+    auto lstmSequenceNgraph_5 = ngraph::pattern::wrap_type<ngraph::op::v5::LSTMSequence>();
+    const auto lstmSeqInputs = std::make_shared<ngraph::pattern::op::Or>(ngraph::OutputVector{lstmSequenceNgraph_0, lstmSequenceNgraph_5});
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(lstmSeqInputs, "OptimizeLSTMSequenceTransposes");
+
+    this->register_matcher(m, callback);
+}
diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
new file mode 100644
index 00000000000000..14cf1a585af720
--- /dev/null
+++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp
@@ -0,0 +1,29 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace MKLDNNPlugin {
+
+class OptimizeGRUSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeGRUSequenceTransposes();
+};
+
+class OptimizeLSTMSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeLSTMSequenceTransposes();
+};
+
+class OptimizeRNNSequenceTransposes : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    OptimizeRNNSequenceTransposes();
+};
+
+}  // namespace MKLDNNPlugin
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
index 9b220b0a9a6d1d..b38964f3b6cc1b 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp
@@ -11,6 +11,8 @@
 #include "utils/bfloat16.hpp"
 #include "nodes/common/cpu_convert.h"
 
+#include <ngraph/node.hpp>
+
 #include <string>
 #include <utility>
 
@@ -21,16 +23,24 @@ using namespace InferenceEngine;
 
 namespace MKLDNNPlugin {
 
-using _RNN = RNNSequenceLayer;  // alias
-
-static rnn_direction ie2mkl(_RNN::Direction &direction) {
-    return direction == _RNN::FWD ? rnn_direction::unidirectional_left2right
-         : direction == _RNN::BWD ? rnn_direction::unidirectional_right2left
-         : direction == _RNN::BDR ? rnn_direction::bidirectional_concat
+static rnn_direction ieDirection2dnnl(const std::shared_ptr<const ngraph::Node>& op) {
+    ngraph::op::RecurrentSequenceDirection direction = ngraph::op::RecurrentSequenceDirection::FORWARD;
+    if (op->get_type_info() == ngraph::op::v5::GRUSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v5::GRUSequence>(op)->get_direction();
+    } else if (op->get_type_info() == ngraph::op::v0::LSTMSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v0::LSTMSequence>(op)->get_direction();
+    } else if (op->get_type_info() == ngraph::op::v5::LSTMSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v5::LSTMSequence>(op)->get_direction();
+    } else if (op->get_type_info() == ngraph::op::v5::RNNSequence::type_info) {
+        direction = ngraph::as_type_ptr<const ngraph::op::v5::RNNSequence>(op)->get_direction();
+    }
+    return direction == ngraph::op::RecurrentSequenceDirection::FORWARD ? rnn_direction::unidirectional_left2right
+         : direction == ngraph::op::RecurrentSequenceDirection::REVERSE ? rnn_direction::unidirectional_right2left
+         : direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? rnn_direction::bidirectional_concat
          : rnn_direction::unidirectional;
 }
 
-static algorithm ie2mkl(std::string act_type) {
+static algorithm ie2dnnl(std::string act_type) {
     return act_type == "sigmoid" ? algorithm::eltwise_logistic
          : act_type == "tanh"    ? algorithm::eltwise_tanh
          : act_type == "relu"    ? algorithm::eltwise_relu
@@ -128,9 +138,9 @@ void MKLDNNRNN::fillCellDesc() {
     if (!one_of(outs.size(), 2, 1))
         THROW_ERROR << "Incorrect number of output ports for layer " << getName();
 
-    auto in_data_dims = getParentEdgeAt(0)->getDims();
-    auto in_h_state_dims = getParentEdgeAt(1)->getDims();
-    auto out_h_state_dims = getChildEdgeAt(0)->getDims();
+    auto in_data_dims = op->get_input_shape(0);
+    auto in_h_state_dims = op->get_input_shape(1);
+    auto out_h_state_dims = op->get_output_shape(0);
 
     if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2)
         THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
@@ -145,7 +155,7 @@ void MKLDNNRNN::fillCellDesc() {
     Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1;
 
     // Expected shapes
-    MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
+    SizeVector D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC};
 
     if (in_data_dims != D_shape
         || in_h_state_dims != S_shape
@@ -153,8 +163,8 @@ void MKLDNNRNN::fillCellDesc() {
         THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
 
     if (S == 2) {
-        auto in_c_state_dims = getParentEdgeAt(2)->getDims();
-        auto out_c_state_dims = getChildEdgeAt(1)->getDims();
+        auto in_c_state_dims = op->get_input_shape(2);
+        auto out_c_state_dims = op->get_output_shape(1);
 
         if (in_c_state_dims != S_shape
             || out_c_state_dims != S_shape)
@@ -196,9 +206,15 @@ void MKLDNNRNN::fillCellDesc() {
     w_data_d   = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo};
     w_state_d  = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo};
 
-    if (bias)
-        w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
+    // Add 5th input
+    w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo};
 
+    copyWeightsData(op);
+}
+
+void MKLDNNRNN::fillCellDesc() {
+    // Expected shapes
+    MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb};
     std::vector<TensorDesc> in_candidate, out_candidate;
     in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc});
     in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc});
@@ -215,6 +231,11 @@ void MKLDNNRNN::fillCellDesc() {
         if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32)
             convertWeightsBlobToBF16();
     }
+    if (one_of(cell_type, algorithm::vanilla_rnn, algorithm::vanilla_gru, algorithm::lbr_gru, algorithm::vanilla_lstm)) {
+        in_candidate.emplace_back(MKLDNNMemoryDesc {WShape, memory::data_type::f32, memory::format_tag::nc});
+        in_candidate.emplace_back(MKLDNNMemoryDesc {RShape, memory::data_type::f32, memory::format_tag::nc});
+        in_candidate.emplace_back(MKLDNNMemoryDesc {BShape, memory::data_type::f32, memory::format_tag::x});
+    }
 
     createDescriptor(in_candidate, out_candidate);
 }
@@ -229,10 +250,10 @@ void MKLDNNRNN::fillSeqDesc() {
     if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN))
         THROW_ERROR << "RNN layer supports only LSTM/GRU/RNN cell";
 
-    cell_type = ie2mkl(rnnLayer->cellType);
+    cell_type = ie2dnnl(op);
     cell_act = algorithm::undef;
-    if (!rnnLayer->activations.empty())
-        cell_act = ie2mkl(rnnLayer->activations[0]);  // Works only for RNN with one gate
+    if (!rnnCellBase->get_activations().empty())
+        cell_act = ie2dnnl(rnnCellBase->get_activations()[0]);  // Works only for RNN with one gate
 
     // TODO [oneDNN]: No more supported
     if (rnnLayer->clip != 0.0f) {
@@ -256,12 +277,16 @@ void MKLDNNRNN::fillSeqDesc() {
     if (!one_of(outs.size(), 3, 2, 1))
         THROW_ERROR << "Incorrect number of output ports for layer " << getName();
 
-    auto in_data_dims = getParentEdgeAt(0)->getDims();
-    auto out_data_dims = getChildEdgeAt(0)->getDims();
+    auto in_data_dims = op->get_input_shape(0);
+    auto out_data_dims = op->get_output_shape(0);
 
     if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3)
         THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName();
 
+    N = op->get_input_shape(1)[0];
+    nativeOrder = N == in_data_dims[1];
+    out_data_dims.erase(out_data_dims.begin() + 1);
+
     if (!nativeOrder) {
         std::swap(in_data_dims[0], in_data_dims[1]);
         std::swap(out_data_dims[0], out_data_dims[1]);
@@ -270,9 +295,8 @@ void MKLDNNRNN::fillSeqDesc() {
     G = gatesCount(cell_type);
     S = statesCount(cell_type);
     T = in_data_dims[0];
-    N = in_data_dims[1];
     DC = in_data_dims[2];
-    SC = out_data_dims[2];
+    SC = rnnCellBase->get_hidden_size();
 
     Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1;
 
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
index edf462f7cb0e89..df279c4b3c58a4 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <mkldnn_node.h>
 #include <string>
 #include <memory>
@@ -17,6 +16,7 @@ class MKLDNNRNN : public MKLDNNNode {
     MKLDNNRNN(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNRNN() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void getSupportedDescriptors() override;
     void createPrimitive() override;
     bool created() const override;
@@ -26,6 +26,8 @@ class MKLDNNRNN : public MKLDNNNode {
     void execute(mkldnn::stream strm) override;
 
 private:
+    void initCell(const std::shared_ptr<ngraph::Node>& op);
+    void initSeq(const std::shared_ptr<ngraph::Node>& op);
     void fillCellDesc();
     void fillSeqDesc();
     bool verifyWeightsPrecision(const InferenceEngine::Precision& layerPrec,
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
index 67e8c09dfeca3a..c9a53c79e07865 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp
@@ -4,13 +4,12 @@
 
 #include "mkldnn_tensoriterator_node.h"
 
-#include <legacy/ie_layers.h>
-#include <legacy/ie_layers_internal.hpp>
 #include <string>
 #include <vector>
 #include <map>
-#include <mkldnn_types.h>
 #include <mkldnn_extension_utils.h>
+#include <ie_ngraph_utils.hpp>
+#include <utils/general_utils.h>
 
 using namespace mkldnn;
 using namespace MKLDNNPlugin;
@@ -18,28 +17,24 @@ using namespace InferenceEngine::details;
 
 namespace MKLDNNPlugin {
 
-static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNNLayerPtr &layer) {
-    using namespace InferenceEngine;
+static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptr<ngraph::Node>& op) {
+    InferenceEngine::LayerConfig config;
 
-    LayerConfig config;
+    for (size_t i = 0; i < op->get_input_size(); i++) {
+        const auto& dims = op->get_input_shape(i);
+        const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i));
 
-    for (const auto &in_w : layer->insData) {
-        const auto in = in_w.lock();
-
-        const auto dims = in->getDims();
-        const auto prec = in->getPrecision();
-
-        DataConfig data_conf {};
-        data_conf.desc = TensorDesc { prec, dims, TensorDesc::getLayoutByDims(dims) };
+        InferenceEngine::DataConfig data_conf {};
+        data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) };
         config.inConfs.push_back(data_conf);
     }
 
-    for (const auto &out : layer->outData) {
-        const auto dims = out->getDims();
-        const auto prec = out->getPrecision();
+    for (size_t i = 0; i < op->get_output_size(); i++) {
+        const auto& dims = op->get_output_shape(i);
+        const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i));
 
-        DataConfig data_conf {};
-        data_conf.desc = TensorDesc { prec, dims, TensorDesc::getLayoutByDims(dims) };
+        InferenceEngine::DataConfig data_conf {};
+        data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) };
         config.outConfs.push_back(data_conf);
     }
 
@@ -50,7 +45,7 @@ static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNN
 class PortIteratorHelper : public PortMapHelper {
 public:
     PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src,
-                       const InferenceEngine::TensorIterator::PortMap &slice_rule, const mkldnn::engine& eng)
+                       const PortMap &slice_rule, const mkldnn::engine& eng)
                        : sliced_src(sliced_src) {
         const auto &full_blob = sliced_src ? from : to;
         const auto &part_blob = !sliced_src ? from : to;
@@ -186,52 +181,214 @@ class staticValueCheck : public PortChecker {
 
 }  // namespace MKLDNNPlugin
 
-MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
-        MKLDNNNode(layer, eng, cache) {}
+int getNumIteration(const std::shared_ptr<const ngraph::Node>& op, const std::vector<PortMap>& inputPortMap, const std::vector<PortMap>& outputPortMap) {
+    const auto isIterable = [](const PortMap& rule) { return rule.axis != -1; };
+
+    const auto getNumIterations = [](const PortMap& rule, const std::vector<size_t>& dimensions) -> int {
+        const auto axis = rule.axis;
+        if (axis < 0 || static_cast<std::size_t>(axis) >= dimensions.size()) {
+            IE_THROW() << R"(: Invalid "axis" value in an iteration component: )"
+                               << rule.axis  << ", dimensions number = " << dimensions.size() << " (out of range)";
+        }
+        const auto space = dimensions[axis];
+        const int start = static_cast<int>((rule.start < 0 ? (space + 1) : 0) + rule.start);
+        const int end   = static_cast<int>((rule.end   < 0 ? (space + 1) : 0) + rule.end);
+
+        const auto stride = rule.stride;
+        if (stride == 0) {
+            IE_THROW() << R"(: Invalid "stride" value in an iteration component: )" << rule.stride << " (infinite loop)";
+        }
+        const auto step = std::abs(stride);
+
+        const auto src = stride < 0 ? end : start;
+        const auto dst = stride < 0 ? start : end;
+        const auto length = dst - src;
+        if (src < 0 || src >= dst || dst > static_cast<int64_t>(space) || length < step) {
+            IE_THROW() << R"(: Invalid "start"/"stride"/"end" values in an iteration component)"
+                               << ": \"start\" = " << rule.start << ", \"stride\" = " << rule.stride  << ", \"end\" = " << rule.end;
+        }
+
+        if (length % step != 0) {
+            IE_THROW() << ": Each iteration must be the same size: length (" << length << ") is not divisible by step (" << step << ")";
+        }
+
+        return static_cast<int>(length / step);
+    };
+
+
+    int numIterations = 1;
+    bool isDefault = true;
+    for (const auto& rule : inputPortMap) {
+        if (!isIterable(rule)) {
+            continue;
+        }
+
+        if (rule.from < 0 || rule.from >= static_cast<int64_t>(op->get_input_size())) {
+            IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from
+                               << " inputs number = " << op->get_input_size() << " (out of range)";
+        }
+
+        const auto currentNumIterations = getNumIterations(rule, op->get_input_shape(rule.from));
+        if (isDefault) {
+            isDefault = false;
+            numIterations = currentNumIterations;
+        } else if (numIterations != currentNumIterations) {
+            IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
+        }
+    }
+
+    for (const auto& rule : outputPortMap) {
+        if (!isIterable(rule)) {
+            continue;
+        }
+
+        if (rule.from < 0 || rule.from >= static_cast<int64_t>(op->get_output_size())) {
+            IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from
+                               << " inputs number = " << op->get_output_size() << " (out of range)";
+        }
+
+        const auto currentNumIterations = getNumIterations(rule, op->get_output_shape(rule.from));
+        if (isDefault) {
+            isDefault = false;
+            numIterations = currentNumIterations;
+        } else if (numIterations != currentNumIterations) {
+            IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations;
+        }
+    }
+
+    return numIterations;
+}
+
+bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept {
+    try {
+        if (!one_of(op->get_type_info(),
+                ngraph::op::v0::TensorIterator::type_info,
+                ngraph::op::v5::Loop::type_info)) {
+            errorMessage = "Only opset1 TensorIterator or opset5 Loop operations are supported.";
+            return false;
+        }
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) :
+        MKLDNNNode(op, eng, cache), ngraphOp(op) {
+    std::string errorMessage;
+    if (!isSupportedOperation(op, errorMessage)) {
+        IE_THROW(NotImplemented) << errorMessage;
+    }
+}
 
 void MKLDNNTensorIteratorNode::getSupportedDescriptors() {
-    auto *ti = dynamic_cast<class InferenceEngine::TensorIterator*>(getCnnLayer().get());
-    if (ti == nullptr)
-        IE_THROW() << "Cannot convert to TensorIterator layer.";
+    auto tiOp = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp>(ngraphOp);
+    const std::shared_ptr<const ngraph::Function> body = tiOp->get_function();
+    sub_graph.CreateGraph(body, ext_mng, weightCache);
+
+    const auto &inMap = sub_graph.GetInputNodesMap();
+    for (const auto &param : tiOp->get_function()->get_parameters()) {
+        auto inNode = inMap.find(param->get_friendly_name());
+        if (inNode != inMap.end()) {
+            auto inMem = inNode->second->getChildEdgeAt(0)->getMemoryPtr();
+            input_mem.push_back(inMem);
+        }
+    }
 
-    n_iter = getNumIteration(*ti);
-    sub_graph.CreateGraph(ti->body, ext_mng, weightCache);
+    const auto &outMap = sub_graph.GetOutputNodesMap();
+    for (const auto &out : tiOp->get_function()->get_results()) {
+        auto prev = out->get_input_node_shared_ptr(0);
+        std::string inputID = prev->get_friendly_name();
+        if (prev->get_output_size() > 1) {
+            inputID += "." + std::to_string(out->get_input_source_output(0).get_index());
+        }
+        auto outNode = outMap.find(inputID);
+        if (outNode != outMap.end()) {
+            auto outMem = outNode->second->getParentEdgeAt(0)->getMemoryPtr();
+            output_mem.push_back(outMem);
+        }
+    }
 
-    // Try to detect inputs and outputs by indexes
-    const auto &in_map = sub_graph.GetInputNodes();
-    for (const auto &in_data : ti->body.inputs) {
-        if (in_data->getName() == "const_holder") continue;
+    // Port map: outputs
+    for (const auto& desc : tiOp->get_output_descriptions()) {
+        auto body_output_idx = desc->m_body_value_index;
+
+        std::string type_name = desc->get_type_info().name;
+        if (type_name == "ConcatOutputDescription") {
+            auto output_desc = ::ngraph::as_type_ptr<ngraph::op::util::SubGraphOp::ConcatOutputDescription>(desc);
+            IE_ASSERT(output_desc != nullptr);
+
+            outputPortMap.emplace_back(PortMap {
+                static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx),
+                static_cast<int>(output_desc->m_axis), static_cast<int>(output_desc->m_stride),
+                static_cast<int>(output_desc->m_start), static_cast<int>(output_desc->m_end),
+                static_cast<int>(output_desc->m_part_size)});
+        } else if (type_name == "BodyOutputDescription") {
+            auto output_desc = ::ngraph::as_type_ptr<ngraph::op::util::SubGraphOp::BodyOutputDescription>(desc);
+            IE_ASSERT(output_desc != nullptr);
+
+            outputPortMap.emplace_back(PortMap {
+                static_cast<int>(output_desc->m_output_index), static_cast<int>(body_output_idx), -1, 1, 0, -1, 1});
+        } else {
+            IE_THROW() << "Incorrect type of the output description.";
+        }
+    }
 
-        auto &in_node = in_map.at(in_data->getName());
-        auto in_mem = in_node->getChildEdgeAt(0)->getMemoryPtr();
-        input_mem.push_back(in_mem);
+    // Port map : inputs and back edges
+    for (const auto& desc : tiOp->get_input_descriptions()) {
+        auto body_input_index = desc->m_body_parameter_index;
+
+        if (const auto slice_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::SliceInputDescription>(desc)) {
+            inputPortMap.emplace_back(PortMap {
+                static_cast<int>(slice_desc->m_input_index), static_cast<int>(body_input_index),
+                static_cast<int>(slice_desc->m_axis), static_cast<int>(slice_desc->m_stride),
+                static_cast<int>(slice_desc->m_start), static_cast<int>(slice_desc->m_end),
+                static_cast<int>(slice_desc->m_part_size)});
+        } else if (const auto merge_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::MergedInputDescription>(desc)) {
+            inputPortMap.emplace_back(PortMap {
+                static_cast<int>(merge_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+
+            auto body_output_idx = merge_desc->m_body_value_index;
+
+            backEdges.emplace_back(PortMap {
+                static_cast<int>(body_output_idx), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+        } else if (const auto inv_desc = std::dynamic_pointer_cast<ngraph::op::util::SubGraphOp::InvariantInputDescription>(desc)) {
+            inputPortMap.emplace_back(PortMap {
+                    static_cast<int>(inv_desc->m_input_index), static_cast<int>(body_input_index), -1, 1, 0, -1, 1});
+        } else {
+            IE_THROW() << "Incorrect type of the input description.";
+        }
     }
 
-    // Assume that order of outputs in original TI and produces sub_graph is same
-    const auto &out_vec = sub_graph.GetOutputNodes();
-    for (size_t i = 0; i < out_vec.size(); i++) {
-        auto out_mem = out_vec[i]->getParentEdgeAt(0)->getMemoryPtr();
-        output_mem.push_back(out_mem);
+    n_iter = getNumIteration(ngraphOp, inputPortMap, outputPortMap);
+
+    if (const auto loopOp = std::dynamic_pointer_cast<const ngraph::op::v5::Loop>(ngraphOp)) {
+        auto spec_port = loopOp->get_special_body_ports();
+        if (spec_port.current_iteration_input_idx != -1) {
+            loopBodyCurrentIterationIdx.push_back(spec_port.current_iteration_input_idx);
+        }
+        if (spec_port.body_condition_output_idx != -1) {
+            loopBodyConditionOutputIdx = spec_port.body_condition_output_idx;
+        }
+        loopTripCountIdx = 0;
+        loopExecutionConditionIdx = 1;
     }
+
+    config = make_plain_config(ngraphOp);
 }
 
 void MKLDNNTensorIteratorNode::initSupportedPrimitiveDescriptors() {
     if (!supportedPrimitiveDescriptors.empty())
         return;
 
-    auto config = make_plain_config(getCnnLayer());
     supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
 }
 
 
 void MKLDNNTensorIteratorNode::createPrimitive() {
-    auto ti = dynamic_cast<class InferenceEngine::TensorIterator*>(getCnnLayer().get());
-    if (ti == nullptr)
-        IE_THROW() << "Cannot convert to TensorIterator layer.";
-
     const auto &eng = getEngine();
 
-    for (auto map_rule : ti->input_port_map) {
+    for (auto map_rule : inputPortMap) {
         auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
         auto &to_mem = input_mem[map_rule.to];
 
@@ -241,7 +398,7 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
             before_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, true, map_rule, eng));
     }
 
-    for (auto map_rule : ti->output_port_map) {
+    for (auto map_rule : outputPortMap) {
         auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr();
         auto &from_mem = output_mem[map_rule.to];
 
@@ -251,7 +408,7 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
             after_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, false, map_rule, eng));
     }
 
-    for (auto map_rule : ti->back_edges) {
+    for (auto map_rule : backEdges) {
         auto from_mem = output_mem[map_rule.from];
         auto to_mem = input_mem[map_rule.to];
 
@@ -259,38 +416,29 @@ void MKLDNNTensorIteratorNode::createPrimitive() {
     }
 
     // special purpose ports
-    constexpr auto key_cur_iter_port = "loop_body_current_iteration_idx";
-    constexpr auto key_cond_port = "loop_body_condition_output_idx";
-    constexpr auto key_trip_count_port = "loop_trip_count_idx";
-    constexpr auto key_init_cond_port = "loop_execution_condition_idx";
-
-    auto iter_idx_ports = ti->GetParamAsInts(key_cur_iter_port, {});
-    for (auto idx : iter_idx_ports) {
+    for (auto idx : loopBodyCurrentIterationIdx) {
         auto to_mem = input_mem[idx];
         before_mappers.emplace_back(new IterCountPortHelper(to_mem, eng));
     }
 
-    auto condition_port_idx = ti->GetParamAsInt(key_cond_port, -1);
-    if (condition_port_idx == -1) {
+    if (loopBodyConditionOutputIdx == -1) {
         continue_cond_check.reset(new staticValueCheck(true)); // always true
     } else {
-        auto mem = output_mem[condition_port_idx];
+        auto mem = output_mem[loopBodyConditionOutputIdx];
         continue_cond_check.reset(new asBoolCheck(mem));
     }
 
-    auto trip_count_port_idx = ti->GetParamAsInt(key_trip_count_port, -1);
-    if (trip_count_port_idx == -1) {
+    if (loopTripCountIdx == -1) {
         trip_count_check.reset(new staticValueCheck(n_iter)); // use statically calculated num of iteration
     } else {
-        auto mem = getParentEdgesAtPort(trip_count_port_idx)[0]->getMemoryPtr();
+        auto mem = getParentEdgesAtPort(loopTripCountIdx)[0]->getMemoryPtr();
         trip_count_check.reset(new asIntCheck(mem));
     }
 
-    auto init_cond_port_idx = ti->GetParamAsInt(key_init_cond_port, -1);
-    if (init_cond_port_idx == -1) {
+    if (loopExecutionConditionIdx == -1) {
         initial_cond_check.reset(new staticValueCheck(true));
     } else {
-        auto mem = getParentEdgesAtPort(init_cond_port_idx)[0]->getMemoryPtr();
+        auto mem = getParentEdgesAtPort(loopExecutionConditionIdx)[0]->getMemoryPtr();
         initial_cond_check.reset(new asBoolCheck(mem));
     }
 }
diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
index 34821531659798..3f3dd96e6f1d7c 100644
--- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
+++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h
@@ -4,7 +4,6 @@
 
 #pragma once
 
-#include <ie_common.h>
 #include <mkldnn_node.h>
 #include <mkldnn_graph.h>
 #include <string>
@@ -13,6 +12,19 @@
 
 namespace MKLDNNPlugin {
 
+struct PortMap {
+    // Data map rule
+    int from; /**< Index of external data from ins/outs fields of node */
+    int to;   /**< Index of internal data in iterator body */
+
+    // Iteration rule
+    int axis;      /**< Axis to iterate throught */
+    int stride;    /**< Stride to iterate throught */
+    int start;     /**< Start index of iteration range */
+    int end;       /**< Last index of iteration range  */
+    int part_size; /**< Part size which will be transfered to body subnetwork */
+};
+
 /**
  * Functor interface to perform some action with pointed tensors (captured in constructor)
  * Generally it's read, write or move data from specified tensors.
@@ -48,6 +60,7 @@ class MKLDNNTensorIteratorNode : public MKLDNNNode {
     MKLDNNTensorIteratorNode(const std::shared_ptr<ngraph::Node>& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache);
     ~MKLDNNTensorIteratorNode() override = default;
 
+    static bool isSupportedOperation(const std::shared_ptr<const ngraph::Node>& op, std::string& errorMessage) noexcept;
     void initSupportedPrimitiveDescriptors() override;
     void getSupportedDescriptors() override;
     void createPrimitive() override;
@@ -73,6 +86,19 @@ class MKLDNNTensorIteratorNode : public MKLDNNNode {
         trip_count_check,      /// < Perform check of trip count value. value >= -1
         initial_cond_check,   /// < Perform check of initial continue condition value. value [0, 1]
         continue_cond_check;  /// < Perform check of continue condition value of body. value [0, 1]
+
+    std::vector<PortMap> inputPortMap;  //!< Input ports map
+    std::vector<PortMap> outputPortMap;  //!< Output ports map
+    std::vector<PortMap> backEdges;  //!< Back edges map
+
+    std::vector<int> loopBodyCurrentIterationIdx;
+    int loopBodyConditionOutputIdx = -1;
+    int loopTripCountIdx = -1;
+    int loopExecutionConditionIdx = -1;
+
+    InferenceEngine::LayerConfig config;
+
+    const std::shared_ptr<ngraph::Node> ngraphOp;
 };
 
 }  // namespace MKLDNNPlugin
diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
index 9657f512a75e97..270839955cd3ac 100644
--- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
+++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp
@@ -64,19 +64,7 @@ std::vector<std::string> disabledTestPatterns() {
 
         /* ********************************************************** TEMPORARILY DISABLED TESTS ********************************************************** */
         // shared SLT test
-        R"(.*GRUCellTest.*)",
-        R"(.*GRUSequenceTest.*)",
-        R"(.*StaticShapeLoopTest.*)",
-        R"(.*TrivialLoopTest.*)",
-        R"(.*LoopTest.*)",
-        R"(.*LSTMCellTest.*)",
-        R"(.*LSTMSequenceTest.*)",
-        R"(.*RNNCellTest.*)",
-        R"(.*RNNSequenceTest.*)",
-        R"(.*TensorIteratorTest.*)",
-
-        // shared subgraph test
-        R"(.*MultipleLSTMCellTest.*)",
+        R"(.*TensorIteratorCommonClip/TensorIteratorTest.*)"
     };
 
 // TODO [NM]: Disabled until BF16 transformer is not migrated on CPU graph representation.