From 01af7f2591c48e658069d482b26339e47cc37c07 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Thu, 29 Apr 2021 10:38:35 +0300 Subject: [PATCH] [CPU] {GRM, LSTM, RNN} Cells and Sequences migration on nGraph. (#55) --- .../src/mkldnn_plugin/CMakeLists.txt | 4 +- .../src/mkldnn_plugin/mkldnn_graph.cpp | 181 ++++++------ .../src/mkldnn_plugin/mkldnn_graph.h | 4 +- .../src/mkldnn_plugin/mkldnn_node.cpp | 57 ++-- .../src/mkldnn_plugin/mkldnn_node.h | 1 - .../convert_to_cpu_specific_opset.hpp | 4 + .../rnn_sequences_optimization.cpp | 144 +++++++++ .../rnn_sequences_optimization.hpp | 29 ++ .../src/mkldnn_plugin/nodes/mkldnn_rnn.cpp | 68 +++-- .../src/mkldnn_plugin/nodes/mkldnn_rnn.h | 4 +- .../nodes/mkldnn_tensoriterator_node.cpp | 274 ++++++++++++++---- .../nodes/mkldnn_tensoriterator_node.h | 28 +- .../skip_tests_config.cpp | 14 +- 13 files changed, 579 insertions(+), 233 deletions(-) create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp create mode 100644 inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index ec2d9ff2f9cdfc..9945746d1a3620 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -34,12 +34,12 @@ set(LAYERS ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_fake_quantize_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reorder_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reshape_node.cpp -# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_rnn.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_rnn.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_align_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_pooling_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_softmax_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_split_node.cpp -# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tensoriterator_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tensoriterator_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tile_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_mvn_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 94cb2ad7634151..3a4ce660779e69 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -91,107 +91,92 @@ void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMg status = Ready; } -//template void MKLDNNGraph::CreateGraph(const TensorIterator::Body&, -// const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); +template void MKLDNNGraph::CreateGraph(const std::shared_ptr&, + const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); template void MKLDNNGraph::CreateGraph(const CNNNetwork&, const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); -//template void MKLDNNGraph::CreateGraph(CNNNetwork&, -// const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); -//void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) { -// this->_name = "subgraph"; -// this->reuse_io_tensors = false; -// -// // Map data object onto producer layer(node) -// std::unordered_map> data2node; -// -// // nodes which has no consumers (output or just unused). But doesn't marked as graph output. -// // Will be stored as fake output separately. -// std::unordered_set unused_data; -// -// // Step 1. Replicate input nodes -// for (const auto &input : subgraph.inputs) { -// if (input->getPrecision() == Precision::UNSPECIFIED) continue; // const node holder -// -// auto creator = getCreatorLayer(input).lock(); -// if (creator == nullptr) { -// creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()})); -// creator->outData.push_back(input); -// } -// -// const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache)); -// data2node[input.get()] = {node, 0}; -// -// graphNodes.push_back(node); -// inputNodesMap[input->getName()] = node; -// -// if (getInputTo(input).empty()) { -// unused_data.insert(input); -// } -// } -// -// // Step 2. Replicate all internal nodes. -// for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) { -// const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)}; -// graphNodes.push_back(node); -// -// for (int port = 0; port < layer->insData.size(); port++) { -// auto data = layer->insData[port].lock(); -// -// auto port_info = data2node[data.get()]; -// auto parent_node = port_info.first; -// auto parent_port_idx = port_info.second; -// -// MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port)); -// node->addEdge(edge); -// graphEdges.push_back(edge); -// } -// int out_port_idx = 0; -// for (auto &out_data : layer->outData) { -// data2node[out_data.get()] = {node, out_port_idx++}; -// if (getInputTo(out_data).empty()) { -// unused_data.insert(out_data); -// } -// } -// } -// -// // Step 3. Add output nodes and output stubs for unused data objects. -// for (const auto &output : subgraph.outputs) { -// auto port_info = data2node[output.get()]; -// auto parent_node = port_info.first; -// auto parent_port_idx = port_info.second; -// -// CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()})); -// layer->insData.push_back(output); -// -// const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)}; -// -// MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0)); -// node->addEdge(edge); -// graphEdges.push_back(edge); -// graphNodes.push_back(node); -// outputNodesMap.push_back(node); -// -// unused_data.erase(output); -// } -// -// // Add stub output node for unused data -// for (auto to_stub_data : unused_data) { -// auto port_info = data2node[to_stub_data.get()]; -// auto parent_node = port_info.first; -// auto parent_port_idx = port_info.second; -// -// CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()})); -// layer->insData.push_back(to_stub_data); -// -// const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); -// -// MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0)); -// node->addEdge(edge); -// graphEdges.push_back(edge); -// graphNodes.push_back(node); -// } -//} +void MKLDNNGraph::Replicate(const std::shared_ptr &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) { + this->_name = "subgraph"; + this->reuse_io_tensors = false; + + // Map data object onto producer node + std::map, std::pair> op2node; + + // nodes which has no consumers (output or just unused). But doesn't marked as graph output. + // Will be stored as fake output separately. + std::deque> unusedOutputs; + + auto getParentOutputPort = [](const std::shared_ptr childOp, const std::shared_ptr parentOp, + const size_t childInputPort) -> int { + for (size_t parentPort = 0; parentPort < parentOp->get_output_size(); parentPort++) { + if (childOp->input(childInputPort).get_tensor_ptr() == parentOp->output(parentPort).get_tensor_ptr()) { + return static_cast(parentPort); + } + } + + return -1; + }; + + for (const auto op : subgraph->get_ordered_ops()) { + const MKLDNNNodePtr node {MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)}; + graphNodes.push_back(node); + + if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) { + inputNodesMap[node->getName()] = node; + } + + if (op->get_type_info() == ngraph::op::v0::Result::type_info) { + auto prev = op->get_input_node_shared_ptr(0); + std::string inputID; + inputID = prev->get_friendly_name(); + if (prev->get_output_size() > 1) { + inputID += "." + std::to_string(op->get_input_source_output(0).get_index()); + } + + outputNodesMap[inputID] = node; + } + + for (size_t port = 0; port < op->get_input_size(); port++) { + auto parentOp = op->get_input_node_shared_ptr(port); + + auto portInfo = op2node[parentOp]; + auto parentNode = portInfo.first; + + MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentOutputPort(op, parentOp, port), port)); + node->addEdge(edge); + graphEdges.push_back(edge); + } + + if (!MKLDNNPlugin::one_of(op->get_type_info(), + ngraph::op::v0::Result::type_info, + ngraph::op::v3::Assign::type_info, + ngraph::op::v6::Assign::type_info)) { + int outPortIdx = 0; + for (int oi = 0; oi < op->get_output_size(); oi++) { + op2node[op->output(oi).get_node_shared_ptr()] = {node, outPortIdx++}; + if (op->get_output_target_inputs(oi).empty()) { + unusedOutputs.push_back(op->output(oi)); + } + } + } + } + + // Add stub output node for unused data + for (auto unusedOutput : unusedOutputs) { + auto portInfo = op2node[unusedOutput.get_node_shared_ptr()]; + auto parentNode = portInfo.first; + auto port = portInfo.second; + const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName(); + const MKLDNNNodePtr outNode = std::make_shared(parentNode->outDims[port].ToSizeVector(), + parentNode->getOriginalOutputPrecisionAtPort(port), + nodeName, "Result", getEngine(), weightsCache); + MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0)); + outNode->addEdge(edge); + graphEdges.push_back(edge); + graphNodes.push_back(outNode); + } +} void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) { OV_ITT_SCOPE_CHAIN(FIRST_INFERENCE, taskChain, itt::domains::MKLDNN_LT, "MKLDNNGraph::Replicate", "CNNNetwork"); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index 2383221ff6325e..6a9085cffcfe59 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -4,14 +4,12 @@ #pragma once -#include "ie_parallel.hpp" #include "cpp/ie_cnn_network.h" #include "config.h" #include "mkldnn_memory.h" #include "mean_image.h" #include "mkldnn_node.h" #include "mkldnn_edge.h" -#include "threading/ie_thread_local.hpp" #include #include #include @@ -190,7 +188,7 @@ class MKLDNNGraph { static mkldnn::engine eng; void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr); -// void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr); + void Replicate(const std::shared_ptr &subgraph, const MKLDNNExtensionManager::Ptr& extMgr); void InitGraph(); void InitNodes(); void InitDescriptors(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 5895a60ad0c6d6..0ca8fb195cd334 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -161,17 +161,17 @@ static const InferenceEngine::details::caseless_unordered_map { "Pad", Pad }, { "Transpose", Transpose }, // { "Copy", Copy }, -// { "LSTMCell", RNNCell }, -// { "GRUCell", RNNCell }, -// { "RNNCell", RNNCell }, -// { "LSTMSequence", RNNSeq }, -// { "GRUSequence", RNNSeq }, -// { "RNNSequence", RNNSeq }, + { "LSTMCell", RNNCell }, + { "GRUCell", RNNCell }, + { "RNNCell", RNNCell }, + { "LSTMSequence", RNNSeq }, + { "GRUSequence", RNNSeq }, + { "RNNSequence", RNNSeq }, { "FakeQuantize", FakeQuantize }, { "BinaryConvolution", BinaryConvolution }, { "DeformableConvolution", DeformableConvolution }, -// { "TensorIterator", TensorIterator }, -// { "Loop", TensorIterator }, + { "TensorIterator", TensorIterator }, + { "Loop", TensorIterator }, { "ReadValue", MemoryInput}, // for construction from name ctor, arbitrary name is used { "Assign", MemoryOutput }, // for construction from layer ctor { "Convert", Convert }, @@ -1293,27 +1293,26 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr } // TODO [NM]: enable after all nodes will be migrated on ngraph - // if (newNode == nullptr) { - // try { - // std::unique_ptr ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage)); - // if (ol != nullptr && ol->created(extMgr)) - // newNode = ol.release(); - // } catch (const InferenceEngine::Exception& ex) { - // if (ex.getStatus() != NOT_IMPLEMENTED) { - // throw; - // } else { - // errorMessage += getExceptionDescWithoutStatus(ex); - // } - // } - // } - -// TODO [NM]: Not implemented -// // WA-start : TI node requires all attributes to construct internal subgpath -// // including extManager, socket and mkldnn::eng. -// MKLDNNTensorIteratorNode *ti = dynamic_cast(newNode); -// if (ti != nullptr) -// ti->setExtManager(extMgr); -// // WA-end +// if (newNode == nullptr) { +// try { +// std::unique_ptr ol(new MKLDNNReferenceNode(op, eng, w_cache, errorMessage)); +// if (ol != nullptr && ol->created(extMgr)) +// newNode = ol.release(); +// } catch (const InferenceEngine::Exception& ex) { +// if (ex.getStatus() != NOT_IMPLEMENTED) { +// throw; +// } else { +// errorMessage += getExceptionDescWithoutStatus(ex); +// } +// } +// } + + // WA-start : TI node requires all attributes to construct internal subgpath + // including extManager, socket and mkldnn::eng. + MKLDNNTensorIteratorNode *ti = dynamic_cast(newNode); + if (ti != nullptr) + ti->setExtManager(extMgr); + // WA-end if (!newNode) { std::string errorDetails; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 5a47a3b1a3b457..632d3253ee6be7 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -11,7 +11,6 @@ #include #include #include -#include #include "mkldnn_dims.h" #include "mkldnn_memory.h" #include "mkldnn_edge.h" diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp index f9bef52913bed3..8496558e61481f 100644 --- a/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/convert_to_cpu_specific_opset.hpp @@ -14,6 +14,7 @@ #include "convert_to_leaky_relu.hpp" #include "convert_to_swish_cpu.hpp" #include "reshape_prelu.hpp" +#include "rnn_sequences_optimization.hpp" namespace MKLDNNPlugin { @@ -34,6 +35,9 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr &nGraphF manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); + manager.register_pass(); + manager.register_pass(); if (!ngraph::op::util::has_op_with_type(nGraphFunc)) { manager.register_pass(); } diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp new file mode 100644 index 00000000000000..74d52c5aaf1129 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.cpp @@ -0,0 +1,144 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "rnn_sequences_optimization.hpp" +#include +#include +#include + +NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeGRUSequenceTransposes, "OptimizeGRUSequenceTransposes", 0); +NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeLSTMSequenceTransposes, "OptimizeLSTMSequenceTransposes", 0); +NGRAPH_RTTI_DEFINITION(MKLDNNPlugin::OptimizeRNNSequenceTransposes, "OptimizeRNNSequenceTransposes", 0); + +namespace { + int64_t getSeqAxis(const std::shared_ptr& sequenceOp) { + // Optimization. + // Plug-ins support seqAxis attribute (value 1 or 0) for Seq ops, but according to the spec we don't + // support this attribute and should insert Transpose layer before and after Seq op in TI to Sequences + // transformation. Additional Transpose layers affect the performance, so we try to detect pattern + // Transpose(axis_order={1,0,2}) -> Seq -> Transpose(axis_order={2,1,0,3} + // and replace unnecessary Transpose ops with SeqIE (seqAxis = 0) to transfer value + // of the attribute to plug-ins. + // todo: specify seqAxis attribute for Sequence ops. + int64_t seqAxis = 1; // default + const auto& target_inputs = sequenceOp->output(0).get_target_inputs(); + if (target_inputs.size() == 1) { + const auto& transpose_before = std::dynamic_pointer_cast(sequenceOp->input_value(0).get_node_shared_ptr()); + const auto& transpose_after = std::dynamic_pointer_cast(target_inputs.begin()->get_node()->shared_from_this()); + if (transpose_after != nullptr && transpose_before != nullptr) { + auto order_before = std::dynamic_pointer_cast( + transpose_before->input_value(1).get_node_shared_ptr()); + auto order_after = std::dynamic_pointer_cast( + transpose_after->input_value(1).get_node_shared_ptr()); + if (order_before != nullptr && order_after != nullptr) { + auto order_before_values = order_before->cast_vector(); + auto order_after_values = order_after->cast_vector(); + std::vector order_ref_before = {1, 0, 2}; + std::vector order_ref_after = {2, 1, 0, 3}; + if (order_before_values == order_ref_before && order_after_values == order_ref_after) { + seqAxis = 0; + } + } + } + } + return seqAxis; + } + + bool transform(const std::shared_ptr& sequenceOp) { + // Detect pattern: Transpose_before -> Seq -> Transpose_after + auto seqAxis = getSeqAxis(sequenceOp); + if (seqAxis == 0) { + ngraph::Output in_0 = sequenceOp->get_input_source_output(0).get_node_shared_ptr()->get_input_source_output(0); + + auto newInShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, sequenceOp->get_input_shape(0)); + auto reshape1 = std::make_shared(in_0, newInShape, false); + ngraph::replace_node(sequenceOp->get_input_node_shared_ptr(0), {reshape1->output(0)}); + + const auto &gruTargetInputs = sequenceOp->output(0).get_target_inputs(); + if (gruTargetInputs.empty()) + return false; + auto transposeAfter = gruTargetInputs.begin()->get_node()->shared_from_this(); + + auto newOutShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, transposeAfter->get_output_shape(0)); + auto reshape2 = std::make_shared(sequenceOp->output(0), newOutShape, false); + ngraph::replace_node(transposeAfter, {reshape2->output(0)}); + } else { + auto originShape = sequenceOp->get_output_shape(0); + auto seqOut = sequenceOp->get_output_target_inputs(0).begin()->get_node()->shared_from_this(); + + auto tncShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {originShape[2], originShape[0], originShape[3]}); + auto reshape1 = std::make_shared(sequenceOp->output(0), tncShape, false); + + auto order = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{3}, {1, 0, 2}); + auto transpose = std::make_shared(reshape1->output(0), order); + + auto ndtcShape = ngraph::op::v0::Constant::create(ngraph::element::i32, ngraph::Shape{4}, originShape); + auto reshape2 = std::make_shared(transpose->output(0), ndtcShape, false); + reshape2->set_friendly_name(sequenceOp->get_friendly_name()+".0"); + + ngraph::insert_new_node_between(sequenceOp, seqOut, reshape2); + } + return true; + } +} // namespace + +MKLDNNPlugin::OptimizeGRUSequenceTransposes::OptimizeGRUSequenceTransposes() { + ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) { + auto gruSequence = std::dynamic_pointer_cast(m.get_match_root()); + if (!gruSequence) { + return false; + } + // Bidirectional cases are not supported + if (gruSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL) + return false; + + return transform(gruSequence); + }; + + auto gruSequenceNgraph = ngraph::pattern::wrap_type(); + + auto m = std::make_shared(gruSequenceNgraph, "OptimizeGRUSequenceTransposes"); + this->register_matcher(m, callback); +} + +MKLDNNPlugin::OptimizeRNNSequenceTransposes::OptimizeRNNSequenceTransposes() { + ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) { + auto rnnSequence = std::dynamic_pointer_cast(m.get_match_root()); + if (!rnnSequence) { + return false; + } + // Bidirectional cases are not supported + if (rnnSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL) + return false; + + return transform(rnnSequence); + }; + + auto rnnSequenceNgraph = ngraph::pattern::wrap_type(); + + auto m = std::make_shared(rnnSequenceNgraph, "OptimizeRNNSequenceTransposes"); + this->register_matcher(m, callback); +} + +MKLDNNPlugin::OptimizeLSTMSequenceTransposes::OptimizeLSTMSequenceTransposes() { + ngraph::matcher_pass_callback callback = [](ngraph::pattern::Matcher &m) { + auto lstmSequence = std::dynamic_pointer_cast(m.get_match_root()); + if (!lstmSequence) { + return false; + } + // Bidirectional cases are not supported + if (lstmSequence->get_direction() == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL) + return false; + + return transform(lstmSequence); + }; + + auto lstmSequenceNgraph_0 = ngraph::pattern::wrap_type(); + auto lstmSequenceNgraph_5 = ngraph::pattern::wrap_type(); + const auto lstmSeqInputs = std::make_shared(ngraph::OutputVector{lstmSequenceNgraph_0, lstmSequenceNgraph_5}); + + auto m = std::make_shared(lstmSeqInputs, "OptimizeLSTMSequenceTransposes"); + + this->register_matcher(m, callback); +} diff --git a/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp new file mode 100644 index 00000000000000..14cf1a585af720 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/ngraph_transformations/rnn_sequences_optimization.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +namespace MKLDNNPlugin { + +class OptimizeGRUSequenceTransposes : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + OptimizeGRUSequenceTransposes(); +}; + +class OptimizeLSTMSequenceTransposes : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + OptimizeLSTMSequenceTransposes(); +}; + +class OptimizeRNNSequenceTransposes : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + OptimizeRNNSequenceTransposes(); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp index 9b220b0a9a6d1d..b38964f3b6cc1b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp @@ -11,6 +11,8 @@ #include "utils/bfloat16.hpp" #include "nodes/common/cpu_convert.h" +#include + #include #include @@ -21,16 +23,24 @@ using namespace InferenceEngine; namespace MKLDNNPlugin { -using _RNN = RNNSequenceLayer; // alias - -static rnn_direction ie2mkl(_RNN::Direction &direction) { - return direction == _RNN::FWD ? rnn_direction::unidirectional_left2right - : direction == _RNN::BWD ? rnn_direction::unidirectional_right2left - : direction == _RNN::BDR ? rnn_direction::bidirectional_concat +static rnn_direction ieDirection2dnnl(const std::shared_ptr& op) { + ngraph::op::RecurrentSequenceDirection direction = ngraph::op::RecurrentSequenceDirection::FORWARD; + if (op->get_type_info() == ngraph::op::v5::GRUSequence::type_info) { + direction = ngraph::as_type_ptr(op)->get_direction(); + } else if (op->get_type_info() == ngraph::op::v0::LSTMSequence::type_info) { + direction = ngraph::as_type_ptr(op)->get_direction(); + } else if (op->get_type_info() == ngraph::op::v5::LSTMSequence::type_info) { + direction = ngraph::as_type_ptr(op)->get_direction(); + } else if (op->get_type_info() == ngraph::op::v5::RNNSequence::type_info) { + direction = ngraph::as_type_ptr(op)->get_direction(); + } + return direction == ngraph::op::RecurrentSequenceDirection::FORWARD ? rnn_direction::unidirectional_left2right + : direction == ngraph::op::RecurrentSequenceDirection::REVERSE ? rnn_direction::unidirectional_right2left + : direction == ngraph::op::RecurrentSequenceDirection::BIDIRECTIONAL ? rnn_direction::bidirectional_concat : rnn_direction::unidirectional; } -static algorithm ie2mkl(std::string act_type) { +static algorithm ie2dnnl(std::string act_type) { return act_type == "sigmoid" ? algorithm::eltwise_logistic : act_type == "tanh" ? algorithm::eltwise_tanh : act_type == "relu" ? algorithm::eltwise_relu @@ -128,9 +138,9 @@ void MKLDNNRNN::fillCellDesc() { if (!one_of(outs.size(), 2, 1)) THROW_ERROR << "Incorrect number of output ports for layer " << getName(); - auto in_data_dims = getParentEdgeAt(0)->getDims(); - auto in_h_state_dims = getParentEdgeAt(1)->getDims(); - auto out_h_state_dims = getChildEdgeAt(0)->getDims(); + auto in_data_dims = op->get_input_shape(0); + auto in_h_state_dims = op->get_input_shape(1); + auto out_h_state_dims = op->get_output_shape(0); if (in_data_dims.ndims() != 2 || in_h_state_dims.ndims() != 2) THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); @@ -145,7 +155,7 @@ void MKLDNNRNN::fillCellDesc() { Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1; // Expected shapes - MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; + SizeVector D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; if (in_data_dims != D_shape || in_h_state_dims != S_shape @@ -153,8 +163,8 @@ void MKLDNNRNN::fillCellDesc() { THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); if (S == 2) { - auto in_c_state_dims = getParentEdgeAt(2)->getDims(); - auto out_c_state_dims = getChildEdgeAt(1)->getDims(); + auto in_c_state_dims = op->get_input_shape(2); + auto out_c_state_dims = op->get_output_shape(1); if (in_c_state_dims != S_shape || out_c_state_dims != S_shape) @@ -196,9 +206,15 @@ void MKLDNNRNN::fillCellDesc() { w_data_d = {{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo}; w_state_d = {{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo}; - if (bias) - w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; + // Add 5th input + w_bias_d = {{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo}; + copyWeightsData(op); +} + +void MKLDNNRNN::fillCellDesc() { + // Expected shapes + MKLDNNDims D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb}; std::vector in_candidate, out_candidate; in_candidate.emplace_back(MKLDNNMemoryDesc {D_shape, dataType, memory::format_tag::nc}); in_candidate.emplace_back(MKLDNNMemoryDesc {S_shape, dataType, memory::format_tag::nc}); @@ -215,6 +231,11 @@ void MKLDNNRNN::fillCellDesc() { if (runtimePrecision == Precision::BF16 && weights_prec == Precision::FP32) convertWeightsBlobToBF16(); } + if (one_of(cell_type, algorithm::vanilla_rnn, algorithm::vanilla_gru, algorithm::lbr_gru, algorithm::vanilla_lstm)) { + in_candidate.emplace_back(MKLDNNMemoryDesc {WShape, memory::data_type::f32, memory::format_tag::nc}); + in_candidate.emplace_back(MKLDNNMemoryDesc {RShape, memory::data_type::f32, memory::format_tag::nc}); + in_candidate.emplace_back(MKLDNNMemoryDesc {BShape, memory::data_type::f32, memory::format_tag::x}); + } createDescriptor(in_candidate, out_candidate); } @@ -229,10 +250,10 @@ void MKLDNNRNN::fillSeqDesc() { if (!one_of(rnnLayer->cellType, _RNN::LSTM, _RNN::GRU, _RNN::GRU_LBR, _RNN::RNN)) THROW_ERROR << "RNN layer supports only LSTM/GRU/RNN cell"; - cell_type = ie2mkl(rnnLayer->cellType); + cell_type = ie2dnnl(op); cell_act = algorithm::undef; - if (!rnnLayer->activations.empty()) - cell_act = ie2mkl(rnnLayer->activations[0]); // Works only for RNN with one gate + if (!rnnCellBase->get_activations().empty()) + cell_act = ie2dnnl(rnnCellBase->get_activations()[0]); // Works only for RNN with one gate // TODO [oneDNN]: No more supported if (rnnLayer->clip != 0.0f) { @@ -256,12 +277,16 @@ void MKLDNNRNN::fillSeqDesc() { if (!one_of(outs.size(), 3, 2, 1)) THROW_ERROR << "Incorrect number of output ports for layer " << getName(); - auto in_data_dims = getParentEdgeAt(0)->getDims(); - auto out_data_dims = getChildEdgeAt(0)->getDims(); + auto in_data_dims = op->get_input_shape(0); + auto out_data_dims = op->get_output_shape(0); if (in_data_dims.ndims() != 3 || out_data_dims.ndims() != 3) THROW_ERROR << "Incorrect shape of input/output ports for layer " << getName(); + N = op->get_input_shape(1)[0]; + nativeOrder = N == in_data_dims[1]; + out_data_dims.erase(out_data_dims.begin() + 1); + if (!nativeOrder) { std::swap(in_data_dims[0], in_data_dims[1]); std::swap(out_data_dims[0], out_data_dims[1]); @@ -270,9 +295,8 @@ void MKLDNNRNN::fillSeqDesc() { G = gatesCount(cell_type); S = statesCount(cell_type); T = in_data_dims[0]; - N = in_data_dims[1]; DC = in_data_dims[2]; - SC = out_data_dims[2]; + SC = rnnCellBase->get_hidden_size(); Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h index edf462f7cb0e89..df279c4b3c58a4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -17,6 +16,7 @@ class MKLDNNRNN : public MKLDNNNode { MKLDNNRNN(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNRNN() override = default; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; void createPrimitive() override; bool created() const override; @@ -26,6 +26,8 @@ class MKLDNNRNN : public MKLDNNNode { void execute(mkldnn::stream strm) override; private: + void initCell(const std::shared_ptr& op); + void initSeq(const std::shared_ptr& op); void fillCellDesc(); void fillSeqDesc(); bool verifyWeightsPrecision(const InferenceEngine::Precision& layerPrec, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp index 67e8c09dfeca3a..c9a53c79e07865 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp @@ -4,13 +4,12 @@ #include "mkldnn_tensoriterator_node.h" -#include -#include #include #include #include -#include #include +#include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -18,28 +17,24 @@ using namespace InferenceEngine::details; namespace MKLDNNPlugin { -static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNNLayerPtr &layer) { - using namespace InferenceEngine; +static InferenceEngine::LayerConfig make_plain_config(const std::shared_ptr& op) { + InferenceEngine::LayerConfig config; - LayerConfig config; + for (size_t i = 0; i < op->get_input_size(); i++) { + const auto& dims = op->get_input_shape(i); + const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i)); - for (const auto &in_w : layer->insData) { - const auto in = in_w.lock(); - - const auto dims = in->getDims(); - const auto prec = in->getPrecision(); - - DataConfig data_conf {}; - data_conf.desc = TensorDesc { prec, dims, TensorDesc::getLayoutByDims(dims) }; + InferenceEngine::DataConfig data_conf {}; + data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) }; config.inConfs.push_back(data_conf); } - for (const auto &out : layer->outData) { - const auto dims = out->getDims(); - const auto prec = out->getPrecision(); + for (size_t i = 0; i < op->get_output_size(); i++) { + const auto& dims = op->get_output_shape(i); + const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i)); - DataConfig data_conf {}; - data_conf.desc = TensorDesc { prec, dims, TensorDesc::getLayoutByDims(dims) }; + InferenceEngine::DataConfig data_conf {}; + data_conf.desc = InferenceEngine::TensorDesc { prec, dims, InferenceEngine::TensorDesc::getLayoutByDims(dims) }; config.outConfs.push_back(data_conf); } @@ -50,7 +45,7 @@ static InferenceEngine::LayerConfig make_plain_config(const InferenceEngine::CNN class PortIteratorHelper : public PortMapHelper { public: PortIteratorHelper(const MKLDNNMemoryPtr &from, const MKLDNNMemoryPtr &to, bool sliced_src, - const InferenceEngine::TensorIterator::PortMap &slice_rule, const mkldnn::engine& eng) + const PortMap &slice_rule, const mkldnn::engine& eng) : sliced_src(sliced_src) { const auto &full_blob = sliced_src ? from : to; const auto &part_blob = !sliced_src ? from : to; @@ -186,52 +181,214 @@ class staticValueCheck : public PortChecker { } // namespace MKLDNNPlugin -MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +int getNumIteration(const std::shared_ptr& op, const std::vector& inputPortMap, const std::vector& outputPortMap) { + const auto isIterable = [](const PortMap& rule) { return rule.axis != -1; }; + + const auto getNumIterations = [](const PortMap& rule, const std::vector& dimensions) -> int { + const auto axis = rule.axis; + if (axis < 0 || static_cast(axis) >= dimensions.size()) { + IE_THROW() << R"(: Invalid "axis" value in an iteration component: )" + << rule.axis << ", dimensions number = " << dimensions.size() << " (out of range)"; + } + const auto space = dimensions[axis]; + const int start = static_cast((rule.start < 0 ? (space + 1) : 0) + rule.start); + const int end = static_cast((rule.end < 0 ? (space + 1) : 0) + rule.end); + + const auto stride = rule.stride; + if (stride == 0) { + IE_THROW() << R"(: Invalid "stride" value in an iteration component: )" << rule.stride << " (infinite loop)"; + } + const auto step = std::abs(stride); + + const auto src = stride < 0 ? end : start; + const auto dst = stride < 0 ? start : end; + const auto length = dst - src; + if (src < 0 || src >= dst || dst > static_cast(space) || length < step) { + IE_THROW() << R"(: Invalid "start"/"stride"/"end" values in an iteration component)" + << ": \"start\" = " << rule.start << ", \"stride\" = " << rule.stride << ", \"end\" = " << rule.end; + } + + if (length % step != 0) { + IE_THROW() << ": Each iteration must be the same size: length (" << length << ") is not divisible by step (" << step << ")"; + } + + return static_cast(length / step); + }; + + + int numIterations = 1; + bool isDefault = true; + for (const auto& rule : inputPortMap) { + if (!isIterable(rule)) { + continue; + } + + if (rule.from < 0 || rule.from >= static_cast(op->get_input_size())) { + IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from + << " inputs number = " << op->get_input_size() << " (out of range)"; + } + + const auto currentNumIterations = getNumIterations(rule, op->get_input_shape(rule.from)); + if (isDefault) { + isDefault = false; + numIterations = currentNumIterations; + } else if (numIterations != currentNumIterations) { + IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations; + } + } + + for (const auto& rule : outputPortMap) { + if (!isIterable(rule)) { + continue; + } + + if (rule.from < 0 || rule.from >= static_cast(op->get_output_size())) { + IE_THROW() << R"(: Invalid "from" value: "from" = )" << rule.from + << " inputs number = " << op->get_output_size() << " (out of range)"; + } + + const auto currentNumIterations = getNumIterations(rule, op->get_output_shape(rule.from)); + if (isDefault) { + isDefault = false; + numIterations = currentNumIterations; + } else if (numIterations != currentNumIterations) { + IE_THROW() << ": There are at least two different iterations numbers: " << numIterations << " and " << currentNumIterations; + } + } + + return numIterations; +} + +bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (!one_of(op->get_type_info(), + ngraph::op::v0::TensorIterator::type_info, + ngraph::op::v5::Loop::type_info)) { + errorMessage = "Only opset1 TensorIterator or opset5 Loop operations are supported."; + return false; + } + } catch (...) { + return false; + } + return true; +} + +MKLDNNTensorIteratorNode::MKLDNNTensorIteratorNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache), ngraphOp(op) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } +} void MKLDNNTensorIteratorNode::getSupportedDescriptors() { - auto *ti = dynamic_cast(getCnnLayer().get()); - if (ti == nullptr) - IE_THROW() << "Cannot convert to TensorIterator layer."; + auto tiOp = std::dynamic_pointer_cast(ngraphOp); + const std::shared_ptr body = tiOp->get_function(); + sub_graph.CreateGraph(body, ext_mng, weightCache); + + const auto &inMap = sub_graph.GetInputNodesMap(); + for (const auto ¶m : tiOp->get_function()->get_parameters()) { + auto inNode = inMap.find(param->get_friendly_name()); + if (inNode != inMap.end()) { + auto inMem = inNode->second->getChildEdgeAt(0)->getMemoryPtr(); + input_mem.push_back(inMem); + } + } - n_iter = getNumIteration(*ti); - sub_graph.CreateGraph(ti->body, ext_mng, weightCache); + const auto &outMap = sub_graph.GetOutputNodesMap(); + for (const auto &out : tiOp->get_function()->get_results()) { + auto prev = out->get_input_node_shared_ptr(0); + std::string inputID = prev->get_friendly_name(); + if (prev->get_output_size() > 1) { + inputID += "." + std::to_string(out->get_input_source_output(0).get_index()); + } + auto outNode = outMap.find(inputID); + if (outNode != outMap.end()) { + auto outMem = outNode->second->getParentEdgeAt(0)->getMemoryPtr(); + output_mem.push_back(outMem); + } + } - // Try to detect inputs and outputs by indexes - const auto &in_map = sub_graph.GetInputNodes(); - for (const auto &in_data : ti->body.inputs) { - if (in_data->getName() == "const_holder") continue; + // Port map: outputs + for (const auto& desc : tiOp->get_output_descriptions()) { + auto body_output_idx = desc->m_body_value_index; + + std::string type_name = desc->get_type_info().name; + if (type_name == "ConcatOutputDescription") { + auto output_desc = ::ngraph::as_type_ptr(desc); + IE_ASSERT(output_desc != nullptr); + + outputPortMap.emplace_back(PortMap { + static_cast(output_desc->m_output_index), static_cast(body_output_idx), + static_cast(output_desc->m_axis), static_cast(output_desc->m_stride), + static_cast(output_desc->m_start), static_cast(output_desc->m_end), + static_cast(output_desc->m_part_size)}); + } else if (type_name == "BodyOutputDescription") { + auto output_desc = ::ngraph::as_type_ptr(desc); + IE_ASSERT(output_desc != nullptr); + + outputPortMap.emplace_back(PortMap { + static_cast(output_desc->m_output_index), static_cast(body_output_idx), -1, 1, 0, -1, 1}); + } else { + IE_THROW() << "Incorrect type of the output description."; + } + } - auto &in_node = in_map.at(in_data->getName()); - auto in_mem = in_node->getChildEdgeAt(0)->getMemoryPtr(); - input_mem.push_back(in_mem); + // Port map : inputs and back edges + for (const auto& desc : tiOp->get_input_descriptions()) { + auto body_input_index = desc->m_body_parameter_index; + + if (const auto slice_desc = std::dynamic_pointer_cast(desc)) { + inputPortMap.emplace_back(PortMap { + static_cast(slice_desc->m_input_index), static_cast(body_input_index), + static_cast(slice_desc->m_axis), static_cast(slice_desc->m_stride), + static_cast(slice_desc->m_start), static_cast(slice_desc->m_end), + static_cast(slice_desc->m_part_size)}); + } else if (const auto merge_desc = std::dynamic_pointer_cast(desc)) { + inputPortMap.emplace_back(PortMap { + static_cast(merge_desc->m_input_index), static_cast(body_input_index), -1, 1, 0, -1, 1}); + + auto body_output_idx = merge_desc->m_body_value_index; + + backEdges.emplace_back(PortMap { + static_cast(body_output_idx), static_cast(body_input_index), -1, 1, 0, -1, 1}); + } else if (const auto inv_desc = std::dynamic_pointer_cast(desc)) { + inputPortMap.emplace_back(PortMap { + static_cast(inv_desc->m_input_index), static_cast(body_input_index), -1, 1, 0, -1, 1}); + } else { + IE_THROW() << "Incorrect type of the input description."; + } } - // Assume that order of outputs in original TI and produces sub_graph is same - const auto &out_vec = sub_graph.GetOutputNodes(); - for (size_t i = 0; i < out_vec.size(); i++) { - auto out_mem = out_vec[i]->getParentEdgeAt(0)->getMemoryPtr(); - output_mem.push_back(out_mem); + n_iter = getNumIteration(ngraphOp, inputPortMap, outputPortMap); + + if (const auto loopOp = std::dynamic_pointer_cast(ngraphOp)) { + auto spec_port = loopOp->get_special_body_ports(); + if (spec_port.current_iteration_input_idx != -1) { + loopBodyCurrentIterationIdx.push_back(spec_port.current_iteration_input_idx); + } + if (spec_port.body_condition_output_idx != -1) { + loopBodyConditionOutputIdx = spec_port.body_condition_output_idx; + } + loopTripCountIdx = 0; + loopExecutionConditionIdx = 1; } + + config = make_plain_config(ngraphOp); } void MKLDNNTensorIteratorNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto config = make_plain_config(getCnnLayer()); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } void MKLDNNTensorIteratorNode::createPrimitive() { - auto ti = dynamic_cast(getCnnLayer().get()); - if (ti == nullptr) - IE_THROW() << "Cannot convert to TensorIterator layer."; - const auto &eng = getEngine(); - for (auto map_rule : ti->input_port_map) { + for (auto map_rule : inputPortMap) { auto &from_mem = getParentEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &to_mem = input_mem[map_rule.to]; @@ -241,7 +398,7 @@ void MKLDNNTensorIteratorNode::createPrimitive() { before_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, true, map_rule, eng)); } - for (auto map_rule : ti->output_port_map) { + for (auto map_rule : outputPortMap) { auto &to_mem = getChildEdgesAtPort(map_rule.from)[0]->getMemoryPtr(); auto &from_mem = output_mem[map_rule.to]; @@ -251,7 +408,7 @@ void MKLDNNTensorIteratorNode::createPrimitive() { after_mappers.emplace_back(new PortIteratorHelper(from_mem, to_mem, false, map_rule, eng)); } - for (auto map_rule : ti->back_edges) { + for (auto map_rule : backEdges) { auto from_mem = output_mem[map_rule.from]; auto to_mem = input_mem[map_rule.to]; @@ -259,38 +416,29 @@ void MKLDNNTensorIteratorNode::createPrimitive() { } // special purpose ports - constexpr auto key_cur_iter_port = "loop_body_current_iteration_idx"; - constexpr auto key_cond_port = "loop_body_condition_output_idx"; - constexpr auto key_trip_count_port = "loop_trip_count_idx"; - constexpr auto key_init_cond_port = "loop_execution_condition_idx"; - - auto iter_idx_ports = ti->GetParamAsInts(key_cur_iter_port, {}); - for (auto idx : iter_idx_ports) { + for (auto idx : loopBodyCurrentIterationIdx) { auto to_mem = input_mem[idx]; before_mappers.emplace_back(new IterCountPortHelper(to_mem, eng)); } - auto condition_port_idx = ti->GetParamAsInt(key_cond_port, -1); - if (condition_port_idx == -1) { + if (loopBodyConditionOutputIdx == -1) { continue_cond_check.reset(new staticValueCheck(true)); // always true } else { - auto mem = output_mem[condition_port_idx]; + auto mem = output_mem[loopBodyConditionOutputIdx]; continue_cond_check.reset(new asBoolCheck(mem)); } - auto trip_count_port_idx = ti->GetParamAsInt(key_trip_count_port, -1); - if (trip_count_port_idx == -1) { + if (loopTripCountIdx == -1) { trip_count_check.reset(new staticValueCheck(n_iter)); // use statically calculated num of iteration } else { - auto mem = getParentEdgesAtPort(trip_count_port_idx)[0]->getMemoryPtr(); + auto mem = getParentEdgesAtPort(loopTripCountIdx)[0]->getMemoryPtr(); trip_count_check.reset(new asIntCheck(mem)); } - auto init_cond_port_idx = ti->GetParamAsInt(key_init_cond_port, -1); - if (init_cond_port_idx == -1) { + if (loopExecutionConditionIdx == -1) { initial_cond_check.reset(new staticValueCheck(true)); } else { - auto mem = getParentEdgesAtPort(init_cond_port_idx)[0]->getMemoryPtr(); + auto mem = getParentEdgesAtPort(loopExecutionConditionIdx)[0]->getMemoryPtr(); initial_cond_check.reset(new asBoolCheck(mem)); } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h index 34821531659798..3f3dd96e6f1d7c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h @@ -4,7 +4,6 @@ #pragma once -#include #include #include #include @@ -13,6 +12,19 @@ namespace MKLDNNPlugin { +struct PortMap { + // Data map rule + int from; /**< Index of external data from ins/outs fields of node */ + int to; /**< Index of internal data in iterator body */ + + // Iteration rule + int axis; /**< Axis to iterate throught */ + int stride; /**< Stride to iterate throught */ + int start; /**< Start index of iteration range */ + int end; /**< Last index of iteration range */ + int part_size; /**< Part size which will be transfered to body subnetwork */ +}; + /** * Functor interface to perform some action with pointed tensors (captured in constructor) * Generally it's read, write or move data from specified tensors. @@ -48,6 +60,7 @@ class MKLDNNTensorIteratorNode : public MKLDNNNode { MKLDNNTensorIteratorNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNTensorIteratorNode() override = default; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void initSupportedPrimitiveDescriptors() override; void getSupportedDescriptors() override; void createPrimitive() override; @@ -73,6 +86,19 @@ class MKLDNNTensorIteratorNode : public MKLDNNNode { trip_count_check, /// < Perform check of trip count value. value >= -1 initial_cond_check, /// < Perform check of initial continue condition value. value [0, 1] continue_cond_check; /// < Perform check of continue condition value of body. value [0, 1] + + std::vector inputPortMap; //!< Input ports map + std::vector outputPortMap; //!< Output ports map + std::vector backEdges; //!< Back edges map + + std::vector loopBodyCurrentIterationIdx; + int loopBodyConditionOutputIdx = -1; + int loopTripCountIdx = -1; + int loopExecutionConditionIdx = -1; + + InferenceEngine::LayerConfig config; + + const std::shared_ptr ngraphOp; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 9657f512a75e97..270839955cd3ac 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -64,19 +64,7 @@ std::vector disabledTestPatterns() { /* ********************************************************** TEMPORARILY DISABLED TESTS ********************************************************** */ // shared SLT test - R"(.*GRUCellTest.*)", - R"(.*GRUSequenceTest.*)", - R"(.*StaticShapeLoopTest.*)", - R"(.*TrivialLoopTest.*)", - R"(.*LoopTest.*)", - R"(.*LSTMCellTest.*)", - R"(.*LSTMSequenceTest.*)", - R"(.*RNNCellTest.*)", - R"(.*RNNSequenceTest.*)", - R"(.*TensorIteratorTest.*)", - - // shared subgraph test - R"(.*MultipleLSTMCellTest.*)", + R"(.*TensorIteratorCommonClip/TensorIteratorTest.*)" }; // TODO [NM]: Disabled until BF16 transformer is not migrated on CPU graph representation.