From 6cfba2b72ee5c8ccc413f1629681527d9b117e4a Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Tue, 9 Mar 2021 17:05:06 +0300 Subject: [PATCH] [CPU] Permute node migration on nGraph. (#15) --- .../src/mkldnn_plugin/CMakeLists.txt | 2 +- .../src/mkldnn_plugin/mkldnn_graph.cpp | 2 +- .../mkldnn_plugin/mkldnn_graph_optimizer.cpp | 106 ++-- .../mkldnn_plugin/mkldnn_graph_optimizer.h | 4 +- .../src/mkldnn_plugin/mkldnn_memory.cpp | 8 +- .../src/mkldnn_plugin/mkldnn_node.cpp | 6 +- .../src/mkldnn_plugin/mkldnn_node.h | 6 +- .../src/mkldnn_plugin/mkldnn_plugin.cpp | 3 +- .../nodes/mkldnn_concat_node.cpp | 571 +++++++++--------- .../mkldnn_plugin/nodes/mkldnn_concat_node.h | 1 + .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp | 128 ++-- .../src/mkldnn_plugin/nodes/mkldnn_mvn_node.h | 3 +- ...ute_node.cpp => mkldnn_transpose_node.cpp} | 276 +++++---- ...permute_node.h => mkldnn_transpose_node.h} | 13 +- .../{permute.cpp => transpose.cpp} | 16 +- ...reorder.hpp => fuse_transpose_reorder.hpp} | 12 +- ...reorder.cpp => fuse_transpose_reorder.cpp} | 112 ++-- 17 files changed, 644 insertions(+), 625 deletions(-) rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_permute_node.cpp => mkldnn_transpose_node.cpp} (65%) rename inference-engine/src/mkldnn_plugin/nodes/{mkldnn_permute_node.h => mkldnn_transpose_node.h} (70%) rename inference-engine/tests/functional/plugin/cpu/single_layer_tests/{permute.cpp => transpose.cpp} (91%) rename inference-engine/tests/functional/plugin/cpu/subgraph_tests/include/{fuse_permute_reorder.hpp => fuse_transpose_reorder.hpp} (69%) rename inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/{fuse_permute_reorder.cpp => fuse_transpose_reorder.cpp} (62%) diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index 0c2ad91af1b7be..7ab519456a58ab 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -29,7 +29,7 @@ set(LAYERS # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_lrn_node.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_memory_node.cpp # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pad_node.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_permute_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_transpose_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pooling_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_quantize_node.cpp ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reorder_node.cpp diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 1fbbae046fb08a..20e760a4318c0d 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -1182,7 +1182,7 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa InsertNode(edge, newReorder, true); // Using the method MKLDNNEdge::getDesc() we can check that input and output tensor descriptors are equal. - // Due to the specificity of MKLDNNGraphOptimizer::MergePermuteAndReorder() that isOptimized flag uses, we shouldn't do these checks. + // Due to the specificity of MKLDNNGraphOptimizer::MergeTransposeAndReorder() that isOptimized flag uses, we shouldn't do these checks. if (!isOptimized) { newReorder->getParentEdgeAt(0)->getDesc(); newReorder->getChildEdgeAt(0)->getDesc(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index ceda015d1b7ca0..76f12f8e02f038 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -14,7 +14,7 @@ #include "nodes/mkldnn_bin_conv_node.h" #include "nodes/mkldnn_quantize_node.h" #include "nodes/mkldnn_mvn_node.h" -#include +#include #include "nodes/mkldnn_interpolate_node.h" #include "nodes/mkldnn_input_node.h" @@ -171,7 +171,7 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap graph.RemoveDroppedNodes(); #endif - MergePermuteAndReorder(graph); + MergeTransposeAndReorder(graph); graph.RemoveDroppedNodes(); graph.RemoveDroppedEdges(); @@ -1677,43 +1677,6 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) { -// for (auto input : graph.GetNodes()) { -// if (input->getType() != Input) { -// continue; -// } -// -// auto inTD = input->getCnnLayer().get()->outData[0]->getTensorDesc(); -// for (size_t i = 0; i < input->getChildEdges().size(); i++) { -// auto inputEdge = input->getChildEdgeAt(i); -// auto convert = inputEdge->getChild(); -// if (convert->getType() == Convert) { -// for (int j = 0; j < convert->getChildEdges().size(); j++) { -// auto convertEdge = convert->getChildEdgeAt(j); -// auto reorder = convertEdge->getChild(); -// if (reorder->getType() == Reorder) { -// MKLDNNReorderNode* rn = dynamic_cast(reorder.get()); -// auto rnOutput = rn->getOutput(); -// if (inTD.getPrecision() == rnOutput.getPrecision() && -// inTD.getLayout() == rnOutput.getLayout() && -// inTD.getDims() == rnOutput.getDims()) { -// auto avterReorder = reorder->getChildEdgeAt(0)->getChild(); -// auto oldEdgeNum = reorder->getChildEdgeAt(0)->getOutputNum(); -// reorder->getChildEdgeAt(0)->drop(); -// convertEdge->drop(); -// -// MKLDNNEdgePtr newEdge(new MKLDNNEdge(input, avterReorder, i, oldEdgeNum)); -// graph.GetEdges().push_back(newEdge); -// input->addEdge(newEdge); -// j--; -// } -// } -// } -// } -// } -// } -} - // TODO [NM]: reuse common/general_utils version bool MKLDNNGraphOptimizer::IsOneOf(Type type, std::vector types) { for (auto tp : types) { @@ -1978,32 +1941,32 @@ void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { +void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); auto isSutableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == Permute && node->getChildEdges().size() == 1; + return node->getType() == Transpose && node->getChildEdges().size() == 1; }; auto isSutableChildNode = [](MKLDNNNodePtr node) { return node->getType() == Reorder && node->getChildEdges().size() == 1; }; - // Method checkAscendingSummaryOrder() checks that after the sequential execution of Permute and Reorder nodes, - // the order of the elements in the memory will not change. In other words, that Permute+Reorder is identical permutation. + // Method checkAscendingSummaryOrder() checks that after the sequential execution of Transpose and Reorder nodes, + // the order of the elements in the memory will not change. In other words, that Transpose+Reorder is identical permutation. auto checkAscendingSummaryOrder = [](std::shared_ptr &parentNode, std::shared_ptr &childNode) -> bool { - auto* permuteNode = dynamic_cast(parentNode.get()); + auto* transposeNode = dynamic_cast(parentNode.get()); auto* reorderNode = dynamic_cast(childNode.get()); - if (!permuteNode || !reorderNode) { + if (!transposeNode || !reorderNode) { return false; } - auto& permuteOrder = permuteNode->getOrder(); - auto& layoutOrder = permuteNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); + auto& transposeOrder = transposeNode->getOrder(); + auto& layoutOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); auto& inOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc.getBlockingDesc().getOrder(); auto& outOrder = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc.getBlockingDesc().getOrder(); - if (permuteOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) { + if (transposeOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) { return false; } @@ -2013,10 +1976,10 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { revLayoutOrder[layoutOrder[i]] = i; } - // newPermuteOrder - Permute layout-aware permutation - auto newPermuteOrder = SizeVector(permuteOrder.size()); - for (int i = 0; i < newPermuteOrder.size(); i++) { - newPermuteOrder[i] = layoutOrder[permuteOrder[revLayoutOrder[i]]]; + // newTransposeOrder - Transpose layout-aware permutation + auto newTransposeOrder = SizeVector(transposeOrder.size()); + for (int i = 0; i < newTransposeOrder.size(); i++) { + newTransposeOrder[i] = layoutOrder[transposeOrder[revLayoutOrder[i]]]; } // reorderOrder - Reorder layout-aware permutation @@ -2030,13 +1993,13 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { } } - // summaryOrder - resulting Permute+Reorder permutation - auto summaryOrder = SizeVector(permuteOrder.size()); + // summaryOrder - resulting Transpose+Reorder permutation + auto summaryOrder = SizeVector(transposeOrder.size()); for (int i = 0; i < summaryOrder.size(); i++) { - summaryOrder[i] = reorderOrder[newPermuteOrder[i]]; + summaryOrder[i] = reorderOrder[newTransposeOrder[i]]; } - // check that Permute+Reorder is the identical permutation + // check that Transpose+Reorder is the identical permutation for (int i = 0; i < summaryOrder.size(); i++) { if (summaryOrder[i] != i) { return false; @@ -2046,22 +2009,34 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { return true; }; - // Permute and Reorder do opposite permutation to each other. + // Transpose and Reorder do opposite permutation to each other. // Example: - // chain [physical layout: NCHW, logical layout: NCHW] -> Permute(order=0312) -> [physical layout: NWCH, logical layout: NCHW] -> + // chain [physical layout: NCHW, logical layout: NCHW] -> Transpose(order=0312) -> [physical layout: NWCH, logical layout: NCHW] -> // Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true) // which will just reinterprets layout without physical change of the memory. // Two cases are possible: // 1) inPrec = outPrec - // In this case, we replace Permute+Reorder pattern with a new Reorder that does nothing. + // In this case, we replace Transpose+Reorder pattern with a new Reorder that does nothing. // 2) inPrec != outPrec - // As in the first case, we also replace Permute+Reorder pattern with a new Reorder. + // As in the first case, we also replace Transpose+Reorder pattern with a new Reorder. // Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec) // to the output precision (outPrec) - auto mergePermuteAndReorder = [&](std::shared_ptr& parentNode, std::shared_ptr& childNode) { - auto parentParentNode = parentNode->getParentEdgeAt(0)->getParent(); + auto mergeTransposeAndReorder = [&](std::shared_ptr& parentNode, std::shared_ptr& childNode) { + auto parentParentNode = parentNode->getParentEdgesAtPort(0)[0]->getParent(); + auto parentParentConstNode = parentNode->getParentEdgesAtPort(1)[0]->getParent(); auto childChildNode = childNode->getChildEdgeAt(0)->getChild(); + auto &remEdge = parentParentConstNode->getChildEdgeAt(0); + remEdge->drop(); + auto& edges = graph.GetEdges(); + for (auto it = edges.begin(); it != edges.end(); it++) { + if ((*it) == remEdge) { + edges.erase(it); + parentParentConstNode->remove(); + break; + } + } + graph.DropNode(parentNode); graph.DropNode(childNode); @@ -2085,6 +2060,9 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { break; } } + if (!edge) { + IE_THROW() << "Transpose node '" << parentNode->getName() << "' has invalid edges."; + } auto reorderNode = graph.InsertReorder(edge, reorderlayerName, reorderInDesc, reorderOutDesc, true); @@ -2111,7 +2089,7 @@ void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { } if (checkAscendingSummaryOrder(parentNode, childNode)) { - mergePermuteAndReorder(parentNode, childNode); + mergeTransposeAndReorder(parentNode, childNode); } } -} \ No newline at end of file +} diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h index 94e0a50eade228..f19dbc1743439e 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h @@ -36,14 +36,12 @@ class MKLDNNGraphOptimizer { void FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph); void DropDoubleReorders(MKLDNNGraph& graph); - void DropConvertReorder(MKLDNNGraph& graph); - void AddConvertToReorder(MKLDNNGraph &graph); void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph); void FuseBroadcastAndEltwise(MKLDNNGraph &graph); void FuseEltwiseAndSimple(MKLDNNGraph &graph); void FuseScaleShiftAndQuantize(MKLDNNGraph &graph); void FuseClampAndQuantize(MKLDNNGraph &graph); - void MergePermuteAndReorder(MKLDNNGraph &graph); + void MergeTransposeAndReorder(MKLDNNGraph &graph); bool IsOneOf(Type type, std::vector types); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp index ce7afca2e65ddb..28d8dd714682cf 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp @@ -489,8 +489,8 @@ static const std::map> form_tags_by mkldnn::memory::format_tag::aBCde4c8b2c, }}, {6, { // Popular mkldnn::memory::format_tag::abcdef, // plain - mkldnn::memory::format_tag::acbdef, // permuted - mkldnn::memory::format_tag::defcab, // permuted + mkldnn::memory::format_tag::acbdef, // permute + mkldnn::memory::format_tag::defcab, // permute mkldnn::memory::format_tag::aBcdef16b, // blocked 16c mkldnn::memory::format_tag::aBCdef16b16c, @@ -742,7 +742,7 @@ MKLDNNMemoryDesc::operator InferenceEngine::TensorDesc() const { MKLDNNMemory::convertToIePrec(desc.data_type()), SizeVector {begin(dims), end(dims)}, ie_blk_desc }; - // TODO: BLOCKED is the most common layout which covers all other permuted layout like NHWC. + // TODO: BLOCKED is the most common layout which covers all other permute layout like NHWC. // But for some cases we have to specify it more correctly.. may be.. or just keep // auto detected layout in constructor of TensorDesc. return res; @@ -809,7 +809,7 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const TensorDesc& tDesc): is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); } - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to permute blocked dims + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims // and may be we can achieve correct "descending strides" form which allow conversion. if (!is_descending_strides) IE_THROW() << "Unsupported case for conversion"; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 3f0fb24bab45db..e4a99d65d9825e 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include @@ -157,7 +157,7 @@ static const InferenceEngine::details::caseless_unordered_map // { "SoftMax", SoftMax }, // { "Split", Split }, // { "Slice", Split }, -// { "Concat", Concatenation }, + { "Concat", Concatenation }, // { "Deconvolution", Deconvolution }, // { "Eltwise", Eltwise }, // { "Mod", Eltwise }, @@ -171,7 +171,7 @@ static const InferenceEngine::details::caseless_unordered_map // { "BatchNormalization", BatchNormalization }, // { "Flatten", Flatten }, // { "Pad", Pad }, -// { "Permute", Permute }, + { "Transpose", Transpose }, // { "Copy", Copy }, // { "LSTMCell", RNNCell }, // { "GRUCell", RNNCell }, diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 1eac30438449ec..a9925e2aff1c91 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -61,7 +61,7 @@ enum Type { DepthToSpace, Flatten, Pad, - Permute, + Transpose, SpaceToDepth, StridedSlice, Copy, @@ -209,8 +209,8 @@ static std::string NameFromType(Type type) { return "Flatten"; case Pad: return "Pad"; - case Permute: - return "Permute"; + case Transpose: + return "Transpose"; case SpaceToDepth: return "SpaceToDepth"; case StridedSlice: diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 92699c19a84624..431395c811d4cd 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -240,7 +240,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { pass_config->set_callback( [](const_node_ptr &node) -> bool { - return MKLDNNMVNNode::checkAxesSuitability(node); + std::string errorMessage; + return MKLDNNMVNNode::isSupportedOperation(node, errorMessage); }); pass_config->set_callback( diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp index ec8b36a3138c3f..a0f414ff179618 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp @@ -26,291 +26,304 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; + +bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + auto concatOp = ngraph::as_type_ptr(op); + if (!concatOp) { + errorMessage = "Node is not an instance of the Concat operation."; + return false; + } + } catch (...) { + return false; + } + return true; +} + MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) {} + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + + auto concatOp = ngraph::as_type_ptr(op); + auto axis = concatOp->get_axis(); + if (axis < 0) { + this->axis = concatOp->get_input_shape(0).size() - axis; + } else { + this->axis = axis; + } +} void MKLDNNConcatNode::getSupportedDescriptors() { - IE_THROW() << "Not implemented"; -// TODO [NM]: reimplement w/o using CNNLayer -// auto * conLayer = dynamic_cast(getCnnLayer().get()); -// -// if (conLayer == nullptr) -// IE_THROW() << "Cannot convert concat layer."; -// -// axis = conLayer->_axis; -// -// if (getParentEdges().empty()) -// IE_THROW() << "Incorrect number of input edges for layer " << getName(); -// if (getChildEdges().empty()) -// IE_THROW() << "Incorrect number of output edges for layer " << getName(); -// auto& firstParentDims = getParentEdgeAt(0)->getDims(); -// for (size_t i = 1; i < getParentEdges().size(); i++) { -// auto& dims = getParentEdgeAt(i)->getDims(); -// bool incorrectDims = false; -// for (size_t j = 0; j < firstParentDims.ndims(); j++) { -// if (j == axis) -// continue; -// if (dims.ndims() != firstParentDims.ndims() || firstParentDims[j] != dims[j]) { -// incorrectDims = true; -// break; -// } -// } -// if (incorrectDims || firstParentDims.ndims() == 0) { -// IE_THROW() << "Incorrect input dimensions for concat node " << getName(); -// } -// } + auto& firstParentDims = getParentEdgeAt(0)->getDims(); + for (size_t i = 1; i < getParentEdges().size(); i++) { + auto& dims = getParentEdgeAt(i)->getDims(); + bool incorrectDims = false; + for (size_t j = 0; j < firstParentDims.ndims(); j++) { + if (j == axis) + continue; + if (dims.ndims() != firstParentDims.ndims() || firstParentDims[j] != dims[j]) { + incorrectDims = true; + break; + } + } + if (incorrectDims || firstParentDims.ndims() == 0) { + IE_THROW() << "Incorrect input dimensions for concat node " << getName(); + } + } } void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { - IE_THROW() << "Not implemented"; - // TODO [NM]: reimplement w/o using CNNLayer -// if (!supportedPrimitiveDescriptors.empty()) -// return; -// -// inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); -// bool isMixedPrecision = false; -// for (int i = 1; i < getCnnLayer()->insData.size(); i++) { -// if (getCnnLayer()->insData[0].lock()->getPrecision() != getCnnLayer()->insData[i].lock()->getPrecision()) { -// isMixedPrecision = true; -// break; -// } -// } -// -// // MKLDNN doesn't support different precision on inputs so fallback on FP32 in such case -// if (isMixedPrecision) -// inputPrecision = Precision::FP32; -// -// // Concat node supports int8 implementations only for NHWC and NDHWC layouts -// if (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) { -// int ndims = getChildEdgeAt(0)->getDims().ndims(); -// if (ndims != 2 && ndims != 4 && ndims != 5) -// inputPrecision = Precision::FP32; -// } -// -// // MKLDNN supports only equal precisions for inputs and output -// outputPrecision = inputPrecision; -// -// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); -// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); -// -// MKLDNNDims dstDims = getChildEdgeAt(0)->getDims(); -// InferenceEngine::LayerConfig config; -// config.dynBatchSupport = true; -// -// for (size_t i = 0; i < getParentEdges().size(); i++) { -// auto parentEdge = getParentEdgeAt(i); -// -// InferenceEngine::DataConfig dataConfig; -// dataConfig.inPlace = -1; -// dataConfig.constant = false; -// auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? parentEdge->getDims().ndims() == 2 ? memory::format_tag::nc : -// parentEdge->getDims().ndims() == 4 ? memory::format_tag::nhwc : -// memory::format_tag::ndhwc -// : memory::format_tag::any; -// -// dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(parentEdge->getDims(), inputDataType, fmt)); -// config.inConfs.push_back(dataConfig); -// } -// -// auto dims = getChildEdgeAt(0)->getDims(); -// -// config.outConfs.resize(1); -// config.outConfs[0].inPlace = -1; -// config.outConfs[0].constant = false; -// if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1) { -// auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? dims.ndims() == 2 ? memory::format_tag::nc : -// dims.ndims() == 4 ? memory::format_tag::nhwc : -// memory::format_tag::ndhwc -// : MKLDNNMemory::GetPlainFormat(dims); -// -// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(dims, outputDataType, fmt)); -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, fmt); -// -// if (inputPrecision != Precision::U8 && inputPrecision != Precision::I8) { -// if (dims.ndims() == 4) { -// if (dims[1] % 8 == 0) { -// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( -// MKLDNNMemoryDesc(dims, outputDataType, memory::format_tag::nChw8c)); -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, memory::format_tag::nChw8c); -// -// if (dims[1] % 16 == 0) { -// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( -// MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nChw16c)); -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nChw16c); -// } -// } -// } else if (dims.ndims() == 5) { -// if (dims[1] % 8 == 0) { -// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( -// MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw8c)); -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw8c); -// -// if (dims[1] % 16 == 0) { -// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( -// MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw16c)); -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw16c); -// } -// } -// } -// } -// } -// -// if (axis != 1) -// return; -// -// auto numOfDim = static_cast(dstDims.ndims()); -// -// SizeVector order(numOfDim); -// SizeVector offsets(numOfDim, 0lu); -// size_t offset = (std::numeric_limits::max)(); -// for (size_t i = 0; i < numOfDim; i++) { -// order[i] = i; -// } -// -// if (outputPrecision == Precision::I8 || outputPrecision == Precision::U8) { -// if (numOfDim == 4) { -// // Here we assume NHWC layout (channels are the last) -// -// order = {0, 2, 3, 1}; -// offsets = {0, 0, 0, 0}; -// -// SizeVector blkDims = dstDims.ToSizeVector(); -// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; -// -// SizeVector strides(numOfDim); -// strides.resize(numOfDim); -// // C is the last in NHWC, so all strides are max() -// for (size_t i = 0; i < numOfDim; i++) { -// strides[i] = (std::numeric_limits::max)(); -// } -// -// config.outConfs[0].desc = TensorDesc(outputPrecision, -// dstDims.ToSizeVector(), -// { blkDims, order, offset, offsets, strides }); -// for (size_t i = 0; i < getParentEdges().size(); i++) { -// auto parentEdge = getParentEdgeAt(i); -// -// SizeVector blkDims = parentEdge->getDims().ToSizeVector(); -// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; -// -// config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NHWC in mkldnn -// -// config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), -// {blkDims, order, offset, offsets, strides}); -// } -// -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nhwc); -// -// return; -// } else if (numOfDim == 5) { -// // Here we assume NDHWC layout (channels are the last) -// -// order = {0, 2, 3, 4, 1}; -// offsets = {0, 0, 0, 0, 0}; -// -// SizeVector blkDims = dstDims.ToSizeVector(); -// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; -// -// SizeVector strides(numOfDim); -// strides.resize(numOfDim); -// // C is the last in NDHWC, so all strides are max() -// for (size_t i = 0; i < numOfDim; i++) { -// strides[i] = (std::numeric_limits::max)(); -// } -// -// config.outConfs[0].desc = TensorDesc(outputPrecision, -// dstDims.ToSizeVector(), -// { blkDims, order, offset, offsets, strides }); -// for (size_t i = 0; i < getParentEdges().size(); i++) { -// auto parentEdge = getParentEdgeAt(i); -// -// SizeVector blkDims = parentEdge->getDims().ToSizeVector(); -// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; -// -// config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NDHWC in mkldnn -// -// config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), -// {blkDims, order, offset, offsets, strides}); -// } -// -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::ndhwc); -// -// return; -// } -// } -// -// SizeVector strides(numOfDim); -// strides[numOfDim - 1] = 1; -// for (size_t i = 2; i <= numOfDim; i++) { -// if (numOfDim - i < axis) { -// strides[numOfDim - i] = (std::numeric_limits::max)(); -// } else { -// strides[numOfDim - i] = strides[numOfDim - i + 1] * dstDims[numOfDim - i + 1]; -// } -// } -// -// config.outConfs[0].desc = TensorDesc( -// MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), -// dstDims.ToSizeVector(), -// {dstDims.ToSizeVector(), order, offset, offsets, strides}); -// for (size_t i = 0; i < getParentEdges().size(); i++) { -// auto parentEdge = getParentEdgeAt(i); -// config.inConfs[i].inPlace = 0; -// config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), -// {parentEdge->getDims().ToSizeVector(), order, offset, offsets, strides}); -// } -// -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemory::Convert(config.outConfs[0].desc.getLayout())); -// -// if (numOfDim == 4lu || numOfDim == 5lu) { -// size_t blkDimsLen = numOfDim + 1; -// order.resize(blkDimsLen); -// for (size_t i = 0; i < numOfDim; i++) { -// order[i] = i; -// } -// order[numOfDim] = 1lu; -// offsets = SizeVector(blkDimsLen, 0lu); -// -// // nChw8c, nChw16c, nCdhw8c, nCdhw16c -// for (size_t sizeS : {8lu, 16lu}) { -// SizeVector blkDims = dstDims.ToSizeVector(); -// if (blkDims[1] % sizeS) -// continue; -// blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); -// blkDims.push_back(sizeS); -// -// strides.resize(blkDimsLen); -// strides[blkDimsLen - 1] = 1; -// for (size_t i = 2lu; i <= blkDimsLen; i++) { -// if (blkDimsLen - i < axis) { -// strides[blkDimsLen - i] = (std::numeric_limits::max)(); -// } else { -// strides[blkDimsLen - i] = strides[blkDimsLen - i + 1] * blkDims[blkDimsLen - i + 1]; -// } -// } -// config.outConfs[0].desc = TensorDesc( -// MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), -// dstDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); -// -// bool canInplace = true; -// for (size_t i = 0lu; canInplace && i < getParentEdges().size(); i++) { -// auto parentEdge = getParentEdgeAt(i); -// blkDims = parentEdge->getDims().ToSizeVector(); -// if (blkDims[1] % sizeS) -// canInplace = false; -// -// blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); -// blkDims.push_back(sizeS); -// config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), -// {blkDims, order, offset, offsets, strides}); -// } -// if (canInplace) { -// auto dstFormat = numOfDim == 4lu ? sizeS == 8lu ? mkldnn::memory::format_tag::nChw8c : mkldnn::memory::format_tag::nChw16c -// : sizeS == 8lu ? mkldnn::memory::format_tag::nCdhw8c : mkldnn::memory::format_tag::nCdhw16c; -// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, dstFormat); -// } -// } -// } + if (!supportedPrimitiveDescriptors.empty()) + return; + + auto& originInputPrecisions = getOriginalInputPrecisions(); + inputPrecision = originInputPrecisions[0]; + bool isMixedPrecision = false; + for (int i = 1; i < getOriginalInputsNumber(); i++) { + if (originInputPrecisions[0] != originInputPrecisions[i]) { + isMixedPrecision = true; + break; + } + } + + // MKLDNN doesn't support different precision on inputs so fallback on FP32 in such case + if (isMixedPrecision) + inputPrecision = Precision::FP32; + + // Concat node supports int8 implementations only for NHWC and NDHWC layouts + if (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) { + int ndims = getChildEdgeAt(0)->getDims().ndims(); + if (ndims != 2 && ndims != 4 && ndims != 5) + inputPrecision = Precision::FP32; + } + + // MKLDNN supports only equal precisions for inputs and output + outputPrecision = inputPrecision; + + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); + auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); + + MKLDNNDims dstDims = getChildEdgeAt(0)->getDims(); + InferenceEngine::LayerConfig config; + config.dynBatchSupport = true; + + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto parentEdge = getParentEdgeAt(i); + + InferenceEngine::DataConfig dataConfig; + dataConfig.inPlace = -1; + dataConfig.constant = false; + auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? parentEdge->getDims().ndims() == 2 ? memory::format_tag::nc : + parentEdge->getDims().ndims() == 4 ? memory::format_tag::nhwc : + memory::format_tag::ndhwc + : memory::format_tag::any; + + dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(parentEdge->getDims(), inputDataType, fmt)); + config.inConfs.push_back(dataConfig); + } + + auto dims = getChildEdgeAt(0)->getDims(); + + config.outConfs.resize(1); + config.outConfs[0].inPlace = -1; + config.outConfs[0].constant = false; + if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1) { + auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? dims.ndims() == 2 ? memory::format_tag::nc : + dims.ndims() == 4 ? memory::format_tag::nhwc : + memory::format_tag::ndhwc + : MKLDNNMemory::GetPlainFormat(dims); + + config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(dims, outputDataType, fmt)); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, fmt); + + if (inputPrecision != Precision::U8 && inputPrecision != Precision::I8) { + if (dims.ndims() == 4) { + if (dims[1] % 8 == 0) { + config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( + MKLDNNMemoryDesc(dims, outputDataType, memory::format_tag::nChw8c)); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, memory::format_tag::nChw8c); + + if (dims[1] % 16 == 0) { + config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( + MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nChw16c)); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nChw16c); + } + } + } else if (dims.ndims() == 5) { + if (dims[1] % 8 == 0) { + config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( + MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw8c)); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw8c); + + if (dims[1] % 16 == 0) { + config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( + MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw16c)); + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw16c); + } + } + } + } + } + + if (axis != 1) + return; + + auto numOfDim = static_cast(dstDims.ndims()); + + SizeVector order(numOfDim); + SizeVector offsets(numOfDim, 0lu); + size_t offset = (std::numeric_limits::max)(); + for (size_t i = 0; i < numOfDim; i++) { + order[i] = i; + } + + if (outputPrecision == Precision::I8 || outputPrecision == Precision::U8) { + if (numOfDim == 4) { + // Here we assume NHWC layout (channels are the last) + + order = {0, 2, 3, 1}; + offsets = {0, 0, 0, 0}; + + SizeVector blkDims = dstDims.ToSizeVector(); + blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; + + SizeVector strides(numOfDim); + strides.resize(numOfDim); + // C is the last in NHWC, so all strides are max() + for (size_t i = 0; i < numOfDim; i++) { + strides[i] = (std::numeric_limits::max)(); + } + + config.outConfs[0].desc = TensorDesc(outputPrecision, + dstDims.ToSizeVector(), + { blkDims, order, offset, offsets, strides }); + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto parentEdge = getParentEdgeAt(i); + + SizeVector blkDims = parentEdge->getDims().ToSizeVector(); + blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; + + config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NHWC in mkldnn + + config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), + {blkDims, order, offset, offsets, strides}); + } + + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nhwc); + + return; + } else if (numOfDim == 5) { + // Here we assume NDHWC layout (channels are the last) + + order = {0, 2, 3, 4, 1}; + offsets = {0, 0, 0, 0, 0}; + + SizeVector blkDims = dstDims.ToSizeVector(); + blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; + + SizeVector strides(numOfDim); + strides.resize(numOfDim); + // C is the last in NDHWC, so all strides are max() + for (size_t i = 0; i < numOfDim; i++) { + strides[i] = (std::numeric_limits::max)(); + } + + config.outConfs[0].desc = TensorDesc(outputPrecision, + dstDims.ToSizeVector(), + { blkDims, order, offset, offsets, strides }); + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto parentEdge = getParentEdgeAt(i); + + SizeVector blkDims = parentEdge->getDims().ToSizeVector(); + blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; + + config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NDHWC in mkldnn + + config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), + {blkDims, order, offset, offsets, strides}); + } + + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::ndhwc); + + return; + } + } + + SizeVector strides(numOfDim); + strides[numOfDim - 1] = 1; + for (size_t i = 2; i <= numOfDim; i++) { + if (numOfDim - i < axis) { + strides[numOfDim - i] = (std::numeric_limits::max)(); + } else { + strides[numOfDim - i] = strides[numOfDim - i + 1] * dstDims[numOfDim - i + 1]; + } + } + + config.outConfs[0].desc = TensorDesc( + MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), + dstDims.ToSizeVector(), + {dstDims.ToSizeVector(), order, offset, offsets, strides}); + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto parentEdge = getParentEdgeAt(i); + config.inConfs[i].inPlace = 0; + config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), + {parentEdge->getDims().ToSizeVector(), order, offset, offsets, strides}); + } + + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemory::Convert(config.outConfs[0].desc.getLayout())); + + if (numOfDim == 4lu || numOfDim == 5lu) { + size_t blkDimsLen = numOfDim + 1; + order.resize(blkDimsLen); + for (size_t i = 0; i < numOfDim; i++) { + order[i] = i; + } + order[numOfDim] = 1lu; + offsets = SizeVector(blkDimsLen, 0lu); + + // nChw8c, nChw16c, nCdhw8c, nCdhw16c + for (size_t sizeS : {8lu, 16lu}) { + SizeVector blkDims = dstDims.ToSizeVector(); + if (blkDims[1] % sizeS) + continue; + blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); + blkDims.push_back(sizeS); + + strides.resize(blkDimsLen); + strides[blkDimsLen - 1] = 1; + for (size_t i = 2lu; i <= blkDimsLen; i++) { + if (blkDimsLen - i < axis) { + strides[blkDimsLen - i] = (std::numeric_limits::max)(); + } else { + strides[blkDimsLen - i] = strides[blkDimsLen - i + 1] * blkDims[blkDimsLen - i + 1]; + } + } + config.outConfs[0].desc = TensorDesc( + MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), + dstDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); + + bool canInplace = true; + for (size_t i = 0lu; canInplace && i < getParentEdges().size(); i++) { + auto parentEdge = getParentEdgeAt(i); + blkDims = parentEdge->getDims().ToSizeVector(); + if (blkDims[1] % sizeS) + canInplace = false; + + blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); + blkDims.push_back(sizeS); + config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), + {blkDims, order, offset, offsets, strides}); + } + if (canInplace) { + auto dstFormat = numOfDim == 4lu ? sizeS == 8lu ? mkldnn::memory::format_tag::nChw8c : mkldnn::memory::format_tag::nChw16c + : sizeS == 8lu ? mkldnn::memory::format_tag::nCdhw8c : mkldnn::memory::format_tag::nCdhw16c; + supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, dstFormat); + } + } + } } void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h index cd693b443cf78a..ffb8ce22febd88 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h @@ -16,6 +16,7 @@ class MKLDNNConcatNode : public MKLDNNNode { MKLDNNConcatNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNConcatNode() override = default; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; void initOptimalPrimitiveDescriptor() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index 121680e0d06974..4526a80119198c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -602,46 +602,93 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator }; ////////////////////////////////////////////////////////////////////////////////// -MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) { - std::string errPrefix = "MVN node with name '" + getName() + "' "; +bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + const auto& inDataShapeSize = op->input_value(0).get_shape().size(); + if (inDataShapeSize < 1 || inDataShapeSize > 5) { + errorMessage = "First input accepts ranks from 1 to 5. Actual: " + std::to_string(inDataShapeSize); + return false; + } + + if (auto mvnOp = ngraph::as_type_ptr(op)) { + auto axesOp = ngraph::as_type_ptr(mvnOp->get_input_node_shared_ptr(1)); + if (!axesOp) { + errorMessage = "Constant expected as the second input."; + return false; + } - if (op->get_output_size() != 1) - IE_THROW() << errPrefix << "has incorrect number of output edges."; + auto epsMode = mvnOp->get_eps_mode(); + if (epsMode != ngraph::op::MVNEpsMode::INSIDE_SQRT && + epsMode != ngraph::op::MVNEpsMode::OUTSIDE_SQRT) { + errorMessage = std::string("Just INSIDE_SQRT and OUTSIDE_SQRT epsilon mods are supported. Actual: ") + + std::to_string(static_cast(epsMode)); + return false; + } + // Validates MVN node axes to check whether it can be executed on the current CPU implementation. + // Supported cases: + // 1D: axes: [0] + // 2D: axes: [1] + // 3D: axes: [1,2], [2] + // 4D: axes: [1,2,3], [2,3] + // 5D: axes: [1,2,3,4], [2,3,4] + auto axesVal = axesOp->cast_vector(); + auto& mvnShape = mvnOp->get_output_shape(0); + for (int& axe : axesVal) + axe = axe < 0 ? axe + mvnShape.size() : axe; + std::sort(axesVal.begin(), axesVal.end()); + if (mvnShape.size() == 1) { + if (axesVal.size() != 1 || axesVal[0] != 0) { + errorMessage = "Unsupported axes."; + return false; + } + } else { + if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2)) { + errorMessage = "Unsupported axes."; + return false; + } + int value = mvnShape.size() - 1; + for (int i = axesVal.size() - 1; i >= 0; i--, value--) { + if (axesVal[i] != value) { + errorMessage = "Unsupported axes."; + return false; + } + } + } + } else if (auto mvnOp = ngraph::as_type_ptr(op)) { + } else { + errorMessage = "Node is not an instance of the MVN operation."; + return false; + } + } catch (...) { + return false; + } + return true; +} - const auto& inDataShapeSize = op->input_value(0).get_shape().size(); - if (inDataShapeSize < 1 || inDataShapeSize > 5) - IE_THROW(NotImplemented) << errPrefix << "doesn't support input with size of dimensions: " << inDataShapeSize; +MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } if (auto mvnOp = ngraph::as_type_ptr(op)) { - if (mvnOp->get_input_size() != 2) - IE_THROW() << errPrefix << "has incorrect number of input edges."; - normalizeVariance_ = mvnOp->get_normalize_variance(); epsValue_ = mvnOp->get_eps(); - auto epsMode = mvnOp->get_eps_mode(); - if (epsMode == ngraph::op::MVNEpsMode::INSIDE_SQRT) { - epsMode_ = INSIDE_SQRT; - } else if (epsMode == ngraph::op::MVNEpsMode::OUTSIDE_SQRT) { + epsMode_ = INSIDE_SQRT; + if (mvnOp->get_eps_mode() == ngraph::op::MVNEpsMode::OUTSIDE_SQRT) { epsMode_ = INSIDE_SQRT; - } else { - IE_THROW(NotImplemented) << errPrefix << "does not support epsilon mode: " << epsMode; } acrossChannels_ = false; + const auto& inDataShapeSize = op->input_value(0).get_shape().size(); if (inDataShapeSize == mvnOp->input_value(1).get_shape()[0] + 1 || inDataShapeSize == 1) acrossChannels_ = true; } else if (auto mvnOp = ngraph::as_type_ptr(op)) { - if (mvnOp->get_input_size() != 1) - IE_THROW() << errPrefix << "has incorrect number of input edges."; - normalizeVariance_ = mvnOp->get_normalize_variance(); epsValue_ = mvnOp->get_eps(); epsMode_ = INSIDE_SQRT; acrossChannels_ = mvnOp->get_across_channels(); - } else { - IE_THROW(NotImplemented) - << "CPU MVN node doesn't support ngraph operation '" << op->get_type_name() << "' with name '" << op->get_friendly_name() << "'"; } } @@ -1340,41 +1387,6 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si } } -// Validates MVN node axes to check whether it can be executed on the current CPU implementation. -// Supported cases: -// 1D: axes: [0] -// 2D: axes: [1] -// 3D: axes: [1,2], [2] -// 4D: axes: [1,2,3], [2,3] -// 5D: axes: [1,2,3,4], [2,3,4] -bool MKLDNNMVNNode::checkAxesSuitability(const std::shared_ptr& node) { - const auto mvn = std::dynamic_pointer_cast(node); - if (mvn != nullptr && node->get_input_size() == 2) { - if (auto axesNode = dynamic_cast(mvn->get_input_node_ptr(1))) { - auto& mvnShape = mvn->get_output_shape(0); - auto axesVal = axesNode->cast_vector(); - for (int& axe : axesVal) - axe = axe < 0 ? axe + mvnShape.size() : axe; - std::sort(axesVal.begin(), axesVal.end()); - if (mvnShape.size() == 1) { - if (axesVal.size() == 1 && axesVal[0] == 0) - return true; - else - return false; - } - if (mvnShape.size() > 5 || (mvnShape.size() != axesVal.size() + 1 && mvnShape.size() != axesVal.size() + 2)) - return false; - int value = mvnShape.size() - 1; - for (int i = axesVal.size() - 1; i >= 0; i--, value--) { - if (axesVal[i] != value) - return false; - } - return true; - } - } - return false; -} - bool MKLDNNMVNNode::created() const { return getType() == MVN; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h index 41e4f9269098ee..45ffdcb9282044 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h @@ -75,6 +75,7 @@ class MKLDNNMVNNode : public MKLDNNNode { MKLDNNMVNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNMVNNode() override = default; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; void createPrimitive() override; @@ -84,8 +85,6 @@ class MKLDNNMVNNode : public MKLDNNNode { return false; } - static bool checkAxesSuitability(const std::shared_ptr&); - inline bool getAcrossChannels() const { return acrossChannels_; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp similarity index 65% rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp index 9515c642ca42c3..4273c2aa30a986 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp @@ -2,114 +2,130 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "mkldnn_permute_node.h" +#include "mkldnn_transpose_node.h" + +#include #include #include #include "ie_parallel.hpp" -#include using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNPermuteNode::MKLDNNPermuteNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(op, eng, cache) {} +bool MKLDNNTransposeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + auto transposeOp = ngraph::as_type_ptr(op); + if (!transposeOp) { + errorMessage = "Node is not an instance of the Transpose operation."; + return false; + } -void MKLDNNPermuteNode::getSupportedDescriptors() { - IE_THROW() << "[NM] Not implemented"; -// if (getParentEdges().size() != 1) -// IE_THROW() << "Incorrect number of input edges for layer " << getName(); -// if (!getChildEdges().size()) -// IE_THROW() << "Incorrect number of output edges for layer " << getName(); -// -// auto& layer = getCnnLayer(); -// if (!layer) { -// IE_THROW() << "Cannot get CNNLayer."; -// } -// -// order.clear(); -// std::vector layerOrder = layer->GetParamAsInts("order"); -// for (auto ord : layerOrder) -// order.push_back(static_cast(ord)); -// -// if (order.empty()) { -// size_t rank = getParentEdgeAt(0)->getDims().ndims(); -// for (size_t i = 1; i <= rank; ++i) { -// order.emplace_back(rank - i); -// } -// } + auto orderOp = ngraph::as_type_ptr(op->get_input_node_shared_ptr(1)); + if (!orderOp) { + errorMessage = "Constant expected as the second input."; + return false; + } + } catch (...) { + return false; + } + return true; } -void MKLDNNPermuteNode::initSupportedPrimitiveDescriptors() { - IE_THROW() << "[NM] Not implemented"; -// if (!supportedPrimitiveDescriptors.empty()) -// return; -// -// prec = getCnnLayer()->insData[0].lock()->getPrecision(); -// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); -// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); -// -// InferenceEngine::LayerConfig config; -// config.dynBatchSupport = true; -// config.inConfs.resize(1); -// config.outConfs.resize(1); -// config.inConfs[0].inPlace = -1; -// config.inConfs[0].constant = false; -// config.outConfs[0].inPlace = -1; -// config.outConfs[0].constant = false; -// if (getParentEdgeAt(0)->getDims().ndims() == 4) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); -// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nchw}); -// -// auto srcDims = getParentEdgeAt(0)->getDims(); -// if (srcDims[1] % 8 == 0) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw8c}); -// } -// -// if (srcDims[1] % 16 == 0) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw16c}); -// } -// -// if (prec == Precision::I8 || prec == Precision::U8) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nhwc); -// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nhwc}); -// } -// } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); -// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ncdhw}); -// -// auto srcDims = getParentEdgeAt(0)->getDims(); -// if (srcDims[1] % 8 == 0) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw8c}); -// } -// -// if (srcDims[1] % 16 == 0) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw16c}); -// } +MKLDNNTransposeNode::MKLDNNTransposeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + + auto orderOp = ngraph::as_type_ptr(op->get_input_node_shared_ptr(1)); + order = orderOp->cast_vector(); + + if (order.empty()) { + size_t rank = op->get_input_shape(0).size(); + for (size_t i = 1lu; i <= rank; ++i) { + order.emplace_back(rank - i); + } + } +} + +void MKLDNNTransposeNode::getSupportedDescriptors() { +} + +void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; // -// if (prec == Precision::I8 || prec == Precision::U8) { -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ndhwc); -// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ndhwc); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ndhwc}); -// } -// } else { -// // general plain case -// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); -// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); -// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); -// } + prec = getOriginalInputPrecisions()[0]; + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); + auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); + auto inputOrderDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[1]); + + InferenceEngine::LayerConfig config; + config.dynBatchSupport = true; + config.inConfs.resize(2); + config.outConfs.resize(1); + config.inConfs[0].inPlace = -1; + config.inConfs[0].constant = false; + config.inConfs[1].inPlace = -1; + config.inConfs[1].constant = true; + config.outConfs[0].inPlace = -1; + config.outConfs[0].constant = false; + config.inConfs[1].desc = MKLDNNMemoryDesc(getParentEdgeAt(1)->getDims(), inputOrderDataType, memory::format_tag::x); + if (getParentEdgeAt(0)->getDims().ndims() == 4) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); + config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nchw}); + + auto srcDims = getParentEdgeAt(0)->getDims(); + if (srcDims[1] % 8 == 0) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw8c}); + } + + if (srcDims[1] % 16 == 0) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw16c}); + } + + if (prec == Precision::I8 || prec == Precision::U8) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nhwc); + config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nhwc}); + } + } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); + config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ncdhw}); + + auto srcDims = getParentEdgeAt(0)->getDims(); + if (srcDims[1] % 8 == 0) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw8c}); + } + + if (srcDims[1] % 16 == 0) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw16c}); + } + + if (prec == Precision::I8 || prec == Precision::U8) { + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ndhwc); + config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ndhwc); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ndhwc}); + } + } else { + // general plain case + config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); + config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); + } } -void MKLDNNPermuteNode::createPrimitive() { +void MKLDNNTransposeNode::createPrimitive() { auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) @@ -134,7 +150,7 @@ void MKLDNNPermuteNode::createPrimitive() { permuteKernel = std::unique_ptr(new PermuteKernel(params)); } -static void permute_to_0231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_0231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); // Supports only NCHW to NHWC @@ -168,7 +184,7 @@ static void permute_to_0231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& }); } -static void permute_to_0213(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_0213(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); int block_size = 1; @@ -194,7 +210,7 @@ static void permute_to_0213(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& }); } -static void permute_to_0312(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_0312(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -212,7 +228,7 @@ static void permute_to_0312(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& } template -static void permute_to_014253(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_014253(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -248,7 +264,7 @@ static void permute_to_014253(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPt } } -static void permute_to_3012(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_3012(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -276,7 +292,7 @@ static void permute_to_3012(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& } } -static void permute_to_021(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_021(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -300,7 +316,7 @@ static void permute_to_021(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& }); } -static void permute_to_034152(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_034152(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -336,7 +352,7 @@ static void permute_to_034152(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPt } } -static void permute_to_0132(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_0132(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); int src_block_size = 1; @@ -362,7 +378,7 @@ static void permute_to_0132(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& }); } -static void permute_to_03142(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_03142(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -394,7 +410,7 @@ static void permute_to_03142(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr } } -static void permute_to_1203(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_1203(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -411,7 +427,7 @@ static void permute_to_1203(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& }); } -static void permute_to_02134(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_02134(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -438,7 +454,7 @@ static void permute_to_02134(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr }); } -static void permute_to_02431(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_02431(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -465,7 +481,7 @@ static void permute_to_02431(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr }); } -static void permute_to_04231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_04231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -492,7 +508,7 @@ static void permute_to_04231(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr }); } -static void permute_to_102(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_102(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -516,7 +532,7 @@ static void permute_to_102(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& }); } -static void permute_to_02341(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_02341(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -543,7 +559,7 @@ static void permute_to_02341(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr }); } -static void permute_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +static void transpose_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { auto src_data = reinterpret_cast(srcMemPtr->GetPtr()); auto dst_data = reinterpret_cast(dstMemPtr->GetPtr()); @@ -570,61 +586,61 @@ static void permute_to_04123(int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr }); } -const std::multimap MKLDNNPermuteNode::OptimizedCases = { - {{0, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_0231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { +const std::multimap MKLDNNTransposeNode::OptimizedCases = { + {{0, 2, 3, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return true; })}, // NCHW -> NHWC case - {{0, 1, 4, 2, 5, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_014253<2, 2>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 1, 4, 2, 5, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_014253<2, 2>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat() && srcMemPtr->GetDims()[2] == 2 && srcMemPtr->GetDims()[3] == 2; })}, // Dense upsample convolution case (scale = 2) - {{0, 1, 4, 2, 5, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_014253<0, 0>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 1, 4, 2, 5, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_014253<0, 0>, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, // Dense upsample convolution case (generic) - {{3, 0, 1, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_3012, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{3, 0, 1, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_3012, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat() && MB == srcMemPtr->GetDims()[0]; })}, // LPR case - {{0, 2, 1, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_0213, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 2, 1, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0213, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, // shufflenet - {{0, 2, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_021, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 2, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_021, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, // self attention block - {{0, 3, 4, 1, 5, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_034152, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 3, 4, 1, 5, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_034152, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, // learning-to-see-in-the-dark-sony - {{0, 1, 3, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_0132, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 1, 3, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0132, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return true; })}, - {{0, 3, 1, 4, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_03142, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 3, 1, 4, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_03142, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, - {{1, 2, 0, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_1203, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{1, 2, 0, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_1203, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat() && MB == srcMemPtr->GetDims()[0]; })}, - {{0, 2, 1, 3, 4}, MKLDNNPermuteNode::PermuteImpl(permute_to_02134, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 2, 1, 3, 4}, MKLDNNTransposeNode::TransposeImpl(transpose_to_02134, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, - {{0, 2, 4, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_02431, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 2, 4, 3, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_02431, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, - {{0, 4, 2, 3, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_04231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 4, 2, 3, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_04231, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, - {{0, 3, 1, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_0312, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 3, 1, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_0312, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, - {{1, 0, 2}, MKLDNNPermuteNode::PermuteImpl(permute_to_102, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{1, 0, 2}, MKLDNNTransposeNode::TransposeImpl(transpose_to_102, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat() && MB == srcMemPtr->GetDims()[0]; })}, - {{0, 2, 3, 4, 1}, MKLDNNPermuteNode::PermuteImpl(permute_to_02341, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 2, 3, 4, 1}, MKLDNNTransposeNode::TransposeImpl(transpose_to_02341, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, - {{0, 4, 1, 2, 3}, MKLDNNPermuteNode::PermuteImpl(permute_to_04123, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { + {{0, 4, 1, 2, 3}, MKLDNNTransposeNode::TransposeImpl(transpose_to_04123, [](int MB, MKLDNNMemoryPtr& srcMemPtr, MKLDNNMemoryPtr& dstMemPtr) { return srcMemPtr->GetDesc().isPlainFormat(); })}, }; -void MKLDNNPermuteNode::execute(mkldnn::stream strm) { +void MKLDNNTransposeNode::execute(mkldnn::stream strm) { auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); int MB = batchToProcess(); @@ -643,7 +659,7 @@ void MKLDNNPermuteNode::execute(mkldnn::stream strm) { permuteKernel->execute(srcData, dstData, MB); } -bool MKLDNNPermuteNode::created() const { - return getType() == Permute; +bool MKLDNNTransposeNode::created() const { + return getType() == Transpose; } -REG_MKLDNN_PRIM_FOR(MKLDNNPermuteNode, Permute); +REG_MKLDNN_PRIM_FOR(MKLDNNTransposeNode, Transpose); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h similarity index 70% rename from inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h rename to inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h index 92a3798975b8fb..07a7a7ac7cdb36 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.h @@ -17,9 +17,10 @@ namespace MKLDNNPlugin { class MKLDNNPermuteNode : public MKLDNNNode { public: - MKLDNNPermuteNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - ~MKLDNNPermuteNode() override = default; + MKLDNNTransposeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ~MKLDNNTransposeNode() override = default; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; void createPrimitive() override; @@ -37,12 +38,12 @@ class MKLDNNPermuteNode : public MKLDNNNode { InferenceEngine::SizeVector order; InferenceEngine::Precision prec; - typedef std::function permuteImpl; + typedef std::function transposeImpl; typedef std::function isApplicable; - struct PermuteImpl { - PermuteImpl(permuteImpl f0, isApplicable f1): execute(std::move(f0)), isValidParams(std::move(f1)) {} + struct TransposeImpl { + TransposeImpl(transposeImpl f0, isApplicable f1): execute(std::move(f0)), isValidParams(std::move(f1)) {} - permuteImpl execute; + transposeImpl execute; isApplicable isValidParams; }; diff --git a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/permute.cpp b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp similarity index 91% rename from inference-engine/tests/functional/plugin/cpu/single_layer_tests/permute.cpp rename to inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp index e95cb6ffa5f969..8c82bbc462d83c 100644 --- a/inference-engine/tests/functional/plugin/cpu/single_layer_tests/permute.cpp +++ b/inference-engine/tests/functional/plugin/cpu/single_layer_tests/transpose.cpp @@ -6,7 +6,7 @@ #include "ngraph_functions/builders.hpp" #include "test_utils/cpu_test_utils.hpp" -// Since the Transpose ngraph operation is converted to the permute node, we will use it in the permute test +// Since the Transpose ngraph operation is converted to the transpose node, we will use it in the transpose test using namespace InferenceEngine; using namespace CPUTestUtils; @@ -19,12 +19,12 @@ typedef std::tuple< std::vector, // Input shapes std::string, // Target device name std::map, // Additional network configuration - CPUSpecificParams> PermuteLayerCPUTestParamSet; + CPUSpecificParams> TransposeLayerCPUTestParamSet; -class PermuteLayerCPUTest : public testing::WithParamInterface, +class TransposeLayerCPUTest : public testing::WithParamInterface, virtual public LayerTestsUtils::LayerTestsCommon, public CPUTestsBase { public: - static std::string getTestCaseName(testing::TestParamInfo obj) { + static std::string getTestCaseName(testing::TestParamInfo obj) { Precision netPrecision; std::vector inputShape, inputOrder; std::string targetDevice; @@ -72,11 +72,11 @@ class PermuteLayerCPUTest : public testing::WithParamInterface; -class FusePermuteAndReorderTest : public testing::WithParamInterface, public CPUTestsBase, +class FuseTransposeAndReorderTest : public testing::WithParamInterface, public CPUTestsBase, virtual public LayerTestsUtils::LayerTestsCommon { public: - static std::string getTestCaseName(testing::TestParamInfo obj); + static std::string getTestCaseName(testing::TestParamInfo obj); protected: void SetUp() override; virtual void CreateGraph(); - void CheckPermuteCount(size_t expectedPermuteCount); + void CheckTransposeCount(size_t expectedTransposeCount); InferenceEngine::SizeVector inputShape; InferenceEngine::Precision inPrec; }; -class FusePermuteAndReorderTest1 : public FusePermuteAndReorderTest { +class FuseTransposeAndReorderTest1 : public FuseTransposeAndReorderTest { protected: void CreateGraph() override; }; -class FusePermuteAndReorderTest2 : public FusePermuteAndReorderTest { +class FuseTransposeAndReorderTest2 : public FuseTransposeAndReorderTest { protected: void CreateGraph() override; }; diff --git a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp similarity index 62% rename from inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp rename to inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp index 16f5e680229210..6cefb1b5be81c9 100644 --- a/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_permute_reorder.cpp +++ b/inference-engine/tests/functional/plugin/cpu/subgraph_tests/src/fuse_transpose_reorder.cpp @@ -2,14 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "subgraph_tests/include/fuse_permute_reorder.hpp" +#include "subgraph_tests/include/fuse_transpose_reorder.hpp" using namespace InferenceEngine; using namespace CPUTestUtils; namespace SubgraphTestsDefinitions { -std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfo obj) { +std::string FuseTransposeAndReorderTest::getTestCaseName(testing::TestParamInfo obj) { std::ostringstream result; SizeVector inputShape; Precision inPrec; @@ -21,11 +21,11 @@ std::string FusePermuteAndReorderTest::getTestCaseName(testing::TestParamInfoget_ops()) { const auto & rtInfo = node->get_rt_info(); auto getExecValue = [&rtInfo](const std::string & paramName) -> std::string { @@ -35,34 +35,34 @@ void FusePermuteAndReorderTest::CheckPermuteCount(size_t expectedPermuteCount) { IE_ASSERT(nullptr != value); return value->get(); }; - if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Permute") { - actualPermuteCount++; + if (getExecValue(ExecGraphInfoSerialization::LAYER_TYPE) == "Transpose") { + actualTransposeCount++; } } - ASSERT_EQ(expectedPermuteCount, actualPermuteCount); + ASSERT_EQ(expectedTransposeCount, actualTransposeCount); } -void FusePermuteAndReorderTest::SetUp() { +void FuseTransposeAndReorderTest::SetUp() { targetDevice = CommonTestUtils::DEVICE_CPU; std::tie(inputShape, inPrec) = this->GetParam(); CreateGraph(); } -const auto fusePermuteAndReorderCommonParams = ::testing::Combine( +const auto fuseTransposeAndReorderCommonParams = ::testing::Combine( ::testing::Values(SizeVector{1, 2, 3, 4}, SizeVector{1, 2, 3, 4, 5}), ::testing::Values(Precision::I8, Precision::U8) ); -/* FusePermuteAndReorderTest graph +/* FuseTransposeAndReorderTest graph --------- |Input | --------- | ------------- | --------- | - | |Permute| | + | |Transpose| | | --------- | | | | | --------- | @@ -75,7 +75,7 @@ const auto fusePermuteAndReorderCommonParams = ::testing::Combine( --------- */ -void FusePermuteAndReorderTest::CreateGraph() { +void FuseTransposeAndReorderTest::CreateGraph() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); @@ -83,37 +83,37 @@ void FusePermuteAndReorderTest::CreateGraph() { auto memFmt = inputShape.size() == 5 ? ndhwc : nhwc; auto constOrder = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order); - auto permute = std::make_shared(params[0], constOrder); - permute->get_rt_info() = makeCPUInfo({memFmt}, {memFmt}, {}); + auto transpose = std::make_shared(params[0], constOrder); + transpose->get_rt_info() = makeCPUInfo({memFmt}, {memFmt}, {}); - ngraph::ResultVector results{std::make_shared(permute)}; - function = std::make_shared(results, params, "PermuteReorder"); + ngraph::ResultVector results{std::make_shared(transpose)}; + function = std::make_shared(results, params, "TransposeReorder"); } -TEST_P(FusePermuteAndReorderTest, CompareWithRefs) { +TEST_P(FuseTransposeAndReorderTest, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() Run(); - CheckPermuteCount(0); + CheckTransposeCount(0); } -INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndReorderCommonParams, FusePermuteAndReorderTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_Basic, FuseTransposeAndReorderTest, fuseTransposeAndReorderCommonParams, FuseTransposeAndReorderTest::getTestCaseName); -/* FusePermuteAndReorderTest1 graph +/* FuseTransposeAndReorderTest1 graph --------- |Input | --------- | --------- - |Permute| + |Transpose| --------- | ------------------- | | | ------------- | | --------- | - | | |Permute| | + | | |Transpose| | --------- | --------- | |Reshape| | | | --------- | --------- | @@ -122,7 +122,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndRe | |-----------| | | | --------- - | |Permute| + | |Transpose| | --------- | | -------- -------- @@ -136,60 +136,60 @@ INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest, fusePermuteAndRe --------- */ -void FusePermuteAndReorderTest1::CreateGraph() { +void FuseTransposeAndReorderTest1::CreateGraph() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); auto params = ngraph::builder::makeParams(ngPrc, {inputShape}); auto order = inputShape.size() == 5 ? std::vector{0, 2, 3, 4, 1} : std::vector{0, 2, 3, 1}; auto constOrder1 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order); - auto permute1 = std::make_shared(params[0], constOrder1); + auto transpose1 = std::make_shared(params[0], constOrder1); auto memFmt1 = inputShape.size() == 5 ? ndhwc : nhwc; - permute1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {}); + transpose1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {}); auto constOrder2 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order); - auto permute2 = std::make_shared(permute1, constOrder2); + auto transpose2 = std::make_shared(transpose1, constOrder2); auto memFmt2 = inputShape.size() == 5 ? ndhwc : nhwc; - permute2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {}); + transpose2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {}); auto constOrder3 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order); - auto permute3 = std::make_shared(permute2, constOrder3); + auto transpose3 = std::make_shared(transpose2, constOrder3); auto memFmt3 = inputShape.size() == 5 ? ncdhw : nchw; - permute3->get_rt_info() = makeCPUInfo({memFmt3}, {memFmt3}, {}); + transpose3->get_rt_info() = makeCPUInfo({memFmt3}, {memFmt3}, {}); - auto shape = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, permute3->get_output_shape(0)); - auto reshape = std::make_shared(permute1, shape, false); + auto shape = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, transpose3->get_output_shape(0)); + auto reshape = std::make_shared(transpose1, shape, false); - auto concat = ngraph::builder::makeConcat({permute3, reshape}, 1); + auto concat = ngraph::builder::makeConcat({transpose3, reshape}, 1); ngraph::ResultVector results{std::make_shared(concat)}; - function = std::make_shared(results, params, "Permute_PermuteReorderPermute_Reshape_Concat"); + function = std::make_shared(results, params, "Transpose_TransposeReorderTranspose_Reshape_Concat"); } -TEST_P(FusePermuteAndReorderTest1, CompareWithRefs) { +TEST_P(FuseTransposeAndReorderTest1, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() Run(); - CheckPermuteCount(2); + CheckTransposeCount(2); } -INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest1, fusePermuteAndReorderCommonParams, FusePermuteAndReorderTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_Basic, FuseTransposeAndReorderTest1, fuseTransposeAndReorderCommonParams, FuseTransposeAndReorderTest::getTestCaseName); -/* FusePermuteAndReorderTest2 graph +/* FuseTransposeAndReorderTest2 graph --------- --------- |Input | |Input | --------- --------- | | | ------------- - --------- | --------- | - |Reorder| | |Permute| | - --------- | --------- | - | | | | - --------- | --------- | - |Permute| | |Reorder| | - --------- | --------- | - | |-----------| + --------- | ----------- | + |Reorder| | |Transpose| | + --------- | ----------- | + | | | | + --------- | ----------- | + |Transpose| | |Reorder| | + --------- | ----------- | + | |-------------| | | -------- -------- | | @@ -202,7 +202,7 @@ INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest1, fusePermuteAndR --------- */ -void FusePermuteAndReorderTest2::CreateGraph() { +void FuseTransposeAndReorderTest2::CreateGraph() { auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrec); auto inputShape2(inputShape); @@ -212,28 +212,28 @@ void FusePermuteAndReorderTest2::CreateGraph() { auto order = inputShape.size() == 5 ? std::vector{0, 4, 1, 2, 3} : std::vector{0, 3, 1, 2}; auto constOrder1 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order); - auto permute1 = std::make_shared(params[0], constOrder1); + auto transpose1 = std::make_shared(params[0], constOrder1); auto memFmt1 = inputShape.size() == 5 ? ndhwc : nhwc; - permute1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {}); + transpose1->get_rt_info() = makeCPUInfo({memFmt1}, {memFmt1}, {}); auto constOrder2 = ngraph::builder::makeConstant(ngraph::element::i64, {inputShape.size()}, order); - auto permute2 = std::make_shared(params[1], constOrder2); + auto transpose2 = std::make_shared(params[1], constOrder2); auto memFmt2 = inputShape.size() == 5 ? ncdhw : nchw; - permute2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {}); + transpose2->get_rt_info() = makeCPUInfo({memFmt2}, {memFmt2}, {}); - auto concat = ngraph::builder::makeConcat({permute1, permute2}, 1); + auto concat = ngraph::builder::makeConcat({transpose1, transpose2}, 1); ngraph::ResultVector results{std::make_shared(concat)}; - function = std::make_shared(results, params, "Permute_Permute_Concat"); + function = std::make_shared(results, params, "Transpose_Transpose_Concat"); } -TEST_P(FusePermuteAndReorderTest2, CompareWithRefs) { +TEST_P(FuseTransposeAndReorderTest2, CompareWithRefs) { SKIP_IF_CURRENT_TEST_IS_DISABLED() Run(); - CheckPermuteCount(1); + CheckTransposeCount(1); } -INSTANTIATE_TEST_CASE_P(smoke_Basic, FusePermuteAndReorderTest2, fusePermuteAndReorderCommonParams, FusePermuteAndReorderTest::getTestCaseName); +INSTANTIATE_TEST_CASE_P(smoke_Basic, FuseTransposeAndReorderTest2, fuseTransposeAndReorderCommonParams, FuseTransposeAndReorderTest::getTestCaseName); } // namespace SubgraphTestsDefinitions