test fixes (#56)

* tests fixes * [CPU] GRN node migration on nGraph * Performance problems fixes. Part 3 * unused node creation fix * small fix * serialize tests fixes * fixesafter review
a-sidorova · May 4, 2021 · 7867d79 · 7867d79
1 parent 2e1001f
commit 7867d79
Show file tree

Hide file tree

Showing 47 changed files with 522 additions and 442 deletions.
diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt
@@ -72,7 +72,7 @@ set(LAYERS
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_elements.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_nd.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_tree.cpp
-#    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/grn.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/nodes/grn.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/non_max_suppression.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/log_softmax.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/nodes/math.cpp

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
@@ -20,6 +20,7 @@
 #include <unordered_set>
 #include <utility>
 #include <cstring>
+#include <ngraph/opsets/opset1.hpp>
 
 using namespace MKLDNNPlugin;
 using namespace InferenceEngine;
@@ -254,56 +255,50 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
 }
 
 bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const {
-    // TODO [NM]: reimplement w/o using legacy API
-    return false;
-//    InputsDataMap inputs = network.getInputsInfo();
-//
-//    CNNLayerSet inputLayers;
-//    std::unordered_set<CNNLayer *> allLayers;
-//
-//    if (inputs.empty())
-//        return false;
-//
-//    auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
-//    if (secondLayers.empty())
-//        return false;
-//
-//    bool check_result = true;
-//    details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
-//        auto type = TypeFromName(layer->type);
-//        // This is WA for Tile layer
-//        auto tileLayer = dynamic_cast<TileLayer *>(layer.get());
-//        if (tileLayer && tileLayer->axis)
-//            return;
-//
-//        auto reshapeLayer = dynamic_cast<ReshapeLayer *>(layer.get());
-//        if (reshapeLayer &&
-//            type == Reshape &&
-//            (reshapeLayer->outData[0]->getTensorDesc().getDims()[0] ==
-//             reshapeLayer->insData[0].lock()->getTensorDesc().getDims()[0])) {
-//            return;
-//        }
-//
-//        if (type != Input &&
-//            type != Output &&
-//            type != Convolution &&
-//            type != Deconvolution &&
-//            type != Activation &&
-//            type != Depthwise &&
-//            type != Lrn &&
-//            type != Pooling &&
-//            type != FullyConnected &&
-//            type != Gemm &&
-//            type != Softmax &&
-//            type != Split &&
-//            type != Concatenation &&
-//            type != Eltwise &&
-//            type != Copy) {
-//            check_result = false;
-//        }
-//    }, false);
-//
-//    return check_result;
+    InputsDataMap inputs = network.getInputsInfo();
+
+    if (inputs.empty())
+        return false;
+
+    auto function = network.getFunction();
+    if (function == nullptr) {
+        IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
+    }
+
+    auto ops = function->get_ordered_ops();
+    for (auto op : ops) {
+        auto type = TypeFromName(op->get_type_name());
+        if (type == Tile) {
+            const auto tile = std::dynamic_pointer_cast<const ngraph::opset1::Tile>(op);
+            const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(tile->get_input_node_shared_ptr(1));
+            if (!repeatsNode)
+                return false;
+            if (tile && repeatsNode->cast_vector<int64_t>()[0] == 1)
+                continue;
+        }
+
+        if (type == Reshape) {
+            if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0])
+                continue;
+        }
+
+        if (type != Input &&
+            type != Output &&
+            type != Convolution &&
+            type != Deconvolution &&
+            type != Lrn &&
+            type != Pooling &&
+            type != FullyConnected &&
+            type != MatMul &&
+            type != Softmax &&
+            type != Split &&
+            type != Concatenation &&
+                type != Eltwise) {
+            return false;
+        }
+    }
+
+    return true;
 }
 
 IE_SUPPRESS_DEPRECATED_START

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
@@ -188,6 +188,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
     InputsDataMap inputsInfo = network.getInputsInfo();
     OutputsDataMap outputsInfo = network.getOutputsInfo();
 
+    this->_name = network.getName();
+
     std::shared_ptr<const ngraph::Function> func = network.getFunction();
     if (!func) {
         IE_THROW() << "Function pointer inside CNNNetwork is nullptr";
@@ -270,10 +272,12 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
     // Add stub output node for unused outputs
     for (auto unusedOutput : unusedOutputs) {
         auto parentNode = op2node[unusedOutput.get_node_shared_ptr()];
-        auto newResult = std::make_shared<ngraph::op::v0::Result>(unusedOutput);
-        newResult->set_friendly_name(std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName());
-        const MKLDNNNodePtr outNode(MKLDNNNode::factory().create(newResult, getEngine(), extMgr, weightsCache));
-        MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, unusedOutput.get_index(), 0));
+        const auto port = unusedOutput.get_index();
+        const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
+        const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
+                                                                        parentNode->getOriginalOutputPrecisionAtPort(port),
+                                                                        nodeName, "Result", getEngine(), weightsCache);
+        MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
         outNode->addEdge(edge);
         graphEdges.push_back(edge);
         graphNodes.push_back(outNode);
@@ -300,25 +304,19 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
         }
     }
 
-//
-//    // Replicate input nodes
-//    for (const auto& input : inputs) {
-//        auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
-//        inputNodesMap[input.first] = layer2node[inputLayer];
-//
-//        // Loading mean images
-//        MKLDNNDims outDims;
-//        if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims())
-//            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
-//        else
-//            outDims = MKLDNNDims(inputNodesMap[input.first]->getChildEdgeAt(0)->getDims());
-//        if (inputs.find(input.first) != inputs.end()) {
-//            InputInfo::Ptr ii = inputs[input.first];
-//            if (ii && ii->getPreProcess().getNumberOfChannels()) {
-//                _meanImages[input.first].Load(outDims, ii);
-//            }
-//        }
-//    }
+    // Loading mean images
+    for (const auto& input : inputsInfo) {
+        MKLDNNDims outDims;
+        if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) {
+            outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
+        } else {
+            outDims = inputNodesMap[input.first]->getChildEdgeAt(0)->getDims();
+        }
+        InputInfo::Ptr ii = inputsInfo[input.first];
+        if (ii && ii->getPreProcess().getNumberOfChannels()) {
+            _meanImages[input.first].Load(outDims, ii);
+        }
+    }
 }
 
 void MKLDNNGraph::InitGraph() {
@@ -477,23 +475,25 @@ void MKLDNNGraph::InitEdges() {
 
             // Check if there is a reorder that supports the type conversion
             if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
-                !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
-                IE_THROW() << "[NM] Not implemented";
-//                //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
-//                std::string convertName = edge->getParent()->getName() + "_" +
-//                                          edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name();
-//
-//                CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()}));
-//                auto convertNode = std::make_shared<MKLDNNConvertNode>(convert, this->getEngine(), this->weightsCache);
-//                convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc());
-//                InsertNode(edge, convertNode, true);
-//
-//                //Check if reorder is still needed
-//                if (convertNode->getChildEdgeAt(0)->needReorder()) {
-//                    edge = convertNode->getChildEdgeAt(0);
-//                } else {
-//                    insertReorder = false;
-//                }
+                    !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
+                //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
+                const auto inDesc = edge->getInputDesc();
+                const auto outDesc = edge->getOutputDesc();
+
+                std::string convertName = edge->getParent()->getName() + "_" +
+                                          inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();
+
+                auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getDims(), inDesc.getPrecision(), outDesc.getPrecision(), convertName,
+                                                                       this->getEngine(), this->weightsCache);
+                convertNode->setDescs(inDesc, outDesc);
+                InsertNode(edge, convertNode, true);
+
+                //Check if reorder is still needed
+                if (convertNode->getChildEdgeAt(0)->needReorder()) {
+                    edge = convertNode->getChildEdgeAt(0);
+                } else {
+                    insertReorder = false;
+                }
             }
 
             if (insertReorder) {
@@ -787,7 +787,29 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
             MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
         size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB;
 
-        cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
+        const auto actualDesc = node->getParentEdgeAt(0)->getDesc();
+        const auto expectedDesc = ext_blob->getTensorDesc();
+
+        // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
+        // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
+        bool isScalarOutput = false;
+        if (actualDesc.getLayout() == SCALAR) {
+            isScalarOutput = expectedDesc.getLayout() == SCALAR ||
+                             std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
+        } else if (expectedDesc.getLayout() == SCALAR) {
+            isScalarOutput = actualDesc.getLayout() == SCALAR ||
+                             std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
+        }
+
+        if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
+            auto outBlobDesc = MKLDNNMemoryDesc{expectedDesc};
+            auto outBloMem = MKLDNNMemory(eng);
+            outBloMem.Create(outBlobDesc, ext_blob_ptr, false);
+
+            outBloMem.SetData(intr_blob, 0, false);
+        } else {
+            cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
+        }
     }
 }
 

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -98,9 +98,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
     FuseConvolutionAndDWConvolution(graph);
     graph.RemoveDroppedNodes();
 
-    FuseBinaryConvolutionAndFakeQuantize(graph);
-    graph.RemoveDroppedNodes();
-
     FuseConvolutionSumAndConvolutionSumActivation(graph);
     graph.RemoveDroppedNodes();
 
@@ -800,7 +797,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
         }
 
         auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-        if (!parentNode->canFuseSimpleOperation(childNode)) {
+        if (!parentNode->canFuse(childNode)) {
             parent++;
             continue;
         }
@@ -1063,7 +1060,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
     auto& graphNodes = graph.GetNodes();
 
     auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == Convolution && node->getChildEdges().size() == 1;
+        return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1;
     };
 
     auto parent = graphNodes.begin();
@@ -1073,9 +1070,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
             parent++;
             continue;
         }
+        const auto parentNodeType = parentNode->getType();
 
         auto childNode = parentNode->getChildEdgeAt(0)->getChild();
-        if (!parentNode->canFuseSimpleOperation(childNode)) {
+        if (!parentNode->canFuse(childNode)) {
             parent++;
             continue;
         }
@@ -1086,7 +1084,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
             auto parentEdges = childNode->parentEdges;
             for (auto &parentEdge : parentEdges) {
                 auto p_edge = parentEdge.lock();
-                if (p_edge->getParent()->getType() == Convolution)
+                if (p_edge->getParent()->getType() == parentNodeType)
                     continue;
 
                 removeEdge(graph, p_edge);
@@ -1097,47 +1095,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
     }
 }
 
-void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndFakeQuantize(MKLDNNGraph &graph) {
-    auto& graphNodes = graph.GetNodes();
-
-    auto isSutableParentNode = [](MKLDNNNodePtr node) {
-        return node->getType() == BinaryConvolution && node->getChildEdges().size() == 1;
-    };
-
-    auto isSutableChildNode = [](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
-        if ((parentNode->isConstant() && !childNode->isConstant()) || childNode->getType() != FakeQuantize)
-            return false;
-
-        auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parentNode.get());
-        if (!binConv) {
-            return false;
-        }
-
-        return binConv->canFuse(childNode);
-    };
-
-    for (int i = 0; i < graphNodes.size(); i++) {
-        auto parent = graphNodes[i];
-        if (!isSutableParentNode(parent)) continue;
-
-        auto child = parent->getChildEdgeAt(0)->getChild();
-        if (!isSutableChildNode(parent, child)) continue;
-
-        child->fuseInto(parent);
-
-        auto parents = child->parentEdges;
-        for (size_t i = 0; i < parents.size(); i++) {
-            auto p_edge = parents[i].lock();
-            if (p_edge->getParent()->getType() == BinaryConvolution)
-                continue;
-
-            removeEdge(graph, p_edge);
-        }
-
-        graph.DropNode(child);
-    }
-}
-
 void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) {
     auto& graphNodes = graph.GetNodes();
 
@@ -1269,14 +1226,33 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
         bool isSutableParent1 = parent1->getType() == Convolution || parent1->getType() == BinaryConvolution;
         bool isSutableParent2 = parent2->getType() == Convolution || parent2->getType() == BinaryConvolution;
 
+        auto canFuseSum = [](MKLDNNBinaryConvolutionNode *binConv, MKLDNNNodePtr fuseCandidate) {
+            if (binConv->getImplType() == impl_desc_type::ref)
+                return false;
+
+            if (binConv->isFusedWith(FakeQuantize))
+                return false;
+
+            if (fuseCandidate->getAlgorithm() == EltwiseAdd) {
+                for (auto& fusedNode : binConv->fusedWith) {
+                    const auto eltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(fusedNode);
+                    if (eltwise && eltwise->isSpecialConvolutionAddFusing()) {
+                        return false;
+                    }
+                }
+                return true;
+            }
+            return false;
+        };
+
         auto* binConvNode1 = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parent1.get());
         if (binConvNode1) {
-            isSutableParent1 = isSutableParent1 && binConvNode1->canFuse(graphNode);
+            isSutableParent1 = isSutableParent1 && canFuseSum(binConvNode1, graphNode);
         }
 
         auto* binConvNode2 = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parent2.get());
         if (binConvNode2) {
-            isSutableParent2 = isSutableParent2 && binConvNode2->canFuse(graphNode);
+            isSutableParent2 = isSutableParent2 && canFuseSum(binConvNode2, graphNode);
         }
 
         auto* convNode1 = dynamic_cast<MKLDNNConvolutionNode *>(parent1.get());