reorders replaces converts (#9)

yury-intel · Nov 16, 2020 · 10f1faa · 10f1faa
1 parent b6f0858
commit 10f1faa
Show file tree

Hide file tree

Showing 4 changed files with 203 additions and 1 deletion.
diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.cpp b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp
@@ -66,7 +66,33 @@ void BF16Transformer::convertToBFloat16(InferenceEngine::CNNNetwork &network) {
             iter->insData[0].lock()->getPrecision() == Precision::FP32) {
             iter->insData[0].lock()->setPrecision(Precision::BF16);
         }
-
+        if (_initbf16.find(iter->type) != _initbf16.end()) {
+            for (size_t o = 0; o < iter->insData.size(); o++) {
+                if (inputs.find(iter->insData[o].lock()->getName()) != inputs.end()) {
+                    std::string iterType = iter->type;
+                    std::transform(iterType.begin(), iterType.end(), iterType.begin(),
+                                   [](unsigned char c){ return std::tolower(c); });
+                    if (iterType == "convolution") {
+                        // TODO: have to be removed after adding suitable implementation for convolution
+                        break;
+                    }
+                    if (iter->insData[o].lock()->getPrecision() != Precision::FP32 &&
+                        iter->insData[o].lock()->getPrecision() != Precision::BF16) {
+                        break;
+                    }
+                    // insert convert
+                    std::string layerName = iter->insData[o].lock()->getName() + "_" + std::to_string(o);
+                    LayerParams cnnLayerParams{ layerName, "Convert", Precision::FP32 };
+                    auto lay = new CNNLayer(cnnLayerParams);
+                    std::map<std::string, std::string> par = {{"name", layerName}, {"type", "Convert"}, {"precision", "FP32"}};
+                    lay->params = par;
+                    CNNLayerPtr convertLayer(lay);
+                    BF16Transformer::addLayerToCNNNetworkAfterData(iter->insData[o].lock(), convertLayer, iter->name, network);
+                    // set conv input as bf
+                    iter->insData[o].lock()->setPrecision(Precision::BF16);
+                }
+            }
+        }
         for (size_t o = 0; o < iter->outData.size(); o++) {
             if (inputs.find(iter->outData[o]->getName()) == inputs.end()
                 && outputs.find(iter->outData[o]->getName()) == outputs.end()
@@ -262,3 +288,79 @@ InferenceEngine::MemoryBlob::Ptr BF16Transformer::convertBF16ToFloat(InferenceEn
     }
     return weightsFP32;
 }
+void BF16Transformer::addLayerToCNNNetworkAfterData(
+        DataPtr parentOutData,
+        CNNLayer::Ptr layer,
+        const std::string& nextLayerName,
+        ICNNNetwork& net,
+        const int childInsDataIndex) {
+    CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&net);
+    if (netImpl == nullptr) {
+        THROW_IE_EXCEPTION << "unexpected network type";
+    }
+
+    CNNLayerPtr nextLayer;
+    if (!nextLayerName.empty()) {
+        netImpl->getLayerByName(nextLayerName.c_str(), nextLayer, nullptr);
+    }
+
+    if (layer && (nextLayerName.empty() || (parentOutData == nullptr) || (childInsDataIndex != -1) ||
+                  (getInputTo(parentOutData).find(nextLayerName) != getInputTo(parentOutData).end()))) {
+        auto getTensorDesc = [](CNNLayerPtr& nextLayer) {
+            const DataPtr insData = nextLayer->insData[0].lock();
+            return insData->getTensorDesc();
+        };
+
+        const TensorDesc& parentTensorDesc = parentOutData != nullptr ? parentOutData->getTensorDesc() : getTensorDesc(nextLayer);
+        DataPtr newEdgeAfterLayer(new Data(layer->name, parentTensorDesc));
+        newEdgeAfterLayer->setName(layer->name);
+        getCreatorLayer(newEdgeAfterLayer) = layer;
+        getInputTo(newEdgeAfterLayer).clear();
+
+
+        if (netImpl == nullptr) {
+            THROW_IE_EXCEPTION << "unexpected network type";
+        }
+        netImpl->addData(layer->name.c_str(), newEdgeAfterLayer);
+        IE_SUPPRESS_DEPRECATED_START
+        netImpl->addLayer(layer);
+        IE_SUPPRESS_DEPRECATED_END
+
+        if (parentOutData != nullptr) {
+            getInputTo(parentOutData)[layer->name] = layer;
+            layer->insData.push_back(parentOutData);
+        }
+        layer->outData.push_back(newEdgeAfterLayer);
+
+        if (!nextLayerName.empty()) {
+            // CNNLayerPtr nextLayer = getInputTo(parentOutData)[nextLayerName];
+            getInputTo(newEdgeAfterLayer)[nextLayerName] = nextLayer;
+
+            if (parentOutData != nullptr) {
+                getInputTo(parentOutData).erase(nextLayerName);
+
+                if (childInsDataIndex == -1) {
+                    for (size_t i = 0; i < nextLayer->insData.size(); i++) {
+                        if (nextLayer->insData[i].lock() == parentOutData) {
+                            nextLayer->insData[i] = newEdgeAfterLayer;
+                        }
+                    }
+                } else {
+                    nextLayer->insData[childInsDataIndex] = newEdgeAfterLayer;
+                }
+            } else {
+                nextLayer->insData.push_back(newEdgeAfterLayer);
+            }
+        } else {
+            CNNLayerPtr parent = getCreatorLayer(parentOutData).lock();
+            if (parent == nullptr) {
+                THROW_IE_EXCEPTION << "parent data is absent";
+            }
+            netImpl->removeOutput(parent->name);
+            netImpl->addData(layer->name.c_str(), newEdgeAfterLayer);
+            netImpl->addOutput(layer->name);
+        }
+    } else {
+        THROW_IE_EXCEPTION << "Invalid argument";
+    }
+}
diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.h b/inference-engine/src/mkldnn_plugin/bf16transformer.h
@@ -8,6 +8,7 @@
 #include <caseless.hpp>
 #include <string>
 #include <set>
+#include <legacy/details/ie_cnn_network_tools.h>
 
 namespace MKLDNNPlugin {
 
@@ -67,6 +68,16 @@ class BF16Transformer {
     */
     void convertToBFloat16(InferenceEngine::CNNNetwork &network);
 
+    /**
+     * inserts given layer after current tensor
+     */
+    static void addLayerToCNNNetworkAfterData(
+            InferenceEngine::DataPtr parentOutData,
+            InferenceEngine::CNNLayerPtr layer,
+            const std::string& nextLayerName,
+            InferenceEngine::ICNNNetwork& net,
+            const int childInsDataIndex = -1);
+
     InferenceEngine::MemoryBlob::Ptr convertBF16ToFloat(InferenceEngine::MemoryBlob::Ptr);
 };
 

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
@@ -144,6 +144,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
     graph.RemoveDroppedNodes();
 
 #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
+    ChangeConvertToReorder(graph);
+    graph.RemoveDroppedNodes();
+
     DropDoubleReorders(graph);
     graph.RemoveDroppedNodes();
 
@@ -1941,6 +1944,91 @@ void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) {
         }
     }
 }
+
+void MKLDNNGraphOptimizer::ChangeConvertToReorder(MKLDNNGraph& graph) {
+    auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
+        std::string inArgs, outArgs;
+        if (parentDesc.getPrecision() != childDesc.getPrecision()) {
+            inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
+            outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
+        }
+        if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
+            inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
+            outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
+        }
+        return inArgs + "_" + outArgs;
+    };
+    std::vector<Precision> continuousPrecisions{
+            Precision::BF16,
+            Precision::FP32
+    };
+    for (int ind = 0; ind < graph.GetNodes().size(); ind++) {
+        auto convertCandidate = graph.GetNodes().at(ind);
+        std::string nodeType = convertCandidate->getTypeStr();
+        std::transform(nodeType.begin(), nodeType.end(), nodeType.begin(),
+                       [](unsigned char c){ return std::tolower(c); });
+        if (nodeType != "convert") {
+            continue;
+        }
+        auto inputPrecision = convertCandidate->getCnnLayer()->insData[0].lock()->getPrecision();
+        auto outputPrecision = convertCandidate->getCnnLayer()->outData[0]->getPrecision();
+        if (std::find(continuousPrecisions.begin(), continuousPrecisions.end(), inputPrecision) == continuousPrecisions.end() ||
+            std::find(continuousPrecisions.begin(), continuousPrecisions.end(), outputPrecision) == continuousPrecisions.end()) {
+            continue;
+        }
+        std::unordered_set<std::string> uniqueLayerNames;
+        for (auto node : graph.GetNodes()) {
+            uniqueLayerNames.insert(node->getCnnLayer()->name);
+        }
+        auto parentEdge = convertCandidate->getParentEdges()[0].lock();
+        auto parentNode = parentEdge->getParent();
+        for (size_t j = 0; j < convertCandidate->getChildEdges().size(); j++) {
+            auto &childEdge = convertCandidate->getChildEdgeAt(j);
+            auto childNode = childEdge->getChild();
+            // create reorder node
+            std::string basicLayerName = childEdge->getParent()->getName() + "_" +
+                                         reorderArgs(convertCandidate->getCnnLayer()->insData[0].lock()->getTensorDesc(),
+                                                     convertCandidate->getCnnLayer()->outData[0]->getTensorDesc()) + "_" +
+                                         childEdge->getChild()->getName();
+            std::string layerName = basicLayerName;
+            int idx = 0;
+            while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
+                idx++;
+                layerName = basicLayerName + "_" + std::to_string(idx);
+            }
+            CNNLayerPtr layer(new CNNLayer({layerName,
+                                            "Reorder",
+                                            convertCandidate->getCnnLayer()->outData[0]->getPrecision()}));
+            auto newReorder = std::make_shared<MKLDNNReorderNode>(layer, graph.getEngine(), graph.weightsCache);
+            newReorder->setDescs(convertCandidate->getCnnLayer()->insData[0].lock()->getTensorDesc(),
+                                 convertCandidate->getCnnLayer()->outData[0]->getTensorDesc());
+            // create new edges edges and drop unused node and edges
+            auto oldParentOutputPort = parentEdge->getInputNum();
+            auto oldChildInputPort = childEdge->getOutputNum();
+
+            MKLDNNEdgePtr newEdge1(new MKLDNNEdge(parentNode, newReorder, oldParentOutputPort, 0));
+            MKLDNNEdgePtr newEdge2(new MKLDNNEdge(newReorder, childNode, j, oldChildInputPort));
+
+            newReorder->parentEdges.push_back(newEdge1);
+            parentNode->childEdges.at(oldParentOutputPort) = newEdge1;
+            newReorder->childEdges.push_back(newEdge2);
+
+            newReorder->getSupportedDescriptors();
+            newReorder->initSupportedPrimitiveDescriptors();
+            newReorder->selectOptimalPrimitiveDescriptor();
+
+            childNode->parentEdges.push_back(newEdge2);
+            graph.GetEdges().push_back(newEdge1);
+            parentNode->removeEdge(parentEdge);
+            graph.GetEdges().push_back(newEdge2);
+            graph.GetNodes().push_back(newReorder);
+
+            parentEdge->drop();
+            childEdge->drop();
+            graph.DropNode(convertCandidate);
+        }
+    }
+}
 #endif
 
 void MKLDNNGraphOptimizer::RemoveIOScaleShifts(MKLDNNGraph &graph) {

diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h
@@ -46,6 +46,7 @@ class MKLDNNGraphOptimizer {
 #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
     void DropDoubleReorders(MKLDNNGraph& graph);
     void DropConvertReorder(MKLDNNGraph& graph);
+    void ChangeConvertToReorder(MKLDNNGraph &graph);
 #endif
     void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
     void FuseBroadcastAndEltwise(MKLDNNGraph &graph);