diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.cpp b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp index 6c0f220f99f52b..17364594628e36 100644 --- a/inference-engine/src/mkldnn_plugin/bf16transformer.cpp +++ b/inference-engine/src/mkldnn_plugin/bf16transformer.cpp @@ -66,7 +66,33 @@ void BF16Transformer::convertToBFloat16(InferenceEngine::CNNNetwork &network) { iter->insData[0].lock()->getPrecision() == Precision::FP32) { iter->insData[0].lock()->setPrecision(Precision::BF16); } - + if (_initbf16.find(iter->type) != _initbf16.end()) { + for (size_t o = 0; o < iter->insData.size(); o++) { + if (inputs.find(iter->insData[o].lock()->getName()) != inputs.end()) { + std::string iterType = iter->type; + std::transform(iterType.begin(), iterType.end(), iterType.begin(), + [](unsigned char c){ return std::tolower(c); }); + if (iterType == "convolution") { + // TODO: have to be removed after adding suitable implementation for convolution + break; + } + if (iter->insData[o].lock()->getPrecision() != Precision::FP32 && + iter->insData[o].lock()->getPrecision() != Precision::BF16) { + break; + } + // insert convert + std::string layerName = iter->insData[o].lock()->getName() + "_" + std::to_string(o); + LayerParams cnnLayerParams{ layerName, "Convert", Precision::FP32 }; + auto lay = new CNNLayer(cnnLayerParams); + std::map par = {{"name", layerName}, {"type", "Convert"}, {"precision", "FP32"}}; + lay->params = par; + CNNLayerPtr convertLayer(lay); + BF16Transformer::addLayerToCNNNetworkAfterData(iter->insData[o].lock(), convertLayer, iter->name, network); + // set conv input as bf + iter->insData[o].lock()->setPrecision(Precision::BF16); + } + } + } for (size_t o = 0; o < iter->outData.size(); o++) { if (inputs.find(iter->outData[o]->getName()) == inputs.end() && outputs.find(iter->outData[o]->getName()) == outputs.end() @@ -262,3 +288,79 @@ InferenceEngine::MemoryBlob::Ptr BF16Transformer::convertBF16ToFloat(InferenceEn } return weightsFP32; } +void BF16Transformer::addLayerToCNNNetworkAfterData( + DataPtr parentOutData, + CNNLayer::Ptr layer, + const std::string& nextLayerName, + ICNNNetwork& net, + const int childInsDataIndex) { + CNNNetworkImpl* netImpl = dynamic_cast(&net); + if (netImpl == nullptr) { + THROW_IE_EXCEPTION << "unexpected network type"; + } + + CNNLayerPtr nextLayer; + if (!nextLayerName.empty()) { + netImpl->getLayerByName(nextLayerName.c_str(), nextLayer, nullptr); + } + + if (layer && (nextLayerName.empty() || (parentOutData == nullptr) || (childInsDataIndex != -1) || + (getInputTo(parentOutData).find(nextLayerName) != getInputTo(parentOutData).end()))) { + auto getTensorDesc = [](CNNLayerPtr& nextLayer) { + const DataPtr insData = nextLayer->insData[0].lock(); + return insData->getTensorDesc(); + }; + + const TensorDesc& parentTensorDesc = parentOutData != nullptr ? parentOutData->getTensorDesc() : getTensorDesc(nextLayer); + DataPtr newEdgeAfterLayer(new Data(layer->name, parentTensorDesc)); + newEdgeAfterLayer->setName(layer->name); + getCreatorLayer(newEdgeAfterLayer) = layer; + getInputTo(newEdgeAfterLayer).clear(); + + + if (netImpl == nullptr) { + THROW_IE_EXCEPTION << "unexpected network type"; + } + netImpl->addData(layer->name.c_str(), newEdgeAfterLayer); + IE_SUPPRESS_DEPRECATED_START + netImpl->addLayer(layer); + IE_SUPPRESS_DEPRECATED_END + + if (parentOutData != nullptr) { + getInputTo(parentOutData)[layer->name] = layer; + layer->insData.push_back(parentOutData); + } + layer->outData.push_back(newEdgeAfterLayer); + + if (!nextLayerName.empty()) { + // CNNLayerPtr nextLayer = getInputTo(parentOutData)[nextLayerName]; + getInputTo(newEdgeAfterLayer)[nextLayerName] = nextLayer; + + if (parentOutData != nullptr) { + getInputTo(parentOutData).erase(nextLayerName); + + if (childInsDataIndex == -1) { + for (size_t i = 0; i < nextLayer->insData.size(); i++) { + if (nextLayer->insData[i].lock() == parentOutData) { + nextLayer->insData[i] = newEdgeAfterLayer; + } + } + } else { + nextLayer->insData[childInsDataIndex] = newEdgeAfterLayer; + } + } else { + nextLayer->insData.push_back(newEdgeAfterLayer); + } + } else { + CNNLayerPtr parent = getCreatorLayer(parentOutData).lock(); + if (parent == nullptr) { + THROW_IE_EXCEPTION << "parent data is absent"; + } + netImpl->removeOutput(parent->name); + netImpl->addData(layer->name.c_str(), newEdgeAfterLayer); + netImpl->addOutput(layer->name); + } + } else { + THROW_IE_EXCEPTION << "Invalid argument"; + } +} \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/bf16transformer.h b/inference-engine/src/mkldnn_plugin/bf16transformer.h index 63adabbc0cff9c..3b24c8ae38ce90 100644 --- a/inference-engine/src/mkldnn_plugin/bf16transformer.h +++ b/inference-engine/src/mkldnn_plugin/bf16transformer.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace MKLDNNPlugin { @@ -67,6 +68,16 @@ class BF16Transformer { */ void convertToBFloat16(InferenceEngine::CNNNetwork &network); + /** + * inserts given layer after current tensor + */ + static void addLayerToCNNNetworkAfterData( + InferenceEngine::DataPtr parentOutData, + InferenceEngine::CNNLayerPtr layer, + const std::string& nextLayerName, + InferenceEngine::ICNNNetwork& net, + const int childInsDataIndex = -1); + InferenceEngine::MemoryBlob::Ptr convertBF16ToFloat(InferenceEngine::MemoryBlob::Ptr); }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index c70539511d6507..984cf51be8e6f2 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -144,6 +144,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap graph.RemoveDroppedNodes(); #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE) + ChangeConvertToReorder(graph); + graph.RemoveDroppedNodes(); + DropDoubleReorders(graph); graph.RemoveDroppedNodes(); @@ -1941,6 +1944,91 @@ void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) { } } } + +void MKLDNNGraphOptimizer::ChangeConvertToReorder(MKLDNNGraph& graph) { + auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) { + std::string inArgs, outArgs; + if (parentDesc.getPrecision() != childDesc.getPrecision()) { + inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name()); + outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name()); + } + if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) { + inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat()); + outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat()); + } + return inArgs + "_" + outArgs; + }; + std::vector continuousPrecisions{ + Precision::BF16, + Precision::FP32 + }; + for (int ind = 0; ind < graph.GetNodes().size(); ind++) { + auto convertCandidate = graph.GetNodes().at(ind); + std::string nodeType = convertCandidate->getTypeStr(); + std::transform(nodeType.begin(), nodeType.end(), nodeType.begin(), + [](unsigned char c){ return std::tolower(c); }); + if (nodeType != "convert") { + continue; + } + auto inputPrecision = convertCandidate->getCnnLayer()->insData[0].lock()->getPrecision(); + auto outputPrecision = convertCandidate->getCnnLayer()->outData[0]->getPrecision(); + if (std::find(continuousPrecisions.begin(), continuousPrecisions.end(), inputPrecision) == continuousPrecisions.end() || + std::find(continuousPrecisions.begin(), continuousPrecisions.end(), outputPrecision) == continuousPrecisions.end()) { + continue; + } + std::unordered_set uniqueLayerNames; + for (auto node : graph.GetNodes()) { + uniqueLayerNames.insert(node->getCnnLayer()->name); + } + auto parentEdge = convertCandidate->getParentEdges()[0].lock(); + auto parentNode = parentEdge->getParent(); + for (size_t j = 0; j < convertCandidate->getChildEdges().size(); j++) { + auto &childEdge = convertCandidate->getChildEdgeAt(j); + auto childNode = childEdge->getChild(); + // create reorder node + std::string basicLayerName = childEdge->getParent()->getName() + "_" + + reorderArgs(convertCandidate->getCnnLayer()->insData[0].lock()->getTensorDesc(), + convertCandidate->getCnnLayer()->outData[0]->getTensorDesc()) + "_" + + childEdge->getChild()->getName(); + std::string layerName = basicLayerName; + int idx = 0; + while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) { + idx++; + layerName = basicLayerName + "_" + std::to_string(idx); + } + CNNLayerPtr layer(new CNNLayer({layerName, + "Reorder", + convertCandidate->getCnnLayer()->outData[0]->getPrecision()})); + auto newReorder = std::make_shared(layer, graph.getEngine(), graph.weightsCache); + newReorder->setDescs(convertCandidate->getCnnLayer()->insData[0].lock()->getTensorDesc(), + convertCandidate->getCnnLayer()->outData[0]->getTensorDesc()); + // create new edges edges and drop unused node and edges + auto oldParentOutputPort = parentEdge->getInputNum(); + auto oldChildInputPort = childEdge->getOutputNum(); + + MKLDNNEdgePtr newEdge1(new MKLDNNEdge(parentNode, newReorder, oldParentOutputPort, 0)); + MKLDNNEdgePtr newEdge2(new MKLDNNEdge(newReorder, childNode, j, oldChildInputPort)); + + newReorder->parentEdges.push_back(newEdge1); + parentNode->childEdges.at(oldParentOutputPort) = newEdge1; + newReorder->childEdges.push_back(newEdge2); + + newReorder->getSupportedDescriptors(); + newReorder->initSupportedPrimitiveDescriptors(); + newReorder->selectOptimalPrimitiveDescriptor(); + + childNode->parentEdges.push_back(newEdge2); + graph.GetEdges().push_back(newEdge1); + parentNode->removeEdge(parentEdge); + graph.GetEdges().push_back(newEdge2); + graph.GetNodes().push_back(newReorder); + + parentEdge->drop(); + childEdge->drop(); + graph.DropNode(convertCandidate); + } + } +} #endif void MKLDNNGraphOptimizer::RemoveIOScaleShifts(MKLDNNGraph &graph) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h index 54bdda68e3020b..e0a0f20654844b 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h @@ -46,6 +46,7 @@ class MKLDNNGraphOptimizer { #if defined (COMPILED_CPU_MKLDNN_REORDER_NODE) void DropDoubleReorders(MKLDNNGraph& graph); void DropConvertReorder(MKLDNNGraph& graph); + void ChangeConvertToReorder(MKLDNNGraph &graph); #endif void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph); void FuseBroadcastAndEltwise(MKLDNNGraph &graph);