Skip to content

Commit

Permalink
reorders replaces converts (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
yury-intel authored and maxnick committed Nov 16, 2020
1 parent b6f0858 commit 10f1faa
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 1 deletion.
104 changes: 103 additions & 1 deletion inference-engine/src/mkldnn_plugin/bf16transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,33 @@ void BF16Transformer::convertToBFloat16(InferenceEngine::CNNNetwork &network) {
iter->insData[0].lock()->getPrecision() == Precision::FP32) {
iter->insData[0].lock()->setPrecision(Precision::BF16);
}

if (_initbf16.find(iter->type) != _initbf16.end()) {
for (size_t o = 0; o < iter->insData.size(); o++) {
if (inputs.find(iter->insData[o].lock()->getName()) != inputs.end()) {
std::string iterType = iter->type;
std::transform(iterType.begin(), iterType.end(), iterType.begin(),
[](unsigned char c){ return std::tolower(c); });
if (iterType == "convolution") {
// TODO: have to be removed after adding suitable implementation for convolution
break;
}
if (iter->insData[o].lock()->getPrecision() != Precision::FP32 &&
iter->insData[o].lock()->getPrecision() != Precision::BF16) {
break;
}
// insert convert
std::string layerName = iter->insData[o].lock()->getName() + "_" + std::to_string(o);
LayerParams cnnLayerParams{ layerName, "Convert", Precision::FP32 };
auto lay = new CNNLayer(cnnLayerParams);
std::map<std::string, std::string> par = {{"name", layerName}, {"type", "Convert"}, {"precision", "FP32"}};
lay->params = par;
CNNLayerPtr convertLayer(lay);
BF16Transformer::addLayerToCNNNetworkAfterData(iter->insData[o].lock(), convertLayer, iter->name, network);
// set conv input as bf
iter->insData[o].lock()->setPrecision(Precision::BF16);
}
}
}
for (size_t o = 0; o < iter->outData.size(); o++) {
if (inputs.find(iter->outData[o]->getName()) == inputs.end()
&& outputs.find(iter->outData[o]->getName()) == outputs.end()
Expand Down Expand Up @@ -262,3 +288,79 @@ InferenceEngine::MemoryBlob::Ptr BF16Transformer::convertBF16ToFloat(InferenceEn
}
return weightsFP32;
}
void BF16Transformer::addLayerToCNNNetworkAfterData(
DataPtr parentOutData,
CNNLayer::Ptr layer,
const std::string& nextLayerName,
ICNNNetwork& net,
const int childInsDataIndex) {
CNNNetworkImpl* netImpl = dynamic_cast<CNNNetworkImpl*>(&net);
if (netImpl == nullptr) {
THROW_IE_EXCEPTION << "unexpected network type";
}

CNNLayerPtr nextLayer;
if (!nextLayerName.empty()) {
netImpl->getLayerByName(nextLayerName.c_str(), nextLayer, nullptr);
}

if (layer && (nextLayerName.empty() || (parentOutData == nullptr) || (childInsDataIndex != -1) ||
(getInputTo(parentOutData).find(nextLayerName) != getInputTo(parentOutData).end()))) {
auto getTensorDesc = [](CNNLayerPtr& nextLayer) {
const DataPtr insData = nextLayer->insData[0].lock();
return insData->getTensorDesc();
};

const TensorDesc& parentTensorDesc = parentOutData != nullptr ? parentOutData->getTensorDesc() : getTensorDesc(nextLayer);
DataPtr newEdgeAfterLayer(new Data(layer->name, parentTensorDesc));
newEdgeAfterLayer->setName(layer->name);
getCreatorLayer(newEdgeAfterLayer) = layer;
getInputTo(newEdgeAfterLayer).clear();


if (netImpl == nullptr) {
THROW_IE_EXCEPTION << "unexpected network type";
}
netImpl->addData(layer->name.c_str(), newEdgeAfterLayer);
IE_SUPPRESS_DEPRECATED_START
netImpl->addLayer(layer);
IE_SUPPRESS_DEPRECATED_END

if (parentOutData != nullptr) {
getInputTo(parentOutData)[layer->name] = layer;
layer->insData.push_back(parentOutData);
}
layer->outData.push_back(newEdgeAfterLayer);

if (!nextLayerName.empty()) {
// CNNLayerPtr nextLayer = getInputTo(parentOutData)[nextLayerName];
getInputTo(newEdgeAfterLayer)[nextLayerName] = nextLayer;

if (parentOutData != nullptr) {
getInputTo(parentOutData).erase(nextLayerName);

if (childInsDataIndex == -1) {
for (size_t i = 0; i < nextLayer->insData.size(); i++) {
if (nextLayer->insData[i].lock() == parentOutData) {
nextLayer->insData[i] = newEdgeAfterLayer;
}
}
} else {
nextLayer->insData[childInsDataIndex] = newEdgeAfterLayer;
}
} else {
nextLayer->insData.push_back(newEdgeAfterLayer);
}
} else {
CNNLayerPtr parent = getCreatorLayer(parentOutData).lock();
if (parent == nullptr) {
THROW_IE_EXCEPTION << "parent data is absent";
}
netImpl->removeOutput(parent->name);
netImpl->addData(layer->name.c_str(), newEdgeAfterLayer);
netImpl->addOutput(layer->name);
}
} else {
THROW_IE_EXCEPTION << "Invalid argument";
}
}
11 changes: 11 additions & 0 deletions inference-engine/src/mkldnn_plugin/bf16transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <caseless.hpp>
#include <string>
#include <set>
#include <legacy/details/ie_cnn_network_tools.h>

namespace MKLDNNPlugin {

Expand Down Expand Up @@ -67,6 +68,16 @@ class BF16Transformer {
*/
void convertToBFloat16(InferenceEngine::CNNNetwork &network);

/**
* inserts given layer after current tensor
*/
static void addLayerToCNNNetworkAfterData(
InferenceEngine::DataPtr parentOutData,
InferenceEngine::CNNLayerPtr layer,
const std::string& nextLayerName,
InferenceEngine::ICNNNetwork& net,
const int childInsDataIndex = -1);

InferenceEngine::MemoryBlob::Ptr convertBF16ToFloat(InferenceEngine::MemoryBlob::Ptr);
};

Expand Down
88 changes: 88 additions & 0 deletions inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap
graph.RemoveDroppedNodes();

#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
ChangeConvertToReorder(graph);
graph.RemoveDroppedNodes();

DropDoubleReorders(graph);
graph.RemoveDroppedNodes();

Expand Down Expand Up @@ -1941,6 +1944,91 @@ void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) {
}
}
}

void MKLDNNGraphOptimizer::ChangeConvertToReorder(MKLDNNGraph& graph) {
auto reorderArgs = [](const InferenceEngine::TensorDesc &parentDesc, const InferenceEngine::TensorDesc &childDesc) {
std::string inArgs, outArgs;
if (parentDesc.getPrecision() != childDesc.getPrecision()) {
inArgs += (inArgs.empty() ? "" : "_") + std::string(parentDesc.getPrecision().name());
outArgs += (outArgs.empty() ? "" : "_") + std::string(childDesc.getPrecision().name());
}
if (MKLDNNMemoryDesc(parentDesc).getFormat() != MKLDNNMemoryDesc(childDesc).getFormat()) {
inArgs += (inArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(parentDesc).getFormat());
outArgs += (outArgs.empty() ? "" : "_") + MKLDNNMemory::formatToString(MKLDNNMemoryDesc(childDesc).getFormat());
}
return inArgs + "_" + outArgs;
};
std::vector<Precision> continuousPrecisions{
Precision::BF16,
Precision::FP32
};
for (int ind = 0; ind < graph.GetNodes().size(); ind++) {
auto convertCandidate = graph.GetNodes().at(ind);
std::string nodeType = convertCandidate->getTypeStr();
std::transform(nodeType.begin(), nodeType.end(), nodeType.begin(),
[](unsigned char c){ return std::tolower(c); });
if (nodeType != "convert") {
continue;
}
auto inputPrecision = convertCandidate->getCnnLayer()->insData[0].lock()->getPrecision();
auto outputPrecision = convertCandidate->getCnnLayer()->outData[0]->getPrecision();
if (std::find(continuousPrecisions.begin(), continuousPrecisions.end(), inputPrecision) == continuousPrecisions.end() ||
std::find(continuousPrecisions.begin(), continuousPrecisions.end(), outputPrecision) == continuousPrecisions.end()) {
continue;
}
std::unordered_set<std::string> uniqueLayerNames;
for (auto node : graph.GetNodes()) {
uniqueLayerNames.insert(node->getCnnLayer()->name);
}
auto parentEdge = convertCandidate->getParentEdges()[0].lock();
auto parentNode = parentEdge->getParent();
for (size_t j = 0; j < convertCandidate->getChildEdges().size(); j++) {
auto &childEdge = convertCandidate->getChildEdgeAt(j);
auto childNode = childEdge->getChild();
// create reorder node
std::string basicLayerName = childEdge->getParent()->getName() + "_" +
reorderArgs(convertCandidate->getCnnLayer()->insData[0].lock()->getTensorDesc(),
convertCandidate->getCnnLayer()->outData[0]->getTensorDesc()) + "_" +
childEdge->getChild()->getName();
std::string layerName = basicLayerName;
int idx = 0;
while (uniqueLayerNames.find(layerName) != uniqueLayerNames.end()) {
idx++;
layerName = basicLayerName + "_" + std::to_string(idx);
}
CNNLayerPtr layer(new CNNLayer({layerName,
"Reorder",
convertCandidate->getCnnLayer()->outData[0]->getPrecision()}));
auto newReorder = std::make_shared<MKLDNNReorderNode>(layer, graph.getEngine(), graph.weightsCache);
newReorder->setDescs(convertCandidate->getCnnLayer()->insData[0].lock()->getTensorDesc(),
convertCandidate->getCnnLayer()->outData[0]->getTensorDesc());
// create new edges edges and drop unused node and edges
auto oldParentOutputPort = parentEdge->getInputNum();
auto oldChildInputPort = childEdge->getOutputNum();

MKLDNNEdgePtr newEdge1(new MKLDNNEdge(parentNode, newReorder, oldParentOutputPort, 0));
MKLDNNEdgePtr newEdge2(new MKLDNNEdge(newReorder, childNode, j, oldChildInputPort));

newReorder->parentEdges.push_back(newEdge1);
parentNode->childEdges.at(oldParentOutputPort) = newEdge1;
newReorder->childEdges.push_back(newEdge2);

newReorder->getSupportedDescriptors();
newReorder->initSupportedPrimitiveDescriptors();
newReorder->selectOptimalPrimitiveDescriptor();

childNode->parentEdges.push_back(newEdge2);
graph.GetEdges().push_back(newEdge1);
parentNode->removeEdge(parentEdge);
graph.GetEdges().push_back(newEdge2);
graph.GetNodes().push_back(newReorder);

parentEdge->drop();
childEdge->drop();
graph.DropNode(convertCandidate);
}
}
}
#endif

void MKLDNNGraphOptimizer::RemoveIOScaleShifts(MKLDNNGraph &graph) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class MKLDNNGraphOptimizer {
#if defined (COMPILED_CPU_MKLDNN_REORDER_NODE)
void DropDoubleReorders(MKLDNNGraph& graph);
void DropConvertReorder(MKLDNNGraph& graph);
void ChangeConvertToReorder(MKLDNNGraph &graph);
#endif
void FuseConvolutionAndZeroPoints(MKLDNNGraph &graph);
void FuseBroadcastAndEltwise(MKLDNNGraph &graph);
Expand Down

0 comments on commit 10f1faa

Please sign in to comment.