Skip to content

Commit

Permalink
test fixes (#56)
Browse files Browse the repository at this point in the history
* tests fixes

* [CPU] GRN node migration on nGraph

* Performance problems fixes. Part 3

* unused node creation fix

* small fix

* serialize tests fixes

* fixesafter review
  • Loading branch information
Maxim Andronov authored and dmitry-gorokhov committed May 4, 2021
1 parent 2e1001f commit 7867d79
Show file tree
Hide file tree
Showing 47 changed files with 522 additions and 442 deletions.
2 changes: 1 addition & 1 deletion inference-engine/src/mkldnn_plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ set(LAYERS
${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_elements.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_nd.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_tree.cpp
# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/grn.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/grn.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/non_max_suppression.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/log_softmax.cpp
${CMAKE_CURRENT_SOURCE_DIR}/nodes/math.cpp
Expand Down
95 changes: 45 additions & 50 deletions inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <unordered_set>
#include <utility>
#include <cstring>
#include <ngraph/opsets/opset1.hpp>

using namespace MKLDNNPlugin;
using namespace InferenceEngine;
Expand Down Expand Up @@ -254,56 +255,50 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name)
}

bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const {
// TODO [NM]: reimplement w/o using legacy API
return false;
// InputsDataMap inputs = network.getInputsInfo();
//
// CNNLayerSet inputLayers;
// std::unordered_set<CNNLayer *> allLayers;
//
// if (inputs.empty())
// return false;
//
// auto & secondLayers = getInputTo(inputs.begin()->second->getInputData());
// if (secondLayers.empty())
// return false;
//
// bool check_result = true;
// details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) {
// auto type = TypeFromName(layer->type);
// // This is WA for Tile layer
// auto tileLayer = dynamic_cast<TileLayer *>(layer.get());
// if (tileLayer && tileLayer->axis)
// return;
//
// auto reshapeLayer = dynamic_cast<ReshapeLayer *>(layer.get());
// if (reshapeLayer &&
// type == Reshape &&
// (reshapeLayer->outData[0]->getTensorDesc().getDims()[0] ==
// reshapeLayer->insData[0].lock()->getTensorDesc().getDims()[0])) {
// return;
// }
//
// if (type != Input &&
// type != Output &&
// type != Convolution &&
// type != Deconvolution &&
// type != Activation &&
// type != Depthwise &&
// type != Lrn &&
// type != Pooling &&
// type != FullyConnected &&
// type != Gemm &&
// type != Softmax &&
// type != Split &&
// type != Concatenation &&
// type != Eltwise &&
// type != Copy) {
// check_result = false;
// }
// }, false);
//
// return check_result;
InputsDataMap inputs = network.getInputsInfo();

if (inputs.empty())
return false;

auto function = network.getFunction();
if (function == nullptr) {
IE_THROW() << "CPU plug-in doesn't support not ngraph-based model!";
}

auto ops = function->get_ordered_ops();
for (auto op : ops) {
auto type = TypeFromName(op->get_type_name());
if (type == Tile) {
const auto tile = std::dynamic_pointer_cast<const ngraph::opset1::Tile>(op);
const auto repeatsNode = std::dynamic_pointer_cast<const ngraph::opset1::Constant>(tile->get_input_node_shared_ptr(1));
if (!repeatsNode)
return false;
if (tile && repeatsNode->cast_vector<int64_t>()[0] == 1)
continue;
}

if (type == Reshape) {
if (op->get_input_shape(0)[0] == op->get_output_shape(0)[0])
continue;
}

if (type != Input &&
type != Output &&
type != Convolution &&
type != Deconvolution &&
type != Lrn &&
type != Pooling &&
type != FullyConnected &&
type != MatMul &&
type != Softmax &&
type != Split &&
type != Concatenation &&
type != Eltwise) {
return false;
}
}

return true;
}

IE_SUPPRESS_DEPRECATED_START
Expand Down
104 changes: 63 additions & 41 deletions inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
InputsDataMap inputsInfo = network.getInputsInfo();
OutputsDataMap outputsInfo = network.getOutputsInfo();

this->_name = network.getName();

std::shared_ptr<const ngraph::Function> func = network.getFunction();
if (!func) {
IE_THROW() << "Function pointer inside CNNNetwork is nullptr";
Expand Down Expand Up @@ -270,10 +272,12 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
// Add stub output node for unused outputs
for (auto unusedOutput : unusedOutputs) {
auto parentNode = op2node[unusedOutput.get_node_shared_ptr()];
auto newResult = std::make_shared<ngraph::op::v0::Result>(unusedOutput);
newResult->set_friendly_name(std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName());
const MKLDNNNodePtr outNode(MKLDNNNode::factory().create(newResult, getEngine(), extMgr, weightsCache));
MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, unusedOutput.get_index(), 0));
const auto port = unusedOutput.get_index();
const auto nodeName = std::string("stub_") + std::to_string(unusedOutput.get_index()) + "_" + parentNode->getName();
const MKLDNNNodePtr outNode = std::make_shared<MKLDNNInputNode>(parentNode->outDims[port].ToSizeVector(),
parentNode->getOriginalOutputPrecisionAtPort(port),
nodeName, "Result", getEngine(), weightsCache);
MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, outNode, port, 0));
outNode->addEdge(edge);
graphEdges.push_back(edge);
graphNodes.push_back(outNode);
Expand All @@ -300,25 +304,19 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana
}
}

//
// // Replicate input nodes
// for (const auto& input : inputs) {
// auto inputLayer = getCreatorLayer(input.second->getInputData()).lock();
// inputNodesMap[input.first] = layer2node[inputLayer];
//
// // Loading mean images
// MKLDNNDims outDims;
// if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims())
// outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
// else
// outDims = MKLDNNDims(inputNodesMap[input.first]->getChildEdgeAt(0)->getDims());
// if (inputs.find(input.first) != inputs.end()) {
// InputInfo::Ptr ii = inputs[input.first];
// if (ii && ii->getPreProcess().getNumberOfChannels()) {
// _meanImages[input.first].Load(outDims, ii);
// }
// }
// }
// Loading mean images
for (const auto& input : inputsInfo) {
MKLDNNDims outDims;
if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) {
outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1));
} else {
outDims = inputNodesMap[input.first]->getChildEdgeAt(0)->getDims();
}
InputInfo::Ptr ii = inputsInfo[input.first];
if (ii && ii->getPreProcess().getNumberOfChannels()) {
_meanImages[input.first].Load(outDims, ii);
}
}
}

void MKLDNNGraph::InitGraph() {
Expand Down Expand Up @@ -477,23 +475,25 @@ void MKLDNNGraph::InitEdges() {

// Check if there is a reorder that supports the type conversion
if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() &&
!isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
IE_THROW() << "[NM] Not implemented";
// //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
// std::string convertName = edge->getParent()->getName() + "_" +
// edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name();
//
// CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()}));
// auto convertNode = std::make_shared<MKLDNNConvertNode>(convert, this->getEngine(), this->weightsCache);
// convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc());
// InsertNode(edge, convertNode, true);
//
// //Check if reorder is still needed
// if (convertNode->getChildEdgeAt(0)->needReorder()) {
// edge = convertNode->getChildEdgeAt(0);
// } else {
// insertReorder = false;
// }
!isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) {
//If we are here, then we need to insert Convert, because there are no reorders that support such type conversion
const auto inDesc = edge->getInputDesc();
const auto outDesc = edge->getOutputDesc();

std::string convertName = edge->getParent()->getName() + "_" +
inDesc.getPrecision().name() + "_" + outDesc.getPrecision().name();

auto convertNode = std::make_shared<MKLDNNConvertNode>(inDesc.getDims(), inDesc.getPrecision(), outDesc.getPrecision(), convertName,
this->getEngine(), this->weightsCache);
convertNode->setDescs(inDesc, outDesc);
InsertNode(edge, convertNode, true);

//Check if reorder is still needed
if (convertNode->getChildEdgeAt(0)->needReorder()) {
edge = convertNode->getChildEdgeAt(0);
} else {
insertReorder = false;
}
}

if (insertReorder) {
Expand Down Expand Up @@ -787,7 +787,29 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) {
MB_to_process = std::min<int>(config.batchLimit, MB_to_process);
size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB;

cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
const auto actualDesc = node->getParentEdgeAt(0)->getDesc();
const auto expectedDesc = ext_blob->getTensorDesc();

// TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it
// WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar
bool isScalarOutput = false;
if (actualDesc.getLayout() == SCALAR) {
isScalarOutput = expectedDesc.getLayout() == SCALAR ||
std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
} else if (expectedDesc.getLayout() == SCALAR) {
isScalarOutput = actualDesc.getLayout() == SCALAR ||
std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies<size_t>()) == 1;
}

if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) {
auto outBlobDesc = MKLDNNMemoryDesc{expectedDesc};
auto outBloMem = MKLDNNMemory(eng);
outBloMem.Create(outBlobDesc, ext_blob_ptr, false);

outBloMem.SetData(intr_blob, 0, false);
} else {
cpu_convert(intr_blob_ptr, ext_blob_ptr, srcPrec, dstPrec, size_to_copy);
}
}
}

Expand Down
76 changes: 26 additions & 50 deletions inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) {
FuseConvolutionAndDWConvolution(graph);
graph.RemoveDroppedNodes();

FuseBinaryConvolutionAndFakeQuantize(graph);
graph.RemoveDroppedNodes();

FuseConvolutionSumAndConvolutionSumActivation(graph);
graph.RemoveDroppedNodes();

Expand Down Expand Up @@ -800,7 +797,7 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra
}

auto childNode = parentNode->getChildEdgeAt(0)->getChild();
if (!parentNode->canFuseSimpleOperation(childNode)) {
if (!parentNode->canFuse(childNode)) {
parent++;
continue;
}
Expand Down Expand Up @@ -1063,7 +1060,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
auto& graphNodes = graph.GetNodes();

auto isSutableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == Convolution && node->getChildEdges().size() == 1;
return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1;
};

auto parent = graphNodes.begin();
Expand All @@ -1073,9 +1070,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
parent++;
continue;
}
const auto parentNodeType = parentNode->getType();

auto childNode = parentNode->getChildEdgeAt(0)->getChild();
if (!parentNode->canFuseSimpleOperation(childNode)) {
if (!parentNode->canFuse(childNode)) {
parent++;
continue;
}
Expand All @@ -1086,7 +1084,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
auto parentEdges = childNode->parentEdges;
for (auto &parentEdge : parentEdges) {
auto p_edge = parentEdge.lock();
if (p_edge->getParent()->getType() == Convolution)
if (p_edge->getParent()->getType() == parentNodeType)
continue;

removeEdge(graph, p_edge);
Expand All @@ -1097,47 +1095,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph)
}
}

void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndFakeQuantize(MKLDNNGraph &graph) {
auto& graphNodes = graph.GetNodes();

auto isSutableParentNode = [](MKLDNNNodePtr node) {
return node->getType() == BinaryConvolution && node->getChildEdges().size() == 1;
};

auto isSutableChildNode = [](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) {
if ((parentNode->isConstant() && !childNode->isConstant()) || childNode->getType() != FakeQuantize)
return false;

auto* binConv = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parentNode.get());
if (!binConv) {
return false;
}

return binConv->canFuse(childNode);
};

for (int i = 0; i < graphNodes.size(); i++) {
auto parent = graphNodes[i];
if (!isSutableParentNode(parent)) continue;

auto child = parent->getChildEdgeAt(0)->getChild();
if (!isSutableChildNode(parent, child)) continue;

child->fuseInto(parent);

auto parents = child->parentEdges;
for (size_t i = 0; i < parents.size(); i++) {
auto p_edge = parents[i].lock();
if (p_edge->getParent()->getType() == BinaryConvolution)
continue;

removeEdge(graph, p_edge);
}

graph.DropNode(child);
}
}

void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) {
auto& graphNodes = graph.GetNodes();

Expand Down Expand Up @@ -1269,14 +1226,33 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG
bool isSutableParent1 = parent1->getType() == Convolution || parent1->getType() == BinaryConvolution;
bool isSutableParent2 = parent2->getType() == Convolution || parent2->getType() == BinaryConvolution;

auto canFuseSum = [](MKLDNNBinaryConvolutionNode *binConv, MKLDNNNodePtr fuseCandidate) {
if (binConv->getImplType() == impl_desc_type::ref)
return false;

if (binConv->isFusedWith(FakeQuantize))
return false;

if (fuseCandidate->getAlgorithm() == EltwiseAdd) {
for (auto& fusedNode : binConv->fusedWith) {
const auto eltwise = std::dynamic_pointer_cast<MKLDNNEltwiseNode>(fusedNode);
if (eltwise && eltwise->isSpecialConvolutionAddFusing()) {
return false;
}
}
return true;
}
return false;
};

auto* binConvNode1 = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parent1.get());
if (binConvNode1) {
isSutableParent1 = isSutableParent1 && binConvNode1->canFuse(graphNode);
isSutableParent1 = isSutableParent1 && canFuseSum(binConvNode1, graphNode);
}

auto* binConvNode2 = dynamic_cast<MKLDNNBinaryConvolutionNode *>(parent2.get());
if (binConvNode2) {
isSutableParent2 = isSutableParent2 && binConvNode2->canFuse(graphNode);
isSutableParent2 = isSutableParent2 && canFuseSum(binConvNode2, graphNode);
}

auto* convNode1 = dynamic_cast<MKLDNNConvolutionNode *>(parent1.get());
Expand Down
Loading

0 comments on commit 7867d79

Please sign in to comment.