diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index fb06dc1e5fdcdb..0801af773632ae 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -12,8 +12,115 @@ if (WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX") endif() -file(GLOB_RECURSE SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +## TODO +set(LAYERS +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_batchnorm_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_bin_conv_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_concat_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_conv_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_crop_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_deconv_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_def_conv_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_eltwise_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_fullyconnected_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_gemm_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_generic_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_input_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_lrn_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_memory_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pad_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_permute_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pooling_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_quantize_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reorder_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reshape_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_rnn.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_align_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_pooling_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_softmax_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_split_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tensoriterator_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tile_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_mvn_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_scatter_update_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_interpolate_node.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reduce_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reference_node.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/batch_to_space.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/broadcast.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/convert.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_loss.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/depth_to_space.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput_onnx.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/embedding_bag_offset_sum.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/embedding_bag_packed_sum.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/embedding_bag_sum.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/embedding_segments_sum.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/extract_image_patches.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/fill.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_elements.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_nd.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_tree.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/grn.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/non_max_suppression.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/log_softmax.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/math.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/one_hot.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/powerfile.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorbox.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorbox_clustered.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorgridgenerator_onnx.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_onnx.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/psroi.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/range.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/region_yolo.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/reorg_yolo.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/reverse_sequence.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/roifeatureextractor_onnx.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/select.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/shuffle_channels.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/simplernms.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/space_to_batch.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/space_to_depth.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/sparse_fill_empty_rows.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/sparse_segment_reduce.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/sparse_weighted_reduce.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/sparse_to_dense.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/bucketize.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/squeeze.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/strided_slice.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topkrois_onnx.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/unique.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/unsqueeze.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/softmax.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/emitter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_eltwise_emitters.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/jit_mkldnn_emitters.cpp +# +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/argmax_imp.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp +# ${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp +) + +file(GLOB SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utils/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utils/rt_info/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/emitters/*.cpp + ${LAYERS} + ${OS_SPECIFIC_SRC} +) file(GLOB HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h @@ -50,8 +157,10 @@ if(SELECTIVE_BUILD STREQUAL "ON") endif() endif() -target_link_libraries(${TARGET_NAME} PRIVATE mkldnn inference_engine inference_engine_legacy - inference_engine_transformations inference_engine_lp_transformations) +target_link_libraries(${TARGET_NAME} PRIVATE mkldnn + inference_engine + inference_engine_transformations + inference_engine_lp_transformations) target_include_directories(${TARGET_NAME} PRIVATE $) @@ -81,7 +190,6 @@ add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS}) target_link_libraries(${TARGET_NAME}_obj PUBLIC mkldnn) target_include_directories(${TARGET_NAME}_obj PRIVATE $ - $ $ $ $ diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp index 0bb8b152be0c04..97d2ebf312254f 100644 --- a/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/emitters/jit_eltwise_emitters.cpp @@ -4,8 +4,6 @@ #include "jit_eltwise_emitters.hpp" #include -#include "legacy/ie_layers.h" - #include using namespace InferenceEngine; @@ -1307,13 +1305,17 @@ jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_ } jit_power_static_emitter::jit_power_static_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, Precision exec_prc) : jit_emitter(host, host_isa, node, exec_prc) { +<<<<<<< c9d0292929cecb8988b8473ae4fe13022a5f8a81 auto *powerLayer = dynamic_cast(node->getCnnLayer().get()); if (powerLayer == nullptr) IE_THROW() << "Cannot convert power layer."; +======= + THROW_IE_EXCEPTION << "[NM] Not implemented"; +>>>>>>> [CPU] Plug-in migration on ngraph initial commit - power = powerLayer->power; - scale = powerLayer->scale; - shift = powerLayer->offset; +// power = powerLayer->power; +// scale = powerLayer->scale; +// shift = powerLayer->offset; prepare_table(); } diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp index 3907bf5b9a0c84..276791b7d7de6d 100644 --- a/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/emitters/jit_load_store_emitters.cpp @@ -4,7 +4,6 @@ #include "jit_emitter.hpp" #include "jit_load_store_emitters.hpp" -#include "legacy/ie_layers.h" #include #include "utils/bfloat16.hpp" diff --git a/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp b/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp index af1555694b81cb..02371895f59bf3 100644 --- a/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp +++ b/inference-engine/src/mkldnn_plugin/emitters/jit_mkldnn_emitters.cpp @@ -25,7 +25,7 @@ jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, jit_mkldnn_emitter::jit_mkldnn_emitter(jit_generator *host, cpu_isa_t host_isa, const MKLDNNNode* node, InferenceEngine::Precision exec_prc) : jit_emitter(host, host_isa, node, exec_prc) { auto eltwiseNode = dynamic_cast(node); - kind = static_cast(eltwiseNode->getAlgorithm()); + kind = static_cast(eltwiseNode->getMKLDNNAlgorithm()); alpha = eltwiseNode->getAlpha(); beta = eltwiseNode->getBeta(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp index 0fcac236356f90..e6090c5cd15f65 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.cpp @@ -4,7 +4,6 @@ #include #include -#include #include "mkldnn_exec_network.h" #include "mkldnn_async_infer_request.h" @@ -12,8 +11,6 @@ #include "mkldnn_memory_state.h" #include "mkldnn_itt.h" #include "nodes/mkldnn_memory_node.hpp" -#include -#include #include #include @@ -23,7 +20,6 @@ #include #include #include -#include using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -43,184 +39,65 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, extensionManager(extMgr), _cfg{cfg}, _name{network.getName()}, - _numaNodesWeights(numaNodesWeights) { + _numaNodesWeights(numaNodesWeights), + _network(network) { OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "MKLDNNExecNetwork", "cloneNet"); - // we are cloning network if we have statistics and we can transform network. - _clonedNetwork = cloneNetwork(network); - - if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) { - // Check if network is INT8 or Binary. - // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution: - // BF16 + INT8 or BF16 + BIN. - bool isFloatModel = true; - CNNNetworkIterator iter(network); - while (iter != CNNNetworkIterator()) { - if (CaselessEq()((*iter)->type, "FakeQuantize")) { - isFloatModel = false; - break; - } - iter++; - } - - auto changePrecisionBF16 = [&](Precision current, Precision target) { - InputsDataMap inputs = _clonedNetwork.getInputsInfo(); - OutputsDataMap outputs = _clonedNetwork.getOutputsInfo(); - CNNNetworkIterator iter(_clonedNetwork); - while (iter != CNNNetworkIterator()) { - // check, if memory output node needs to be transformed - if (current == Precision::FP32 && - (*iter)->type == "Memory" && (*iter)->outData.size() == 0 && - (*iter)->insData[0].lock()->getPrecision() == current) { - (*iter)->insData[0].lock()->setPrecision(target); - } - - for (size_t o = 0; o < (*iter)->outData.size(); o++) { - if (inputs.find((*iter)->outData[o]->getName()) == inputs.end() - && outputs.find((*iter)->outData[o]->getName()) == outputs.end() - && !CaselessEq()((*iter)->type, "const") - && (*iter)->outData[o]->getPrecision() == current) { - (*iter)->outData[o]->setPrecision(target); - } - } - iter++; - } - }; - - if (with_cpu_x86_avx512_core() && isFloatModel) { - // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin. - // Otherwise, only layers marked as BF16 in '_clonedNetwork' will be performed in bfloat16 mode. - // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin. - if (cfg.enforceBF16 == true) - changePrecisionBF16(Precision::FP32, Precision::BF16); - } else { - changePrecisionBF16(Precision::BF16, Precision::FP32); - } - } - - OV_ITT_TASK_NEXT(taskChain, "createConstInputs"); - auto createConstInputTo = [&](CNNLayerPtr layer, Blob::Ptr blob, const std::vector& shape, const std::string& name) { - LayerParams attrs = {layer->name + "_const_" + name, "Const", blob->getTensorDesc().getPrecision()}; - auto constLayer = std::make_shared(attrs); - constLayer->blobs["custom"] = blob; - - const TensorDesc& td = {blob->getTensorDesc().getPrecision(), shape, TensorDesc::getLayoutByDims(shape)}; - - DataPtr newEdgeAfterLayer(new Data(constLayer->name, td)); - newEdgeAfterLayer->setName(constLayer->name); - getCreatorLayer(newEdgeAfterLayer) = constLayer; - getInputTo(newEdgeAfterLayer).clear(); - - IE_SUPPRESS_DEPRECATED_START - auto icnnnet = static_cast(_clonedNetwork); - IE_SUPPRESS_DEPRECATED_END - auto implNetwork = std::dynamic_pointer_cast(icnnnet); - IE_ASSERT(implNetwork != nullptr); - implNetwork->addData(constLayer->name.c_str(), newEdgeAfterLayer); - implNetwork->addLayer(constLayer); - - constLayer->outData.push_back(newEdgeAfterLayer); - getInputTo(newEdgeAfterLayer)[layer->name] = layer; - layer->insData.push_back(newEdgeAfterLayer); - }; - - // The code block below transforms legacy layers to the form more compatible with opset1 in order to simplify future migration - // TODO: remove after plug-in is migrated on opset1 - auto all_layers = details::CNNNetSortTopologically(_clonedNetwork); - for (auto &layer : all_layers) { - if (layer->type == "ScaleShift" && layer->insData.size() == 1) { - auto constDimsRank = layer->insData[0].lock()->getDims().size(); - - Blob::Ptr scalesBlob = layer->blobs["weights"]; - if (scalesBlob != nullptr) { - std::vector shape(constDimsRank, 1); - shape[shape.size() > 1 ? 1 : 0] = scalesBlob->size(); - - createConstInputTo(layer, scalesBlob, shape, "weights"); - } - - Blob::Ptr shiftBlob = layer->blobs["biases"]; - if (shiftBlob != nullptr) { - std::vector shape(constDimsRank, 1); - shape[shape.size() > 1 ? 1 : 0] = shiftBlob->size(); - - createConstInputTo(layer, shiftBlob, shape, "biases"); - } else if (scalesBlob != nullptr) { - Blob::Ptr biases = make_shared_blob(scalesBlob->getTensorDesc()); - if (biases == nullptr) - IE_THROW() << "Cannot make 'biases' shared blob"; - biases->allocate(); - auto biasesPtr = biases->buffer().as(); - for (size_t i = 0; i < biases->size(); i++) - biasesPtr[i] = 0; - - std::vector shape(constDimsRank, 1); - shape[shape.size() > 1 ? 1 : 0] = biases->size(); - - createConstInputTo(layer, biases, shape, "biases"); - } - } else if (layer->type == "PReLU" && layer->insData.size() == 1) { - Blob::Ptr scalesBlob = layer->blobs["weights"]; - if (scalesBlob != nullptr) { - std::vector shape(layer->insData[0].lock()->getDims().size(), 1); - shape[shape.size() > 1 ? 1 : 0] = scalesBlob->size(); - - createConstInputTo(layer, scalesBlob, shape, "weights"); - } - } else if (layer->type == "DeformableConvolution") { - auto * defConvLayer = dynamic_cast(layer.get()); - if (defConvLayer == nullptr) - IE_THROW() << "Cannot convert deformable convolution layer."; - - Blob::Ptr weightsBlob = defConvLayer->blobs["weights"]; - if (weightsBlob != nullptr) { - std::vector shape; - - if (defConvLayer->_group != 1) { - shape.push_back(defConvLayer->_group); - } - shape.push_back(defConvLayer->_out_depth); - shape.push_back(defConvLayer->input()->getDims()[1]); - for (int i = 1; i <= defConvLayer->_kernel.size(); i++) { - shape.push_back(defConvLayer->_kernel[defConvLayer->_kernel.size() - i]); - } - - createConstInputTo(layer, weightsBlob, shape, "weights"); - - defConvLayer->blobs.clear(); - defConvLayer->_weights = nullptr; - } - } else if (layer->type == "BinaryConvolution") { - auto * binConvLayer = dynamic_cast(layer.get()); - if (binConvLayer == nullptr) - IE_THROW() << "Cannot convert binary convolution layer."; - - Blob::Ptr weightsBlob = binConvLayer->blobs["weights"]; - if (weightsBlob != nullptr) { - std::vector shape; - - if (binConvLayer->_group != 1) { - shape.push_back(binConvLayer->_group); - } - shape.push_back(binConvLayer->_out_depth); - shape.push_back(binConvLayer->input()->getDims()[1]); - for (int i = 1; i <= binConvLayer->_kernel.size(); i++) { - shape.push_back(binConvLayer->_kernel[binConvLayer->_kernel.size() - i]); - } - - createConstInputTo(layer, weightsBlob, shape, "weights"); - - binConvLayer->blobs.clear(); - binConvLayer->_weights = nullptr; - } - } - } +// TODO [NM]: reimplement w/o using legacy API +// if (_cfg.lpTransformsMode == Config::LPTransformsMode::On) { +// // Check if network is INT8 or Binary. +// // BF16 transformations were disabled since CPU plug-in doesn't support mixed precision execution: +// // BF16 + INT8 or BF16 + BIN. +// bool isFloatModel = true; +// CNNNetworkIterator iter(network); +// while (iter != CNNNetworkIterator()) { +// if (CaselessEq()((*iter)->type, "FakeQuantize")) { +// isFloatModel = false; +// break; +// } +// iter++; +// } +// +// auto changePrecisionBF16 = [&](Precision current, Precision target) { +// InputsDataMap inputs = _clonedNetwork.getInputsInfo(); +// OutputsDataMap outputs = _clonedNetwork.getOutputsInfo(); +// CNNNetworkIterator iter(_clonedNetwork); +// while (iter != CNNNetworkIterator()) { +// // check, if memory output node needs to be transformed +// if (current == Precision::FP32 && +// (*iter)->type == "Memory" && (*iter)->outData.size() == 0 && +// (*iter)->insData[0].lock()->getPrecision() == current) { +// (*iter)->insData[0].lock()->setPrecision(target); +// } +// +// for (size_t o = 0; o < (*iter)->outData.size(); o++) { +// if (inputs.find((*iter)->outData[o]->getName()) == inputs.end() +// && outputs.find((*iter)->outData[o]->getName()) == outputs.end() +// && !CaselessEq()((*iter)->type, "const") +// && (*iter)->outData[o]->getPrecision() == current) { +// (*iter)->outData[o]->setPrecision(target); +// } +// } +// iter++; +// } +// }; +// +// if (with_cpu_x86_avx512_core() && isFloatModel) { +// // If enforceBF16 flag was set, BF16 transformation applies for all layers supported by CPU plugin. +// // Otherwise, only layers marked as BF16 in '_clonedNetwork' will be performed in bfloat16 mode. +// // CPU plugin throws an exception, if marked as BF16 layers have not supported by CPU plugin. +// if (cfg.enforceBF16 == true) +// changePrecisionBF16(Precision::FP32, Precision::BF16); +// } else { +// changePrecisionBF16(Precision::BF16, Precision::FP32); +// } +// } OV_ITT_TASK_SKIP(taskChain); if (_cfg.batchLimit > 1) { // check topology for applicability - if (!CanProcessDynBatch(_clonedNetwork)) { + if (!CanProcessDynBatch(_network)) { IE_THROW() << "MKLDNNGraph::CreateGraph: such topology cannot be compiled for dynamic batch!"; } } @@ -260,16 +137,17 @@ MKLDNNExecNetwork::MKLDNNExecNetwork(const InferenceEngine::CNNNetwork &network, if (_graphs.size() == 1) { for (auto &node : GetGraph()._graph.GetNodes()) { if (node->getType() == MemoryInput) { - auto memoryNode = dynamic_cast(node.get()); - auto state_store = memoryNode->getStore(); - auto state_name = memoryNode->getId(); - - // Remove suffix with pair ID. Internal information. - auto suffix_idx = state_name.find("/id="); - if (suffix_idx != std::string::npos) - state_name = state_name.substr(0, suffix_idx); - - memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store)); + IE_THROW() << "[NM] Not implemented"; +// auto memoryNode = dynamic_cast(node.get()); +// auto state_store = memoryNode->getStore(); +// auto state_name = memoryNode->getId(); +// +// // Remove suffix with pair ID. Internal information. +// auto suffix_idx = state_name.find("/id="); +// if (suffix_idx != std::string::npos) +// state_name = state_name.substr(0, suffix_idx); +// +// memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store)); } } } @@ -288,12 +166,11 @@ MKLDNNExecNetwork::Graph::Lock MKLDNNExecNetwork::GetGraph() { std::exception_ptr exception; auto makeGraph = [&] { try { - auto localNetwork = cloneNetwork(_clonedNetwork); { std::lock_guard lock{_cfgMutex}; graphLock._graph.setConfig(_cfg); } - graphLock._graph.CreateGraph(localNetwork, extensionManager, _numaNodesWeights[numaNodeId]); + graphLock._graph.CreateGraph(_network, extensionManager, _numaNodesWeights[numaNodeId]); } catch(...) { exception = std::current_exception(); } @@ -379,55 +256,58 @@ InferenceEngine::Parameter MKLDNNExecNetwork::GetMetric(const std::string &name) } bool MKLDNNExecNetwork::CanProcessDynBatch(const InferenceEngine::CNNNetwork &network) const { - InputsDataMap inputs = network.getInputsInfo(); - - CNNLayerSet inputLayers; - std::unordered_set allLayers; - - if (inputs.empty()) - return false; - - auto & secondLayers = getInputTo(inputs.begin()->second->getInputData()); - if (secondLayers.empty()) - return false; - - bool check_result = true; - details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) { - auto type = TypeFromName(layer->type); - // This is WA for Tile layer - auto tileLayer = dynamic_cast(layer.get()); - if (tileLayer && tileLayer->axis) - return; - - auto reshapeLayer = dynamic_cast(layer.get()); - if (reshapeLayer && - type == Reshape && - (reshapeLayer->outData[0]->getTensorDesc().getDims()[0] == - reshapeLayer->insData[0].lock()->getTensorDesc().getDims()[0])) { - return; - } - - if (type != Input && - type != Output && - type != Convolution && - type != Deconvolution && - type != Activation && - type != Depthwise && - type != Lrn && - type != Pooling && - type != FullyConnected && - type != Gemm && - type != SoftMax && - type != Split && - type != Concatenation && - type != Eltwise && - type != BatchNormalization && - type != Copy) { - check_result = false; - } - }, false); - - return check_result; + // TODO [NM]: reimplement w/o using legacy API + return false; +// InputsDataMap inputs = network.getInputsInfo(); +// +// CNNLayerSet inputLayers; +// std::unordered_set allLayers; +// +// if (inputs.empty()) +// return false; +// +// auto & secondLayers = getInputTo(inputs.begin()->second->getInputData()); +// if (secondLayers.empty()) +// return false; +// +// bool check_result = true; +// details::UnorderedDFS(allLayers, secondLayers.begin()->second, [&](CNNLayerPtr layer) { +// auto type = TypeFromName(layer->type); +// // This is WA for Tile layer +// auto tileLayer = dynamic_cast(layer.get()); +// if (tileLayer && tileLayer->axis) +// return; +// +// auto reshapeLayer = dynamic_cast(layer.get()); +// if (reshapeLayer && +// type == Reshape && +// (reshapeLayer->outData[0]->getTensorDesc().getDims()[0] == +// reshapeLayer->insData[0].lock()->getTensorDesc().getDims()[0])) { +// return; +// } +// +// if (type != Input && +// type != Output && +// type != Convolution && +// type != Deconvolution && +// type != Activation && +// type != Depthwise && +// type != Lrn && +// type != Pooling && +// type != FullyConnected && +// type != Gemm && +// type != Softmax && +// type != Split && +// type != Concatenation && +// type != Eltwise && +// type != Crop && +// type != BatchNormalization && +// type != Copy) { +// check_result = false; +// } +// }, false); +// +// return check_result; } IE_SUPPRESS_DEPRECATED_START diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h index 84e376a2a00306..88831dd6e054ed 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_exec_network.h @@ -14,7 +14,6 @@ #include #include #include -#include #include namespace MKLDNNPlugin { @@ -49,7 +48,7 @@ class MKLDNNExecNetwork: public InferenceEngine::ExecutableNetworkThreadSafeDefa friend class MKLDNNInferRequest; MKLDNNExtensionManager::Ptr extensionManager; std::vector memoryStates; - InferenceEngine::CNNNetwork _clonedNetwork; + const InferenceEngine::CNNNetwork _network; std::mutex _cfgMutex; Config _cfg; std::atomic_int _numRequests = {0}; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp index fac29d5e22e9a2..deb3fdff7d68f3 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.cpp @@ -31,17 +31,14 @@ InferenceEngine::ILayerImpl::Ptr MKLDNNExtensionManager::CreateImplementation(co return nullptr; } -std::shared_ptr MKLDNNExtensionManager::CreateExtensionFactory( - const InferenceEngine::CNNLayerPtr &layer) { - if (!layer) - IE_THROW() << "Cannot get cnn layer!"; +std::shared_ptr MKLDNNExtensionManager::CreateExtensionFactory(const std::shared_ptr& op) { std::shared_ptr factory; for (auto& ext : _extensions) { ResponseDesc responseDesc; StatusCode rc = GENERAL_ERROR; ILayerImplFactory* factory_ptr = nullptr; if (auto mkldnnExt = std::dynamic_pointer_cast(ext)) - rc = mkldnnExt->getFactoryFor(factory_ptr, layer.get(), &responseDesc); + rc = mkldnnExt->getFactoryFor(factory_ptr, op, &responseDesc); if (rc != OK) { factory = nullptr; continue; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h index e205993792ef9e..83ddfc3ffe1499 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_mngr.h @@ -8,7 +8,6 @@ #include #include #include -#include #include "nodes/list.hpp" namespace MKLDNNPlugin { @@ -18,7 +17,7 @@ class MKLDNNExtensionManager { using Ptr = std::shared_ptr; MKLDNNExtensionManager() = default; InferenceEngine::ILayerImpl::Ptr CreateImplementation(const std::shared_ptr& op); - std::shared_ptr CreateExtensionFactory(const InferenceEngine::CNNLayerPtr& Layer); + std::shared_ptr CreateExtensionFactory(const std::shared_ptr& op); void AddExtension(InferenceEngine::IExtensionPtr extension); private: diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index e9db4a3076e188..0722fcbea977fe 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -26,11 +26,9 @@ #include #include -#include #include #include -#include -#include +#include "nodes/common/cpu_memcpy.h" #include "nodes/common/cpu_convert.h" #include "precision_utils.h" @@ -38,6 +36,12 @@ #include "utils/blob_dump.h" #include "utils/general_utils.h" +#include "utils/ngraph_utils.hpp" + +#include +#include +#include +#include /***************************************************** * Debug capability @@ -71,31 +75,7 @@ typedef std::vector edge_clusters_t; mkldnn::engine MKLDNNGraph::eng(mkldnn::engine::kind::cpu, 0); template -void MKLDNNGraph::ApplyUnrollPasses(NET &net) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, "MKLDNNGraph::ApplyUnrollPasses"); - - NetPass::CombineRNNSeq(net); - bool ti_proc_ok = NetPass::UnrollRNN_if(net, [] (const RNNCellBase &rnn) -> bool { - if (rnn.clip != 0.0f) - return true; - if ((rnn.cellType == RNNCellBase::GRU || rnn.cellType == RNNCellBase::GRU_LBR) && - rnn.activations != std::vector {"sigmoid", "tanh"}) - return true; - if (rnn.cellType == RNNCellBase::LSTM && - rnn.activations != std::vector {"sigmoid", "tanh", "tanh"}) - return true; - return false; - }); - if (!ti_proc_ok) - IE_THROW() << "Plugin doesn't support Tensor Iterator in pure form. " - "None TI optimization pattern has been applied successfully"; -} - -template void MKLDNNGraph::ApplyUnrollPasses(TensorIterator::Body&); -template void MKLDNNGraph::ApplyUnrollPasses(CNNNetwork&); - -template -void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& extMgr, +void MKLDNNGraph::CreateGraph(NET &net, const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache) { OV_ITT_SCOPED_TASK(MKLDNNPlugin::itt::domains::MKLDNN_LT, "CreateGraph"); @@ -109,224 +89,242 @@ void MKLDNNGraph::CreateGraph(const NET &net, const MKLDNNExtensionManager::Ptr& status = Ready; } -template void MKLDNNGraph::CreateGraph(const TensorIterator::Body&, - const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); +//template void MKLDNNGraph::CreateGraph(const TensorIterator::Body&, +// const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); template void MKLDNNGraph::CreateGraph(const CNNNetwork&, const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); +//template void MKLDNNGraph::CreateGraph(CNNNetwork&, +// const MKLDNNExtensionManager::Ptr&, MKLDNNWeightsSharing::Ptr&); -void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) { - this->_name = "subgraph"; - this->reuse_io_tensors = false; - - // Map data object onto producer layer(node) - std::unordered_map> data2node; - - // nodes which has no consumers (output or just unused). But doesn't marked as graph output. - // Will be stored as fake output separately. - std::unordered_set unused_data; - - // Step 1. Replicate input nodes - for (const auto &input : subgraph.inputs) { - if (input->getPrecision() == Precision::UNSPECIFIED) continue; // const node holder - - auto creator = getCreatorLayer(input).lock(); - if (creator == nullptr) { - creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()})); - creator->outData.push_back(input); - } - - const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache)); - data2node[input.get()] = {node, 0}; +//void MKLDNNGraph::Replicate(const TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr) { +// this->_name = "subgraph"; +// this->reuse_io_tensors = false; +// +// // Map data object onto producer layer(node) +// std::unordered_map> data2node; +// +// // nodes which has no consumers (output or just unused). But doesn't marked as graph output. +// // Will be stored as fake output separately. +// std::unordered_set unused_data; +// +// // Step 1. Replicate input nodes +// for (const auto &input : subgraph.inputs) { +// if (input->getPrecision() == Precision::UNSPECIFIED) continue; // const node holder +// +// auto creator = getCreatorLayer(input).lock(); +// if (creator == nullptr) { +// creator.reset(new CNNLayer({input->getName(), "Input", input->getTensorDesc().getPrecision()})); +// creator->outData.push_back(input); +// } +// +// const MKLDNNNodePtr node(MKLDNNNode::factory().create(creator, getEngine(), extMgr, weightsCache)); +// data2node[input.get()] = {node, 0}; +// +// graphNodes.push_back(node); +// inputNodesMap[input->getName()] = node; +// +// if (getInputTo(input).empty()) { +// unused_data.insert(input); +// } +// } +// +// // Step 2. Replicate all internal nodes. +// for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) { +// const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)}; +// graphNodes.push_back(node); +// +// for (int port = 0; port < layer->insData.size(); port++) { +// auto data = layer->insData[port].lock(); +// +// auto port_info = data2node[data.get()]; +// auto parent_node = port_info.first; +// auto parent_port_idx = port_info.second; +// +// MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port)); +// node->addEdge(edge); +// graphEdges.push_back(edge); +// } +// int out_port_idx = 0; +// for (auto &out_data : layer->outData) { +// data2node[out_data.get()] = {node, out_port_idx++}; +// if (getInputTo(out_data).empty()) { +// unused_data.insert(out_data); +// } +// } +// } +// +// // Step 3. Add output nodes and output stubs for unused data objects. +// for (const auto &output : subgraph.outputs) { +// auto port_info = data2node[output.get()]; +// auto parent_node = port_info.first; +// auto parent_port_idx = port_info.second; +// +// CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()})); +// layer->insData.push_back(output); +// +// const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)}; +// +// MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0)); +// node->addEdge(edge); +// graphEdges.push_back(edge); +// graphNodes.push_back(node); +// outputNodesMap.push_back(node); +// +// unused_data.erase(output); +// } +// +// // Add stub output node for unused data +// for (auto to_stub_data : unused_data) { +// auto port_info = data2node[to_stub_data.get()]; +// auto parent_node = port_info.first; +// auto parent_port_idx = port_info.second; +// +// CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()})); +// layer->insData.push_back(to_stub_data); +// +// const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); +// +// MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0)); +// node->addEdge(edge); +// graphEdges.push_back(edge); +// graphNodes.push_back(node); +// } +//} - graphNodes.push_back(node); - inputNodes[input->getName()] = node; +void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) { + InputsDataMap inputsInfo = network.getInputsInfo(); + OutputsDataMap outputsInfo = network.getOutputsInfo(); - if (getInputTo(input).empty()) { - unused_data.insert(input); - } + std::shared_ptr func = network.getFunction(); + if (!func) { + IE_THROW() << "Function pointer inside CNNNetwork is nullptr"; } - // Step 2. Replicate all internal nodes. - for (const auto layer : NetPass::TIBodySortTopologically(subgraph)) { - const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)}; - graphNodes.push_back(node); - - for (int port = 0; port < layer->insData.size(); port++) { - auto data = layer->insData[port].lock(); + auto orderedOps = func->get_ordered_ops(); - auto port_info = data2node[data.get()]; - auto parent_node = port_info.first; - auto parent_port_idx = port_info.second; - MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, port)); - node->addEdge(edge); - graphEdges.push_back(edge); - } - int out_port_idx = 0; - for (auto &out_data : layer->outData) { - data2node[out_data.get()] = {node, out_port_idx++}; - if (getInputTo(out_data).empty()) { - unused_data.insert(out_data); +// // The input layer precision has to be equal to the InputData precision +// std::map changedPrecision; +// for (const auto& input : inputs) { +// auto inputLayer = getCreatorLayer(input.second->getInputData()).lock(); +// if (inputLayer) { +// inputLayer->precision = inputLayer->outData[0]->getTensorDesc().getPrecision(); +// } +// } +// +// // TODO [NM]: unordered_map is preferred from performance perspective. Needs hash for ngraph::Node +// std::unordered_map op2node; + std::map, MKLDNNNodePtr> op2node; + std::unordered_set unused_data; // nodes which has no consumers (output or just unused) +// + auto getParentPort = [](const std::shared_ptr op, const std::shared_ptr parentOp) -> int { + // TODO [NM]: do we have a better way how to determine parent port? + for (size_t port = 0; port < parentOp->get_output_size(); port++) { + auto outputs = parentOp->get_output_target_inputs(port); + for (auto& output : outputs) { + if (op == output.get_node()->shared_from_this()) + return static_cast(port); } } - } - - // Step 3. Add output nodes and output stubs for unused data objects. - for (const auto &output : subgraph.outputs) { - auto port_info = data2node[output.get()]; - auto parent_node = port_info.first; - auto parent_port_idx = port_info.second; - - CNNLayerPtr layer(new CNNLayer({"out_" + output->getName(), "Output", output->getTensorDesc().getPrecision()})); - layer->insData.push_back(output); - - const MKLDNNNodePtr node {MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)}; - - MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0)); - node->addEdge(edge); - graphEdges.push_back(edge); - graphNodes.push_back(node); - outputNodes.push_back(node); - unused_data.erase(output); - } - - // Add stub output node for unused data - for (auto to_stub_data : unused_data) { - auto port_info = data2node[to_stub_data.get()]; - auto parent_node = port_info.first; - auto parent_port_idx = port_info.second; - - CNNLayerPtr layer(new CNNLayer({"stub_" + to_stub_data->getName(), "Output", to_stub_data->getTensorDesc().getPrecision()})); - layer->insData.push_back(to_stub_data); - - const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); - - MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, parent_port_idx, 0)); - node->addEdge(edge); - graphEdges.push_back(edge); - graphNodes.push_back(node); - } -} - -void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr) { - InputsDataMap inputs = network.getInputsInfo(); - - this->_name = network.getName(); - - // The input layer precision has to be equal to the InputData precision - std::map changedPrecision; - for (const auto& input : inputs) { - auto inputLayer = getCreatorLayer(input.second->getInputData()).lock(); - if (inputLayer) { - inputLayer->precision = inputLayer->outData[0]->getTensorDesc().getPrecision(); - } - } - - std::unordered_map layer2node; - std::unordered_set unused_data; // nodes which has no consumers (output or just unused) - - auto _parent_port = [] (const DataPtr &data) -> int { - auto parent = getCreatorLayer(data).lock(); - for (int i = 0; parent->outData.size(); i++) - if (data == parent->outData[i]) - return i; return -1; }; // Replicate All Nodes in topological order - for (const auto layer : CNNNetSortTopologically(network)) { - CNNLayerPtr _layer = layer; - if (layer->type == "Memory" && layer->GetParamAsString("index") == "1") { - auto memoryId = layer->GetParamAsString("id"); - Precision portPrecision = layer->outData[0]->getTensorDesc().getPrecision(); - _layer.reset(new CNNLayer({layer->name + "/id=" + memoryId, "MemoryInput", portPrecision})); - _layer->params = layer->params; - _layer->outData = layer->outData; - } - - const MKLDNNNodePtr node(MKLDNNNode::factory().create(_layer, getEngine(), extMgr, weightsCache)); + for (const auto& op : orderedOps) { +// CNNLayerPtr _layer = layer; +// if (layer->type == "Memory" && layer->GetParamAsString("index") == "1") { +// auto memoryId = layer->GetParamAsString("id"); +// Precision portPrecision = layer->outData[0]->getTensorDesc().getPrecision(); +// _layer.reset(new CNNLayer({layer->name + "/id=" + memoryId, "MemoryInput", portPrecision})); +// _layer->params = layer->params; +// _layer->outData = layer->outData; +// } + + const MKLDNNNodePtr node(MKLDNNNode::factory().create(op, getEngine(), extMgr, weightsCache)); graphNodes.push_back(node); - layer2node[layer] = node; - if (layer->params.count("originalLayersNames")) { - node->originalLayers = layer->params["originalLayersNames"]; + if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) { + if (inputsInfo.count(node->getName()) != 0) { + inputNodesMap[node->getName()] = node; + } } - for (int port = 0; port < layer->insData.size(); port++) { - auto data = layer->insData[port].lock(); - auto parent_layer = getCreatorLayer(data).lock(); - if (!parent_layer) continue; // no parent means that it is input data node (or memory/const layer) - - auto parent_node = layer2node[parent_layer]; + if (op->get_type_info() == ngraph::op::v0::Result::type_info) { + auto prev = op->get_input_node_shared_ptr(0); + std::string inputID; + inputID = prev->get_friendly_name(); + if (prev->get_output_size() > 1) { + inputID += "." + std::to_string(op->get_input_source_output(0).get_index()); + } - MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), port)); - node->addEdge(edge); - graphEdges.push_back(edge); - } - for (auto &out_data : layer->outData) { - if (getInputTo(out_data).empty()) { - unused_data.insert(out_data); + if (outputsInfo.count(inputID) != 0) { + outputNodesMap[inputID] = node; } } - } - - OutputsDataMap outputs = network.getOutputsInfo(); - for (const auto &output : outputs) { - const auto data = output.second; - - auto parent_layer = getCreatorLayer(data).lock(); - auto parent_node = layer2node[parent_layer]; - - CNNLayerPtr layer(new CNNLayer({"out_" + output.first, "Output", data->getTensorDesc().getPrecision()})); - layer->insData.push_back(data); - - const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); - - MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(data), 0)); - node->addEdge(edge); - graphEdges.push_back(edge); - - graphNodes.push_back(node); - outputNodes.push_back(node); - unused_data.erase(data); - } + op2node[op] = node; - // Add stub output node for unused data - for (auto to_stub_data : unused_data) { - auto parent_layer = getCreatorLayer(to_stub_data).lock(); - auto parent_node = layer2node[parent_layer]; + const auto& rtInfo = op->get_rt_info(); + if (rtInfo.count("originalLayersNames")) { + node->originalLayers = getRTInfoValue(rtInfo, "originalLayersNames"); + } - CNNLayerPtr layer(new CNNLayer({"stub_" + parent_layer->name, "Output", to_stub_data->getTensorDesc().getPrecision()})); - layer->insData.push_back(to_stub_data); + for (size_t port = 0; port < op->get_input_size(); port++) { + auto parentOp = op->get_input_node_shared_ptr(port); - const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); +// auto data = layer->insData[port].lock(); +// auto parent_layer = getCreatorLayer(data).lock(); +// if (!parent_layer) continue; // no parent means that it is input data node (or memory/const layer) - MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(to_stub_data), 0)); - node->addEdge(edge); - graphEdges.push_back(edge); - graphNodes.push_back(node); - } + auto parentNode = op2node[parentOp]; - // Replicate input nodes - for (const auto& input : inputs) { - auto inputLayer = getCreatorLayer(input.second->getInputData()).lock(); - inputNodes[input.first] = layer2node[inputLayer]; - - // Loading mean images - MKLDNNDims outDims; - if (!inputNodes[input.first]->getChildEdgeAt(0)->getDims().ndims()) - outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1)); - else - outDims = MKLDNNDims(inputNodes[input.first]->getChildEdgeAt(0)->getDims()); - if (inputs.find(input.first) != inputs.end()) { - InputInfo::Ptr ii = inputs[input.first]; - if (ii && ii->getPreProcess().getNumberOfChannels()) { - _meanImages[input.first].Load(outDims, ii); - } + MKLDNNEdgePtr edge(new MKLDNNEdge(parentNode, node, getParentPort(op, parentOp), static_cast(port))); + node->addEdge(edge); + graphEdges.push_back(edge); } + +// for (auto &out_data : layer->outData) { +// if (getInputTo(out_data).empty()) { +// unused_data.insert(out_data); +// } +// } } +// +// // Add stub output node for unused data +// for (auto to_stub_data : unused_data) { +// auto parent_layer = getCreatorLayer(to_stub_data).lock(); +// auto parent_node = layer2node[parent_layer]; +// +// CNNLayerPtr layer(new CNNLayer({"stub_" + parent_layer->name, "Output", to_stub_data->getTensorDesc().getPrecision()})); +// layer->insData.push_back(to_stub_data); +// +// const MKLDNNNodePtr node(MKLDNNNode::factory().create(layer, getEngine(), extMgr, weightsCache)); +// +// MKLDNNEdgePtr edge(new MKLDNNEdge(parent_node, node, _parent_port(to_stub_data), 0)); +// node->addEdge(edge); +// graphEdges.push_back(edge); +// graphNodes.push_back(node); +// } +// +// // Replicate input nodes +// for (const auto& input : inputs) { +// auto inputLayer = getCreatorLayer(input.second->getInputData()).lock(); +// inputNodesMap[input.first] = layer2node[inputLayer]; +// +// // Loading mean images +// MKLDNNDims outDims; +// if (!inputNodesMap[input.first]->getChildEdgeAt(0)->getDims().ndims()) +// outDims = MKLDNNDims(InferenceEngine::SizeVector(1, 1)); +// else +// outDims = MKLDNNDims(inputNodesMap[input.first]->getChildEdgeAt(0)->getDims()); +// if (inputs.find(input.first) != inputs.end()) { +// InputInfo::Ptr ii = inputs[input.first]; +// if (ii && ii->getPreProcess().getNumberOfChannels()) { +// _meanImages[input.first].Load(outDims, ii); +// } +// } +// } } void MKLDNNGraph::InitGraph() { @@ -352,9 +350,9 @@ void MKLDNNGraph::InitGraph() { CreatePrimitives(); SetOriginalLayerNames(); - - if (!config.dumpToDot.empty()) - dumpToDotFile(config.dumpToDot + "_init.dot"); +// +// if (!config.dumpToDot.empty()) +// dumpToDotFile(config.dumpToDot + "_init.dot"); #ifndef DUMP_INTERNAL_BLOBS for (auto &graphNode : graphNodes) { @@ -385,24 +383,23 @@ void MKLDNNGraph::InitGraph() { void MKLDNNGraph::SetOriginalLayerNames() { OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraph::SetOriginalLayerNames"); - // Do it before cleanup. Because it will lose original layers information for (auto &graphNode : graphNodes) { auto nodeType = graphNode->getType(); if (nodeType == Reorder || nodeType == Output) continue; if (graphNode->getOriginalLayers().empty()) { - graphNode->addOriginalLayer(graphNode->getCnnLayer()); + graphNode->addOriginalLayer(graphNode->getOriginalName()); } - if (graphNode->getFusedWith().size() || graphNode->getMergeWith().size()) { + if (!graphNode->getFusedWith().empty() || !graphNode->getMergeWith().empty()) { // Original layer names std::vector internal = graphNode->getFusedWith(); auto &merged = graphNode->getMergeWith(); internal.insert(internal.end(), merged.begin(), merged.end()); for (auto &sub_node : internal) { - graphNode->addOriginalLayer(sub_node->getCnnLayer()); + graphNode->addOriginalLayer(sub_node->getOriginalName()); } } } @@ -517,7 +514,7 @@ void MKLDNNGraph::InitEdges() { std::unordered_set uniqueLayerNames; for (auto node : graphNodes) { - uniqueLayerNames.insert(node->getCnnLayer()->name); + uniqueLayerNames.insert(node->getName()); } for (auto i = 0; i < numberOfEdges; i++) { @@ -528,21 +525,22 @@ void MKLDNNGraph::InitEdges() { // Check if there is a reorder that supports the type conversion if (edge->getInputDesc().getPrecision() != edge->getOutputDesc().getPrecision() && !isReorderAvailable(edge->getInputDesc(), edge->getOutputDesc(), this->getEngine())) { - //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion - std::string convertName = edge->getParent()->getName() + "_" + - edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name(); - - CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()})); - auto convertNode = std::make_shared(convert, this->getEngine(), this->weightsCache); - convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc()); - InsertNode(edge, convertNode, true); - - //Check if reorder is still needed - if (convertNode->getChildEdgeAt(0)->needReorder()) { - edge = convertNode->getChildEdgeAt(0); - } else { - insertReorder = false; - } + IE_THROW() << "[NM] Not implemented"; +// //If we are here, then we need to insert Convert, because there are no reorders that support such type conversion +// std::string convertName = edge->getParent()->getName() + "_" + +// edge->getInputDesc().getPrecision().name() + "_" + edge->getOutputDesc().getPrecision().name(); +// +// CNNLayerPtr convert(new CNNLayer(LayerParams{convertName, "Convert", edge->getInputDesc().getPrecision()})); +// auto convertNode = std::make_shared(convert, this->getEngine(), this->weightsCache); +// convertNode->setDescs(edge->getInputDesc(), edge->getOutputDesc()); +// InsertNode(edge, convertNode, true); +// +// //Check if reorder is still needed +// if (convertNode->getChildEdgeAt(0)->needReorder()) { +// edge = convertNode->getChildEdgeAt(0); +// } else { +// insertReorder = false; +// } } if (insertReorder) { @@ -758,8 +756,8 @@ void MKLDNNGraph::CreatePrimitives() { void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in) { if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready."; - auto input = inputNodes.find(name); - if (input != inputNodes.end()) { + auto input = inputNodesMap.find(name); + if (input != inputNodesMap.end()) { MKLDNNDims outDims = input->second->getChildEdgeAt(0)->getDims(); const void *ext_data_ptr = in->cbuffer(); @@ -791,11 +789,14 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { if (!IsReady()) IE_THROW() << "Wrong state. Topology not ready."; - for (MKLDNNNodePtr &node : outputNodes) { + for (auto &outputMap : outputNodesMap) { + auto name = outputMap.first; + auto node = outputMap.second; // remove out_ from node name - std::string name = node->getName().substr(4); +// std::string name = node->getName().substr(4); const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory(); if (out.find(name) == out.end()) { + // TODO [NM]: Do we really need this path? // TODO: Create blob from MemoryDesc Blob::Ptr outBlob = make_shared_blob({Precision::FP32, node->getParentEdgeAt(0)->getDims().ToSizeVector(), TensorDesc::getLayoutByDims(node->getParentEdgeAt(0)->getDims().ToSizeVector())}, @@ -981,7 +982,7 @@ void MKLDNNGraph::GetPerfData(std::map(it.second.get()); - if (!node || node->isConstant()) - continue; - resp[it.first] = node->getChildEdgeAt(0)->getBlob(); + for (auto &it : inputNodesMap) { +// TODO [NM]: Do we still need this? +// MKLDNNInputNode* node = dynamic_cast(it.second.get()); +// if (!node || node->isConstant()) +// continue; + resp[it.first] = it.second->getChildEdgeAt(0)->getBlob(); } } void MKLDNNGraph::getOutputBlobs(InferenceEngine::BlobMap &resp) { - for (auto &it : outputNodes) { - std::string name = it->getName().substr(4); - resp[name] = it->getParentEdgeAt(0)->getBlob(); + for (auto &it : outputNodesMap) { + resp[it.first] = it.second->getParentEdgeAt(0)->getBlob(); } } @@ -1164,10 +1165,7 @@ void MKLDNNGraph::RemoveDroppedEdges() { MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerName, const TensorDesc& inDesc, const TensorDesc& outDesc, bool isOptimized, InferenceEngine::Blob::Ptr scales) { - CNNLayerPtr layer(new CNNLayer({layerName, - "Reorder", - inDesc.getPrecision()})); - MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layer, getEngine(), weightsCache)); + MKLDNNNodePtr newReorder(new MKLDNNReorderNode(layerName, getEngine(), weightsCache)); auto *reorderPtr = dynamic_cast(newReorder.get()); if (reorderPtr == nullptr) { IE_THROW() << "MKLDNNGraph::InsertReorder: Cannot cast to MKLDNNReorderNode"; @@ -1188,15 +1186,6 @@ MKLDNNNodePtr MKLDNNGraph::InsertReorder(MKLDNNEdgePtr edge, std::string layerNa return newReorder; } -void MKLDNNGraph::dumpToDotFile(std::string file) const { - std::ofstream dot; - dot.open(file); - if (!dot.is_open()) IE_THROW() << "CPU Plugin cannot create dot file " << file << "."; - - dump_graph_as_dot(*this, dot); - dot.close(); -} - void MKLDNNGraph::do_before(const std::string &dir, const MKLDNNNodePtr &node) { auto exec_order = std::to_string(node->execIndex); std::string nodeName = node->name; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index 5e8d9d7d009a0c..47c1bdc35ecc2f 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -48,7 +48,7 @@ class MKLDNNGraph { void getOutputBlobs(InferenceEngine::BlobMap &out_map); template - void CreateGraph(const NET &network, + void CreateGraph(NET &network, const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache); @@ -73,15 +73,14 @@ class MKLDNNGraph { return graphEdges; } - std::vector& GetOutputNodes() { - return outputNodes; + std::map& GetInputNodesMap() { + return inputNodesMap; } - std::map& GetInputNodes() { - return inputNodes; + std::map& GetOutputNodesMap() { + return outputNodesMap; } - mkldnn::engine getEngine() const { return eng; } @@ -152,9 +151,6 @@ class MKLDNNGraph { InferenceEngine::CNNNetwork dump() const; - template - static void ApplyUnrollPasses(NET &net); - void ResetInferCount() { infer_count = 0; } void SortTopologically(); @@ -166,8 +162,8 @@ class MKLDNNGraph { status = NotReady; eng = mkldnn::engine(mkldnn::engine::kind::cpu, 0); - inputNodes.clear(); - outputNodes.clear(); + inputNodesMap.clear(); + outputNodesMap.clear(); graphNodes.clear(); graphEdges.clear(); _meanImages.clear(); @@ -183,8 +179,8 @@ class MKLDNNGraph { MKLDNNMemoryPtr memWorkspace; - std::map inputNodes; - std::vector outputNodes; + std::map inputNodesMap; + std::map outputNodesMap; std::vector graphNodes; std::vector graphEdges; @@ -194,7 +190,7 @@ class MKLDNNGraph { static mkldnn::engine eng; void Replicate(const InferenceEngine::CNNNetwork &network, const MKLDNNExtensionManager::Ptr& extMgr); - void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr); +// void Replicate(const InferenceEngine::TensorIterator::Body &subgraph, const MKLDNNExtensionManager::Ptr& extMgr); void InitGraph(); void InitNodes(); void InitDescriptors(); @@ -211,16 +207,10 @@ class MKLDNNGraph { friend class MKLDNNInferRequest; friend class MKLDNNGraphlessInferRequest; - friend InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph); friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); private: - void dumpToDotFile(std::string file) const; - struct ParsedLayer { - MKLDNNNodePtr parent; - InferenceEngine::CNNLayerPtr cnnLayer; - size_t outIdx; - }; + std::atomic cancelation_requested; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp index c22512cd97240c..14d2f6a28ae7e8 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp @@ -3,8 +3,6 @@ // #include "mkldnn_graph_dumper.h" -#include -#include #include #include "exec_graph_info.hpp" #include "mkldnn_debug.h" @@ -22,25 +20,93 @@ namespace MKLDNNPlugin { namespace { -std::map extract_node_metadata(const MKLDNNNodePtr &); -void drawer_callback(const InferenceEngine::CNNLayerPtr, ordered_properties &, ordered_properties &); +std::map extract_node_metadata(const MKLDNNNodePtr &node) { + std::map serialization_info; -} // namespace + if (node->getType() == Input && node->isConstant()) { + // We need to separate Input and Const layers + serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = "Const"; + } else if (node->getType() == Generic) { + // Path to print actual name for extension layers + serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = node->getTypeStr(); + } else { + serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = NameFromType(node->getType()); + } + + // Original layers + serialization_info[ExecGraphInfoSerialization::ORIGINAL_NAMES] = node->getOriginalLayers(); + + // Implementation type name + serialization_info[ExecGraphInfoSerialization::IMPL_TYPE] = node->getPrimitiveDescriptorType(); + + std::string outputPrecisionsStr; + if (!node->getChildEdges().empty()) { + outputPrecisionsStr = node->getChildEdgeAt(0)->getDesc().getPrecision().name(); + + bool isAllEqual = true; + for (size_t i = 1; i < node->getChildEdges().size(); i++) { + if (node->getChildEdgeAt(i-1)->getDesc().getPrecision() != node->getChildEdgeAt(i)->getDesc().getPrecision()) { + isAllEqual = false; + break; + } + } -CNNLayer::Ptr create_cnnlayer(const MKLDNNNodePtr &node) { - CNNLayer::Ptr layer(new CNNLayer({node->getName(), "type", Precision::FP32})); + // If all output precisions are the same, we store the name only once + if (!isAllEqual) { + for (size_t i = 1; i < node->getChildEdges().size(); i++) + outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getDesc().getPrecision().name()); + } + } else { + // Branch to correctly handle output nodes + if (!node->getParentEdges().empty()) { + outputPrecisionsStr = node->getParentEdgeAt(0)->getDesc().getPrecision().name(); + } + } + serialization_info[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outputPrecisionsStr; - layer->params = extract_node_metadata(node); - layer->type = layer->params[ExecGraphInfoSerialization::LAYER_TYPE]; - layer->params.erase(ExecGraphInfoSerialization::LAYER_TYPE); + std::string outputLayoutsStr; + auto outDescs = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs; - auto &cfg = node->getSelectedPrimitiveDescriptor()->getConfig(); - layer->insData.resize(cfg.inConfs.size()); - layer->outData.resize(cfg.outConfs.size()); + if (!outDescs.empty()) { + auto fmt0 = MKLDNNMemoryDesc(outDescs[0].desc).getFormat(); + outputLayoutsStr = mkldnn::utils::fmt2str(fmt0); - return layer; + bool isAllEqual = true; + for (size_t i = 1; i < outDescs.size(); i++) { + if (MKLDNNMemoryDesc(outDescs[i - 1].desc).getFormat() != MKLDNNMemoryDesc(outDescs[i].desc).getFormat()) { + isAllEqual = false; + break; + } + } + + // If all output layouts are the same, we store the name only once + if (!isAllEqual) { + for (size_t i = 1; i < outDescs.size(); i++) { + auto fmt = MKLDNNMemoryDesc(outDescs[i].desc).getFormat(); + outputLayoutsStr += "," + std::string(mkldnn::utils::fmt2str(fmt)); + } + } + } else { + outputLayoutsStr = mkldnn::utils::fmt2str(mkldnn::memory::format_tag::undef); + } + serialization_info[ExecGraphInfoSerialization::OUTPUT_LAYOUTS] = outputLayoutsStr; + + // Performance + if (node->PerfCounter().avg() != 0) { + serialization_info[ExecGraphInfoSerialization::PERF_COUNTER] = std::to_string(node->PerfCounter().avg()); + } else { + serialization_info[ExecGraphInfoSerialization::PERF_COUNTER] = "not_executed"; // it means it was not calculated yet + } + + serialization_info[ExecGraphInfoSerialization::EXECUTION_ORDER] = std::to_string(node->getExecIndex()); + + serialization_info[ExecGraphInfoSerialization::RUNTIME_PRECISION] = node->getRuntimePrecision().name(); + + return serialization_info; } +} // namespace + InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph) { std::map > node2layer; @@ -69,15 +135,15 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph auto create_ngraph_node = [&](const MKLDNNNodePtr &node) { bool is_input = false, is_output = false, should_be_hold = false; - for (auto && kvp : graph.inputNodes) { + for (auto && kvp : graph.inputNodesMap) { if (kvp.second == node) { is_input = true; break; } } - for (auto && onode : graph.outputNodes) { - if (onode == node) { + for (auto && kvp : graph.outputNodesMap) { + if (kvp.second == node) { is_output = true; break; } @@ -141,187 +207,4 @@ InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph return net; } -InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph) { - auto net = std::make_shared(); - - net->setName(graph._name); - std::map node2layer; - - // Copy all nodes to network - for (auto &node : graph.graphNodes) { - auto layer = create_cnnlayer(node); - node2layer[node] = layer; - net->addLayer(layer); - } - - // Copy all edges to network - for (auto &node : graph.graphNodes) { - auto pr = node2layer[node]; - auto ch_edges = node->getChildEdges(); - - for (int i = 0; i < ch_edges.size(); i++) { - auto edge = node->getChildEdgeAt(i); - int in_port = edge->getOutputNum(); - auto ch_node = edge->getChild(); - auto ch = node2layer[ch_node]; - - DataPtr data; - if (i < pr->outData.size()) { - std::string data_name = node->getName() + "_out" + std::to_string(i); - pr->outData[i] = std::make_shared(data_name, edge->getDesc()); - data = pr->outData[i]; - getCreatorLayer(data) = pr; - } else { - data = pr->outData[0]; - } - - getInputTo(data)[ch->name] = ch; - ch->insData[in_port] = data; - } - } - - // Specify inputs data - for (auto kvp : graph.inputNodes) { - auto in_node = kvp.second; - auto in_layer = node2layer[in_node]; - - auto in_info = std::make_shared(); - in_info->setInputData(in_layer->outData[0]); - net->setInputInfo(in_info); - } - - return InferenceEngine::CNNNetwork{net}; -} - -void dump_graph_as_dot(const MKLDNNGraph &graph, std::ostream &out) { - InferenceEngine::CNNNetwork dump_net = dump_graph_as_ie_net(graph); - InferenceEngine::saveGraphToDot(dump_net, out, drawer_callback); -} - -//********************************** -// Special converters of meta data -//********************************** - -namespace { - -std::map extract_node_metadata(const MKLDNNNodePtr &node) { - std::map serialization_info; - - if (node->getType() == Input && node->isConstant()) { - // We need to separate Input and Const layers - serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = "Const"; - } else if (node->getType() == Generic) { - // Path to print actual name for extension layers - serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = node->getTypeStr(); - } else { - serialization_info[ExecGraphInfoSerialization::LAYER_TYPE] = NameFromType(node->getType()); - } - - // Original layers - serialization_info[ExecGraphInfoSerialization::ORIGINAL_NAMES] = node->getOriginalLayers(); - - // Implementation type name - serialization_info[ExecGraphInfoSerialization::IMPL_TYPE] = node->getPrimitiveDescriptorType(); - - std::string outputPrecisionsStr; - if (!node->getChildEdges().empty()) { - outputPrecisionsStr = node->getChildEdgeAt(0)->getDesc().getPrecision().name(); - - bool isAllEqual = true; - for (size_t i = 1; i < node->getChildEdges().size(); i++) { - if (node->getChildEdgeAt(i-1)->getDesc().getPrecision() != node->getChildEdgeAt(i)->getDesc().getPrecision()) { - isAllEqual = false; - break; - } - } - - // If all output precisions are the same, we store the name only once - if (!isAllEqual) { - for (size_t i = 1; i < node->getChildEdges().size(); i++) - outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getDesc().getPrecision().name()); - } - } else { - // Branch to correctly handle output nodes - if (!node->getParentEdges().empty()) { - outputPrecisionsStr = node->getParentEdgeAt(0)->getDesc().getPrecision().name(); - } - } - serialization_info[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outputPrecisionsStr; - - std::string outputLayoutsStr; - auto outDescs = node->getSelectedPrimitiveDescriptor()->getConfig().outConfs; - - if (!outDescs.empty()) { - auto fmt0 = MKLDNNMemoryDesc(outDescs[0].desc).getFormat(); - outputLayoutsStr = mkldnn::utils::fmt2str(fmt0); - - bool isAllEqual = true; - for (size_t i = 1; i < outDescs.size(); i++) { - if (MKLDNNMemoryDesc(outDescs[i - 1].desc).getFormat() != MKLDNNMemoryDesc(outDescs[i].desc).getFormat()) { - isAllEqual = false; - break; - } - } - - // If all output layouts are the same, we store the name only once - if (!isAllEqual) { - for (size_t i = 1; i < outDescs.size(); i++) { - auto fmt = MKLDNNMemoryDesc(outDescs[i].desc).getFormat(); - outputLayoutsStr += "," + std::string(mkldnn::utils::fmt2str(fmt)); - } - } - } else { - outputLayoutsStr = mkldnn::utils::fmt2str(mkldnn::memory::format_tag::undef); - } - serialization_info[ExecGraphInfoSerialization::OUTPUT_LAYOUTS] = outputLayoutsStr; - - // Performance - if (node->PerfCounter().avg() != 0) { - serialization_info[ExecGraphInfoSerialization::PERF_COUNTER] = std::to_string(node->PerfCounter().avg()); - } else { - serialization_info[ExecGraphInfoSerialization::PERF_COUNTER] = "not_executed"; // it means it was not calculated yet - } - - serialization_info[ExecGraphInfoSerialization::EXECUTION_ORDER] = std::to_string(node->getExecIndex()); - - serialization_info[ExecGraphInfoSerialization::RUNTIME_PRECISION] = node->getRuntimePrecision().name(); - - return serialization_info; -} - -const char BLUE[] = "#D8D9F1"; -const char GREEN[] = "#D9EAD3"; - -void drawer_callback(const InferenceEngine::CNNLayerPtr layer, - ordered_properties &printed_properties, - ordered_properties &node_properties) { - const auto ¶ms = layer->params; - - // Implementation - auto impl = params.find(ExecGraphInfoSerialization::IMPL_TYPE); - if (impl != params.end()) { - printed_properties.push_back({"impl", impl->second}); - } - - // Original names - auto orig = params.find(ExecGraphInfoSerialization::ORIGINAL_NAMES); - if (orig != params.end()) { - printed_properties.push_back({"originals", orig->second}); - } - - // Precision - auto prec = params.find(ExecGraphInfoSerialization::OUTPUT_PRECISIONS); - if (prec != params.end()) { - printed_properties.push_back({"precision", prec->second}); - // Set color - node_properties.push_back({"fillcolor", prec->second == "FP32" ? GREEN : BLUE}); - } - - // Set xlabel containing PM data if calculated - auto perf = layer->params.find(ExecGraphInfoSerialization::PERF_COUNTER); - node_properties.push_back({"xlabel", (perf != layer->params.end()) ? perf->second : ""}); -} - -} // namespace - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h index e68c8a99be6f91..d954695baaa050 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.h @@ -11,9 +11,6 @@ namespace MKLDNNPlugin { -void dump_graph_as_dot(const MKLDNNGraph &graph, std::ostream &out); - -InferenceEngine::CNNNetwork dump_graph_as_ie_net(const MKLDNNGraph &graph); InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index 477de31b769261..ee6003a039a807 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -21,7 +21,6 @@ #include "mkldnn/ie_mkldnn.h" #include -#include #include "utils/general_utils.h" // WA for xbyak.h @@ -51,33 +50,42 @@ MKLDNNGraphOptimizer::MKLDNNGraphOptimizer() {} void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations"); - - MergeTwoEqualScaleShifts(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// MergeTwoEqualScaleShifts(graph); +// graph.RemoveDroppedNodes(); FuseBroadcastAndEltwise(graph); graph.RemoveDroppedNodes(); - FuseClampAndQuantize(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// or move to LPT +// FuseClampAndQuantize(graph); +// graph.RemoveDroppedNodes(); - FuseScaleShiftAndQuantize(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// or move to LPT +// FuseScaleShiftAndQuantize(graph); +// graph.RemoveDroppedNodes(); - MergeGroupConvolution(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: do we still have networks that requires this optimizations? Preferable should be removed. +// MergeGroupConvolution(graph); +// graph.RemoveDroppedNodes(); - FuseConvolutionAndZeroPoints(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseConvolutionAndZeroPoints(graph); +// graph.RemoveDroppedNodes(); - FuseConvolutionAndDepthwise(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseConvolutionAndDepthwise(graph); +// graph.RemoveDroppedNodes(); - FuseConvolutionAndActivation(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseConvolutionAndActivation(graph); +// graph.RemoveDroppedNodes(); - FuseConvolutionAndDepthwise(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseConvolutionAndDepthwise(graph); +// graph.RemoveDroppedNodes(); FuseConvolutionAndQuantize(graph); graph.RemoveDroppedNodes(); @@ -85,8 +93,9 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { graph.SortTopologically(); graph.RemoveDroppedEdges(); - FuseConvolutionAndDepthwise(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseConvolutionAndDepthwise(graph); +// graph.RemoveDroppedNodes(); FusePoolingAndQuantize(graph); graph.RemoveDroppedNodes(); @@ -94,8 +103,9 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { graph.SortTopologically(); graph.RemoveDroppedEdges(); - FuseConvolutionAndDWConvolution(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseConvolutionAndDWConvolution(graph); +// graph.RemoveDroppedNodes(); FuseBinaryConvolutionAndQuantize(graph); graph.RemoveDroppedNodes(); @@ -103,26 +113,27 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { FuseBatchNormWithScale(graph); graph.RemoveDroppedNodes(); - RemoveIdentityOperator(graph); - graph.RemoveDroppedNodes(); - FuseConvolutionSumAndConvolutionSumActivation(graph); graph.RemoveDroppedNodes(); - FuseConvolutionAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseConvolutionAndSimpleOperation(graph); +// graph.RemoveDroppedNodes(); - FuseFullyConnectedAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseFullyConnectedAndSimpleOperation(graph); +// graph.RemoveDroppedNodes(); - FuseMVNAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseMVNAndSimpleOperation(graph); +// graph.RemoveDroppedNodes(); FuseInterpolateAndSimpleOperation(graph); graph.RemoveDroppedNodes(); - FuseNormalizeAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); +// TODO [NM]: transformation should be implemented w/o using of CNNLayer +// FuseNormalizeAndSimpleOperation(graph); +// graph.RemoveDroppedNodes(); FuseEltwiseAndSimple(graph); graph.RemoveDroppedNodes(); @@ -133,9 +144,6 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) { OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations"); - RemoveIOScaleShifts(graph); - graph.RemoveDroppedNodes(); - DropDoubleReorders(graph); graph.RemoveDroppedNodes(); @@ -153,96 +161,39 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap } void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableConvNode = [](MKLDNNNodePtr node) { - if (node->getType() != Convolution) - return false; - - if (node->getParentEdges().size() < 2) - return false; - - auto* convLayer = dynamic_cast(node->getCnnLayer().get()); - if (convLayer == nullptr) - IE_THROW() << "Cannot get convolution layer " << node->getName(); - - return true; - }; - - auto initializeInputZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0, MKLDNNNodePtr parent1) { - auto* convNode = dynamic_cast(node.get()); - if (convNode == nullptr) - IE_THROW() << "Cannot get convolution node " << node->getName(); - - int IC = node->getParentEdgesAtPort(0)[0]->getDims()[1]; - int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1]; - - if (parent0->getType() == Eltwise) { - // The plug-in doesn't support FP32 convolution with input/weights zero points. - // In case weights are in FP32 (or we have zero points on weights which are not supported by INT8 convolution) we cannot use - // INT8 implementation so we have to disable input zero points fusing as well. - auto weightsLayer = parent1->getCnnLayer(); - if (!weightsLayer || weightsLayer->type != "Const" || weightsLayer->outData[0]->getPrecision() != Precision::I8) { - return false; - } - - auto* eltwiseNode = dynamic_cast(parent0.get()); - if (eltwiseNode->getOpType() != Subtract) - return false; - - if (parent0->getParentEdges().size() != 2) - return false; - - if (parent0->getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->type == "Const") { - auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent(); - if (arg0->getCnnLayer()->outData[0]->getPrecision() != Precision::U8) - return false; - - if (parent0->getParentEdgesAtPort(1)[0]->getDims().size() < 2) { - return false; - } - - if (parent0->getParentEdgesAtPort(1)[0]->getDims()[1] != 1 && - parent0->getParentEdgesAtPort(1)[0]->getDims()[1] != IC) - return false; - - auto arg1 = parent0->getParentEdgesAtPort(0)[0]->getParent(); - if (arg1->getCnnLayer()->outData[0]->getPrecision() != Precision::U8) - return false; - - auto zeroPointsBlob = dynamic_cast*>(arg0->getCnnLayer()->blobs["custom"].get()); - if (zeroPointsBlob == nullptr) - IE_THROW() << "Cannot cast to TBlob internal zero points blob"; - - auto zeroPointsData = zeroPointsBlob->buffer().as(); - if (zeroPointsData == nullptr) - IE_THROW() << "zeroPointsBlob has not allocated buffer"; - - for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[1]; j++) { - convNode->inputZeroPoints.push_back(zeroPointsData[j]); - } - } else { - return false; - } - } else { - return false; - } - - if (convNode->outputCompensation.empty()) { - convNode->outputCompensation.resize(OC); - } - - return true; - }; - -// auto initializeWeightsZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0) { +// auto& graphNodes = graph.GetNodes(); +// +// auto isSutableConvNode = [](MKLDNNNodePtr node) { +// if (node->getType() != Convolution) +// return false; +// +// if (node->getParentEdges().size() < 2) +// return false; +// +// auto* convLayer = dynamic_cast(node->getCnnLayer().get()); +// if (convLayer == nullptr) +// IE_THROW() << "Cannot get convolution layer " << node->getName(); +// +// return true; +// }; +// +// auto initializeInputZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0, MKLDNNNodePtr parent1) { // auto* convNode = dynamic_cast(node.get()); // if (convNode == nullptr) // IE_THROW() << "Cannot get convolution node " << node->getName(); // +// int IC = node->getParentEdgesAtPort(0)[0]->getDims()[1]; // int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1]; // // if (parent0->getType() == Eltwise) { +// // The plug-in doesn't support FP32 convolution with input/weights zero points. +// // In case weights are in FP32 (or we have zero points on weights which are not supported by INT8 convolution) we cannot use +// // INT8 implementation so we have to disable input zero points fusing as well. +// auto weightsLayer = parent1->getCnnLayer(); +// if (!weightsLayer || weightsLayer->type != "Const" || weightsLayer->outData[0]->getPrecision() != Precision::I8) { +// return false; +// } +// // auto* eltwiseNode = dynamic_cast(parent0.get()); // if (eltwiseNode->getOpType() != Subtract) // return false; @@ -252,27 +203,31 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { // // if (parent0->getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->type == "Const") { // auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent(); -// if (arg0->getCnnLayer()->outData[0]->getPrecision() != Precision::I8) +// if (arg0->getCnnLayer()->outData[0]->getPrecision() != Precision::U8) +// return false; +// +// if (parent0->getParentEdgesAtPort(1)[0]->getDims().size() < 2) { // return false; +// } // -// if (parent0->getParentEdgesAtPort(1)[0]->getDims()[0] != 1 && -// parent0->getParentEdgesAtPort(1)[0]->getDims()[0] != OC) +// if (parent0->getParentEdgesAtPort(1)[0]->getDims()[1] != 1 && +// parent0->getParentEdgesAtPort(1)[0]->getDims()[1] != IC) // return false; // // auto arg1 = parent0->getParentEdgesAtPort(0)[0]->getParent(); -// if (arg1->getCnnLayer()->outData[0]->getPrecision() != Precision::I8) +// if (arg1->getCnnLayer()->outData[0]->getPrecision() != Precision::U8) // return false; // -// auto zeroPointsBlob = dynamic_cast*>(arg0->getCnnLayer()->blobs["custom"].get()); +// auto zeroPointsBlob = dynamic_cast*>(arg0->getCnnLayer()->blobs["custom"].get()); // if (zeroPointsBlob == nullptr) // IE_THROW() << "Cannot cast to TBlob internal zero points blob"; // -// auto zeroPointsData = zeroPointsBlob->buffer().as(); +// auto zeroPointsData = zeroPointsBlob->buffer().as(); // if (zeroPointsData == nullptr) // IE_THROW() << "zeroPointsBlob has not allocated buffer"; // -// for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[0]; j++) { -// convNode->weightsZeroPoints.push_back(static_cast(zeroPointsData[j])); +// for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[1]; j++) { +// convNode->inputZeroPoints.push_back(zeroPointsData[j]); // } // } else { // return false; @@ -281,100 +236,153 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { // return false; // } // +// if (convNode->outputCompensation.empty()) { +// convNode->outputCompensation.resize(OC); +// } +// // return true; // }; - - auto initializeOutputCompensation = [](MKLDNNNodePtr node) { - auto* convNode = dynamic_cast(node.get()); - if (convNode == nullptr) - IE_THROW() << "Cannot get convolution node " << node->getName(); - - auto * convLayer = dynamic_cast(convNode->getCnnLayer().get()); - if (convLayer == nullptr) - IE_THROW() << "Cannot get eltwise layer " << node->getName(); - - for (int i = 0; i < convLayer->insData.size(); i++) - if (convLayer->insData[i].lock() == nullptr) - IE_THROW() << "Node '"<< node->getName() << "' has invalid input data with index " << i; - - if (convNode->inputZeroPoints.empty()) - return; - - auto weightsLayer = getCreatorLayer(convLayer->insData[1].lock()).lock(); - if (weightsLayer->type != "Const") { - weightsLayer = getCreatorLayer(weightsLayer->insData[0].lock()).lock(); - } - - - auto weightsBlob = dynamic_cast*>(weightsLayer->blobs["custom"].get()); - if (weightsBlob == nullptr) - IE_THROW() << "Cannot cast to TBlob internal weights blob"; - - auto weightsPtr = weightsBlob->buffer().as(); - if (weightsPtr == nullptr) - IE_THROW() << "weightsBlob has not allocated buffer"; - - ptrdiff_t G = convLayer->_group; - ptrdiff_t OC = weightsLayer->outData[0]->getDims()[0] / G; - ptrdiff_t IC = weightsLayer->outData[0]->getDims()[1]; - ptrdiff_t KD = weightsLayer->outData[0]->getDims().size() == 5 ? weightsLayer->outData[0]->getDims()[2] : 1; - ptrdiff_t KH = weightsLayer->outData[0]->getDims()[weightsLayer->outData[0]->getDims().size() - 2]; - ptrdiff_t KW = weightsLayer->outData[0]->getDims()[weightsLayer->outData[0]->getDims().size() - 1]; - - for (size_t g = 0; g < G; g++) { - for (size_t oc = 0; oc < OC; oc++) { - int32_t a = 0; - for (size_t ic = 0; ic < IC; ic++) { - for (size_t kd = 0; kd < KD; kd++) { - for (size_t kh = 0; kh < KH; kh++) { - for (size_t kw = 0; kw < KW; kw++) { - size_t widx = g * OC * IC * KD * KH * KW + - oc * IC * KD * KH * KW + - ic * KD * KH * KW + - kd * KH * KW + - kh * KW + - kw; - - auto w = static_cast(weightsPtr[widx]); - - auto izp = !convNode->inputZeroPoints.empty() ? static_cast(convNode->inputZeroPoints[g * IC + ic]) : 0; - a += w * izp; - - auto wzp = !convNode->weightsZeroPoints.empty() ? static_cast(convNode->weightsZeroPoints[g * OC + oc]) : 0; - a -= wzp * izp; - } - } - } - } - convNode->outputCompensation[g * OC + oc] = -a; - } - } - }; - - for (int i = 0; i < graphNodes.size(); i++) { - auto conv = graphNodes[i]; - if (!isSutableConvNode(conv)) continue; - - auto dataEltwise = conv->getParentEdgesAtPort(0)[0]->getParent(); - auto weightsEltwise = conv->getParentEdgesAtPort(1)[0]->getParent(); - if (initializeInputZeroPoints(conv, dataEltwise, weightsEltwise)) { - auto p_edge = dataEltwise->getParentEdgesAtPort(1)[0]; - removeEdge(graph, p_edge); - - graph.DropNode(dataEltwise); - } - -// [TODO] Weights zero point is not supported on oneDNN side for the moment +// +//// auto initializeWeightsZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0) { +//// auto* convNode = dynamic_cast(node.get()); +//// if (convNode == nullptr) +//// IE_THROW() << "Cannot get convolution node " << node->getName(); +//// +//// int OC = node->getChildEdgesAtPort(0)[0]->getDims()[1]; +//// +//// if (parent0->getType() == Eltwise) { +//// auto* eltwiseNode = dynamic_cast(parent0.get()); +//// if (eltwiseNode->getOpType() != Subtract) +//// return false; +//// +//// if (parent0->getParentEdges().size() != 2) +//// return false; +//// +//// if (parent0->getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->type == "Const") { +//// auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent(); +//// if (arg0->getCnnLayer()->outData[0]->getPrecision() != Precision::I8) +//// return false; +//// +//// if (parent0->getParentEdgesAtPort(1)[0]->getDims()[0] != 1 && +//// parent0->getParentEdgesAtPort(1)[0]->getDims()[0] != OC) +//// return false; +//// +//// auto arg1 = parent0->getParentEdgesAtPort(0)[0]->getParent(); +//// if (arg1->getCnnLayer()->outData[0]->getPrecision() != Precision::I8) +//// return false; +//// +//// auto zeroPointsBlob = dynamic_cast*>(arg0->getCnnLayer()->blobs["custom"].get()); +//// if (zeroPointsBlob == nullptr) +//// IE_THROW() << "Cannot cast to TBlob internal zero points blob"; +//// +//// auto zeroPointsData = zeroPointsBlob->buffer().as(); +//// if (zeroPointsData == nullptr) +//// IE_THROW() << "zeroPointsBlob has not allocated buffer"; +//// +//// for (int j = 0; j < parent0->getParentEdgesAtPort(1)[0]->getDims()[0]; j++) { +//// convNode->weightsZeroPoints.push_back(static_cast(zeroPointsData[j])); +//// } +//// } else { +//// return false; +//// } +//// } else { +//// return false; +//// } +//// +//// return true; +//// }; +// +// auto initializeOutputCompensation = [](MKLDNNNodePtr node) { +// auto* convNode = dynamic_cast(node.get()); +// if (convNode == nullptr) +// IE_THROW() << "Cannot get convolution node " << node->getName(); +// +// auto * convLayer = dynamic_cast(convNode->getCnnLayer().get()); +// if (convLayer == nullptr) +// IE_THROW() << "Cannot get eltwise layer " << node->getName(); +// +// for (int i = 0; i < convLayer->insData.size(); i++) +// if (convLayer->insData[i].lock() == nullptr) +// IE_THROW() << "Node '"<< node->getName() << "' has invalid input data with index " << i; +// +// if (convNode->inputZeroPoints.empty()) +// return; +// +// auto weightsLayer = getCreatorLayer(convLayer->insData[1].lock()).lock(); +// if (weightsLayer->type != "Const") { +// weightsLayer = getCreatorLayer(weightsLayer->insData[0].lock()).lock(); +// } +// +// +// auto weightsBlob = dynamic_cast*>(weightsLayer->blobs["custom"].get()); +// if (weightsBlob == nullptr) +// IE_THROW() << "Cannot cast to TBlob internal weights blob"; +// +// auto weightsPtr = weightsBlob->buffer().as(); +// if (weightsPtr == nullptr) +// IE_THROW() << "weightsBlob has not allocated buffer"; +// +// ptrdiff_t G = convLayer->_group; +// ptrdiff_t OC = weightsLayer->outData[0]->getDims()[0] / G; +// ptrdiff_t IC = weightsLayer->outData[0]->getDims()[1]; +// ptrdiff_t KD = weightsLayer->outData[0]->getDims().size() == 5 ? weightsLayer->outData[0]->getDims()[2] : 1; +// ptrdiff_t KH = weightsLayer->outData[0]->getDims()[weightsLayer->outData[0]->getDims().size() - 2]; +// ptrdiff_t KW = weightsLayer->outData[0]->getDims()[weightsLayer->outData[0]->getDims().size() - 1]; +// +// for (size_t g = 0; g < G; g++) { +// for (size_t oc = 0; oc < OC; oc++) { +// int32_t a = 0; +// for (size_t ic = 0; ic < IC; ic++) { +// for (size_t kd = 0; kd < KD; kd++) { +// for (size_t kh = 0; kh < KH; kh++) { +// for (size_t kw = 0; kw < KW; kw++) { +// size_t widx = g * OC * IC * KD * KH * KW + +// oc * IC * KD * KH * KW + +// ic * KD * KH * KW + +// kd * KH * KW + +// kh * KW + +// kw; +// +// auto w = static_cast(weightsPtr[widx]); +// +// auto izp = !convNode->inputZeroPoints.empty() ? static_cast(convNode->inputZeroPoints[g * IC + ic]) : 0; +// a += w * izp; +// +// auto wzp = !convNode->weightsZeroPoints.empty() ? static_cast(convNode->weightsZeroPoints[g * OC + oc]) : 0; +// a -= wzp * izp; +// } +// } +// } +// } +// convNode->outputCompensation[g * OC + oc] = -a; +// } +// } +// }; +// +// for (int i = 0; i < graphNodes.size(); i++) { +// auto conv = graphNodes[i]; +// if (!isSutableConvNode(conv)) continue; +// +// auto dataEltwise = conv->getParentEdgesAtPort(0)[0]->getParent(); // auto weightsEltwise = conv->getParentEdgesAtPort(1)[0]->getParent(); -// if (initializeWeightsZeroPoints(conv, weightsEltwise)) { -// auto p_edge = weightsEltwise->getParentEdgesAtPort(1)[0]; +// if (initializeInputZeroPoints(conv, dataEltwise, weightsEltwise)) { +// auto p_edge = dataEltwise->getParentEdgesAtPort(1)[0]; // removeEdge(graph, p_edge); // -// graph.DropNode(weightsEltwise); +// graph.DropNode(dataEltwise); // } - - initializeOutputCompensation(conv); - } +// +//// [TODO] Weights zero point is not supported on oneDNN side for the moment +//// auto weightsEltwise = conv->getParentEdgesAtPort(1)[0]->getParent(); +//// if (initializeWeightsZeroPoints(conv, weightsEltwise)) { +//// auto p_edge = weightsEltwise->getParentEdgesAtPort(1)[0]; +//// removeEdge(graph, p_edge); +//// +//// graph.DropNode(weightsEltwise); +//// } +// +// initializeOutputCompensation(conv); +// } } void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) { @@ -436,285 +444,216 @@ void MKLDNNGraphOptimizer::MergeGroupConvolution(MKLDNNGraph &graph) { // WA: We need it until LP transformations will not optimize this pattern inside void MKLDNNGraphOptimizer::MergeTwoEqualScaleShifts(MKLDNNGraph& graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableScaleShiftNode = [](MKLDNNNodePtr node) { - if (node->getType() != Eltwise) - return false; - - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot cast " << node->getName() << " to Eltwise node"; - - if (eltwiseNode->getChildEdges().size() != 1) - return false; - - if (eltwiseNode->getOpType() != MulAdd) - return false; - - return true; - }; - - auto isEqualScaleShiftNodes = [](MKLDNNNodePtr node1, MKLDNNNodePtr node2) { - if (node1->getParentEdgeAt(0) != node2->getParentEdgeAt(0)) - return false; - - auto *eltwiseNode1 = dynamic_cast(node1.get()); - auto *eltwiseNode2 = dynamic_cast(node2.get()); - - auto eltwiseLayer1 = eltwiseNode1->getCnnLayer(); - auto eltwiseLayer2 = eltwiseNode2->getCnnLayer(); - - Blob::Ptr scalesBlob1 = eltwiseLayer1->blobs["weights"]; - Blob::Ptr shiftsBlob1 = eltwiseLayer1->blobs["biases"]; - Blob::Ptr scalesBlob2 = eltwiseLayer2->blobs["weights"]; - Blob::Ptr shiftsBlob2 = eltwiseLayer2->blobs["biases"]; - if (scalesBlob1 == nullptr || shiftsBlob1 == nullptr || scalesBlob2 == nullptr || shiftsBlob2 == nullptr) - return false; - - if (scalesBlob1->size() != shiftsBlob1->size() || scalesBlob2->size() != shiftsBlob2->size() - || scalesBlob1->size() != scalesBlob2->size()) return false; - - const float *scalesBufferPtr1 = scalesBlob1->buffer().as(); - const float *shiftsBufferPtr1 = shiftsBlob1->buffer().as(); - const float *scalesBufferPtr2 = scalesBlob2->buffer().as(); - const float *shiftsBufferPtr2 = shiftsBlob2->buffer().as(); - - for (int i = 0; i < scalesBlob1->size(); i++) - if (scalesBufferPtr1[i] != scalesBufferPtr2[i] || shiftsBufferPtr1[i] != shiftsBufferPtr2[i]) - return false; - - return true; - }; - - auto MergeScaleShiftNodes = [&](MKLDNNNodePtr childNode1, MKLDNNNodePtr childNode2) { - auto parentNode = childNode2->getParentEdgeAt(0)->getParent(); - auto ccNode2 = childNode2->getChildEdgeAt(0)->getChild(); - - auto parentEdges = childNode2->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent() == parentNode) - continue; - - removeEdge(graph, p_edge); - } - - graph.DropNode(childNode2); - - MKLDNNEdgePtr remEdge; - for (auto edge : parentNode->getChildEdges()) { - if (edge.lock()->getChild() == ccNode2) { - remEdge = edge.lock(); - break; - } - } - if (remEdge == nullptr) - IE_THROW() << "Edge was not found"; - remEdge->drop(); - graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), remEdge), graph.GetEdges().end()); - - if (childNode1->getChildEdgeAt(0)->getChild() != ccNode2) { - auto iIndex = childNode1->getChildEdgeAt(0)->getInputNum(); - auto oIndex = remEdge->getOutputNum(); - MKLDNNEdgePtr newEdge(new MKLDNNEdge(childNode1, ccNode2, iIndex, oIndex)); - childNode1->addEdge(newEdge); - graph.GetEdges().push_back(newEdge); - } - }; - - for (int i = 0; i < graphNodes.size(); i++) { - auto parentNode = graphNodes[i]; - if (parentNode->getChildEdges().size() != 2) continue; - - auto childNode1 = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableScaleShiftNode(childNode1)) continue; - - auto childNode2 = parentNode->getChildEdgeAt(1)->getChild(); - if (!isSutableScaleShiftNode(childNode2)) continue; - - if (!isEqualScaleShiftNodes(childNode1, childNode2)) continue; - - MergeScaleShiftNodes(childNode1, childNode2); - } +// auto& graphNodes = graph.GetNodes(); +// +// auto isSutableScaleShiftNode = [](MKLDNNNodePtr node) { +// if (node->getType() != Eltwise) +// return false; +// +// auto* eltwiseNode = dynamic_cast(node.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot cast " << node->getName() << " to Eltwise node"; +// +// if (eltwiseNode->getChildEdges().size() != 1) +// return false; +// +// if (eltwiseNode->getOpType() != MulAdd) +// return false; +// +// return true; +// }; +// +// auto isEqualScaleShiftNodes = [](MKLDNNNodePtr node1, MKLDNNNodePtr node2) { +// if (node1->getParentEdgeAt(0) != node2->getParentEdgeAt(0)) +// return false; +// +// auto *eltwiseNode1 = dynamic_cast(node1.get()); +// auto *eltwiseNode2 = dynamic_cast(node2.get()); +// +// auto eltwiseLayer1 = eltwiseNode1->getCnnLayer(); +// auto eltwiseLayer2 = eltwiseNode2->getCnnLayer(); +// +// Blob::Ptr scalesBlob1 = eltwiseLayer1->blobs["weights"]; +// Blob::Ptr shiftsBlob1 = eltwiseLayer1->blobs["biases"]; +// Blob::Ptr scalesBlob2 = eltwiseLayer2->blobs["weights"]; +// Blob::Ptr shiftsBlob2 = eltwiseLayer2->blobs["biases"]; +// if (scalesBlob1 == nullptr || shiftsBlob1 == nullptr || scalesBlob2 == nullptr || shiftsBlob2 == nullptr) +// return false; +// +// if (scalesBlob1->size() != shiftsBlob1->size() || scalesBlob2->size() != shiftsBlob2->size() +// || scalesBlob1->size() != scalesBlob2->size()) return false; +// +// const float *scalesBufferPtr1 = scalesBlob1->buffer().as(); +// const float *shiftsBufferPtr1 = shiftsBlob1->buffer().as(); +// const float *scalesBufferPtr2 = scalesBlob2->buffer().as(); +// const float *shiftsBufferPtr2 = shiftsBlob2->buffer().as(); +// +// for (int i = 0; i < scalesBlob1->size(); i++) +// if (scalesBufferPtr1[i] != scalesBufferPtr2[i] || shiftsBufferPtr1[i] != shiftsBufferPtr2[i]) +// return false; +// +// return true; +// }; +// +// auto MergeScaleShiftNodes = [&](MKLDNNNodePtr childNode1, MKLDNNNodePtr childNode2) { +// auto parentNode = childNode2->getParentEdgeAt(0)->getParent(); +// auto ccNode2 = childNode2->getChildEdgeAt(0)->getChild(); +// +// auto parentEdges = childNode2->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent() == parentNode) +// continue; +// +// removeEdge(graph, p_edge); +// } +// +// graph.DropNode(childNode2); +// +// MKLDNNEdgePtr remEdge; +// for (auto edge : parentNode->getChildEdges()) { +// if (edge.lock()->getChild() == ccNode2) { +// remEdge = edge.lock(); +// break; +// } +// } +// if (remEdge == nullptr) +// IE_THROW() << "Edge was not found"; +// remEdge->drop(); +// graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), remEdge), graph.GetEdges().end()); +// +// if (childNode1->getChildEdgeAt(0)->getChild() != ccNode2) { +// auto iIndex = childNode1->getChildEdgeAt(0)->getInputNum(); +// auto oIndex = remEdge->getOutputNum(); +// MKLDNNEdgePtr newEdge(new MKLDNNEdge(childNode1, ccNode2, iIndex, oIndex)); +// childNode1->addEdge(newEdge); +// graph.GetEdges().push_back(newEdge); +// } +// }; +// +// for (int i = 0; i < graphNodes.size(); i++) { +// auto parentNode = graphNodes[i]; +// if (parentNode->getChildEdges().size() != 2) continue; +// +// auto childNode1 = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableScaleShiftNode(childNode1)) continue; +// +// auto childNode2 = parentNode->getChildEdgeAt(1)->getChild(); +// if (!isSutableScaleShiftNode(childNode2)) continue; +// +// if (!isEqualScaleShiftNodes(childNode1, childNode2)) continue; +// +// MergeScaleShiftNodes(childNode1, childNode2); +// } } void MKLDNNGraphOptimizer::FuseBatchNormWithScale(MKLDNNGraph &graph) { - auto &graphNodes = graph.GetNodes(); - - for (int i = 0; i < graphNodes.size(); i++) { - const auto& bn = graphNodes[i]; - if (bn->getType() == BatchNormalization) { - const auto& outputNodes = graph.GetOutputNodes(); - const std::string node_name = bn->getName(); - // Check that the node is not output node - if (std::find_if(outputNodes.begin(), outputNodes.end(), - [&node_name](const MKLDNNNodePtr& x) { - return x->getName() == node_name;}) == outputNodes.end()) { - if (bn->getChildEdges().size() == 1) { - auto child = bn->getChildEdgeAt(0)->getChild(); - if (child->type == Eltwise && child->getCnnLayer()->type == "ScaleShift") { - bn->fuseWith(child); - - auto parentEdges = child->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == BatchNormalization) - continue; - - removeEdge(graph, p_edge); - } - - graph.DropNode(child); - } - } - } - } - } +// auto &graphNodes = graph.GetNodes(); +// +// for (int i = 0; i < graphNodes.size(); i++) { +// const auto& bn = graphNodes[i]; +// if (bn->getType() == BatchNormalization) { +// const auto& outputNodesMap = graph.GetOutputNodesMap(); +// const std::string node_name = bn->getName(); +// // Check that the node is not output node +// if (std::find_if(outputNodesMap.begin(), outputNodesMap.end(), +// [&node_name](const MKLDNNNodePtr& x) { +// return x->getName() == node_name;}) == outputNodesMap.end()) { +// if (bn->getChildEdges().size() == 1) { +// auto child = bn->getChildEdgeAt(0)->getChild(); +// if (child->type == Eltwise && child->getCnnLayer()->type == "ScaleShift") { +// bn->fuseWith(child); +// +// auto parentEdges = child->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == BatchNormalization) +// continue; +// +// removeEdge(graph, p_edge); +// } +// +// graph.DropNode(child); +// } +// } +// } +// } +// } } void MKLDNNGraphOptimizer::FuseConvolutionAndActivation(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) { - auto* binConv = dynamic_cast(conv.get()); - if (binConv) { - if (!binConv->canFuse(activation)) - return false; - } - - if (!activation->getCnnLayer()) - return false; - - auto* eltwiseNode = dynamic_cast(activation.get()); - - return eltwiseNode && - (eltwiseNode->getOpType() == Relu || - (conv->getCnnLayer()->precision == Precision::FP32 && - IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid, - Round}))); - }; - - for (int i = 0; i < graphNodes.size(); i++) { - if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) { - auto conv = graphNodes[i]; - - auto fuse = [&] (MKLDNNNodePtr relu) { - conv->fuseWith(relu); - }; - - if (conv->getChildEdges().size() == 1) { - auto ch1 = conv->getChildEdgeAt(0)->getChild(); - - if (isFusingSupported(conv, ch1)) { - fuse(ch1); - - if (ch1->getChildEdges().size() == 1) { - auto ch2 = ch1->getChildEdgeAt(0)->getChild(); - - if (isFusingSupported(conv, ch2)) { - fuse(ch2); - graph.DropNode(ch2); - } - } - graph.DropNode(ch1); - } else { - if (ch1->type == Pooling) { - auto pool = ch1; - - auto* pLayer = dynamic_cast(pool->getCnnLayer().get()); - if (pLayer == nullptr) - IE_THROW() << "Cannot get pooling layer " << pool->getName(); - bool is_max_pool = pLayer->_type == PoolingLayer::PoolType::MAX; - - if (is_max_pool && pool->getChildEdges().size() == 1) { - auto ch2 = pool->getChildEdgeAt(0)->getChild(); - if (isFusingSupported(conv, ch2)) { - fuse(ch2); - graph.DropNode(ch2); - } - } - } - } - } - } - } +// auto& graphNodes = graph.GetNodes(); +// +// auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) { +// auto* binConv = dynamic_cast(conv.get()); +// if (binConv) { +// if (!binConv->canFuse(activation)) +// return false; +// } +// +// if (!activation->getCnnLayer()) +// return false; +// +// auto* eltwiseNode = dynamic_cast(activation.get()); +// +// return eltwiseNode && +// (eltwiseNode->getOpType() == Relu || +// (conv->getCnnLayer()->precision == Precision::FP32 && +// IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid, +// Round}))); +// }; +// +// for (int i = 0; i < graphNodes.size(); i++) { +// if (graphNodes[i]->getType() == Convolution || graphNodes[i]->getType() == BinaryConvolution) { +// auto conv = graphNodes[i]; +// +// auto fuse = [&] (MKLDNNNodePtr relu) { +// conv->fuseWith(relu); +// }; +// +// if (conv->getChildEdges().size() == 1) { +// auto ch1 = conv->getChildEdgeAt(0)->getChild(); +// +// if (isFusingSupported(conv, ch1)) { +// fuse(ch1); +// +// if (ch1->getChildEdges().size() == 1) { +// auto ch2 = ch1->getChildEdgeAt(0)->getChild(); +// +// if (isFusingSupported(conv, ch2)) { +// fuse(ch2); +// graph.DropNode(ch2); +// } +// } +// graph.DropNode(ch1); +// } else { +// if (ch1->type == Pooling) { +// auto pool = ch1; +// +// auto* pLayer = dynamic_cast(pool->getCnnLayer().get()); +// if (pLayer == nullptr) +// IE_THROW() << "Cannot get pooling layer " << pool->getName(); +// bool is_max_pool = pLayer->_type == PoolingLayer::PoolType::MAX; +// +// if (is_max_pool && pool->getChildEdges().size() == 1) { +// auto ch2 = pool->getChildEdgeAt(0)->getChild(); +// if (isFusingSupported(conv, ch2)) { +// fuse(ch2); +// graph.DropNode(ch2); +// } +// } +// } +// } +// } +// } +// } } void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); auto isSutableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == FullyConnected && - node->getChildEdges().size() == 1; - }; - - auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { - if (!childNode->getCnnLayer()) - return false; - - if (childNode->getType() == Quantize) { - auto* quantizeNode = dynamic_cast(childNode.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot get quantize layer " << childNode->getName(); - - if (parentNode->getParentEdgesAtPort(0)[0]->getDims().ndims() != 3) { - return !quantizeNode->isBinarization(); - } else { - return (quantizeNode->isInputLowBroadcast() && quantizeNode->isInputHighBroadcast() && - quantizeNode->isOutputLowBroadcast() && quantizeNode->isOutputHighBroadcast() && - !quantizeNode->isBinarization()); - } - } else if (childNode->getType() == Eltwise) { - auto* eltwiseNode = dynamic_cast(childNode.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot get Eltwise node " << childNode->getName(); - - if (IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, - Hsigmoid, Round})) { - return true; - } else if (IsOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu})) { - if (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() != 2) - return false; - - if (parentNode->getParentEdgesAtPort(0)[0]->getDims().ndims() != 3) { - return true; - } else { - const auto &eltwiseLayer = eltwiseNode->getCnnLayer(); - if (eltwiseLayer == nullptr) - IE_THROW() << "Cannot get scale shift layer " << eltwiseNode->getName(); - - if (eltwiseNode->getOpType() != MulAdd) - return false; - - Blob::Ptr scalesBlob = eltwiseLayer->blobs["weights"]; - if (scalesBlob == nullptr) - return false; - - Blob::Ptr shiftsBlob = eltwiseLayer->blobs["biases"]; - if (shiftsBlob == nullptr) - return false; - - const float *scalesBufferPtr = scalesBlob->buffer().as(); - const float *shiftsBufferPtr = shiftsBlob->buffer().as(); - - if (scalesBlob->size() != shiftsBlob->size()) - return false; - - for (int i = 1; i < scalesBlob->size(); i++) - if (scalesBufferPtr[0] != scalesBufferPtr[i]) - return false; - - for (int i = 1; i < shiftsBlob->size(); i++) - if (shiftsBufferPtr[0] != shiftsBufferPtr[i]) - return false; - - return true; - } - } - } - - return false; + return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getDims().ndims() != 3; }; auto parent = graphNodes.begin(); @@ -726,14 +665,14 @@ void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &gra } auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(parentNode, childNode)) { + if (!parentNode->canFuseSimpleOperation(childNode)) { parent++; continue; } - parentNode->fuseWith(childNode); + childNode->fuseInto(parentNode); - if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { + if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { auto parentEdges = childNode->parentEdges; for (auto &parentEdge : parentEdges) { auto p_edge = parentEdge.lock(); @@ -956,10 +895,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndQuantize(MKLDNNGraph &graph) { bool isSutableBinConv = node->getType() == Convolution; if (isSutableBinConv) { - auto *convLayer = dynamic_cast(node->getCnnLayer().get()); - if (convLayer == nullptr) - IE_THROW() << "Cannot get convolution layer " << node->getName(); - return isSutableBinConv && node->getChildEdges().size() == 1; } else { return false; @@ -967,9 +902,6 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndQuantize(MKLDNNGraph &graph) { }; auto isSutableChildNode = [](MKLDNNNodePtr node) { - if (!node->getCnnLayer()) - return false; - if (node->getType() != Quantize) return false; @@ -1003,67 +935,67 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndQuantize(MKLDNNGraph &graph) { } void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == Convolution && - node->getChildEdges().size() == 1 && - node->getCnnLayer()->precision == Precision::FP32; - }; - - auto isSutableChildNode = [&](MKLDNNNodePtr node) { - if (!node->getCnnLayer()) - return false; - - if (node->getType() == Quantize) { - auto* quantizeNode = dynamic_cast(node.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot get quantize layer " << node->getName(); - - return !quantizeNode->isBinarization(); - } else if (node->getType() == Eltwise) { - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot get eltwise node " << node->getName(); - - return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) || - (eltwiseNode->getOpType() == Prelu) || - IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, - Hsigmoid, Round})); - } - - return false; - }; - - auto parent = graphNodes.begin(); - while (parent != graphNodes.end()) { - auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { - parent++; - continue; - } - - auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(childNode)) { - parent++; - continue; - } - - parentNode->fuseWith(childNode); - - if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { - auto parentEdges = childNode->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Convolution) - continue; - - removeEdge(graph, p_edge); - } - } - - graph.DropNode(childNode); - } +// auto& graphNodes = graph.GetNodes(); +// +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// return node->getType() == Convolution && +// node->getChildEdges().size() == 1 && +// node->getCnnLayer()->precision == Precision::FP32; +// }; +// +// auto isSutableChildNode = [&](MKLDNNNodePtr node) { +// if (!node->getCnnLayer()) +// return false; +// +// if (node->getType() == Quantize) { +// auto* quantizeNode = dynamic_cast(node.get()); +// if (quantizeNode == nullptr) +// IE_THROW() << "Cannot get quantize layer " << node->getName(); +// +// return !quantizeNode->isBinarization(); +// } else if (node->getType() == Eltwise) { +// auto* eltwiseNode = dynamic_cast(node.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot get eltwise node " << node->getName(); +// +// return ((eltwiseNode->getOpType() == MulAdd && node->getCnnLayer()->blobs.size() == 2) || +// (eltwiseNode->getOpType() == Prelu) || +// IsOneOf(eltwiseNode->getOpType(), {Relu, Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, +// Hsigmoid, Round})); +// } +// +// return false; +// }; +// +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } +// +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(childNode)) { +// parent++; +// continue; +// } +// +// parentNode->fuseWith(childNode); +// +// if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == Convolution) +// continue; +// +// removeEdge(graph, p_edge); +// } +// } +// +// graph.DropNode(childNode); +// } } void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) { @@ -1109,61 +1041,56 @@ void MKLDNNGraphOptimizer::FuseBinaryConvolutionAndQuantize(MKLDNNGraph &graph) } void MKLDNNGraphOptimizer::FusePoolingAndQuantize(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableParentNode = [](MKLDNNNodePtr node) { - bool isSutablePooling = node->getType() == Pooling; - - if (isSutablePooling) { - auto *poolingLayer = dynamic_cast(node->getCnnLayer().get()); - if (poolingLayer == nullptr) - IE_THROW() << "Cannot get Pooling layer " << node->getName(); - - // Optimized FP32 Pooling doesn't support fusing with FQ - auto inputPrecision = poolingLayer->insData[0].lock()->getPrecision(); - if (inputPrecision != Precision::U8 && inputPrecision != Precision::I8) - return false; - - return node->getChildEdges().size() == 1 && poolingLayer->_type == PoolingLayer::AVG; - } else { - return false; - } - }; - - auto isSutableChildNode = [](MKLDNNNodePtr node) { - if (!node->getCnnLayer()) - return false; - - if (node->getType() != Quantize) - return false; - - auto* quantizeNode = dynamic_cast(node.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot get quantize layer " << node->getName(); - - return !quantizeNode->isBinarization(); - }; - - for (int i = 0; i < graphNodes.size(); i++) { - auto parent = graphNodes[i]; - if (!isSutableParentNode(parent)) continue; - - auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(child)) continue; - - parent->fuseWith(child); - - auto parents = child->parentEdges; - for (size_t i = 0; i < parents.size(); i++) { - auto p_edge = parents[i].lock(); - if (p_edge->getParent()->getType() == Pooling) - continue; - - removeEdge(graph, p_edge); - } - - graph.DropNode(child); - } +// auto& graphNodes = graph.GetNodes(); +// +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// bool isSutablePooling = node->getType() == Pooling; +// +// if (isSutablePooling) { +// auto *poolingLayer = dynamic_cast(node->getCnnLayer().get()); +// if (poolingLayer == nullptr) +// IE_THROW() << "Cannot get Pooling layer " << node->getName(); +// +// return node->getChildEdges().size() == 1 && poolingLayer->_type == PoolingLayer::AVG; +// } else { +// return false; +// } +// }; +// +// auto isSutableChildNode = [](MKLDNNNodePtr node) { +// if (!node->getCnnLayer()) +// return false; +// +// if (node->getType() != Quantize) +// return false; +// +// auto* quantizeNode = dynamic_cast(node.get()); +// if (quantizeNode == nullptr) +// IE_THROW() << "Cannot get quantize layer " << node->getName(); +// +// return !quantizeNode->isBinarization(); +// }; +// +// for (int i = 0; i < graphNodes.size(); i++) { +// auto parent = graphNodes[i]; +// if (!isSutableParentNode(parent)) continue; +// +// auto child = parent->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(child)) continue; +// +// parent->fuseWith(child); +// +// auto parents = child->parentEdges; +// for (size_t i = 0; i < parents.size(); i++) { +// auto p_edge = parents[i].lock(); +// if (p_edge->getParent()->getType() == Pooling) +// continue; +// +// removeEdge(graph, p_edge); +// } +// +// graph.DropNode(child); +// } } /** @@ -1235,24 +1162,17 @@ static bool is_data_dependency(const std::shared_ptr &parent, void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNGraph &graph) { std::vector &graphNodes = graph.GetNodes(); - auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr activation) { - if (!activation->getCnnLayer()) - return false; - - auto* eltwiseNode = dynamic_cast(activation.get()); - - return eltwiseNode && - (eltwiseNode->getOpType() == Relu || - (conv->getCnnLayer()->precision == Precision::FP32 && - IsOneOf(eltwiseNode->getOpType(), {Elu, Logistic, BoundedRelu, Clamp, Swish, Hswish, Mish, Hsigmoid, - Round}))); + auto isFusingSupported = [&](MKLDNNNodePtr conv, MKLDNNNodePtr child) { + return child->getType() == Eltwise && + one_of(child->getAlgorithm(), EltwiseRelu, EltwiseElu, EltwiseSigmoid, EltwiseBoundedRelu, EltwiseClamp, EltwiseSwish, EltwiseHswish, + EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero); }; for (auto &graphNode : graphNodes) { if (graphNode->getType() != Eltwise) continue; - if (!std::dynamic_pointer_cast(graphNode)->isSum()) continue; + if (!(std::dynamic_pointer_cast(graphNode)->getAlgorithm() == EltwiseAdd)) continue; if (std::dynamic_pointer_cast(graphNode)->isWithBroadcast()) continue; // TODO: Enlarge to several inputs @@ -1389,73 +1309,73 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG } void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableParentNode = [](MKLDNNNodePtr node) { - bool isSutableMVN = (node->getType() == MVN) && (node->inDims[0].ndims() == 4 || node->inDims[0].ndims() == 5); - - if (isSutableMVN) { - auto *mvnLayer = dynamic_cast(node->getCnnLayer().get()); - if (mvnLayer == nullptr) - IE_THROW() << "Cannot get MVN layer " << node->getName(); - - return node->getChildEdges().size() == 1 && mvnLayer->across_channels == 0 && mvnLayer->normalize == 1; - } else { - return false; - } - }; - - auto isSutableChildNode = [](MKLDNNNodePtr node) { - if (!node->getCnnLayer()) - return false; - - if (node->getType() == Quantize) { - auto* quantizeNode = dynamic_cast(node.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot get quantize layer " << node->getName(); - return !quantizeNode->isBinarization(); - } else if (node->getType() == Eltwise) { - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot get eltwise node " << node->getName(); - - return ((eltwiseNode->getOpType() == MulAdd) || - (eltwiseNode->getOpType() == Prelu) || - eltwiseNode->getOpType() == Relu); - } - - return false; - }; - - auto parent = graphNodes.begin(); - while (parent != graphNodes.end()) { - auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { - parent++; - continue; - } - - auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(childNode)) { - parent++; - continue; - } - - parentNode->fuseWith(childNode); - - if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { - auto parentEdges = childNode->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == MVN) - continue; - - removeEdge(graph, p_edge); - } - } - - graph.DropNode(childNode); - } +// auto& graphNodes = graph.GetNodes(); +// +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// bool isSutableMVN = (node->getType() == MVN) && (node->inDims[0].ndims() == 4 || node->inDims[0].ndims() == 5); +// +// if (isSutableMVN) { +// auto *mvnLayer = dynamic_cast(node->getCnnLayer().get()); +// if (mvnLayer == nullptr) +// IE_THROW() << "Cannot get MVN layer " << node->getName(); +// +// return node->getChildEdges().size() == 1 && mvnLayer->across_channels == 0 && mvnLayer->normalize == 1; +// } else { +// return false; +// } +// }; +// +// auto isSutableChildNode = [](MKLDNNNodePtr node) { +// if (!node->getCnnLayer()) +// return false; +// +// if (node->getType() == Quantize) { +// auto* quantizeNode = dynamic_cast(node.get()); +// if (quantizeNode == nullptr) +// IE_THROW() << "Cannot get quantize layer " << node->getName(); +// return !quantizeNode->isBinarization(); +// } else if (node->getType() == Eltwise) { +// auto* eltwiseNode = dynamic_cast(node.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot get eltwise node " << node->getName(); +// +// return ((eltwiseNode->getOpType() == MulAdd) || +// (eltwiseNode->getOpType() == Prelu) || +// eltwiseNode->getOpType() == Relu); +// } +// +// return false; +// }; +// +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } +// +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(childNode)) { +// parent++; +// continue; +// } +// +// parentNode->fuseWith(childNode); +// +// if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == MVN) +// continue; +// +// removeEdge(graph, p_edge); +// } +// } +// +// graph.DropNode(childNode); +// } } void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) { @@ -1516,69 +1436,69 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) } void MKLDNNGraphOptimizer::FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableParentNode = [](MKLDNNNodePtr node) { - bool isSutableNormalize = node->getType() == Normalize; - - if (isSutableNormalize) { - return node->getChildEdges().size() == 1; - } else { - return false; - } - }; - - auto isSutableChildNode = [&](MKLDNNNodePtr node) { - if (!node->getCnnLayer()) - return false; - - if (node->getType() == Quantize) { - auto* quantizeNode = dynamic_cast(node.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot get quantize layer " << node->getName(); - return !quantizeNode->isBinarization(); - } else if (node->getType() == Eltwise) { - auto *eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot get Eltwise node " << node->getName(); - return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish, - Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) || - ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) || - (eltwiseNode->getOpType() == Prelu)); - } - - return false; - }; - - auto parent = graphNodes.begin(); - while (parent != graphNodes.end()) { - auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { - parent++; - continue; - } - - auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(childNode)) { - parent++; - continue; - } - - parentNode->fuseWith(childNode); - - if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { - auto parentEdges = childNode->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == Normalize) - continue; - - removeEdge(graph, p_edge); - } - } - - graph.DropNode(childNode); - } +// auto& graphNodes = graph.GetNodes(); +// +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// bool isSutableNormalize = node->getType() == Normalize; +// +// if (isSutableNormalize) { +// return node->getChildEdges().size() == 1; +// } else { +// return false; +// } +// }; +// +// auto isSutableChildNode = [&](MKLDNNNodePtr node) { +// if (!node->getCnnLayer()) +// return false; +// +// if (node->getType() == Quantize) { +// auto* quantizeNode = dynamic_cast(node.get()); +// if (quantizeNode == nullptr) +// IE_THROW() << "Cannot get quantize layer " << node->getName(); +// return !quantizeNode->isBinarization(); +// } else if (node->getType() == Eltwise) { +// auto *eltwiseNode = dynamic_cast(node.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot get Eltwise node " << node->getName(); +// return IsOneOf(eltwiseNode->getOpType(), {Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, Tanh, Swish, +// Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) || +// ((eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2) || +// (eltwiseNode->getOpType() == Prelu)); +// } +// +// return false; +// }; +// +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } +// +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(childNode)) { +// parent++; +// continue; +// } +// +// parentNode->fuseWith(childNode); +// +// if (childNode->getType() == Quantize || childNode->getType() == Eltwise) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == Normalize) +// continue; +// +// removeEdge(graph, p_edge); +// } +// } +// +// graph.DropNode(childNode); +// } } void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { @@ -1701,35 +1621,6 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { } } -void MKLDNNGraphOptimizer::RemoveIdentityOperator(MKLDNNGraph &graph) { - for (MKLDNNNodePtr& node : graph.GetNodes()) { - bool toDrop = false; - - if (node->getType() == Eltwise) { - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode->getOpType() == PowerStatic) { - PowerLayer *l = dynamic_cast(node->getCnnLayer().get()); - if (l == nullptr) - IE_THROW() << "Cannot get power layer " << node->getName(); - - if (l->power == 1.0f && l->scale == 1.0f && l->offset == 0.0f) toDrop = true; - } - } - - if (node->getType() == Eltwise && node->getCnnLayer()->type == "ScaleShift") { - ScaleShiftLayer* l = dynamic_cast(node->getCnnLayer().get()); - if (l == nullptr) - IE_THROW() << "Cannot get scale shift layer " << node->getName(); - - if (l->_weights == nullptr && l->_biases == nullptr) toDrop = true; - } - - if (node->getType() == Copy) toDrop = true; - - if (toDrop) graph.DropNode(node); - } -} - void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { std::set processed; int graphNodesSize = graph.GetNodes().size(); @@ -1783,88 +1674,43 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { } void MKLDNNGraphOptimizer::DropConvertReorder(MKLDNNGraph& graph) { - for (auto input : graph.GetNodes()) { - if (input->getType() != Input) { - continue; - } - - auto inTD = input->getCnnLayer().get()->outData[0]->getTensorDesc(); - for (size_t i = 0; i < input->getChildEdges().size(); i++) { - auto inputEdge = input->getChildEdgeAt(i); - auto convert = inputEdge->getChild(); - if (convert->getType() == Convert) { - for (int j = 0; j < convert->getChildEdges().size(); j++) { - auto convertEdge = convert->getChildEdgeAt(j); - auto reorder = convertEdge->getChild(); - if (reorder->getType() == Reorder) { - MKLDNNReorderNode* rn = dynamic_cast(reorder.get()); - auto rnOutput = rn->getOutput(); - if (inTD.getPrecision() == rnOutput.getPrecision() && - inTD.getLayout() == rnOutput.getLayout() && - inTD.getDims() == rnOutput.getDims()) { - /** - * TODO: just drop extra nodes instead of moving edges - * graph.DropNode(convert); - * graph.DropNode(reorder); - */ - auto avterReorder = reorder->getChildEdgeAt(0)->getChild(); - auto oldEdgeNum = reorder->getChildEdgeAt(0)->getOutputNum(); - reorder->getChildEdgeAt(0)->drop(); - convertEdge->drop(); - - MKLDNNEdgePtr newEdge(new MKLDNNEdge(input, avterReorder, i, oldEdgeNum)); - graph.GetEdges().push_back(newEdge); - input->addEdge(newEdge); - j--; - } - } - } - } - } - } -} - -void MKLDNNGraphOptimizer::RemoveIOScaleShifts(MKLDNNGraph &graph) { - for (MKLDNNNodePtr& node : graph.GetNodes()) { - if (node->getType() == Eltwise && node->getCnnLayer()->type == "ScaleShift") { - ScaleShiftLayer* l = dynamic_cast(node->getCnnLayer().get()); - if (l == nullptr) - IE_THROW() << "Cannot get scale shift layer " << node->getName(); - - auto cur = l->insData[0].lock(); - if (cur == nullptr) { - IE_THROW() << "[MKLDNN] error - invalid input data"; - } - if (cur->getTensorDesc().getPrecision() != l->outData[0]->getTensorDesc().getPrecision()) { - if (node->name.find("_iScaleShift_") != std::string::npos) { - auto child = node->childEdges[0].lock()->getChild(); - if (child->type == Reorder) { - MKLDNNReorderNode* rn = dynamic_cast(child.get()); - if (rn != nullptr) { - rn->_scales = l->_weights; - graph.DropNode(node); - } - } else { - IE_THROW() << "Strange case. No Reorder after iScaleShift"; - } - } else if (node->name.find("_oScaleShift_") != std::string::npos) { - auto parent = node->parentEdges[0].lock()->getParent(); - - if (parent->type == Reorder) { - MKLDNNReorderNode* rn = dynamic_cast(parent.get()); - if (rn != nullptr) { - rn->_scales = l->_weights; - graph.DropNode(node); - } - } else { - IE_THROW() << "Strange case. No Reorder before oScaleShift"; - } - } - } - } - } +// for (auto input : graph.GetNodes()) { +// if (input->getType() != Input) { +// continue; +// } +// +// auto inTD = input->getCnnLayer().get()->outData[0]->getTensorDesc(); +// for (size_t i = 0; i < input->getChildEdges().size(); i++) { +// auto inputEdge = input->getChildEdgeAt(i); +// auto convert = inputEdge->getChild(); +// if (convert->getType() == Convert) { +// for (int j = 0; j < convert->getChildEdges().size(); j++) { +// auto convertEdge = convert->getChildEdgeAt(j); +// auto reorder = convertEdge->getChild(); +// if (reorder->getType() == Reorder) { +// MKLDNNReorderNode* rn = dynamic_cast(reorder.get()); +// auto rnOutput = rn->getOutput(); +// if (inTD.getPrecision() == rnOutput.getPrecision() && +// inTD.getLayout() == rnOutput.getLayout() && +// inTD.getDims() == rnOutput.getDims()) { +// auto avterReorder = reorder->getChildEdgeAt(0)->getChild(); +// auto oldEdgeNum = reorder->getChildEdgeAt(0)->getOutputNum(); +// reorder->getChildEdgeAt(0)->drop(); +// convertEdge->drop(); +// +// MKLDNNEdgePtr newEdge(new MKLDNNEdge(input, avterReorder, i, oldEdgeNum)); +// graph.GetEdges().push_back(newEdge); +// input->addEdge(newEdge); +// j--; +// } +// } +// } +// } +// } +// } } +// TODO [NM]: reuse common/general_utils version bool MKLDNNGraphOptimizer::IsOneOf(Type type, std::vector types) { for (auto tp : types) { if (type == tp) { @@ -1874,15 +1720,6 @@ bool MKLDNNGraphOptimizer::IsOneOf(Type type, std::vector types) { return false; } -bool MKLDNNGraphOptimizer::IsOneOf(EltwiseOpType alg, std::vector algs) { - for (auto a : algs) { - if (alg == a) { - return true; - } - } - return false; -} - void MKLDNNGraphOptimizer::removeEdge(MKLDNNGraph &graph, MKLDNNEdgePtr& edge) { auto& edges = graph.GetEdges(); for (auto it = edges.begin(); it != edges.end(); it++) { @@ -1930,14 +1767,10 @@ void MKLDNNGraphOptimizer::FuseClampAndQuantize(MKLDNNGraph &graph) { if (node->getType() != Eltwise) return false; - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot cast " << node->getName() << " to Eltwise node"; - - if (eltwiseNode->getChildEdges().size() != 1) + if (node->getChildEdges().size() != 1) return false; - if (eltwiseNode->getOpType() != Clamp) + if (node->getAlgorithm() != EltwiseClamp) return false; return true; @@ -1993,130 +1826,130 @@ void MKLDNNGraphOptimizer::FuseClampAndQuantize(MKLDNNGraph &graph) { } void MKLDNNGraphOptimizer::FuseScaleShiftAndQuantize(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableScaleShiftNode = [](MKLDNNNodePtr node) { - if (node->getType() != Eltwise) - return false; - - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot cast " << node->getName() << " to eltwise node"; - - if (eltwiseNode->getChildEdges().size() != 1) - return false; - - if (eltwiseNode->getOpType() != MulAdd) - return false; - - return true; - }; - - auto isSutableQuantizeNode = [](MKLDNNNodePtr node) { - if (node->getType() != Quantize) - return false; - - auto* quantizeNode = dynamic_cast(node.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot cast " << node->getName() << " to Quantize node"; - - return !quantizeNode->isBinarization(); - }; - - auto fuseScaleShiftAndQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) { - auto* eltwiseNode = dynamic_cast(parent.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot cast " << parent->getName() << " to eltwise node"; - - auto eltwiseLayer = eltwiseNode->getCnnLayer(); - if (eltwiseLayer == nullptr) - IE_THROW() << "Cannot get scale shift layer " << eltwiseNode->getName(); - - auto* quantizeNode = dynamic_cast(child.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot cast " << child->getName() << " to Quantize node"; - - Blob::Ptr scalesBlob = eltwiseLayer->blobs["weights"]; - if (scalesBlob == nullptr) - return false; - - Blob::Ptr shiftsBlob = eltwiseLayer->blobs["biases"]; - if (shiftsBlob == nullptr) - return false; - - const float* scalesBufferPtr = scalesBlob->buffer().as(); - const float* shiftsBufferPtr = shiftsBlob->buffer().as(); - - if (scalesBlob->size() != shiftsBlob->size()) - return false; - - for (int i = 0; i < scalesBlob->size(); i++) - if (scalesBufferPtr[i] <= 0.f) - return false; - - const std::vector& cropLowData = quantizeNode->getCropLow(); - const std::vector& cropHighData = quantizeNode->getCropHigh(); - const std::vector& inputScaleData = quantizeNode->getInputScale(); - const std::vector& inputShiftData = quantizeNode->getInputShift(); - - std::vector newCropLow(scalesBlob->size()); - std::vector newCropHigh(scalesBlob->size()); - std::vector newInputScale(scalesBlob->size()); - std::vector newInputShift(scalesBlob->size()); - - for (int i = 0; i < newCropLow.size(); i++) { - float cl = cropLowData.size() == 1 ? cropLowData[0] : cropLowData[i]; - - newCropLow[i] = (cl - shiftsBufferPtr[i]) / scalesBufferPtr[i]; - } - - for (int i = 0; i < newCropHigh.size(); i++) { - float ch = cropHighData.size() == 1 ? cropHighData[0] : cropHighData[i]; - - newCropHigh[i] = (ch - shiftsBufferPtr[i]) / scalesBufferPtr[i]; - } - - for (int i = 0; i < newInputScale.size(); i++) { - float isc = inputScaleData.size() == 1 ? inputScaleData[0] : inputScaleData[i]; - - newInputScale[i] = isc * scalesBufferPtr[i]; - } - - for (int i = 0; i < newInputShift.size(); i++) { - float isc = inputScaleData.size() == 1 ? inputScaleData[0] : inputScaleData[i]; - float ish = inputShiftData.size() == 1 ? inputShiftData[0] : inputShiftData[i]; - - newInputShift[i] = ish + shiftsBufferPtr[i] * isc; - } - - quantizeNode->setCropLow(newCropLow); - quantizeNode->setCropHigh(newCropHigh); - quantizeNode->setInputScale(newInputScale); - quantizeNode->setInputShift(newInputShift); - - return true; - }; - - for (int i = 0; i < graphNodes.size(); i++) { - auto parent = graphNodes[i]; - if (!isSutableScaleShiftNode(parent)) continue; - - auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSutableQuantizeNode(child)) continue; - - if (fuseScaleShiftAndQuantizeNodes(parent, child)) { - auto parentEdges = parent->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getCnnLayer()->type != "Const") - continue; - - removeEdge(graph, p_edge); - } - - graph.DropNode(parent); - } - } +// auto& graphNodes = graph.GetNodes(); +// +// auto isSutableScaleShiftNode = [](MKLDNNNodePtr node) { +// if (node->getType() != Eltwise) +// return false; +// +// auto* eltwiseNode = dynamic_cast(node.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot cast " << node->getName() << " to eltwise node"; +// +// if (eltwiseNode->getChildEdges().size() != 1) +// return false; +// +// if (eltwiseNode->getOpType() != MulAdd) +// return false; +// +// return true; +// }; +// +// auto isSutableQuantizeNode = [](MKLDNNNodePtr node) { +// if (node->getType() != Quantize) +// return false; +// +// auto* quantizeNode = dynamic_cast(node.get()); +// if (quantizeNode == nullptr) +// IE_THROW() << "Cannot cast " << node->getName() << " to Quantize node"; +// +// return !quantizeNode->isBinarization(); +// }; +// +// auto fuseScaleShiftAndQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) { +// auto* eltwiseNode = dynamic_cast(parent.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot cast " << parent->getName() << " to eltwise node"; +// +// auto eltwiseLayer = eltwiseNode->getCnnLayer(); +// if (eltwiseLayer == nullptr) +// IE_THROW() << "Cannot get scale shift layer " << eltwiseNode->getName(); +// +// auto* quantizeNode = dynamic_cast(child.get()); +// if (quantizeNode == nullptr) +// IE_THROW() << "Cannot cast " << child->getName() << " to Quantize node"; +// +// Blob::Ptr scalesBlob = eltwiseLayer->blobs["weights"]; +// if (scalesBlob == nullptr) +// return false; +// +// Blob::Ptr shiftsBlob = eltwiseLayer->blobs["biases"]; +// if (shiftsBlob == nullptr) +// return false; +// +// const float* scalesBufferPtr = scalesBlob->buffer().as(); +// const float* shiftsBufferPtr = shiftsBlob->buffer().as(); +// +// if (scalesBlob->size() != shiftsBlob->size()) +// return false; +// +// for (int i = 0; i < scalesBlob->size(); i++) +// if (scalesBufferPtr[i] <= 0.f) +// return false; +// +// const std::vector& cropLowData = quantizeNode->getCropLow(); +// const std::vector& cropHighData = quantizeNode->getCropHigh(); +// const std::vector& inputScaleData = quantizeNode->getInputScale(); +// const std::vector& inputShiftData = quantizeNode->getInputShift(); +// +// std::vector newCropLow(scalesBlob->size()); +// std::vector newCropHigh(scalesBlob->size()); +// std::vector newInputScale(scalesBlob->size()); +// std::vector newInputShift(scalesBlob->size()); +// +// for (int i = 0; i < newCropLow.size(); i++) { +// float cl = cropLowData.size() == 1 ? cropLowData[0] : cropLowData[i]; +// +// newCropLow[i] = (cl - shiftsBufferPtr[i]) / scalesBufferPtr[i]; +// } +// +// for (int i = 0; i < newCropHigh.size(); i++) { +// float ch = cropHighData.size() == 1 ? cropHighData[0] : cropHighData[i]; +// +// newCropHigh[i] = (ch - shiftsBufferPtr[i]) / scalesBufferPtr[i]; +// } +// +// for (int i = 0; i < newInputScale.size(); i++) { +// float isc = inputScaleData.size() == 1 ? inputScaleData[0] : inputScaleData[i]; +// +// newInputScale[i] = isc * scalesBufferPtr[i]; +// } +// +// for (int i = 0; i < newInputShift.size(); i++) { +// float isc = inputScaleData.size() == 1 ? inputScaleData[0] : inputScaleData[i]; +// float ish = inputShiftData.size() == 1 ? inputShiftData[0] : inputShiftData[i]; +// +// newInputShift[i] = ish + shiftsBufferPtr[i] * isc; +// } +// +// quantizeNode->setCropLow(newCropLow); +// quantizeNode->setCropHigh(newCropHigh); +// quantizeNode->setInputScale(newInputScale); +// quantizeNode->setInputShift(newInputShift); +// +// return true; +// }; +// +// for (int i = 0; i < graphNodes.size(); i++) { +// auto parent = graphNodes[i]; +// if (!isSutableScaleShiftNode(parent)) continue; +// +// auto child = parent->getChildEdgeAt(0)->getChild(); +// if (!isSutableQuantizeNode(child)) continue; +// +// if (fuseScaleShiftAndQuantizeNodes(parent, child)) { +// auto parentEdges = parent->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getCnnLayer()->type != "Const") +// continue; +// +// removeEdge(graph, p_edge); +// } +// +// graph.DropNode(parent); +// } +// } } void MKLDNNGraphOptimizer::MergePermuteAndReorder(MKLDNNGraph &graph) { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h index 60034d6cbec812..01282cca50e65e 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.h @@ -34,9 +34,7 @@ class MKLDNNGraphOptimizer { void FuseMVNAndSimpleOperation(MKLDNNGraph &graph); void FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph); void FuseNormalizeAndSimpleOperation(MKLDNNGraph &graph); - void RemoveIdentityOperator(MKLDNNGraph& graph); - void RemoveIOScaleShifts(MKLDNNGraph& graph); void DropDoubleReorders(MKLDNNGraph& graph); void DropConvertReorder(MKLDNNGraph& graph); void AddConvertToReorder(MKLDNNGraph &graph); @@ -48,7 +46,6 @@ class MKLDNNGraphOptimizer { void MergePermuteAndReorder(MKLDNNGraph &graph); bool IsOneOf(Type type, std::vector types); - bool IsOneOf(EltwiseOpType alg, std::vector algs); void removeEdge(MKLDNNGraph &graph, MKLDNNEdgePtr& edge); }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index 24e1a3a144b549..0842b01097e31a 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -46,16 +46,17 @@ MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsData if (execNetwork->_numRequests > 1 || execNetwork->QueryState().size() == 0) { for (auto &node : graph->GetNodes()) { if (node->getType() == MemoryInput) { - auto memoryNode = dynamic_cast(node.get()); - auto state_store = memoryNode->getStore(); - auto state_name = memoryNode->getId(); - - // Remove suffix with pair ID. Internal information. - auto suffix_idx = state_name.find("/id="); - if (suffix_idx != std::string::npos) - state_name = state_name.substr(0, suffix_idx); - - memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store)); + THROW_IE_EXCEPTION << "[NM] Not implemented"; +// auto memoryNode = dynamic_cast(node.get()); +// auto state_store = memoryNode->getStore(); +// auto state_name = memoryNode->getId(); +// +// // Remove suffix with pair ID. Internal information. +// auto suffix_idx = state_name.find("/id="); +// if (suffix_idx != std::string::npos) +// state_name = state_name.substr(0, suffix_idx); +// +// memoryStates.emplace_back(new MKLDNNVariableState(state_name, state_store)); } } } else { @@ -141,41 +142,45 @@ void MKLDNNPlugin::MKLDNNInferRequest::PushInputData() { } void MKLDNNPlugin::MKLDNNInferRequest::PushStates() { - for (auto &node : graph->GetNodes()) { - if (node->getType() == MemoryInput) { - auto cur_node = dynamic_cast(node.get()); - auto cur_id = cur_node->getId(); - for (const auto& state : memoryStates) { - if (state->GetName() == cur_id) { - auto cur_state_mem = cur_node->getStore(); - auto data_ptr = state->GetState()->cbuffer().as(); - auto data_size = state->GetState()->byteSize(); - auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); - - cpu_memcpy(cur_state_mem_buf, data_ptr, data_size); - } - } - } - } + THROW_IE_EXCEPTION << "Not implemented"; + // TODO [NM]: disabled until MKLDNNMemoryInputNode is not migrated on ngraph +// for (auto &node : graph->GetNodes()) { +// if (node->getType() == MemoryInput) { +// auto cur_node = dynamic_cast(node.get()); +// auto cur_id = cur_node->getId(); +// for (const auto& state : memoryStates) { +// if (state->GetName() == cur_id) { +// auto cur_state_mem = cur_node->getStore(); +// auto data_ptr = state->GetState()->cbuffer().as(); +// auto data_size = state->GetState()->byteSize(); +// auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); +// +// cpu_memcpy(cur_state_mem_buf, data_ptr, data_size); +// } +// } +// } +// } } void MKLDNNPlugin::MKLDNNInferRequest::PullStates() { - for (auto &node : graph->GetNodes()) { - if (node->getType() == MemoryInput) { - auto cur_node = dynamic_cast(node.get()); - auto cur_id = cur_node->getId(); - for (const auto& state : memoryStates) { - if (state->GetName() == cur_id) { - auto cur_state_mem = cur_node->getStore(); - auto data_ptr = state->GetState()->cbuffer().as(); - auto data_size = state->GetState()->byteSize(); - auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); - - cpu_memcpy(data_ptr, cur_state_mem_buf, data_size); - } - } - } - } + THROW_IE_EXCEPTION << "Not implemented"; + // TODO [NM]: disabled until MKLDNNMemoryInputNode is not migrated on ngraph +// for (auto &node : graph->GetNodes()) { +// if (node->getType() == MemoryInput) { +// auto cur_node = dynamic_cast(node.get()); +// auto cur_id = cur_node->getId(); +// for (const auto& state : memoryStates) { +// if (state->GetName() == cur_id) { +// auto cur_state_mem = cur_node->getStore(); +// auto data_ptr = state->GetState()->cbuffer().as(); +// auto data_size = state->GetState()->byteSize(); +// auto cur_state_mem_buf = static_cast(cur_state_mem->GetPtr()); +// +// cpu_memcpy(data_ptr, cur_state_mem_buf, data_size); +// } +// } +// } +// } } @@ -272,7 +277,8 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: return data; } - InferenceEngine::TensorDesc desc = blobs[name]->getTensorDesc(); + InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc();//blobs[name]->getTensorDesc(); + InferenceEngine::Precision originPrecision = blobs[name]->getTensorDesc().getPrecision(); // WA: need to avoid exception thrown when we compare blocking desc in SetBlob // in situation if we push output blobs as inputs for next network (in Hetero plugin) @@ -283,7 +289,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: _outputs[name] = make_blob_with_precision(desc); _outputs[name]->allocate(); - if (desc.getPrecision() == InferenceEngine::Precision::FP32 && !graph->getProperty().batchLimit) { + if (desc.getPrecision() == originPrecision && !graph->getProperty().batchLimit) { externalPtr[name] = _outputs[name]->buffer(); } data = _outputs[name]; @@ -396,8 +402,8 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void * void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() { for (auto& it : externalPtr) { - auto input = graph->inputNodes.find(it.first); - if (input != graph->inputNodes.end()) { + auto input = graph->inputNodesMap.find(it.first); + if (input != graph->inputNodesMap.end()) { if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second) continue; // Input cannot be in-place with other primitives @@ -430,9 +436,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() { } MKLDNNNodePtr output; - for (auto& out : graph->outputNodes) { - if (out->getName() == "out_" + it.first) { - output = out; + for (auto& out : graph->outputNodesMap) { + if (out.first == it.first) { + output = out.second; break; } } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index ebef14038577d8..5ef1fa5f8dc48b 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include "mkldnn_extension_utils.h" @@ -47,6 +48,8 @@ #include "mkldnn_debug.h" #include "utils/rt_info/memory_formats_attribute.hpp" +#include + using namespace mkldnn; using namespace MKLDNNPlugin; using namespace openvino; @@ -54,23 +57,43 @@ using namespace openvino; using namespace InferenceEngine::details; namespace MKLDNNPlugin { static const InferenceEngine::details::caseless_unordered_map type_to_name_tbl = { - { "Unknown", Unknown }, - { "Input", Input }, - { "Const", Input }, - { "Output", Output }, - { "Reorder", Reorder }, + { "Constant", Input }, + { "Parameter", Input }, + { "Result", Output }, { "Convolution", Convolution }, - { "ReLU", Eltwise }, - { "GELU", Eltwise }, - { "ELU", Eltwise }, + { "GroupConvolution", Convolution }, + { "MatMul", Gemm }, + { "MaxPool", Pooling }, + { "AvgPool", Pooling }, + { "Add", Eltwise }, + { "Subtract", Eltwise }, + { "Multiply", Eltwise }, + { "Divide", Eltwise }, + { "SquaredDifference", Eltwise }, + { "Maximum", Eltwise }, + { "Minimum", Eltwise }, + { "Mod", Eltwise }, + { "FloorMod", Eltwise }, + { "Power", Eltwise }, + { "Equal", Eltwise }, + { "NotEqual", Eltwise }, + { "Greater", Eltwise }, + { "GreaterEqual", Eltwise }, + { "Less", Eltwise }, + { "LessEqual", Eltwise }, + { "LogicalAnd", Eltwise }, + { "LogicalOr", Eltwise }, + { "LogicalXor", Eltwise }, + { "LogicalNot", Eltwise }, + { "Relu", Eltwise }, + { "Gelu", Eltwise }, + { "Elu", Eltwise }, + { "Tanh", Eltwise }, { "Sigmoid", Eltwise }, - { "Logistic", Eltwise }, - { "TanH", Eltwise }, - { "ReLU6", Eltwise }, - { "Exp", Eltwise }, - { "Not", Eltwise }, - { "Activation", Eltwise }, + { "Abs", Eltwise }, + { "Sqrt", Eltwise }, { "Clamp", Eltwise }, + { "Exp", Eltwise }, { "Swish", Eltwise }, { "HSwish", Eltwise }, { "Mish", Eltwise }, @@ -156,44 +179,54 @@ MKLDNNNode::NodesFactory & MKLDNNNode::factory() { return factoryInstance; } -MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &w_cache) +MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown), - weightCache(w_cache), cnnLayer(layer), engine(eng), name(layer->name), typeStr(layer->type), - type(TypeFromName(layer->type)), profiling(layer->name) { - if (!layer->outData.empty()) { - for (const auto& outData : layer->outData) { - outDims.emplace_back(outData->getDims()); + weightCache(w_cache), engine(eng), name(op->get_friendly_name()), typeStr(op->get_type_name()), + type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { + algorithm = Algorithm::Undefined; + fusingPort = -1; + + originalName = name; + originalInputsNumber = op->get_input_size(); + for (size_t i = 0; i < op->get_input_size(); i++) { + inDims.emplace_back(op->get_input_shape(i)); + originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i))); + } + + if (op->get_output_size() != 0 && typeStr != "Result") { + for (size_t i = 0; i < op->get_output_size(); i++) { + outDims.emplace_back(op->get_output_shape(i)); + originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i))); } } else { - if (!(CaselessEq()(layer->type, "memory") || - CaselessEq()(layer->type, "memoryinput") || - CaselessEq()(layer->type, "output") || - CaselessEq()(layer->type, "reorder") || - CaselessEq()(layer->type, "convert"))) { - IE_THROW() << "Inappropriate layer type: " << layer->type << " name: " << layer->name; + // TODO [NM]: get rid of this condition + if (!(CaselessEq()(typeStr, "Result"))) { + IE_THROW() << "Inappropriate layer type: " << typeStr << " name: " << name; } +// if (!(CaselessEq()(typeStr, "memory") || +// CaselessEq()(typeStr, "memoryinput") || +// CaselessEq()(typeStr, "output") || +// CaselessEq()(typeStr, "reorder"))) { +// IE_THROW() << "Inappropriate layer type: " << typeStr << " name: " << name; +// } } - for (const auto& inData : layer->insData) { - inDims.emplace_back(inData.lock()->getDims()); - } - if (layer->params.find("PrimitivesPriority") != layer->params.end()) { - std::istringstream stream(layer->params["PrimitivesPriority"]); - std::string str; - while (getline(stream, str, ',')) { - if (str.substr(0, 4) != "cpu:") - continue; - implPriorities.push_back(parse_impl_name(str)); - if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown && - str != "cpu:unknown") - IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName(); - } - } - auto ngraphNode = layer->getNode(); - if (ngraphNode != nullptr) { - std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(ngraphNode); +// if (op->params.find("PrimitivesPriority") != layer->params.end()) { +// std::istringstream stream(layer->params["PrimitivesPriority"]); +// std::string str; +// while (getline(stream, str, ',')) { +// if (str.substr(0, 4) != "cpu:") +// continue; +// implPriorities.push_back(parse_impl_name(str)); +// if (implPriorities[implPriorities.size() - 1] == impl_desc_type::unknown && +// str != "cpu:unknown") +// IE_THROW() << "Unsupported CPU implementation " << str << " for node " << getName(); +// } +// } + + if (op != nullptr) { + std::string inputMemoryFormats = ngraph::getMLKDNNInputMemoryFormats(op); if (!inputMemoryFormats.empty()) { std::istringstream stream(inputMemoryFormats); std::string str; @@ -204,7 +237,7 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn:: } } - std::string outputMemoryFormats = ngraph::getMLKDNNOutputMemoryFormats(ngraphNode); + std::string outputMemoryFormats = ngraph::getMLKDNNOutputMemoryFormats(op); if (!outputMemoryFormats.empty()) { std::istringstream stream(outputMemoryFormats); std::string str; @@ -217,6 +250,13 @@ MKLDNNNode::MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn:: } } +MKLDNNNode::MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) + : selectedPrimitiveDescriptorIndex(-1), permanent(false), temporary(false), constant(ConstantType::Unknown), + weightCache(w_cache), engine(eng), name(name), typeStr(type), + type(TypeFromName(type)), profiling(name) { + // TODO [NM]: What about filling inDims and outDims? +} + void MKLDNNNode::addEdge(const MKLDNNEdgeWeakPtr& edge) { auto edgePtr = edge.lock(); if (!edgePtr) @@ -663,66 +703,66 @@ void MKLDNNNode::initDescriptor(const InferenceEngine::LayerConfig &config) { selectedPD->getConfig() = rightConfig; } -InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool isGrouped) { - auto checkSize = [](size_t dst_size, size_t src_size) { - if (dst_size < src_size) { - IE_THROW() << "Cannot create internal buffer. Buffer can be overrun."; - } - }; - auto * wLayer = dynamic_cast(getCnnLayer().get()); - if (wLayer == nullptr) - IE_THROW() << "Cannot get weightable layer for node " << getName() << "."; - - InferenceEngine::Blob::Ptr blb = weights ? wLayer->_weights : wLayer->_biases; - - if (blb == nullptr) - IE_THROW() << "Cannot get internal blob layer for node " << getName() << "."; - - auto intLayout = getWeightsLayoutByDims(dims, isGrouped); - - InferenceEngine::TensorDesc desc(blb->getTensorDesc().getPrecision(), dims, intLayout); - - auto fillInternalBlob = [&](char *data, size_t intBuffSize) { - size_t offset = blb->byteSize(); - checkSize(intBuffSize, offset); - cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize()); - data += blb->byteSize(); - for (const auto &merged : getMergeWith()) { - wLayer = dynamic_cast(merged->getCnnLayer().get()); - if (wLayer == nullptr) - IE_THROW() << "Cannot convert merged weightable layer for node " - << getName() << "."; - blb = weights ? wLayer->_weights : wLayer->_biases; - - if (blb == nullptr) - IE_THROW() << "Cannot get internal blob layer for node " << getName() << "."; - offset += blb->byteSize(); - checkSize(intBuffSize, offset); - cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize()); - data += blb->byteSize(); - } - }; - - Blob::Ptr internalBlob; - if (blb->getTensorDesc().getPrecision() == Precision::BIN) { - internalBlob = InferenceEngine::make_shared_blob(desc); - } else if (blb->getTensorDesc().getPrecision() == Precision::I8) { - internalBlob = InferenceEngine::make_shared_blob(desc); - } else if (blb->getTensorDesc().getPrecision() == Precision::I32) { - internalBlob = InferenceEngine::make_shared_blob(desc); - } else if (blb->getTensorDesc().getPrecision() == Precision::BF16) { - internalBlob = InferenceEngine::make_shared_blob(desc); - } else { - internalBlob = InferenceEngine::make_shared_blob(desc); - } - internalBlob->allocate(); - char *data = internalBlob->buffer(); - size_t intBuffSize = internalBlob->byteSize(); - - fillInternalBlob(data, intBuffSize); - - return internalBlob; -} +//InferenceEngine::Blob::Ptr MKLDNNNode::createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool isGrouped) { +// auto checkSize = [](size_t dst_size, size_t src_size) { +// if (dst_size < src_size) { +// IE_THROW() << "Cannot create internal buffer. Buffer can be overrun."; +// } +// }; +// auto * wLayer = dynamic_cast(getCnnLayer().get()); +// if (wLayer == nullptr) +// IE_THROW() << "Cannot get weightable layer for node " << getName() << "."; +// +// InferenceEngine::Blob::Ptr blb = weights ? wLayer->_weights : wLayer->_biases; +// +// if (blb == nullptr) +// IE_THROW() << "Cannot get internal blob layer for node " << getName() << "."; +// +// auto intLayout = getWeightsLayoutByDims(dims, isGrouped); +// +// InferenceEngine::TensorDesc desc(blb->getTensorDesc().getPrecision(), dims, intLayout); +// +// auto fillInternalBlob = [&](char *data, size_t intBuffSize) { +// size_t offset = blb->byteSize(); +// checkSize(intBuffSize, offset); +// cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize()); +// data += blb->byteSize(); +// for (const auto &merged : getMergeWith()) { +// wLayer = dynamic_cast(merged->getCnnLayer().get()); +// if (wLayer == nullptr) +// IE_THROW() << "Cannot convert merged weightable layer for node " +// << getName() << "."; +// blb = weights ? wLayer->_weights : wLayer->_biases; +// +// if (blb == nullptr) +// IE_THROW() << "Cannot get internal blob layer for node " << getName() << "."; +// offset += blb->byteSize(); +// checkSize(intBuffSize, offset); +// cpu_memcpy_s(data, intBuffSize, blb->buffer(), blb->byteSize()); +// data += blb->byteSize(); +// } +// }; +// +// Blob::Ptr internalBlob; +// if (blb->getTensorDesc().getPrecision() == Precision::BIN) { +// internalBlob = InferenceEngine::make_shared_blob(desc); +// } else if (blb->getTensorDesc().getPrecision() == Precision::I8) { +// internalBlob = InferenceEngine::make_shared_blob(desc); +// } else if (blb->getTensorDesc().getPrecision() == Precision::I32) { +// internalBlob = InferenceEngine::make_shared_blob(desc); +// } else if (blb->getTensorDesc().getPrecision() == Precision::BF16) { +// internalBlob = InferenceEngine::make_shared_blob(desc); +// } else { +// internalBlob = InferenceEngine::make_shared_blob(desc); +// } +// internalBlob->allocate(); +// char *data = internalBlob->buffer(); +// size_t intBuffSize = internalBlob->byteSize(); +// +// fillInternalBlob(data, intBuffSize); +// +// return internalBlob; +//} void MKLDNNNode::prepareMemory(const PrimitiveDescInfo *selected_pd, mkldnn::primitive_desc_iterator& itpd) { for (size_t i = 0; i < getChildEdges().size(); i++) { @@ -831,18 +871,18 @@ MKLDNNNode::ConstantType MKLDNNNode::checkConstant(LOOK look, std::vectorname; + originalLayers = layerName; } else { - originalLayers += "," + layer->name; + originalLayers += "," + layerName; } } void MKLDNNNode::cleanup() { internalBlobs.clear(); - cnnLayer.reset(); +// cnnLayer.reset(); for (auto it : fusedWith) { it->cleanup(); @@ -1179,29 +1219,51 @@ InferenceEngine::Precision MKLDNNNode::getRuntimePrecision() const { return runtimePrecision; } -MKLDNNNode* MKLDNNNode::NodesFactory::create(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, +MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr& op, const mkldnn::engine& eng, const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache) { MKLDNNNode *newNode = nullptr; - std::unique_ptr ol(createNodeIfRegistered(MKLDNNPlugin, Generic, layer, eng, w_cache)); - if (ol != nullptr && ol->created(extMgr)) - newNode = ol.release(); - - if (newNode == nullptr) { - std::unique_ptr ol(createNodeIfRegistered(MKLDNNPlugin, TypeFromName(layer->type), layer, eng, w_cache)); + try { + std::unique_ptr ol(createNodeIfRegistered(MKLDNNPlugin, Generic, op, eng, w_cache)); if (ol != nullptr && ol->created(extMgr)) newNode = ol.release(); + } catch (const InferenceEngine::Exception& ex) { + if (ex.getStatus() != NOT_IMPLEMENTED) + throw; + } + + if (newNode == nullptr) { + try { + std::unique_ptr ol(createNodeIfRegistered(MKLDNNPlugin, TypeFromName(op->get_type_name()), op, eng, w_cache)); + if (ol != nullptr && ol->created(extMgr)) + newNode = ol.release(); + } catch (const InferenceEngine::Exception& ex) { + if (ex.getStatus() != NOT_IMPLEMENTED) + throw; + } + } + + if (newNode == nullptr) { + try { + std::unique_ptr ol(new MKLDNNReferenceNode(op, eng, w_cache)); + if (ol != nullptr && ol->created(extMgr)) + newNode = ol.release(); + } catch (const InferenceEngine::Exception& ex) { + if (ex.getStatus() != NOT_IMPLEMENTED) + throw; + } } - // WA-start : TI node requires all attributes to construct internal subgpath - // including extManager, socket and mkldnn::eng. - MKLDNNTensorIteratorNode *ti = dynamic_cast(newNode); - if (ti != nullptr) - ti->setExtManager(extMgr); - // WA-end +// TODO [NM]: Not implemented +// // WA-start : TI node requires all attributes to construct internal subgpath +// // including extManager, socket and mkldnn::eng. +// MKLDNNTensorIteratorNode *ti = dynamic_cast(newNode); +// if (ti != nullptr) +// ti->setExtManager(extMgr); +// // WA-end if (!newNode) - IE_THROW() << "Unsupported primitive of type: " << layer->type << " name: " << layer->name; + IE_THROW() << "Unsupported primitive of type: " << op->get_type_name() << " name: " << op->get_friendly_name(); return newNode; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 4ed5daee429f20..0a663c5c7fad81 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -23,6 +23,9 @@ #include "mkldnn_weights_cache.hpp" #include "mkldnn.hpp" #include +#include +#include "utils/ngraph_utils.hpp" +#include #include namespace MKLDNNPlugin { @@ -30,6 +33,7 @@ namespace MKLDNNPlugin { using MKLDNNNodePtr = std::shared_ptr; using MKLDNNNodeWeakPtr = std::weak_ptr; +// TODO [NM]: move into separate header enum Type { Unknown, Generic, @@ -43,7 +47,7 @@ enum Type { Lrn, Pooling, FullyConnected, - SoftMax, + Softmax, Split, Concatenation, Eltwise, @@ -85,7 +89,69 @@ enum Type { ReduceOr, ReduceProd, ReduceSum, - ReduceSumSquare + ReduceSumSquare, + Reference +}; + +enum Algorithm { + Undefined, + + // Pooling algorithms + PoolingMax, + PoolingAvg, + + // Convolution algorithms + ConvolutionCommon, + ConvolutionGrouped, + + // Convolution algorithms + DeconvolutionCommon, + DeconvolutionGrouped, + + // Elementwise algorithms + EltwiseAdd, + EltwiseMultiply, + EltwiseSubtract, + EltwiseDivide, + EltwiseFloorMod, + EltwiseMod, + EltwiseMaximum, + EltwiseMinimum, + EltwiseSquaredDifference, + EltwisePowerDynamic, + EltwisePowerStatic, + EltwiseMulAdd, + EltwiseEqual, + EltwiseNotEqual, + EltwiseGreater, + EltwiseGreaterEqual, + EltwiseLess, + EltwiseLessEqual, + EltwiseLogicalAnd, + EltwiseLogicalOr, + EltwiseLogicalXor, + EltwiseLogicalNot, + EltwiseRelu, + EltwiseGelu, + EltwiseElu, + EltwiseTanh, + EltwiseSigmoid, + EltwiseSquare, // TODO [NM]: looks like unused - remove + EltwiseAbs, + EltwiseSqrt, + EltwiseLinear, // TODO [NM]: looks like unused - remove + EltwiseBoundedRelu, // TODO [NM]: looks like unused - remove + EltwiseSoftRelu, // TODO [NM]: looks like unused - remove + EltwiseRelu6, // TODO [NM]: looks like unused - remove + EltwiseExp, + EltwiseClamp, + EltwiseSwish, + EltwisePrelu, + EltwiseMish, + EltwiseHswish, + EltwiseHsigmoid, + EltwiseRoundHalfToEven, + EltwiseRoundHalfAwayFromZero }; Type TypeFromName(const std::string type); @@ -114,8 +180,8 @@ static std::string NameFromType(Type type) { return "FullyConnected"; case Gemm: return "Gemm"; - case SoftMax: - return "SoftMax"; + case Softmax: + return "Softmax"; case Split: return "Split"; case Concatenation: @@ -334,8 +400,15 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { bool isFusedWith(Type type) const; - void fuseWith(const MKLDNNNodePtr &fuse) { - fusedWith.push_back(fuse); + void fuseWith(const MKLDNNNodePtr &fusingNode) { + fusedWith.push_back(fusingNode); + + for (int i = 0; i< inDims.size(); i++) { + if (fusingNode->getParentEdgesAtPort(i)[0]->getParent().get() == this) { + setFusingPort(i); + break; + } + } } void clearFusedWith() { @@ -346,7 +419,7 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { mergedWith.push_back(merge); } - void addOriginalLayer(const InferenceEngine::CNNLayerPtr &layer); + void addOriginalLayer(const std::string& layerName); const std::vector &getMergeWith() { return mergedWith; @@ -356,6 +429,14 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { return fusedWith; } + int getFusingPort() const { + return fusingPort; + } + + void setFusingPort(int fusingPort) { + this->fusingPort = fusingPort; + } + const std::string getName() const { return name; } @@ -368,9 +449,9 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { return type; } - const InferenceEngine::CNNLayerPtr &getCnnLayer() const { - return cnnLayer; - } +// const InferenceEngine::CNNLayerPtr &getCnnLayer() const { +// return cnnLayer; +// } const std::vector& getSupportedPrimitiveDescriptors() const { return supportedPrimitiveDescriptors; @@ -484,15 +565,6 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { IE_THROW() << "Primitive descriptor was not found for node " << getName() << "."; } - static void invertVectorCopyUtoI(const InferenceEngine::PropertyVector& src, std::vector& dst) { - dst.clear(); - for (int i = 1; i <= src.size(); i++) { - dst.push_back(static_cast(src[src.size() - i])); - } - } - - std::vector inDims; - int getExecIndex() const { return execIndex; } @@ -523,9 +595,26 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { */ virtual InferenceEngine::Precision getRuntimePrecision() const; + const std::vector& getOriginalInputPrecisions() const { + return originalInputPrecisions; + } + const std::vector& getOriginalOutputPrecisions() const { + return originalOutputPrecisions; + } + + size_t getOriginalInputsNumber() const { + return originalInputsNumber; + } + + std::string getOriginalName() const { + return originalName; + } + + Algorithm getAlgorithm() const { + return algorithm; + } + protected: - // TODO: It is necessary only in order to avoid modifications of cnnLayers and original topology - std::vector outDims; void setType(Type type) { this->type = type; } @@ -550,6 +639,9 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { GetPrimitiveMemoryFormatFunc; std::vector internalBlobDesc; + std::vector inDims; + std::vector outDims; + std::vector fusedWith; std::vector mergedWith; std::vector implPriorities; @@ -558,7 +650,8 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { std::string originalLayers; // contains names of the original layers separated by comma - MKLDNNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache); + MKLDNNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache); + MKLDNNNode(const std::string& type, const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache); int selectedPrimitiveDescriptorIndex = -1; bool permanent = false; @@ -580,6 +673,8 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { InferenceEngine::Blob::Ptr ext_scales; MKLDNNWeightsSharing::Ptr weightCache; + Algorithm algorithm; + friend class MKLDNNEdge; friend class MKLDNNGraph; friend class MKLDNNGraphOptimizer; @@ -594,7 +689,7 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { virtual std::vector getAvailableFormatsForDims(const MKLDNNDims& dims) const; int batchToProcess(); - InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool is_grouped = false); +// InferenceEngine::Blob::Ptr createInternalBlob(InferenceEngine::SizeVector dims, bool weights, bool is_grouped = false); InferenceEngine::Layout getWeightsLayoutByDims(InferenceEngine::SizeVector dims, bool isGrouped); @@ -614,7 +709,13 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { std::vector parentEdges; std::vector childEdges; - InferenceEngine::CNNLayerPtr cnnLayer; + std::string originalName; + size_t originalInputsNumber; + std::vector originalInputPrecisions; + std::vector originalOutputPrecisions; + + int fusingPort; + mkldnn::engine engine; std::string name; @@ -650,21 +751,21 @@ class MKLDNNNode : public InferenceEngine::details::no_copy { }; class MKLDNNNode::NodesFactory : public openvino::cc::Factory& op, const mkldnn::engine &, MKLDNNWeightsSharing::Ptr &)> { public: NodesFactory() : Factory("NodesFactory") {} - MKLDNNNode* create(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, + MKLDNNNode* create(const std::shared_ptr& op, const mkldnn::engine& eng, const MKLDNNExtensionManager::Ptr& extMgr, MKLDNNWeightsSharing::Ptr &w_cache); }; template struct MKLDNNNodeImpl : public MKLDNNNodeType { - MKLDNNNodeImpl(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNodeType(layer, eng, cache) { + MKLDNNNodeImpl(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNodeType(op, eng, cache) { MKLDNNNodeType::perfCounters().template buildClassCounters(NameFromType(MKLDNNNodeType::getType())); } }; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index 5dd4fa84f1dc48..d261acb4f1cbd1 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -8,7 +8,6 @@ #include "mkldnn_weights_cache.hpp" #include "mkldnn_itt.h" -#include #include #include #include @@ -16,19 +15,8 @@ #include #include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include - #include #include @@ -126,8 +114,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { } // WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass - manager.register_pass(); - manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); @@ -170,12 +156,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { node->input_value(0).get_shape().size() == node->get_output_shape(0).size(); }); - // Disable FC reshaping for 3D case - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - return node->input_value(0).get_shape().size() == 3ul; - }); - pass_config->set_callback( [](const_node_ptr &node) -> bool { @@ -277,7 +257,6 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { pass_config->disable(); pass_config->disable(); pass_config->disable(); - pass_config->disable(); pass_config->disable(); pass_config->disable(); @@ -323,57 +302,35 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { transformer.transform(nGraphFunc); } - bool has_fake_quantize = ::ngraph::op::util::has_op_with_type(nGraphFunc); - - ngraph::pass::Manager legacyManager; - - legacyManager.register_pass(); - legacyManager.register_pass(); - legacyManager.register_pass(ngraph::element::i64, ngraph::element::i32); - // not legacy actually, but it should be the last transformation in the transformation pipeline - legacyManager.register_pass(); + ngraph::pass::Manager postLPTPassManager; + postLPTPassManager.register_pass(); + postLPTPassManager.register_pass(); auto legacyPassConfig = legacyManager.get_pass_config(); legacyPassConfig->disable(); - legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { + postLPTPassManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { return !MKLDNNQuantizeNode::isNeedToDecompose(node); }); - - legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { + postLPTPassManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { if (auto mul_op = std::dynamic_pointer_cast(node)) { auto add_op = std::dynamic_pointer_cast(mul_op->get_input_node_shared_ptr(0)); auto constant = std::dynamic_pointer_cast(mul_op->get_input_node_shared_ptr(1)); bool is_dequantization = mul_op->get_rt_info().count("DEQUANTIZATION") != 0; if (add_op && constant && is_dequantization) { return ngraph::is_type(add_op->get_input_node_shared_ptr(0)) || - ngraph::is_type(add_op->get_input_node_shared_ptr(0)) || - ngraph::is_type(add_op->get_input_node_shared_ptr(0)); + ngraph::is_type(add_op->get_input_node_shared_ptr(0)) || + ngraph::is_type(add_op->get_input_node_shared_ptr(0)); } } return false; }); - - legacyPassConfig->set_callback([](const_node_ptr &node) -> bool { + postLPTPassManager.get_pass_config()->set_callback([](const_node_ptr &node) -> bool { // UnrollTI transformation is disabled by default, is turned on by LowLatency transformation return node->get_rt_info().count("UNROLL_TI") == 0; }); - legacyManager.run_passes(nGraphFunc); - - OV_ITT_TASK_CHAIN(taskChain, MKLDNNPlugin::itt::domains::MKLDNN_LT, "Transformation", "convertFunctionToICNNNetwork"); - - clonedNetwork = CNNNetwork(InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, clonedNetwork, has_fake_quantize)); - - OV_ITT_TASK_NEXT(taskChain, "ConvertIOPrecision"); - - // WA: after conversion to CNNNetwork user precision can redefine input/output precisions - // so we need to apply additional precision conversion but only for inputs and outputs - for (auto & precision : convert_precision_list) { - NetPass::ConvertIOPrecision(clonedNetwork, - InferenceEngine::details::convertPrecision(precision.first), - InferenceEngine::details::convertPrecision(precision.second)); - } + postLPTPassManager.run_passes(nGraphFunc); } InferenceEngine::ExecutableNetworkInternal::Ptr @@ -409,34 +366,33 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std conf.batchLimit = static_cast(network.getBatchSize()); } - CNNNetwork clonedNetwork = InferenceEngine::cloneNetwork(network); + CNNNetwork clonedNetwork = InferenceEngine::details::cloneNetwork(network); - bool is_transformed = false; - if (clonedNetwork.getFunction()) { +// bool is_transformed = false; +// if (clonedNetwork.getFunction()) { Transformation(clonedNetwork, conf); - is_transformed = true; - } - IE_SUPPRESS_DEPRECATED_START - auto icnnnet = static_cast(clonedNetwork); - IE_SUPPRESS_DEPRECATED_END - auto implNetwork = std::dynamic_pointer_cast(icnnnet); - if (implNetwork) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding"); - // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network - ConstTransformer transformator(implNetwork.get()); - transformator.fullTrim(); - if (!is_transformed) { - InferenceEngine::CNNNetwork implNetworkWrapper(implNetwork); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::I64, Precision::I32); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::U64, Precision::I32); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::U32, Precision::I32); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::FP64, Precision::FP32); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::FP16, Precision::FP32); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::BOOL, Precision::U8); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::U16, Precision::I32); - NetPass::ConvertPrecision(implNetworkWrapper, Precision::I16, Precision::I32); - } - } +// is_transformed = true; +// } +// IE_SUPPRESS_DEPRECATED_START +// auto icnnnet = static_cast(clonedNetwork); +// IE_SUPPRESS_DEPRECATED_END +// auto implNetwork = std::dynamic_pointer_cast(icnnnet); +// if (implNetwork) { +// OV_ITT_SCOPED_TASK(itt::domains::MKLDNN_LT, "CNNNet_based_ConstFolding"); +// // valid for CNNNetworkImpl only, while there's no API in ICNNNetwork to change network +// ConstTransformer transformator(implNetwork.get()); +// transformator.fullTrim(); +// if (!is_transformed) { +// InferenceEngine::CNNNetwork implNetworkWrapper(implNetwork); +// NetPass::ConvertPrecision(implNetworkWrapper, Precision::I64, Precision::I32); +// NetPass::ConvertPrecision(implNetworkWrapper, Precision::U64, Precision::I32); +// NetPass::ConvertPrecision(implNetworkWrapper, Precision::U32, Precision::I32); +// NetPass::ConvertPrecision(implNetworkWrapper, Precision::FP16, Precision::FP32); +// NetPass::ConvertPrecision(implNetworkWrapper, Precision::BOOL, Precision::U8); +// NetPass::ConvertPrecision(implNetworkWrapper, Precision::U16, Precision::I32); +// NetPass::ConvertPrecision(implNetworkWrapper, Precision::I16, Precision::I32); +// } +// } return std::make_shared(clonedNetwork, conf, extensionManager, weightsSharing); } @@ -538,92 +494,96 @@ void Engine::AddExtension(InferenceEngine::IExtensionPtr extension) { QueryNetworkResult Engine::QueryNetwork(const CNNNetwork& network, const std::map& config) const { QueryNetworkResult res; - MKLDNNWeightsSharing::Ptr fake_w_cache; - auto function = network.getFunction(); - if (function != nullptr) { - std::unordered_set originalOps; - for (auto&& node : function->get_ops()) { - originalOps.emplace(node->get_friendly_name()); - } - - // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not? - Config conf = engConfig; - conf.readProperties(config); - if (conf.enableDynamicBatch) { - conf.batchLimit = static_cast(network.getBatchSize()); - } - - auto clonedNetwork = InferenceEngine::cloneNetwork(network); - Transformation(clonedNetwork, conf); - std::unordered_set supported; - std::unordered_set unsupported; - for (details::CNNNetworkIterator itLayer{clonedNetwork}; itLayer != details::CNNNetworkIterator(); itLayer++) { - auto layerIsSupported = [&] { - std::unique_ptr ptr; - try { - ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache)); - } catch (InferenceEngine::Exception&) { - return false; - } - return true; - } (); - for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) { - if (InferenceEngine::details::contains(originalOps, fusedLayerName)) { - if (layerIsSupported) { - supported.emplace(fusedLayerName); - } else { - unsupported.emplace(fusedLayerName); - } - } - } - } - for (auto&& unsupportedNode : unsupported) { - supported.erase(unsupportedNode); - } - for (auto&& node : function->get_ops()) { - if (InferenceEngine::details::contains(supported, node->get_friendly_name())) { - for (auto&& inputNodeOutput : node->input_values()) { - if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) { - supported.emplace(inputNodeOutput.get_node()->get_friendly_name()); - } - } - for (auto&& outputs : node->outputs()) { - for (auto&& outputNodeInput : outputs.get_target_inputs()) { - if (ngraph::op::is_output(outputNodeInput.get_node())) { - supported.emplace(outputNodeInput.get_node()->get_friendly_name()); - } - } - } - } - - if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) { - if (!InferenceEngine::details::contains(supported, node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) { - supported.erase(node->get_friendly_name()); - } - } else if (ngraph::op::is_output(node)) { - if (!InferenceEngine::details::contains(supported, node->input_values().begin()->get_node()->get_friendly_name())) { - supported.erase(node->get_friendly_name()); - } - } - } - - for (auto&& layerName : supported) { - res.supportedLayersMap.emplace(layerName, GetName()); - } - } else { - details::CNNNetworkIterator i(network); - while (i != details::CNNNetworkIterator()) { - try { - mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0)); - // if we can create and have not thrown exception, then layer is supported - std::unique_ptr (MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache)); - res.supportedLayersMap.insert({ (*i)->name, GetName() }); - } catch (InferenceEngine::Exception&) { - } - i++; - } - } + IE_THROW() << "Not implemented"; +// TODO [NM]: reimplement QueryNetwork on ngraph +// +// MKLDNNWeightsSharing::Ptr fake_w_cache; +// auto function = network.getFunction(); +// if (function != nullptr) { +// std::unordered_set originalOps; +// for (auto&& node : function->get_ops()) { +// originalOps.emplace(node->get_friendly_name()); +// } +// +// // TODO: Clarify the behavior of SetConfig method. Skip eng_config or not? +// Config conf = engConfig; +// conf.readProperties(config); +// +// if (conf.enableDynamicBatch) { +// conf.batchLimit = static_cast(network.getBatchSize()); +// } +// +// auto clonedNetwork = InferenceEngine::cloneNetwork(network); +// Transformation(clonedNetwork, conf); +// std::unordered_set supported; +// std::unordered_set unsupported; +// for (details::CNNNetworkIterator itLayer{clonedNetwork}; itLayer != details::CNNNetworkIterator(); itLayer++) { +// auto layerIsSupported = [&] { +// std::unique_ptr ptr; +// try { +// ptr.reset(MKLDNNNode::factory().create(*itLayer, {mkldnn::engine::kind::cpu, 0}, extensionManager, fake_w_cache)); +// } catch (InferenceEngine::details::InferenceEngineException&) { +// return false; +// } +// return true; +// } (); +// for (auto&& fusedLayerName : ngraph::getFusedNamesVector((*itLayer)->getNode())) { +// if (InferenceEngine::details::contains(originalOps, fusedLayerName)) { +// if (layerIsSupported) { +// supported.emplace(fusedLayerName); +// } else { +// unsupported.emplace(fusedLayerName); +// } +// } +// } +// } +// for (auto&& unsupportedNode : unsupported) { +// supported.erase(unsupportedNode); +// } +// for (auto&& node : function->get_ops()) { +// if (InferenceEngine::details::contains(supported, node->get_friendly_name())) { +// for (auto&& inputNodeOutput : node->input_values()) { +// if (ngraph::op::is_constant(inputNodeOutput.get_node()) || ngraph::op::is_parameter(inputNodeOutput.get_node())) { +// supported.emplace(inputNodeOutput.get_node()->get_friendly_name()); +// } +// } +// for (auto&& outputs : node->outputs()) { +// for (auto&& outputNodeInput : outputs.get_target_inputs()) { +// if (ngraph::op::is_output(outputNodeInput.get_node())) { +// supported.emplace(outputNodeInput.get_node()->get_friendly_name()); +// } +// } +// } +// } +// +// if (ngraph::op::is_constant(node) || ngraph::op::is_parameter(node)) { +// if (!InferenceEngine::details::contains(supported, node->output(0).get_target_inputs().begin()->get_node()->get_friendly_name())) { +// supported.erase(node->get_friendly_name()); +// } +// } else if (ngraph::op::is_output(node)) { +// if (!InferenceEngine::details::contains(supported, node->input_values().begin()->get_node()->get_friendly_name())) { +// supported.erase(node->get_friendly_name()); +// } +// } +// } +// +// for (auto&& layerName : supported) { +// res.supportedLayersMap.emplace(layerName, GetName()); +// } +// } else { +// details::CNNNetworkIterator i(network); +// while (i != details::CNNNetworkIterator()) { +// try { +// mkldnn::engine eng(mkldnn::engine(mkldnn::engine::kind::cpu, 0)); +// // if we can create and have not thrown exception, then layer is supported +// std::unique_ptr (MKLDNNNode::factory().create(*i, eng, extensionManager, fake_w_cache)); +// res.supportedLayersMap.insert({ (*i)->name, GetName() }); +// } catch (InferenceEngine::details::InferenceEngineException&) { +// } +// i++; +// } +// } return res; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/base.hpp b/inference-engine/src/mkldnn_plugin/nodes/base.hpp index 7720ad36762ca9..2b847ef1ae4007 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/base.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/base.hpp @@ -5,11 +5,12 @@ #pragma once #include -#include #include "nodes/list.hpp" +#include #include #include +#include namespace InferenceEngine { namespace Extensions { @@ -53,99 +54,6 @@ class ExtLayerBase: public ILayerExecImpl { } protected: - enum class ConfLayout { ANY, PLN, BLK8, BLK16 }; - - class DataConfigurator { - public: - explicit DataConfigurator(ConfLayout l): - layout(l) {} - - DataConfigurator(ConfLayout l, bool constant, int inplace = -1, Precision::ePrecision prc = Precision::UNSPECIFIED): - layout(l), constant(constant), inplace(inplace), prc(prc) {} - - DataConfigurator(ConfLayout l, Precision::ePrecision prc): - layout(l), prc(prc) {} - - ConfLayout layout; - bool constant = false; - int inplace = -1; - Precision::ePrecision prc = Precision::UNSPECIFIED; // by default use the layer precision - }; - - void addConfig(const CNNLayer* layer, std::vector in_l, - std::vector out_l, bool dynBatchSupport = false) { - LayerConfig config; - - if (in_l.size() != layer->insData.size()) - IE_THROW() << "Incorrect number of input edges for layer " << layer->name << ". Expected " << layer->insData.size() - << " but layout specification provided for " << in_l.size(); - if (out_l.size() != layer->outData.size()) - IE_THROW() << "Incorrect number of output edges for layer " << layer->name << ". Expected " << layer->outData.size() - << " but layout specification provided for " << out_l.size(); - - // Fill tensor parameters into config - auto fill_port = [] (std::vector& port, DataConfigurator conf, const DataPtr& data) { - auto div_up = [](const int a, const int b) -> int { - if (!b) - return 0; - return (a + b - 1) / b; - }; - if (!data) IE_THROW() << "Cannot get input data!"; - - DataConfig dataConfig; - dataConfig.inPlace = conf.inplace; - dataConfig.constant = conf.constant; - - const TensorDesc& data_desc = data->getTensorDesc(); - const SizeVector& data_dims = data_desc.getDims(); - - std::vector blocks = data_dims; - std::vector order(blocks.size()); - for (size_t i = 0; i < order.size(); i++) order[i] = i; - - const bool isInt8 = (data->getPrecision() == Precision::I8 || data->getPrecision() == Precision::U8); - - if (conf.layout == ConfLayout::BLK8 || conf.layout == ConfLayout::BLK16) { - if (data_dims.size() < 4 || data_dims.size() > 5) - IE_THROW() << "Inapplicable blocking layout." - << "Tensor should be 4D or 5D."; - - int blk_size = conf.layout == ConfLayout::BLK8 ? 8 : 16; - - // Blocking through Channel dimension. Like [nChwXc] - order.push_back(1); - blocks[1] = div_up(blocks[1], blk_size); - blocks.push_back(blk_size); - } else if (isInt8) { - if (data_dims.size() == 4) { - order = {0, 2, 3, 1}; - blocks = {data_dims[0], data_dims[2], data_dims[3], data_dims[1]}; - } else if (data_dims.size() == 5) { - order = {0, 2, 3, 4, 1}; - blocks = {data_dims[0], data_dims[2], data_dims[3], data_dims[4], data_dims[1]}; - } // all over keep original plain format - - conf.layout = ConfLayout::PLN; - } - - InferenceEngine::Precision precision = (conf.prc == Precision::UNSPECIFIED) ? data_desc.getPrecision() : Precision(conf.prc); - if (conf.layout == ConfLayout::ANY) { - dataConfig.desc = TensorDesc(precision, data_dims, InferenceEngine::Layout::ANY); - } else { - dataConfig.desc = TensorDesc(precision, data_dims, {blocks, order}); - } - port.push_back(dataConfig); - }; - - for (size_t i = 0; i < in_l.size(); i++) - fill_port(config.inConfs, in_l[i], layer->insData[i].lock()); - - for (size_t i = 0; i < out_l.size(); i++) - fill_port(config.outConfs, out_l[i], layer->outData[i]); - - config.dynBatchSupport = dynBatchSupport; - confs.push_back(config); - } std::string errorMsg; std::vector confs; }; @@ -153,20 +61,19 @@ class ExtLayerBase: public ILayerExecImpl { template class ImplFactory : public ILayerImplFactory { public: - explicit ImplFactory(const CNNLayer *layer) { - cnnLayer = InferenceEngine::clonelayer(*layer); - cnnLayer->_fusedWith = layer->_fusedWith; - cnnLayer->insData = layer->insData; - cnnLayer->outData = layer->outData; - } + explicit ImplFactory(const std::shared_ptr& op) : ngraphOp(op) {} // First implementation has more priority than next StatusCode getImplementations(std::vector& impls, ResponseDesc *resp) noexcept override { - impls.push_back(ILayerImpl::Ptr(new IMPL(cnnLayer.get()))); + try { + impls.push_back(ILayerImpl::Ptr(new IMPL(ngraphOp))); + } catch (const InferenceEngine::Exception& ex) { + return ex.getStatus(); + } return OK; } protected: - InferenceEngine::CNNLayerPtr cnnLayer; + const std::shared_ptr ngraphOp; }; #define REG_FACTORY_FOR(__prim, __type) \ diff --git a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp index 9479e1d2e5894b..06a3d8815ae3a8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/gather.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/gather.cpp @@ -13,6 +13,10 @@ #include "ie_parallel.hpp" #include "common/cpu_memcpy.h" #include "common/fp16_utils.h" +#include +#include +#include +#include namespace InferenceEngine { namespace Extensions { @@ -20,25 +24,36 @@ namespace Cpu { class GatherImpl: public ExtLayerBase { public: - explicit GatherImpl(const CNNLayer* layer) { + explicit GatherImpl(const std::shared_ptr& op) { try { - if (layer->insData.size() != 2 || layer->outData.empty()) - IE_THROW() << layer->name << " Incorrect number of input/output edges!"; + errorPrefix_ = std::string("Layer Gather with name '") + op->get_friendly_name() + "' "; - Precision inIdxPrecision = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getPrecision(); + auto gatherOp = ngraph::as_type_ptr(op); + if (!gatherOp) + IE_THROW() << "CPU Gather node doesn't support ngraph operation " + << gatherOp->get_type_name() << " with name " << gatherOp->get_friendly_name(); + + if (gatherOp->get_input_size() != 3 || gatherOp->get_output_size() == 0) + IE_THROW() << errorPrefix_ << "has incorrect number of input/output edges!"; + + Precision inIdxPrecision = details::convertPrecision(gatherOp->get_input_element_type(GATHER_INDEXES)); if (inIdxPrecision != Precision::FP32 && inIdxPrecision != Precision::I32 && inIdxPrecision != Precision::FP16) inIdxPrecision = Precision::I32; - axis = layer->GetParamAsInt("axis"); - - const SizeVector& dictionary_dims = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getDims(); + const SizeVector& dictionary_dims = gatherOp->get_input_shape(GATHER_DICTIONARY); if (dictionary_dims.size() == 0) - IE_THROW() << layer->name << " Incorrect input parameters dimension!"; - // Dictionary must be at least rank axis + 1 - IE_ASSERT(-static_cast(dictionary_dims.size()) <= axis && axis < static_cast(dictionary_dims.size())) - << layer->name << " Incorrect input parameters dimensions and axis number!"; + IE_THROW() << errorPrefix_ << "has incorrect input parameters dimension!"; + + auto axesOp = gatherOp->get_input_node_shared_ptr(GATHER_AXIS); + if (!ngraph::as_type_ptr(axesOp)) + IE_THROW() << errorPrefix_ << "supports only Constant op on 'axis' input."; + + axis = static_cast(gatherOp->get_axis()); if (axis < 0) axis += dictionary_dims.size(); + // Dictionary must be at least rank axis + 1 + IE_ASSERT(-static_cast(dictionary_dims.size()) <= axis && axis < static_cast(dictionary_dims.size())) + << errorPrefix_ << "has incorrect input parameters dimensions and axis number!"; // Find number of dictionaries, index range and data length for (int i = 0; i < axis; i++) @@ -48,28 +63,29 @@ class GatherImpl: public ExtLayerBase { dataLength *= dictionary_dims[i]; if (dataLength == 0) - IE_THROW() << layer->name << " Incorrect input parameters dimension!"; + IE_THROW() << errorPrefix_ << "had incorrect input parameters dimension!"; LayerConfig config; - DataConfig dataConfigIdx, dataConfigDct; - Precision dataPrecision = layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getPrecision(); - dataConfigDct.desc = TensorDesc(dataPrecision, dictionary_dims, - layer->insData[GATHER_DICTIONARY].lock()->getTensorDesc().getLayoutByDims(dictionary_dims)); + DataConfig dataConfigIdx, dataConfigDct, dataConfigAxis; + Precision dataPrecision = details::convertPrecision(gatherOp->get_input_element_type(GATHER_DICTIONARY)); + dataConfigDct.desc = TensorDesc(dataPrecision, dictionary_dims, TensorDesc::getLayoutByDims(dictionary_dims)); config.inConfs.push_back(dataConfigDct); - const SizeVector& indexes_dims = layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getDims(); - dataConfigIdx.desc = TensorDesc(inIdxPrecision, indexes_dims, - layer->insData[GATHER_INDEXES].lock()->getTensorDesc().getLayout()); + const SizeVector& indexes_dims = gatherOp->get_input_shape(GATHER_INDEXES); + dataConfigIdx.desc = TensorDesc(inIdxPrecision, indexes_dims, TensorDesc::getLayoutByDims(indexes_dims)); config.inConfs.push_back(dataConfigIdx); + const SizeVector& axis_dims = gatherOp->get_input_shape(GATHER_AXIS); + dataConfigAxis.desc = TensorDesc(Precision::I32, axis_dims, TensorDesc::getLayoutByDims(axis_dims)); + config.inConfs.push_back(dataConfigAxis); DataConfig dataConfigOut; - const SizeVector& out_dims = layer->outData[0]->getTensorDesc().getDims(); - dataConfigOut.desc = TensorDesc(dataPrecision, out_dims, - layer->outData[0]->getTensorDesc().getLayoutByDims(out_dims)); + const SizeVector& out_dims = gatherOp->get_output_shape(0); + dataConfigOut.desc = TensorDesc(dataPrecision, out_dims, TensorDesc::getLayoutByDims(out_dims)); config.outConfs.push_back(dataConfigOut); config.dynBatchSupport = false; confs.push_back(config); } catch (InferenceEngine::Exception &ex) { errorMsg = ex.what(); + throw; } } @@ -144,6 +160,9 @@ class GatherImpl: public ExtLayerBase { size_t dataLength = 1; const size_t GATHER_DICTIONARY = 0; const size_t GATHER_INDEXES = 1; + const size_t GATHER_AXIS = 2; + + std::string errorPrefix_; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/list.hpp b/inference-engine/src/mkldnn_plugin/nodes/list.hpp index 65275b91cf6664..f87d890d3c21c6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/list.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/list.hpp @@ -7,12 +7,12 @@ #include #include -#include #include #include #include #include +#include namespace InferenceEngine { @@ -43,7 +43,7 @@ class ILayerImplFactory { namespace Extensions { namespace Cpu { -using ext_factory = std::function; +using ext_factory = std::function& op)>; struct ExtensionsHolder { std::map list; @@ -60,11 +60,11 @@ class MKLDNNExtensions : public IExtension { } virtual StatusCode - getFactoryFor(ILayerImplFactory*& factory, const CNNLayer* cnnLayer, ResponseDesc* resp) noexcept { + getFactoryFor(ILayerImplFactory*& factory, const std::shared_ptr& op, ResponseDesc* resp) noexcept { using namespace MKLDNNPlugin; - factory = layersFactory.createNodeIfRegistered(MKLDNNPlugin, cnnLayer->type, cnnLayer); + factory = layersFactory.createNodeIfRegistered(MKLDNNPlugin, op->get_type_name(), op); if (!factory) { - std::string errorMsg = std::string("Factory for ") + cnnLayer->type + " wasn't found!"; + std::string errorMsg = std::string("Factory for ") + op->get_type_name() + " wasn't found!"; errorMsg.copy(resp->msg, sizeof(resp->msg) - 1); return NOT_FOUND; } @@ -85,7 +85,7 @@ class MKLDNNExtensions : public IExtension { using LayersFactory = openvino::cc::Factory< std::string, - InferenceEngine::ILayerImplFactory*(const InferenceEngine::CNNLayer*)>; + InferenceEngine::ILayerImplFactory*(const std::shared_ptr& op)>; LayersFactory layersFactory; diff --git a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp index 12604eef081122..24e0403d6bf516 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/list_tbl.hpp @@ -7,78 +7,80 @@ # define MKLDNN_EXTENSION_NODE(__prim, __type) #endif -MKLDNN_EXTENSION_NODE(EmbeddingBagOffsetsSumImpl, EmbeddingBagOffsetsSum); -MKLDNN_EXTENSION_NODE(EmbeddingBagPackedSumImpl, EmbeddingBagPackedSum); -MKLDNN_EXTENSION_NODE(EmbeddingSegmentsSumImpl, EmbeddingSegmentsSum); -MKLDNN_EXTENSION_NODE(CTCLossImpl, CTCLoss); -MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox); -MKLDNN_EXTENSION_NODE(MathImpl, Abs); -MKLDNN_EXTENSION_NODE(MathImpl, Acos); -MKLDNN_EXTENSION_NODE(MathImpl, Acosh); -MKLDNN_EXTENSION_NODE(MathImpl, Asin); -MKLDNN_EXTENSION_NODE(MathImpl, Asinh); -MKLDNN_EXTENSION_NODE(MathImpl, Atan); -MKLDNN_EXTENSION_NODE(MathImpl, Atanh); -MKLDNN_EXTENSION_NODE(MathImpl, Ceil); -MKLDNN_EXTENSION_NODE(MathImpl, Ceiling); -MKLDNN_EXTENSION_NODE(MathImpl, Cos); -MKLDNN_EXTENSION_NODE(MathImpl, Cosh); -MKLDNN_EXTENSION_NODE(MathImpl, Floor); -MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid); -MKLDNN_EXTENSION_NODE(MathImpl, Log); -MKLDNN_EXTENSION_NODE(MathImpl, Neg); -MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal); -MKLDNN_EXTENSION_NODE(MathImpl, Selu); -MKLDNN_EXTENSION_NODE(MathImpl, Sign); -MKLDNN_EXTENSION_NODE(MathImpl, Sin); -MKLDNN_EXTENSION_NODE(MathImpl, Sinh); -MKLDNN_EXTENSION_NODE(MathImpl, SoftPlus); -MKLDNN_EXTENSION_NODE(MathImpl, Softsign); -MKLDNN_EXTENSION_NODE(MathImpl, Tan); -MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs); -MKLDNN_EXTENSION_NODE(ExtractImagePatchesImpl, ExtractImagePatches); -MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence); -MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput); -MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax); -MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze); -MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput); -MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo); -MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax); -MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo); -MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze); -MKLDNN_EXTENSION_NODE(FillImpl, Fill); -MKLDNN_EXTENSION_NODE(UniqueImpl, Unique); -MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling); -MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace); -MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot); -MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast); -MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum); -MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense); -MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor); -MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage); -MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression); -MKLDNN_EXTENSION_NODE(TopKImpl, TopK); -MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels); -MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth); -MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile); -MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace); -MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator); -MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS); -MKLDNN_EXTENSION_NODE(GRNImpl, GRN); -MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows); -MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize); -MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder); -MKLDNN_EXTENSION_NODE(CTCGreedyDecoderSeqLenImpl, CTCGreedyDecoderSeqLen); +//MKLDNN_EXTENSION_NODE(EmbeddingBagOffsetsSumImpl, EmbeddingBagOffsetsSum); +//MKLDNN_EXTENSION_NODE(EmbeddingBagPackedSumImpl, EmbeddingBagPackedSum); +//MKLDNN_EXTENSION_NODE(EmbeddingSegmentsSumImpl, EmbeddingSegmentsSum); +//MKLDNN_EXTENSION_NODE(CTCLossImpl, CTCLoss); +//MKLDNN_EXTENSION_NODE(PriorBoxImpl, PriorBox); +//MKLDNN_EXTENSION_NODE(MathImpl, Abs); +//MKLDNN_EXTENSION_NODE(MathImpl, Acos); +//MKLDNN_EXTENSION_NODE(MathImpl, Acosh); +//MKLDNN_EXTENSION_NODE(MathImpl, Asin); +//MKLDNN_EXTENSION_NODE(MathImpl, Asinh); +//MKLDNN_EXTENSION_NODE(MathImpl, Atan); +//MKLDNN_EXTENSION_NODE(MathImpl, Atanh); +//MKLDNN_EXTENSION_NODE(MathImpl, Ceil); +//MKLDNN_EXTENSION_NODE(MathImpl, Ceiling); +//MKLDNN_EXTENSION_NODE(MathImpl, Cos); +//MKLDNN_EXTENSION_NODE(MathImpl, Cosh); +//MKLDNN_EXTENSION_NODE(MathImpl, Erf); +//MKLDNN_EXTENSION_NODE(MathImpl, Floor); +//MKLDNN_EXTENSION_NODE(MathImpl, HardSigmoid); +//MKLDNN_EXTENSION_NODE(MathImpl, Log); +//MKLDNN_EXTENSION_NODE(MathImpl, Neg); +//MKLDNN_EXTENSION_NODE(MathImpl, Reciprocal); +//MKLDNN_EXTENSION_NODE(MathImpl, Selu); +//MKLDNN_EXTENSION_NODE(MathImpl, Sign); +//MKLDNN_EXTENSION_NODE(MathImpl, Sin); +//MKLDNN_EXTENSION_NODE(MathImpl, Sinh); +//MKLDNN_EXTENSION_NODE(MathImpl, SoftPlus); +//MKLDNN_EXTENSION_NODE(MathImpl, Softsign); +//MKLDNN_EXTENSION_NODE(MathImpl, Tan); +//MKLDNN_EXTENSION_NODE(ExperimentalDetectronTopKROIsImpl, ExperimentalDetectronTopKROIs); +//MKLDNN_EXTENSION_NODE(ExtractImagePatchesImpl, ExtractImagePatches); +//MKLDNN_EXTENSION_NODE(ReverseSequenceImpl, ReverseSequence); +//MKLDNN_EXTENSION_NODE(DetectionOutputImpl, DetectionOutput); +//MKLDNN_EXTENSION_NODE(ArgMaxImpl, ArgMax); +//MKLDNN_EXTENSION_NODE(UnsqueezeImpl, Unsqueeze); +//MKLDNN_EXTENSION_NODE(StridedSliceImpl, StridedSlice); +//MKLDNN_EXTENSION_NODE(ExperimentalDetectronDetectionOutputImpl, ExperimentalDetectronDetectionOutput); +//MKLDNN_EXTENSION_NODE(RegionYoloImpl, RegionYolo); +//MKLDNN_EXTENSION_NODE(LogSoftmaxImpl, LogSoftmax); +//MKLDNN_EXTENSION_NODE(ReorgYoloImpl, ReorgYolo); +//MKLDNN_EXTENSION_NODE(SqueezeImpl, Squeeze); +//MKLDNN_EXTENSION_NODE(FillImpl, Fill); +//MKLDNN_EXTENSION_NODE(UniqueImpl, Unique); +//MKLDNN_EXTENSION_NODE(PSROIPoolingImpl, PSROIPooling); +//MKLDNN_EXTENSION_NODE(DepthToSpaceImpl, DepthToSpace); +//MKLDNN_EXTENSION_NODE(OneHotImpl, OneHot); +//MKLDNN_EXTENSION_NODE(BroadcastImpl, Broadcast); +//MKLDNN_EXTENSION_NODE(ExperimentalSparseWeightedReduceImpl, ExperimentalSparseWeightedSum); +//MKLDNN_EXTENSION_NODE(SparseToDenseImpl, SparseToDense); +//MKLDNN_EXTENSION_NODE(ExperimentalDetectronROIFeatureExtractorImpl, ExperimentalDetectronROIFeatureExtractor); +//MKLDNN_EXTENSION_NODE(ONNXCustomProposalImpl, ExperimentalDetectronGenerateProposalsSingleImage); +//MKLDNN_EXTENSION_NODE(NonMaxSuppressionImpl, NonMaxSuppression); +//MKLDNN_EXTENSION_NODE(TopKImpl, TopK); +//MKLDNN_EXTENSION_NODE(ShuffleChannelsImpl, ShuffleChannels); +//MKLDNN_EXTENSION_NODE(SpaceToDepthImpl, SpaceToDepth); +//MKLDNN_EXTENSION_NODE(PowerFileImpl, PowerFile); +//MKLDNN_EXTENSION_NODE(BatchToSpaceImpl, BatchToSpace); +//MKLDNN_EXTENSION_NODE(ExperimentalDetectronPriorGridGeneratorImpl, ExperimentalDetectronPriorGridGenerator); +//MKLDNN_EXTENSION_NODE(SimplerNMSImpl, SimplerNMS); +//MKLDNN_EXTENSION_NODE(GRNImpl, GRN); +//MKLDNN_EXTENSION_NODE(SparseFillEmptyRowsImpl, SparseFillEmptyRows); +//MKLDNN_EXTENSION_NODE(BucketizeImpl, Bucketize); +//MKLDNN_EXTENSION_NODE(CTCGreedyDecoderImpl, CTCGreedyDecoder); +//MKLDNN_EXTENSION_NODE(CTCGreedyDecoderSeqLenImpl, CTCGreedyDecoderSeqLen); MKLDNN_EXTENSION_NODE(GatherImpl, Gather); -MKLDNN_EXTENSION_NODE(GatherElementsImpl, GatherElements); -MKLDNN_EXTENSION_NODE(GatherNDImpl, GatherND); -MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal); -MKLDNN_EXTENSION_NODE(RangeImpl, Range); -MKLDNN_EXTENSION_NODE(SelectImpl, Select); -MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree); -MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered); -MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch); -MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean); -MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN); -MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum); -MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum); +//MKLDNN_EXTENSION_NODE(GatherElementsImpl, GatherElements); +//MKLDNN_EXTENSION_NODE(GatherNDImpl, GatherND); +//MKLDNN_EXTENSION_NODE(ProposalImpl, Proposal); +//MKLDNN_EXTENSION_NODE(RangeImpl, Range); +//MKLDNN_EXTENSION_NODE(SelectImpl, Select); +//MKLDNN_EXTENSION_NODE(GatherTreeImpl, GatherTree); +//MKLDNN_EXTENSION_NODE(PriorBoxClusteredImpl, PriorBoxClustered); +//MKLDNN_EXTENSION_NODE(SpaceToBatchImpl, SpaceToBatch); +//MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentMean); +//MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSqrtN); +//MKLDNN_EXTENSION_NODE(SparseSegmentReduceImpl, SparseSegmentSum); +//MKLDNN_EXTENSION_NODE(CumSumImpl, CumSum); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp index b885f8c0c159ca..8aff5ca6a0b5c7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.cpp @@ -10,9 +10,9 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNBatchNormalizationNode::MKLDNNBatchNormalizationNode(const InferenceEngine::CNNLayerPtr& layer, +MKLDNNBatchNormalizationNode::MKLDNNBatchNormalizationNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) { + : MKLDNNNode(op, eng, cache) { internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { return GetVarianceDesc(primitive_desc_it); }); @@ -27,82 +27,90 @@ MKLDNNBatchNormalizationNode::MKLDNNBatchNormalizationNode(const InferenceEngine }); } -void MKLDNNBatchNormalizationNode::getSupportedDescriptors() { - if (!descs.empty()) - return; - auto * bnLayer = dynamic_cast(getCnnLayer().get()); - if (bnLayer == nullptr) - IE_THROW() << "Cannot convert batch normalization layer."; - if (bnLayer->_weights == nullptr || bnLayer->_biases == nullptr) { - IE_THROW() << "Weights/biases are empty for layer: " << bnLayer->name - << " used in MKLDNN node: " << getName() << "\n" - << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" - << " to load them from .bin part of the IR"; - } - - if (getParentEdges().size() != 1) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); - if (!getChildEdges().size()) - IE_THROW() << "Incorrect number of output edges for layer " << getName(); - - eps = bnLayer->epsilon; - - size_t variancesSize = MKLDNNDims(bnLayer->_weights->getTensorDesc().getDims()).size(); - size_t meansSize = MKLDNNDims(bnLayer->_biases->getTensorDesc().getDims()).size(); - - if (variancesSize != meansSize && variancesSize != 1) - IE_THROW() << "Incorrect weights and biases sizes!"; - - internalBlobs.push_back(createInternalBlob(bnLayer->_weights->getTensorDesc().getDims(), true)); - internalBlobs.push_back(createInternalBlob(bnLayer->_biases->getTensorDesc().getDims(), false)); - - auto parentOutDims = getParentEdgeAt(0)->getDims(); - - if (fusedWith.size() > 1) - IE_THROW() << "BatchNorm fusion is possible with only one layer!"; - - for (const auto &node : fusedWith) { - auto * scshLayer = dynamic_cast(node->getCnnLayer().get()); - if (scshLayer == nullptr) - IE_THROW() << "Cannot cast to the ScaleShift layer to fuse with BatchNorm."; - - size_t C = static_cast(getChildEdgeAt(0)->getDims()[1]); - SizeVector mkldnn_weights = {2, C}; - TensorDesc desc(scshLayer->_weights->getTensorDesc().getPrecision(), mkldnn_weights, InferenceEngine::NC); - InferenceEngine::TBlob::Ptr internalBlob = InferenceEngine::make_shared_blob(desc); - internalBlob->allocate(); - float * data = internalBlob->buffer(); - if (data == nullptr) - IE_THROW() << "Cannot get memory!"; - - InferenceEngine::Blob::Ptr blb = scshLayer->_weights; - if (blb == nullptr) - IE_THROW() << "Cannot get weights blob for node " << getName() << "."; - - size_t weightsByteSize = blb->byteSize(); - cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize); - data += blb->size(); - blb = scshLayer->_biases; - - if (blb == nullptr) { - memset(data, 0, weightsByteSize); - } else { - if (weightsByteSize != blb->byteSize()) - IE_THROW() << "ScaleShift has incorrect weights!"; - cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize); - } - internalBlobs.push_back(internalBlob); - } - - InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); - if (precision != InferenceEngine::Precision::FP32) - precision = InferenceEngine::Precision::FP32; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +bool MKLDNNBatchNormalizationNode::fusedWithScale() const { + return false; + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// return fusedWith.size() == 1 && fusedWith[0]->getType() == Eltwise +// && fusedWith[0]->getCnnLayer()->type == "ScaleShift"; +} - for (auto format : getAvailableFormatsForDims(parentOutDims)) { - MKLDNNMemoryDesc in_candidate(parentOutDims, inputDataType, format); - createDescriptor({in_candidate}, {}); - } +void MKLDNNBatchNormalizationNode::getSupportedDescriptors() { +// if (!descs.empty()) +// return; +// auto * bnLayer = dynamic_cast(getCnnLayer().get()); +// if (bnLayer == nullptr) +// IE_THROW() << "Cannot convert batch normalization layer."; +// if (bnLayer->_weights == nullptr || bnLayer->_biases == nullptr) { +// IE_THROW() << "Weights/biases are empty for layer: " << bnLayer->name +// << " used in MKLDNN node: " << getName() << "\n" +// << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" +// << " to load them from .bin part of the IR"; +// } +// +// if (getParentEdges().size() != 1) +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// if (!getChildEdges().size()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +// +// eps = bnLayer->epsilon; +// +// size_t variancesSize = MKLDNNDims(bnLayer->_weights->getTensorDesc().getDims()).size(); +// size_t meansSize = MKLDNNDims(bnLayer->_biases->getTensorDesc().getDims()).size(); +// +// if (variancesSize != meansSize && variancesSize != 1) +// IE_THROW() << "Incorrect weights and biases sizes!"; +// +// internalBlobs.push_back(createInternalBlob(bnLayer->_weights->getTensorDesc().getDims(), true)); +// internalBlobs.push_back(createInternalBlob(bnLayer->_biases->getTensorDesc().getDims(), false)); +// +// auto parentOutDims = getParentEdgeAt(0)->getDims(); +// +// if (fusedWith.size() > 1) +// IE_THROW() << "BatchNorm fusion is possible with only one layer!"; +// +// for (const auto &node : fusedWith) { +// auto * scshLayer = dynamic_cast(node->getCnnLayer().get()); +// if (scshLayer == nullptr) +// IE_THROW() << "Cannot cast to the ScaleShift layer to fuse with BatchNorm."; +// +// size_t C = static_cast(getChildEdgeAt(0)->getDims()[1]); +// SizeVector mkldnn_weights = {2, C}; +// TensorDesc desc(scshLayer->_weights->getTensorDesc().getPrecision(), mkldnn_weights, InferenceEngine::NC); +// InferenceEngine::TBlob::Ptr internalBlob = InferenceEngine::make_shared_blob(desc); +// internalBlob->allocate(); +// float * data = internalBlob->buffer(); +// if (data == nullptr) +// IE_THROW() << "Cannot get memory!"; +// +// InferenceEngine::Blob::Ptr blb = scshLayer->_weights; +// if (blb == nullptr) +// IE_THROW() << "Cannot get weights blob for node " << getName() << "."; +// +// size_t weightsByteSize = blb->byteSize(); +// cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize); +// data += blb->size(); +// blb = scshLayer->_biases; +// +// if (blb == nullptr) { +// memset(data, 0, weightsByteSize); +// } else { +// if (weightsByteSize != blb->byteSize()) +// IE_THROW() << "ScaleShift has incorrect weights!"; +// cpu_memcpy_s(data, internalBlob->byteSize(), blb->buffer(), weightsByteSize); +// } +// internalBlobs.push_back(internalBlob); +// } +// +// InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); +// if (precision != InferenceEngine::Precision::FP32) +// precision = InferenceEngine::Precision::FP32; +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// +// for (auto format : getAvailableFormatsForDims(parentOutDims)) { +// MKLDNNMemoryDesc in_candidate(parentOutDims, inputDataType, format); +// createDescriptor({in_candidate}, {}); +// } } static MKLDNNMemoryDesc get_bn_mdesc_by_index(const mkldnn::primitive_desc_iterator &primitive_desc, int idx) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h index 46d79425fb2154..ed64cb06fd77a9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batchnorm_node.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNBatchNormalizationNode : public MKLDNNNode { public: - MKLDNNBatchNormalizationNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, + MKLDNNBatchNormalizationNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNBatchNormalizationNode() override = default; @@ -26,8 +26,7 @@ class MKLDNNBatchNormalizationNode : public MKLDNNNode { void createPrimitive() override; bool created() const override; - bool fusedWithScale() const {return fusedWith.size() == 1 && fusedWith[0]->getType() == Eltwise - && fusedWith[0]->getCnnLayer()->type == "ScaleShift";} + bool fusedWithScale() const; MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; MKLDNNMemoryDesc getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp index 68c554ceef0345..df0ebd7d54b5d4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp @@ -8,12 +8,10 @@ #include "mkldnn_eltwise_node.h" #include "mkldnn_quantize_node.h" #include "mkldnn_conv_node.h" -#include #include #include #include #include -#include #include "ie_parallel.hpp" #include "cpu/x64/jit_generator.hpp" #include "cpu/x64/jit_uni_eltwise_injector.hpp" @@ -873,9 +871,9 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ } }; -MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, +MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) { + : MKLDNNNode(op, eng, cache) { if (mayiuse(x64::avx512_common)) { implType = impl_desc_type::jit_avx512; } else if (mayiuse(x64::avx2)) { @@ -888,63 +886,65 @@ MKLDNNBinaryConvolutionNode::MKLDNNBinaryConvolutionNode(const InferenceEngine:: } void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() { - if (!descs.empty()) - return; - - auto* binConvLayer = dynamic_cast(getCnnLayer().get()); - if (binConvLayer == nullptr) - IE_THROW() << "Cannot convert convolution layer."; - - std::string errorPrefix = "BinaryConvolution layer with name '" + getName() + "' "; - - withBinarization = isFusedWith(Quantize); - withSum = false; - int expectedInputEdgesNum = 2; - for (int i = 0; i < fusedWith.size(); i++) { - auto *eltwiseNode = dynamic_cast(fusedWith[i].get()); - if (eltwiseNode && eltwiseNode->isSum()) { - withSum = true; - expectedInputEdgesNum++; - } - } - - group = binConvLayer->_group; - if (group != 1) { - IE_THROW() << errorPrefix << "doesn't support parameter group != 1"; - } - - if (getParentEdges().size() != expectedInputEdgesNum) - IE_THROW() << errorPrefix << "has incorrect number of input edges"; - - if (getChildEdges().empty()) - IE_THROW() << errorPrefix << "has incorrect number of output edges"; - - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); - } - - if (getParentEdgeAt(1)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); - } - - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); - } - - if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) { - IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input."; - } - - pad_value = binConvLayer->_pad_value; - - invertVectorCopyUtoI(binConvLayer->_stride, stride); - for (int i = 1; i <= binConvLayer->_dilation.size(); i++) { - dilation.push_back(static_cast(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1); - } - - auto allPads = getPaddings(*binConvLayer); - invertVectorCopyUtoI(allPads.begin, paddingL); - invertVectorCopyUtoI(allPads.end, paddingR); + // TODO [NM]: reimplement w/o using CNNLayer + IE_THROW() << "Not implemented"; +// if (!descs.empty()) +// return; +// +// auto* binConvLayer = dynamic_cast(getCnnLayer().get()); +// if (binConvLayer == nullptr) +// IE_THROW() << "Cannot convert convolution layer."; +// +// std::string errorPrefix = "BinaryConvolution layer with name '" + getName() + "' "; +// +// withBinarization = isFusedWith(Quantize); +// withSum = false; +// int expectedInputEdgesNum = 2; +// for (int i = 0; i < fusedWith.size(); i++) { +// auto *eltwiseNode = dynamic_cast(fusedWith[i].get()); +// if (eltwiseNode && eltwiseNode->isSum()) { +// withSum = true; +// expectedInputEdgesNum++; +// } +// } +// +// group = binConvLayer->_group; +// if (group != 1) { +// IE_THROW() << errorPrefix << "doesn't support parameter group != 1"; +// } +// +// if (getParentEdges().size() != expectedInputEdgesNum) +// IE_THROW() << errorPrefix << "has incorrect number of input edges"; +// +// if (getChildEdges().empty()) +// IE_THROW() << errorPrefix << "has incorrect number of output edges"; +// +// if (getParentEdgeAt(0)->getDims().ndims() != 4) { +// IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); +// } +// +// if (getParentEdgeAt(1)->getDims().ndims() != 4) { +// IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); +// } +// +// if (getChildEdgeAt(0)->getDims().ndims() != 4) { +// IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); +// } +// +// if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) { +// IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input."; +// } +// +// pad_value = binConvLayer->_pad_value; +// +// invertVectorCopyUtoI(binConvLayer->_stride, stride); +// for (int i = 1; i <= binConvLayer->_dilation.size(); i++) { +// dilation.push_back(static_cast(binConvLayer->_dilation[binConvLayer->_dilation.size() - i]) - 1); +// } +// +// auto allPads = getPaddings(*binConvLayer); +// invertVectorCopyUtoI(allPads.begin, paddingL); +// invertVectorCopyUtoI(allPads.end, paddingR); } void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { @@ -1077,15 +1077,6 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { } bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { - auto isOneOf = [](EltwiseOpType alg, std::vector algs) { - for (auto a : algs) { - if (alg == a) { - return true; - } - } - return false; - }; - if (implType == impl_desc_type::ref) return false; @@ -1099,23 +1090,18 @@ bool MKLDNNBinaryConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { IE_THROW() << "Cannot get quantize node " << node->getName(); return quantizeNode->isBinarization(); } else if (node->getType() == Eltwise) { - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot get eltwise node " << node->getName(); - // Only one Add operation can be fused since it is implemented via output blob reuse - if (eltwiseNode->isSum()) { + if (node->getAlgorithm() == EltwiseAdd) { for (auto& fusedNode : fusedWith) { - auto* fusedEltwiseNode = dynamic_cast(fusedNode.get()); - if (fusedEltwiseNode->isSum()) { + if (fusedNode->getType() == Eltwise && fusedNode->getAlgorithm() == EltwiseAdd) { return false; } } } - return eltwiseNode->isSum() || - isOneOf(eltwiseNode->getOpType(), {MulAdd, Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, - Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}); + return one_of(node->getAlgorithm(), EltwiseAdd, EltwiseMulAdd, EltwisePrelu, EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseBoundedRelu, + EltwiseClamp, EltwiseTanh, EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, + EltwiseRoundHalfAwayFromZero, EltwiseLinear, EltwiseAbs, EltwiseSquare, EltwiseSqrt); } return false; @@ -1127,7 +1113,7 @@ void MKLDNNBinaryConvolutionNode::setPostOps(mkldnn::primitive_attr &attr) { for (auto &node : fusedWith) { auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { - if (eltwiseNode->isSum()) + if (eltwiseNode->getAlgorithm() == EltwiseAdd) ops.append_sum(1.0); else eltwiseNode->appendPostOps(ops); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h index 60892aae208733..7b9b9db1fc056d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h @@ -74,7 +74,7 @@ struct jit_uni_bin_conv_kernel { class MKLDNNBinaryConvolutionNode : public MKLDNNNode { public: - MKLDNNBinaryConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNBinaryConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNBinaryConvolutionNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp index 63d51589ad4459..ec8b36a3138c3f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp @@ -9,7 +9,6 @@ #include #include -#include #include "mkldnn.hpp" #include "mkldnn/iml_type_mapper.h" #include "mkldnn_dims.h" @@ -27,287 +26,291 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNConcatNode::MKLDNNConcatNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) {} +MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) {} void MKLDNNConcatNode::getSupportedDescriptors() { - auto * conLayer = dynamic_cast(getCnnLayer().get()); - - if (conLayer == nullptr) - IE_THROW() << "Cannot convert concat layer."; - - axis = conLayer->_axis; - - if (getParentEdges().empty()) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); - if (getChildEdges().empty()) - IE_THROW() << "Incorrect number of output edges for layer " << getName(); - auto& firstParentDims = getParentEdgeAt(0)->getDims(); - for (size_t i = 1; i < getParentEdges().size(); i++) { - auto& dims = getParentEdgeAt(i)->getDims(); - bool incorrectDims = false; - for (size_t j = 0; j < firstParentDims.ndims(); j++) { - if (j == axis) - continue; - if (dims.ndims() != firstParentDims.ndims() || firstParentDims[j] != dims[j]) { - incorrectDims = true; - break; - } - } - if (incorrectDims || firstParentDims.ndims() == 0) { - IE_THROW() << "Incorrect input dimensions for concat node " << getName(); - } - } + IE_THROW() << "Not implemented"; +// TODO [NM]: reimplement w/o using CNNLayer +// auto * conLayer = dynamic_cast(getCnnLayer().get()); +// +// if (conLayer == nullptr) +// IE_THROW() << "Cannot convert concat layer."; +// +// axis = conLayer->_axis; +// +// if (getParentEdges().empty()) +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// if (getChildEdges().empty()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +// auto& firstParentDims = getParentEdgeAt(0)->getDims(); +// for (size_t i = 1; i < getParentEdges().size(); i++) { +// auto& dims = getParentEdgeAt(i)->getDims(); +// bool incorrectDims = false; +// for (size_t j = 0; j < firstParentDims.ndims(); j++) { +// if (j == axis) +// continue; +// if (dims.ndims() != firstParentDims.ndims() || firstParentDims[j] != dims[j]) { +// incorrectDims = true; +// break; +// } +// } +// if (incorrectDims || firstParentDims.ndims() == 0) { +// IE_THROW() << "Incorrect input dimensions for concat node " << getName(); +// } +// } } void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) - return; - - inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); - bool isMixedPrecision = false; - for (int i = 1; i < getCnnLayer()->insData.size(); i++) { - if (getCnnLayer()->insData[0].lock()->getPrecision() != getCnnLayer()->insData[i].lock()->getPrecision()) { - isMixedPrecision = true; - break; - } - } - - // MKLDNN doesn't support different precision on inputs so fallback on FP32 in such case - if (isMixedPrecision) - inputPrecision = Precision::FP32; - - // Concat node supports int8 implementations only for NHWC and NDHWC layouts - if (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) { - int ndims = getChildEdgeAt(0)->getDims().ndims(); - if (ndims != 2 && ndims != 4 && ndims != 5) - inputPrecision = Precision::FP32; - } - - // MKLDNN supports only equal precisions for inputs and output - outputPrecision = inputPrecision; - - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); - - MKLDNNDims dstDims = getChildEdgeAt(0)->getDims(); - InferenceEngine::LayerConfig config; - config.dynBatchSupport = true; - - for (size_t i = 0; i < getParentEdges().size(); i++) { - auto parentEdge = getParentEdgeAt(i); - - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? parentEdge->getDims().ndims() == 2 ? memory::format_tag::nc : - parentEdge->getDims().ndims() == 4 ? memory::format_tag::nhwc : - memory::format_tag::ndhwc - : memory::format_tag::any; - - dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(parentEdge->getDims(), inputDataType, fmt)); - config.inConfs.push_back(dataConfig); - } - - auto dims = getChildEdgeAt(0)->getDims(); - - config.outConfs.resize(1); - config.outConfs[0].inPlace = -1; - config.outConfs[0].constant = false; - if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1) { - auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? dims.ndims() == 2 ? memory::format_tag::nc : - dims.ndims() == 4 ? memory::format_tag::nhwc : - memory::format_tag::ndhwc - : MKLDNNMemory::GetPlainFormat(dims); - - config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(dims, outputDataType, fmt)); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, fmt); - - if (inputPrecision != Precision::U8 && inputPrecision != Precision::I8) { - if (dims.ndims() == 4) { - if (dims[1] % 8 == 0) { - config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( - MKLDNNMemoryDesc(dims, outputDataType, memory::format_tag::nChw8c)); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, memory::format_tag::nChw8c); - - if (dims[1] % 16 == 0) { - config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( - MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nChw16c)); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nChw16c); - } - } - } else if (dims.ndims() == 5) { - if (dims[1] % 8 == 0) { - config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( - MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw8c)); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw8c); - - if (dims[1] % 16 == 0) { - config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( - MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw16c)); - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw16c); - } - } - } - } - } - - if (axis != 1) - return; - - auto numOfDim = static_cast(dstDims.ndims()); - - SizeVector order(numOfDim); - SizeVector offsets(numOfDim, 0lu); - size_t offset = (std::numeric_limits::max)(); - for (size_t i = 0; i < numOfDim; i++) { - order[i] = i; - } - - if (outputPrecision == Precision::I8 || outputPrecision == Precision::U8) { - if (numOfDim == 4) { - // Here we assume NHWC layout (channels are the last) - - order = {0, 2, 3, 1}; - offsets = {0, 0, 0, 0}; - - SizeVector blkDims = dstDims.ToSizeVector(); - blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; - - SizeVector strides(numOfDim); - strides.resize(numOfDim); - // C is the last in NHWC, so all strides are max() - for (size_t i = 0; i < numOfDim; i++) { - strides[i] = (std::numeric_limits::max)(); - } - - config.outConfs[0].desc = TensorDesc(outputPrecision, - dstDims.ToSizeVector(), - { blkDims, order, offset, offsets, strides }); - for (size_t i = 0; i < getParentEdges().size(); i++) { - auto parentEdge = getParentEdgeAt(i); - - SizeVector blkDims = parentEdge->getDims().ToSizeVector(); - blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; - - config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NHWC in mkldnn - - config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), - {blkDims, order, offset, offsets, strides}); - } - - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nhwc); - - return; - } else if (numOfDim == 5) { - // Here we assume NDHWC layout (channels are the last) - - order = {0, 2, 3, 4, 1}; - offsets = {0, 0, 0, 0, 0}; - - SizeVector blkDims = dstDims.ToSizeVector(); - blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; - - SizeVector strides(numOfDim); - strides.resize(numOfDim); - // C is the last in NDHWC, so all strides are max() - for (size_t i = 0; i < numOfDim; i++) { - strides[i] = (std::numeric_limits::max)(); - } - - config.outConfs[0].desc = TensorDesc(outputPrecision, - dstDims.ToSizeVector(), - { blkDims, order, offset, offsets, strides }); - for (size_t i = 0; i < getParentEdges().size(); i++) { - auto parentEdge = getParentEdgeAt(i); - - SizeVector blkDims = parentEdge->getDims().ToSizeVector(); - blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; - - config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NDHWC in mkldnn - - config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), - {blkDims, order, offset, offsets, strides}); - } - - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::ndhwc); - - return; - } - } - - SizeVector strides(numOfDim); - strides[numOfDim - 1] = 1; - for (size_t i = 2; i <= numOfDim; i++) { - if (numOfDim - i < axis) { - strides[numOfDim - i] = (std::numeric_limits::max)(); - } else { - strides[numOfDim - i] = strides[numOfDim - i + 1] * dstDims[numOfDim - i + 1]; - } - } - - config.outConfs[0].desc = TensorDesc( - MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), - dstDims.ToSizeVector(), - {dstDims.ToSizeVector(), order, offset, offsets, strides}); - for (size_t i = 0; i < getParentEdges().size(); i++) { - auto parentEdge = getParentEdgeAt(i); - config.inConfs[i].inPlace = 0; - config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), - {parentEdge->getDims().ToSizeVector(), order, offset, offsets, strides}); - } - - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemory::Convert(config.outConfs[0].desc.getLayout())); - - if (numOfDim == 4lu || numOfDim == 5lu) { - size_t blkDimsLen = numOfDim + 1; - order.resize(blkDimsLen); - for (size_t i = 0; i < numOfDim; i++) { - order[i] = i; - } - order[numOfDim] = 1lu; - offsets = SizeVector(blkDimsLen, 0lu); - - // nChw8c, nChw16c, nCdhw8c, nCdhw16c - for (size_t sizeS : {8lu, 16lu}) { - SizeVector blkDims = dstDims.ToSizeVector(); - if (blkDims[1] % sizeS) - continue; - blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); - blkDims.push_back(sizeS); - - strides.resize(blkDimsLen); - strides[blkDimsLen - 1] = 1; - for (size_t i = 2lu; i <= blkDimsLen; i++) { - if (blkDimsLen - i < axis) { - strides[blkDimsLen - i] = (std::numeric_limits::max)(); - } else { - strides[blkDimsLen - i] = strides[blkDimsLen - i + 1] * blkDims[blkDimsLen - i + 1]; - } - } - config.outConfs[0].desc = TensorDesc( - MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), - dstDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); - - bool canInplace = true; - for (size_t i = 0lu; canInplace && i < getParentEdges().size(); i++) { - auto parentEdge = getParentEdgeAt(i); - blkDims = parentEdge->getDims().ToSizeVector(); - if (blkDims[1] % sizeS) - canInplace = false; - - blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); - blkDims.push_back(sizeS); - config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), - {blkDims, order, offset, offsets, strides}); - } - if (canInplace) { - auto dstFormat = numOfDim == 4lu ? sizeS == 8lu ? mkldnn::memory::format_tag::nChw8c : mkldnn::memory::format_tag::nChw16c - : sizeS == 8lu ? mkldnn::memory::format_tag::nCdhw8c : mkldnn::memory::format_tag::nCdhw16c; - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, dstFormat); - } - } - } + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// if (!supportedPrimitiveDescriptors.empty()) +// return; +// +// inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); +// bool isMixedPrecision = false; +// for (int i = 1; i < getCnnLayer()->insData.size(); i++) { +// if (getCnnLayer()->insData[0].lock()->getPrecision() != getCnnLayer()->insData[i].lock()->getPrecision()) { +// isMixedPrecision = true; +// break; +// } +// } +// +// // MKLDNN doesn't support different precision on inputs so fallback on FP32 in such case +// if (isMixedPrecision) +// inputPrecision = Precision::FP32; +// +// // Concat node supports int8 implementations only for NHWC and NDHWC layouts +// if (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) { +// int ndims = getChildEdgeAt(0)->getDims().ndims(); +// if (ndims != 2 && ndims != 4 && ndims != 5) +// inputPrecision = Precision::FP32; +// } +// +// // MKLDNN supports only equal precisions for inputs and output +// outputPrecision = inputPrecision; +// +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); +// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); +// +// MKLDNNDims dstDims = getChildEdgeAt(0)->getDims(); +// InferenceEngine::LayerConfig config; +// config.dynBatchSupport = true; +// +// for (size_t i = 0; i < getParentEdges().size(); i++) { +// auto parentEdge = getParentEdgeAt(i); +// +// InferenceEngine::DataConfig dataConfig; +// dataConfig.inPlace = -1; +// dataConfig.constant = false; +// auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? parentEdge->getDims().ndims() == 2 ? memory::format_tag::nc : +// parentEdge->getDims().ndims() == 4 ? memory::format_tag::nhwc : +// memory::format_tag::ndhwc +// : memory::format_tag::any; +// +// dataConfig.desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(parentEdge->getDims(), inputDataType, fmt)); +// config.inConfs.push_back(dataConfig); +// } +// +// auto dims = getChildEdgeAt(0)->getDims(); +// +// config.outConfs.resize(1); +// config.outConfs[0].inPlace = -1; +// config.outConfs[0].constant = false; +// if ((!isMixedPrecision && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) || axis != 1) { +// auto fmt = (inputPrecision == Precision::U8 || inputPrecision == Precision::I8) ? dims.ndims() == 2 ? memory::format_tag::nc : +// dims.ndims() == 4 ? memory::format_tag::nhwc : +// memory::format_tag::ndhwc +// : MKLDNNMemory::GetPlainFormat(dims); +// +// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc(MKLDNNMemoryDesc(dims, outputDataType, fmt)); +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, fmt); +// +// if (inputPrecision != Precision::U8 && inputPrecision != Precision::I8) { +// if (dims.ndims() == 4) { +// if (dims[1] % 8 == 0) { +// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( +// MKLDNNMemoryDesc(dims, outputDataType, memory::format_tag::nChw8c)); +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, memory::format_tag::nChw8c); +// +// if (dims[1] % 16 == 0) { +// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( +// MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nChw16c)); +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nChw16c); +// } +// } +// } else if (dims.ndims() == 5) { +// if (dims[1] % 8 == 0) { +// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( +// MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw8c)); +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw8c); +// +// if (dims[1] % 16 == 0) { +// config.outConfs[0].desc = MKLDNNExtensionUtils::getUninitTensorDesc( +// MKLDNNMemoryDesc(dims, outputDataType, mkldnn::memory::format_tag::nCdhw16c)); +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nCdhw16c); +// } +// } +// } +// } +// } +// +// if (axis != 1) +// return; +// +// auto numOfDim = static_cast(dstDims.ndims()); +// +// SizeVector order(numOfDim); +// SizeVector offsets(numOfDim, 0lu); +// size_t offset = (std::numeric_limits::max)(); +// for (size_t i = 0; i < numOfDim; i++) { +// order[i] = i; +// } +// +// if (outputPrecision == Precision::I8 || outputPrecision == Precision::U8) { +// if (numOfDim == 4) { +// // Here we assume NHWC layout (channels are the last) +// +// order = {0, 2, 3, 1}; +// offsets = {0, 0, 0, 0}; +// +// SizeVector blkDims = dstDims.ToSizeVector(); +// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; +// +// SizeVector strides(numOfDim); +// strides.resize(numOfDim); +// // C is the last in NHWC, so all strides are max() +// for (size_t i = 0; i < numOfDim; i++) { +// strides[i] = (std::numeric_limits::max)(); +// } +// +// config.outConfs[0].desc = TensorDesc(outputPrecision, +// dstDims.ToSizeVector(), +// { blkDims, order, offset, offsets, strides }); +// for (size_t i = 0; i < getParentEdges().size(); i++) { +// auto parentEdge = getParentEdgeAt(i); +// +// SizeVector blkDims = parentEdge->getDims().ToSizeVector(); +// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[1] }; +// +// config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NHWC in mkldnn +// +// config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), +// {blkDims, order, offset, offsets, strides}); +// } +// +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::nhwc); +// +// return; +// } else if (numOfDim == 5) { +// // Here we assume NDHWC layout (channels are the last) +// +// order = {0, 2, 3, 4, 1}; +// offsets = {0, 0, 0, 0, 0}; +// +// SizeVector blkDims = dstDims.ToSizeVector(); +// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; +// +// SizeVector strides(numOfDim); +// strides.resize(numOfDim); +// // C is the last in NDHWC, so all strides are max() +// for (size_t i = 0; i < numOfDim; i++) { +// strides[i] = (std::numeric_limits::max)(); +// } +// +// config.outConfs[0].desc = TensorDesc(outputPrecision, +// dstDims.ToSizeVector(), +// { blkDims, order, offset, offsets, strides }); +// for (size_t i = 0; i < getParentEdges().size(); i++) { +// auto parentEdge = getParentEdgeAt(i); +// +// SizeVector blkDims = parentEdge->getDims().ToSizeVector(); +// blkDims = { blkDims[0], blkDims[2], blkDims[3], blkDims[4], blkDims[1] }; +// +// config.inConfs[i].inPlace = -1; // Change to 0 here if inplace concat is supported for NDHWC in mkldnn +// +// config.inConfs[i].desc = TensorDesc(inputPrecision, parentEdge->getDims().ToSizeVector(), +// {blkDims, order, offset, offsets, strides}); +// } +// +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref, mkldnn::memory::format_tag::ndhwc); +// +// return; +// } +// } +// +// SizeVector strides(numOfDim); +// strides[numOfDim - 1] = 1; +// for (size_t i = 2; i <= numOfDim; i++) { +// if (numOfDim - i < axis) { +// strides[numOfDim - i] = (std::numeric_limits::max)(); +// } else { +// strides[numOfDim - i] = strides[numOfDim - i + 1] * dstDims[numOfDim - i + 1]; +// } +// } +// +// config.outConfs[0].desc = TensorDesc( +// MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), +// dstDims.ToSizeVector(), +// {dstDims.ToSizeVector(), order, offset, offsets, strides}); +// for (size_t i = 0; i < getParentEdges().size(); i++) { +// auto parentEdge = getParentEdgeAt(i); +// config.inConfs[i].inPlace = 0; +// config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), +// {parentEdge->getDims().ToSizeVector(), order, offset, offsets, strides}); +// } +// +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, MKLDNNMemory::Convert(config.outConfs[0].desc.getLayout())); +// +// if (numOfDim == 4lu || numOfDim == 5lu) { +// size_t blkDimsLen = numOfDim + 1; +// order.resize(blkDimsLen); +// for (size_t i = 0; i < numOfDim; i++) { +// order[i] = i; +// } +// order[numOfDim] = 1lu; +// offsets = SizeVector(blkDimsLen, 0lu); +// +// // nChw8c, nChw16c, nCdhw8c, nCdhw16c +// for (size_t sizeS : {8lu, 16lu}) { +// SizeVector blkDims = dstDims.ToSizeVector(); +// if (blkDims[1] % sizeS) +// continue; +// blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); +// blkDims.push_back(sizeS); +// +// strides.resize(blkDimsLen); +// strides[blkDimsLen - 1] = 1; +// for (size_t i = 2lu; i <= blkDimsLen; i++) { +// if (blkDimsLen - i < axis) { +// strides[blkDimsLen - i] = (std::numeric_limits::max)(); +// } else { +// strides[blkDimsLen - i] = strides[blkDimsLen - i + 1] * blkDims[blkDimsLen - i + 1]; +// } +// } +// config.outConfs[0].desc = TensorDesc( +// MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType), +// dstDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); +// +// bool canInplace = true; +// for (size_t i = 0lu; canInplace && i < getParentEdges().size(); i++) { +// auto parentEdge = getParentEdgeAt(i); +// blkDims = parentEdge->getDims().ToSizeVector(); +// if (blkDims[1] % sizeS) +// canInplace = false; +// +// blkDims[1] = blkDims[1] / sizeS + (blkDims[1] % sizeS ? 1lu : 0lu); +// blkDims.push_back(sizeS); +// config.inConfs[i].desc = TensorDesc(MKLDNNExtensionUtils::DataTypeToIEPrecision(inputDataType), parentEdge->getDims().ToSizeVector(), +// {blkDims, order, offset, offsets, strides}); +// } +// if (canInplace) { +// auto dstFormat = numOfDim == 4lu ? sizeS == 8lu ? mkldnn::memory::format_tag::nChw8c : mkldnn::memory::format_tag::nChw16c +// : sizeS == 8lu ? mkldnn::memory::format_tag::nCdhw8c : mkldnn::memory::format_tag::nCdhw16c; +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, dstFormat); +// } +// } +// } } void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h index d337232a5921e3..cd693b443cf78a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.h @@ -13,7 +13,7 @@ namespace MKLDNNPlugin { class MKLDNNConcatNode : public MKLDNNNode { public: - MKLDNNConcatNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNConcatNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNConcatNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index d6106b6816587c..e2f38b4778a795 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -9,94 +9,116 @@ #include "mkldnn_quantize_node.h" #include "mkldnn_pooling_node.h" #include "mkldnn_concat_node.h" -#include #include #include #include #include -#include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNConvolutionNode::MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache), withBiases(false), withSum(false), withDWConv(false), isDW(false), isMerged(false), - isGrouped(false), dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef), - groupNum(1lu), baseInputsNumber(1), eltwisePrecision(Precision::FP32) { - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); - }); - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - if (!withBiases) - return MKLDNNMemoryDesc(); - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1)); - }); - - auto ws = layer->blobs.find("w-scale"); - if (ws != layer->blobs.end()) { - wScale = ws->second; - } +MKLDNNConvolutionNode::MKLDNNConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache), withBiases(false), withSum(false), withDWConv(false), isDW(false), isMerged(false), + isGrouped(false), /* dw_conv_oc(0), dw_conv_ih(0), dw_conv_iw(0), dw_conv_in_dt(memory::data_type::undef), */ + groupNum(1lu), eltwisePrecision(Precision::FP32) { + // TODO [NM]: do we still have networks that requires this optimizations? Preferable should be removed. + isMerged = false; // (!getMergeWith().empty()); // grouped convolution was constructed from split->concat subgraph + isPrimitivesPriorityDefined = op->get_rt_info().count("PrimitivesPriority") != 0; + + auto convolutionOp = ngraph::as_type_ptr(op); + auto groupConvolutionOp = ngraph::as_type_ptr(op); + + if (convolutionOp) { + algorithm = ConvolutionCommon; + + groupNum = 1; + isGrouped = false; + + weightDims = convolutionOp->input_value(1).get_shape(); + + IC = weightDims[1]; + groupIC = IC; + groupOC = weightDims[0]; + + isDW = groupNum == groupOC && groupNum == groupIC; + + if (isMerged) { + groupNum = getMergeWith().size() + 1; + } - // Trying to find oi-scale - if (getCnnLayer()->type == "Convolution" && getCnnLayer()->precision == Precision::I8) { - auto ois = layer->blobs.find("oi-scale"); - if ((getCnnLayer()->outData[0]->getPrecision() == Precision::I8 || getCnnLayer()->outData[0]->getPrecision() == Precision::U8) - && ois == layer->blobs.end()) { - IE_THROW() << "Internal error of graph quantization - mismatch of intermediate scales and next layer type for convolution " - << getCnnLayer()->name; + withBiases = getOriginalInputsNumber() == 3; + biasesDims = { groupOC }; + + for (int i = 0; i < convolutionOp->get_strides().size(); i++) { + stride.push_back(static_cast(convolutionOp->get_strides()[i])); } - if (ois != layer->blobs.end()) { - // If we can find an oi-scale, then the next layer has to be an INT8. - oScale = ois->second; + for (int i = 0; i < convolutionOp->get_dilations().size(); i++) { + dilation.push_back(static_cast(convolutionOp->get_dilations()[i]) - 1); } - } + paddingL = convolutionOp->get_pads_begin(); + paddingR = convolutionOp->get_pads_end(); + } else if (groupConvolutionOp) { + algorithm = ConvolutionGrouped; - if (getCnnLayer()->type == "Convolution") { - baseInputsNumber = getCnnLayer().get()->insData.size(); - } -} + groupNum = groupConvolutionOp->input_value(1).get_shape()[0]; + isGrouped = true; -mkldnn::memory::data_type MKLDNNConvolutionNode::precisionToDataType(InferenceEngine::Precision prec) { - // MKLDNN Plugin doesn't support U16 layout so upcast to FP32 in this case - if (prec == Precision::U16) - prec = Precision::FP32; + weightDims = groupConvolutionOp->input_value(1).get_shape(); - return MKLDNNExtensionUtils::IEPrecisionToDataType(prec); -} + IC = weightDims[2]; + groupIC = IC; + groupOC = weightDims[1]; -bool MKLDNNConvolutionNode::canBeExecutedInInt8() { - auto * convLayer = dynamic_cast(getCnnLayer().get()); - if (convLayer == nullptr) - IE_THROW() << "Cannot convert convolution layer."; - - if (baseInputsNumber > 1) { - auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision()); - if (!inputZeroPoints.empty()) - inputDataType = memory::data_type::u8; - - auto weightsDataType = precisionToDataType(Precision::FP32); - if (baseInputsNumber > 1) { - weightsDataType = precisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision()); - if (!weightsZeroPoints.empty()) - weightsDataType = memory::data_type::s8; - } + isDW = groupNum == groupOC && groupNum == groupIC; + + if (isMerged) { + groupNum = getMergeWith().size() + 1; + } - return (inputDataType == mkldnn_s8 || inputDataType == mkldnn_u8) && weightsDataType == mkldnn_s8; + withBiases = getOriginalInputsNumber() == 3; + biasesDims = {groupOC}; + + for (int i = 0; i < groupConvolutionOp->get_strides().size(); i++) { + stride.push_back(static_cast(groupConvolutionOp->get_strides()[i])); + } + for (int i = 0; i < groupConvolutionOp->get_dilations().size(); i++) { + dilation.push_back(static_cast(groupConvolutionOp->get_dilations()[i]) - 1); + } + paddingL = groupConvolutionOp->get_pads_begin(); + paddingR = groupConvolutionOp->get_pads_end(); } else { - return this->getCnnLayer()->precision == Precision::I8; + IE_THROW(NotImplemented) + << "CPU Convolution node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); } } -InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex) { +bool MKLDNNConvolutionNode::canBeExecutedInInt8() { + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[0]); + if (!inputZeroPoints.empty()) + inputDataType = memory::data_type::u8; + + auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[1]); + if (!weightsZeroPoints.empty()) + weightsDataType = memory::data_type::s8; + + return one_of(inputDataType, memory::data_type::u8, memory::data_type::s8) && weightsDataType == memory::data_type::s8; +} + +InferenceEngine::Precision MKLDNNConvolutionNode::fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const { InferenceEngine::Precision eltwisePrecision; - auto parent0 = getCreatorLayer(eltwiseNode->getCnnLayer()->insData[0].lock()).lock(); - auto parent1 = getCreatorLayer(eltwiseNode->getCnnLayer()->insData[1].lock()).lock(); - auto fusedParent = findex != 0 ? fusedWith[findex - 1].get()->getCnnLayer() : this->getCnnLayer(); - eltwisePrecision = fusedParent == parent0 ? eltwiseNode->getCnnLayer()->insData[1].lock()->getPrecision() : - eltwiseNode->getCnnLayer()->insData[0].lock()->getPrecision(); + int fusingPort = fusingNode->getFusingPort(); + if (fusingPort == 0) { + eltwisePrecision = fusingNode->getOriginalInputPrecisions()[1]; + } else if (fusingPort == 1) { + eltwisePrecision = fusingNode->getOriginalInputPrecisions()[0]; + } else { + IE_THROW() << "Cannot determine Eltwise post op precision for Convolution node with name '" << getName() << "'"; + } + return eltwisePrecision; } @@ -104,53 +126,41 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { if (!descs.empty()) return; - auto * convLayer = dynamic_cast(getCnnLayer().get()); - if (convLayer == nullptr) - IE_THROW() << "Cannot convert convolution layer."; - withSum = false; - int expectedInputEdgesNum = baseInputsNumber; + int expectedInputEdgesNum = static_cast(getOriginalInputsNumber()); for (int i = 0; i < fusedWith.size(); i++) { - auto *convolutionNode = dynamic_cast(fusedWith[i].get()); - if (convolutionNode) { - expectedInputEdgesNum += convolutionNode->getBaseIntputsNumber() - 1; + if (fusedWith[i]->getType() == Convolution) { + expectedInputEdgesNum += static_cast(fusedWith[i]->getOriginalInputsNumber()) - 1; } - auto *eltwiseNode = dynamic_cast(fusedWith[i].get()); - if (eltwiseNode && eltwiseNode->isSum()) { + if (fusedWith[i]->getAlgorithm() == EltwiseAdd) { withSum = true; expectedInputEdgesNum++; } } - auto inputDataType = precisionToDataType(getCnnLayer()->insData[0].lock()->getPrecision()); + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[0]); if (!inputZeroPoints.empty()) inputDataType = memory::data_type::u8; - auto outputDataType = precisionToDataType(getCnnLayer()->outData[0]->getPrecision()); + auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalOutputPrecisions()[0]); eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); - if (baseInputsNumber > 1) { - if (!fusedWith.empty()) { - auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); - if (lastFusedLayer) { - outputDataType = precisionToDataType(lastFusedLayer->outData[0]->getPrecision()); - eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); - } - } + if (!fusedWith.empty()) { + outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(fusedWith[fusedWith.size() - 1]->getOriginalInputPrecisions()[0]); + eltwisePrecision = MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType); + } - // We need to make sure that convolution output and second input of fused Eltwise operation - // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32. - if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) { - for (int i = 0; i < fusedWith.size(); i++) { - auto *eltwiseNode = dynamic_cast(fusedWith[i].get()); - if (eltwiseNode && eltwiseNode->isSum()) { - eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i); - if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) { - eltwisePrecision = Precision::FP32; - outputDataType = memory::data_type::f32; - } - break; + // We need to make sure that convolution output and second input of fused Eltwise operation + // have equal precision sizes since they use the same physical memory. In case precisions are different we upscale to FP32. + if (outputDataType != memory::data_type::f32 && outputDataType != memory::data_type::bf16 && withSum) { + for (int i = 0; i < fusedWith.size(); i++) { + if (fusedWith[i]->getAlgorithm() == EltwiseAdd) { + eltwisePrecision = fusedEltwisePrecision(fusedWith[i]); + if (MKLDNNExtensionUtils::DataTypeToIEPrecision(outputDataType).size() != eltwisePrecision.size()) { + eltwisePrecision = Precision::FP32; + outputDataType = memory::data_type::f32; } + break; } } } @@ -160,139 +170,72 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - if ((getParentEdgeAt(0)->getDims().ndims() < 4) || (getParentEdgeAt(0)->getDims().ndims() > 5)) { + int ndims = getParentEdgesAtPort(0)[0]->getDims().ndims(); + if ((ndims < 4) || (ndims > 5)) { IE_THROW() << "Convolution layer. Unsupported mode. Only 4D and 5D blobs are supported as input."; } - isMerged = (!getMergeWith().empty()); // grouped convolution was constructed from split->concat subgraph - isGrouped = convLayer->_group != 1; // group info available from IR if (isMerged && isGrouped) IE_THROW() << "Convolution initialization. Group splitted mode are used together with direct group specification."; - // default values. Can be replaced in next steps - groupNum = convLayer->_group; - size_t IC = convLayer->input()->getDims()[1]; - size_t groupIC = IC; - size_t groupOC = convLayer->_out_depth; - - isDW = groupNum == groupOC && groupNum == groupIC; - - if (isMerged) { - groupNum = getMergeWith().size() + 1; - } - if (isGrouped) { - groupIC /= groupNum; - groupOC /= groupNum; - } - - weightDims.clear(); - weightDims.push_back(groupOC); - weightDims.push_back(groupIC); - for (int i = 1; i <= convLayer->_kernel.size(); i++) { - weightDims.push_back(convLayer->_kernel[convLayer->_kernel.size() - i]); - } - biasesDims = { groupOC * groupNum }; - - if (isGrouped || isMerged) weightDims.insert(weightDims.begin(), groupNum); - - withBiases = (convLayer->_biases != nullptr && convLayer->_biases->size() != 0) || baseInputsNumber == 3; - - if (baseInputsNumber == 1) { - internalBlobs.push_back(createInternalBlob(weightDims, true, isGrouped)); - - if (withBiases) { - internalBlobs.push_back(createInternalBlob(biasesDims, false)); - } - - Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second; - if (weights->getTensorDesc().getPrecision() == Precision::I8) { - // The weights blob has incorrect dims, so we have to fix it - TensorDesc wdesc = internalBlobs[0]->getTensorDesc(); - wdesc.setPrecision(Precision::I8); - InferenceEngine::TBlob::Ptr reshapedInt8Weights = - InferenceEngine::TBlob::Ptr( - new InferenceEngine::TBlob(wdesc, static_cast(weights->buffer()), weights->byteSize())); - - internalBlobs[0] = reshapedInt8Weights; - if (withBiases) { - Blob::Ptr biases = this->getCnnLayer()->blobs.find("biases")->second; - TensorDesc bdesc = internalBlobs[1]->getTensorDesc(); - bdesc.setPrecision(Precision::I32); - InferenceEngine::TBlob::Ptr reshapedInt32Biases = - InferenceEngine::TBlob::Ptr( - new InferenceEngine::TBlob(bdesc, static_cast(biases->buffer()), biases->byteSize())); - internalBlobs[1] = reshapedInt32Biases; - } - } - } - - invertVectorCopyUtoI(convLayer->_stride, stride); - for (int i = 1; i <= convLayer->_dilation.size(); i++) { - dilation.push_back(static_cast(convLayer->_dilation[convLayer->_dilation.size() - i]) - 1); - } - - auto allPads = getPaddings(*convLayer); - invertVectorCopyUtoI(allPads.begin, paddingL); - invertVectorCopyUtoI(allPads.end, paddingR); - MKLDNNDims weightsDims = MKLDNNDims(weightDims); withDWConv = isFusedWith(Convolution); - for (int i = 0; i < fusedWith.size(); i++) { - auto *convolutionNode = dynamic_cast(fusedWith[i].get()); - if (convolutionNode) { - auto *convLayer = reinterpret_cast(convolutionNode->getCnnLayer().get()); - dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2]; - dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1]; - dw_conv_oc = convLayer->_out_depth; - for (int j = 0; j < convLayer->_kernel.size(); j++) { - dw_conv_kernel.push_back(convLayer->_kernel[j]); - } - for (int j = 0; j < convLayer->_stride.size(); j++) { - dw_conv_strides.push_back(convLayer->_stride[j]); - } - - if (canBeExecutedInInt8()) { - if (i == 0) { - dw_conv_in_dt = precisionToDataType(getCnnLayer()->outData[0]->getPrecision()); - } else { - dw_conv_in_dt = precisionToDataType(fusedWith[i - 1].get()->getCnnLayer()->outData[0]->getPrecision()); - } - } else { - dw_conv_in_dt = memory::data_type::f32; - } - - for (int j = 0; j < paddingR.size(); j++) { - int with_group = (isGrouped || isMerged) ? 1 : 0; - int krn = weightsDims[with_group + 2 + j]; - int src = getParentEdgeAt(0)->getDims()[2 + j]; - int dst = getChildEdgeAt(0)->getDims()[2 + j]; - - krn = (krn - 1)*(dilation[j] + 1) + 1; - int calc_dst = (src - krn + paddingL[j]) / stride[j] + 1; - paddingR[j] = (dst - calc_dst) * stride[j]; - } - } - } + // TODO: fusing with Convolution is not ported yet +// for (int i = 0; i < fusedWith.size(); i++) { +// auto *convolutionNode = dynamic_cast(fusedWith[i].get()); +// if (convolutionNode) { +// auto *convLayer = reinterpret_cast(convolutionNode->getCnnLayer().get()); +// dw_conv_ih = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 2]; +// dw_conv_iw = convolutionNode->inDims[0][convolutionNode->inDims[0].ndims() - 1]; +// dw_conv_oc = convLayer->_out_depth; +// for (int j = 0; j < convLayer->_kernel.size(); j++) { +// dw_conv_kernel.push_back(convLayer->_kernel[j]); +// } +// for (int j = 0; j < convLayer->_stride.size(); j++) { +// dw_conv_strides.push_back(convLayer->_stride[j]); +// } +// +// if (canBeExecutedInInt8()) { +// if (i == 0) { +// dw_conv_in_dt = precisionToDataType(getCnnLayer()->outData[0]->getPrecision()); +// } else { +// dw_conv_in_dt = precisionToDataType(fusedWith[i - 1].get()->getCnnLayer()->outData[0]->getPrecision()); +// } +// } else { +// dw_conv_in_dt = memory::data_type::f32; +// } +// +// for (int j = 0; j < paddingR.size(); j++) { +// int with_group = (isGrouped || isMerged) ? 1 : 0; +// int krn = weightsDims[with_group + 2 + j]; +// int src = getParentEdgeAt(0)->getDims()[2 + j]; +// int dst = getChildEdgeAt(0)->getDims()[2 + j]; +// +// krn = (krn - 1)*(dilation[j] + 1) + 1; +// int calc_dst = (src - krn + paddingL[j]) / stride[j] + 1; +// paddingR[j] = (dst - calc_dst) * stride[j]; +// } +// } +// } MKLDNNMemoryDesc in_candidate, out_candidate; if (canBeExecutedInInt8()) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, - getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, - getParentEdgeAt(0)->getDims().ndims() == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); + in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, ndims == 5 ? memory::format_tag::ndhwc + : memory::format_tag::nhwc); + out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, ndims == 5 ? memory::format_tag::ndhwc + : memory::format_tag::nhwc); createDescriptor({in_candidate}, {out_candidate}); } else { - inputDataType = (convLayer->input()->getPrecision() == Precision::BF16 - && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::data_type::bf16 : memory::data_type::f32; - outputDataType = (convLayer->outData[0]->getPrecision() == Precision::BF16 - && !(isGrouped && getParentEdgeAt(0)->getDims().ndims() == 5)) ? memory::data_type::bf16 : memory::data_type::f32; + inputDataType = (getOriginalInputPrecisions()[0] == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16 + : memory::data_type::f32; + outputDataType = (getOriginalOutputPrecisions()[0] == Precision::BF16 && !(isGrouped && ndims == 5)) ? memory::data_type::bf16 + : memory::data_type::f32; eltwisePrecision = Precision::FP32; for (int i = 0; i < fusedWith.size(); i++) { - auto *eltwiseNode = dynamic_cast(fusedWith[i].get()); - if (eltwiseNode && eltwiseNode->isSum()) { - eltwisePrecision = fusedEltwisePrecision(eltwiseNode, i); + if (fusedWith[i]->getAlgorithm() == EltwiseAdd) { + eltwisePrecision = fusedEltwisePrecision(fusedWith[i]); // TODO(amalyshe): there might be situation when convolution can be executed in BF16, // output is required in FP32 but eltwise inplace tensor would be in BF16 // currently we forcedly change output to the BF16 that will add reoreder after the node @@ -311,16 +254,13 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { eltwisePrecision = Precision::FP32; } - Layout layout = convLayer->input()->getLayout(); - - if (layout == NCHW || layout == NHWC) { + if (ndims == 4) { if (IC == 1 && groupOC == 1) { in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); createDescriptor({in_candidate}, {out_candidate}); } else if (IC == 3 || IC == 1) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, - layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc); + in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw16c); createDescriptor({in_candidate}, {out_candidate}); out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nChw8c); @@ -334,19 +274,16 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { createDescriptor({in_candidate}, {out_candidate}); } - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, - layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, - layout == NCHW ? memory::format_tag::nchw : memory::format_tag::nhwc); + in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); + out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); createDescriptor({in_candidate}, {out_candidate}); - } else if (layout == NCDHW || layout == NDHWC) { + } else if (ndims == 5) { if (IC == 1 && groupOC == 1) { in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); createDescriptor({in_candidate}, {out_candidate}); } else if (IC == 3 || IC == 1) { - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, - layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc); + in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw16c); createDescriptor({in_candidate}, {out_candidate}); out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nCdhw8c); @@ -360,29 +297,26 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { createDescriptor({in_candidate}, {out_candidate}); } - in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, - layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc); - out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, - layout == NCDHW ? memory::format_tag::ncdhw : memory::format_tag::ndhwc); + in_candidate = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); + out_candidate = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); createDescriptor({in_candidate}, {out_candidate}); } } } void MKLDNNConvolutionNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) { - int blob_idx = 0; mkldnn::post_ops ops; for (auto &node : fusedWith) { if (node->getType() == Split || node->getType() == Concatenation) continue; - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode && eltwiseNode->isSum()) { - ops.append_sum(1.0, precisionToDataType(eltwisePrecision)); + if (node->getAlgorithm() == EltwiseAdd) { + ops.append_sum(1.0, MKLDNNExtensionUtils::IEPrecisionToDataType(eltwisePrecision)); continue; } + auto* eltwiseNode = dynamic_cast(node.get()); if (eltwiseNode) { eltwiseNode->appendPostOps(ops); continue; @@ -523,22 +457,23 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { config.inConfs.push_back(dataConfig); } - if (withDWConv && baseInputsNumber > 1) { - auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); - auto biasPrc = memory::data_type::f32; - - MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); - MKLDNNDims dwBiasesDims({dw_conv_oc}); - - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); - config.inConfs.push_back(dataConfig); - - dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); - config.inConfs.push_back(dataConfig); - } +// TODO: fusing with Convolution is not ported yet +// if (withDWConv) { +// auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); +// auto biasPrc = memory::data_type::f32; +// +// MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); +// MKLDNNDims dwBiasesDims({dw_conv_oc}); +// +// InferenceEngine::DataConfig dataConfig; +// dataConfig.inPlace = -1; +// dataConfig.constant = false; +// dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); +// config.inConfs.push_back(dataConfig); +// +// dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); +// config.inConfs.push_back(dataConfig); +// } for (size_t i = 0; i < descOutputNumbers(desc); i++) { InferenceEngine::DataConfig dataConfig; @@ -577,7 +512,6 @@ void MKLDNNConvolutionNode::createPrimitive() { mkldnn::primitive_attr attr; addZeroPoints(attr); setPostOps(attr, true); - addScaleToPrimitiveAttr(attr); auto prim_desc = createPrimitiveDescriptor(attr); @@ -585,11 +519,14 @@ void MKLDNNConvolutionNode::createPrimitive() { prim.reset(new convolution_forward(prim_desc)); auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + auto wei = getParentEdgesAtPort(1)[0]->getMemoryPtr()->GetPrimitive(); auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - if (withBiases) - primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_BIAS, getBias()}, {DNNL_ARG_DST, dst}}; - else - primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, getWeights()}, {DNNL_ARG_DST, dst}}; + if (withBiases) { + auto bias = getParentEdgesAtPort(2)[0]->getMemoryPtr()->GetPrimitive(); + primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_BIAS, bias}, {DNNL_ARG_DST, dst}}; + } else { + primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, wei}, {DNNL_ARG_DST, dst}}; + } } bool MKLDNNConvolutionNode::created() const { @@ -600,36 +537,11 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector &outputDesc) { TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0]; - mkldnn::memory::data_type wdt = precisionToDataType(inDesc.getPrecision()); - mkldnn::memory::data_type bdt = precisionToDataType(inDesc.getPrecision()); - if (inDesc.getPrecision() == Precision::BF16) { - bdt = mkldnn::memory::data_type::f32; - } + memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); + memory::data_type bdt = memory::data_type::f32; if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) { wdt = memory::data_type::s8; - bdt = baseInputsNumber == 3 ? precisionToDataType(getCnnLayer()->insData[2].lock()->getPrecision()) : memory::data_type::s32; - } - - if (baseInputsNumber == 1) { - Blob::Ptr weights = this->getCnnLayer()->blobs.find("weights")->second; - - if (weights->getTensorDesc().getPrecision() == Precision::I8) { - wdt = memory::data_type::s8; - bdt = memory::data_type::s32; - - Precision outPrec; - if (getCnnLayer()->outData[0]->getPrecision() == Precision::FP32) { - outPrec = Precision::FP32; - } else { - // define precision accordninly normalizer - // TODO(amalyshe) do we need to have separate flow for last in int8 chain or not? - outPrec = outDesc.getPrecision(); - } - - inDesc = TensorDesc(inDesc.getPrecision(), inputDesc[0].getDims(), inputDesc[0].getBlockingDesc()); - outDesc = TensorDesc(outPrec, outputDesc[0].getDims(), outputDesc[0].getBlockingDesc()); - } } MKLDNNMemoryDesc in_candidate(inDesc); @@ -643,12 +555,11 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector algorithms; - // We cannot map wino_format on tensor descriptor for now - if (getBaseIntputsNumber() == 1) { - algorithms.push_back(algorithm::convolution_winograd); - } - algorithms.push_back(algorithm::convolution_direct); + std::vector algorithms; + + // TODO [NM]: We cannot map wino_format on tensor descriptor for now + // algorithms.push_back(algorithm::convolution_winograd); + algorithms.push_back(mkldnn::algorithm::convolution_direct); for (auto alg : algorithms) { try { @@ -690,27 +601,6 @@ void MKLDNNConvolutionNode::addZeroPoints(mkldnn::primitive_attr& attr) const { } } -void MKLDNNConvolutionNode::addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const { - if (wScale != nullptr) { - float* wScaleData = static_cast(wScale->buffer()); - - std::vector oScaleDataVector; - if (getCnnLayer()->precision == Precision::I8 && getCnnLayer()->outData[0]->getPrecision() != Precision::FP32) { - float *oScaleData = static_cast(oScale->buffer()); - - for (size_t c = 0; c < wScale->size(); c++) { - oScaleDataVector.push_back(wScaleData[c] / oScaleData[c]); - } - } else { - for (size_t c = 0; c < wScale->size(); c++) { - oScaleDataVector.push_back(wScaleData[c]); - } - } - - attr.set_output_scales(1 << 1 /*through C dim*/, oScaleDataVector); - } -} - void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& config) { auto* selectedPD = getSelectedPrimitiveDescriptor(); if (!selectedPD) { @@ -720,18 +610,15 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c // Strided blobs feature support. // Works only for FP32 convolutions for now. bool isStridedBlobsSupported = true; - for (auto &insData : getCnnLayer()->insData) { - if (insData.lock()->getPrecision() != InferenceEngine::Precision::FP32 - && insData.lock()->getPrecision() != InferenceEngine::Precision::BF16) { - isStridedBlobsSupported = false; - break; - } - } - // TODO: fix strided blobs feature support for dynamic weights - if (baseInputsNumber != 1) { + // TODO [NM]: refactor via using global executionPrecision. + if (canBeExecutedInInt8()) { isStridedBlobsSupported = false; } +// // TODO [NM]: fix strided blobs feature support for dynamic weights +// if (getOriginalInputsNumber() != 1) { +// isStridedBlobsSupported = false; +// } if (isStridedBlobsSupported) { createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc}); @@ -740,7 +627,6 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c mkldnn::primitive_attr attr; addZeroPoints(attr); setPostOps(attr); - addScaleToPrimitiveAttr(attr); InferenceEngine::LayerConfig rightConfig = selectedPD->getConfig(); size_t selected_count = 0; @@ -763,22 +649,23 @@ void MKLDNNConvolutionNode::initDescriptor(const InferenceEngine::LayerConfig& c cfg.inConfs.push_back(dataConfig); } - if (withDWConv && baseInputsNumber > 1) { - auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); - auto biasPrc = memory::data_type::f32; - - MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); - MKLDNNDims dwBiasesDims({dw_conv_oc}); - - InferenceEngine::DataConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); - cfg.inConfs.push_back(dataConfig); - - dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); - cfg.inConfs.push_back(dataConfig); - } + // TODO: fusing with Convolution is not ported yet +// if (withDWConv) { +// auto weightsPrc = precisionToDataType(dw_conv_in_dt == mkldnn_u8 ? Precision::I8 : Precision::FP32); +// auto biasPrc = memory::data_type::f32; +// +// MKLDNNDims dwWeightsDims({dw_conv_oc, (ptrdiff_t)1, (ptrdiff_t)1, dw_conv_kernel[Y_AXIS], dw_conv_kernel[X_AXIS]}); +// MKLDNNDims dwBiasesDims({dw_conv_oc}); +// +// InferenceEngine::DataConfig dataConfig; +// dataConfig.inPlace = -1; +// dataConfig.constant = false; +// dataConfig.desc = MKLDNNMemoryDesc(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); +// cfg.inConfs.push_back(dataConfig); +// +// dataConfig.desc = MKLDNNMemoryDesc(dwBiasesDims, biasPrc, memory::format_tag::x); +// cfg.inConfs.push_back(dataConfig); +// } for (size_t j = 0; j < descOutputNumbers(desc); j++) { InferenceEngine::DataConfig dataConfig; @@ -855,7 +742,7 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) { if (!inputMemoryFormatsFilter.empty() || !outputMemoryFormatsFilter.empty()) return false; - if (getCnnLayer()->params.find("PrimitivesPriority") != getCnnLayer()->params.end()) + if (isPrimitivesPriorityDefined) return false; // Here we check that we will not delete jit_planar_conv primitive by mistake. @@ -915,14 +802,6 @@ MKLDNNMemoryDesc MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_ite } } -const mkldnn::memory& MKLDNNConvolutionNode::getWeights() const { - return baseInputsNumber > 1 ? getParentEdgeAt(1)->getMemory().GetPrimitive() : internalBlobMemory[0]->GetPrimitive(); -} - -const mkldnn::memory& MKLDNNConvolutionNode::getBias() const { - return baseInputsNumber > 2 ? getParentEdgeAt(2)->getMemory().GetPrimitive() : internalBlobMemory[1]->GetPrimitive(); -} - InferenceEngine::Precision MKLDNNConvolutionNode::getRuntimePrecision() const { std::vector inputPrecisions; // Don't take bias precision into account diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h index 4c11b331f7582b..2dde482ee89fb7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h @@ -16,7 +16,7 @@ class MKLDNNEltwiseNode; class MKLDNNConvolutionNode : public MKLDNNNode { public: - MKLDNNConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNConvolutionNode() override = default; void getSupportedDescriptors() override; @@ -36,18 +36,11 @@ class MKLDNNConvolutionNode : public MKLDNNNode { void setPostOps(mkldnn::primitive_attr &attr, bool initWeights); size_t descInputNumbers(MKLDNNDescriptor desc) override { - return static_cast(baseInputsNumber); - } - - int getBaseIntputsNumber() { - return baseInputsNumber; + return static_cast(getOriginalInputsNumber()); } MKLDNNMemoryDesc getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - const mkldnn::memory& getWeights() const; - const mkldnn::memory& getBias() const; - bool canBeExecutedInInt8(); InferenceEngine::Precision getRuntimePrecision() const override; @@ -57,11 +50,9 @@ class MKLDNNConvolutionNode : public MKLDNNNode { std::vector outputCompensation; protected: - void addScaleToPrimitiveAttr(mkldnn::primitive_attr attr) const; - InferenceEngine::Precision fusedEltwisePrecision(MKLDNNEltwiseNode *eltwiseNode, int findex); + InferenceEngine::Precision fusedEltwisePrecision(const MKLDNNNodePtr& fusingNode) const; private: - mkldnn::memory::data_type precisionToDataType(InferenceEngine::Precision prec); void addZeroPoints(mkldnn::primitive_attr& attr) const; bool withBiases; @@ -70,6 +61,7 @@ class MKLDNNConvolutionNode : public MKLDNNNode { bool isDW; bool isMerged; bool isGrouped; + bool isPrimitivesPriorityDefined; std::vector stride; std::vector dilation; std::vector paddingL; @@ -77,18 +69,18 @@ class MKLDNNConvolutionNode : public MKLDNNNode { InferenceEngine::SizeVector weightDims; InferenceEngine::SizeVector biasesDims; - ptrdiff_t dw_conv_oc; - ptrdiff_t dw_conv_ih; - ptrdiff_t dw_conv_iw; - std::vector dw_conv_kernel; - std::vector dw_conv_strides; - mkldnn::memory::data_type dw_conv_in_dt; - std::vector PostOpsIntBlobMemory; - - InferenceEngine::Blob::Ptr wScale, oScale; +// TODO: fusing with Convolution is not ported yet +// ptrdiff_t dw_conv_oc; +// ptrdiff_t dw_conv_ih; +// ptrdiff_t dw_conv_iw; +// std::vector dw_conv_kernel; +// std::vector dw_conv_strides; +// mkldnn::memory::data_type dw_conv_in_dt; size_t groupNum; - int baseInputsNumber; + size_t IC; + size_t groupIC; + size_t groupOC; InferenceEngine::Precision eltwisePrecision; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp index 6f67d116ac1159..fb20aefe435608 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp @@ -13,8 +13,8 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNConvertNode::MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +MKLDNNConvertNode::MKLDNNConvertNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) {} void MKLDNNConvertNode::getSupportedDescriptors() { // if tensor descriptors are set via setDescs method we need to update the inDims/outDims data diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h index 51313546b3f7e2..a36d5ea33bd0eb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h @@ -13,7 +13,7 @@ namespace MKLDNNPlugin { class MKLDNNConvertNode : public MKLDNNNode { public: - MKLDNNConvertNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNConvertNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNConvertNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp new file mode 100644 index 00000000000000..f7e534ac7a627c --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.cpp @@ -0,0 +1,198 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_crop_node.h" +#include +#include +#include +#include +#include +#include "ie_parallel.hpp" +#include "common/cpu_memcpy.h" +#include "utils/general_utils.h" + +using namespace mkldnn; +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +MKLDNNCropNode::MKLDNNCropNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) {} + +void MKLDNNCropNode::getSupportedDescriptors() { + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// CropLayer* cropLayer = dynamic_cast(getCnnLayer().get()); +// +// if (cropLayer == nullptr) +// IE_THROW() << "Cannot convert crop layer."; +// +// channelAxis = 1; +// if (getParentEdges().size() != 1 && getParentEdges().size() != 2) { +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// } +// +// MKLDNNDims childDims = getChildEdgeAt(0)->getDims(); +// +// offsets.resize(static_cast(childDims.ndims())); // plus one dim for batch +// dims.resize(static_cast(childDims.ndims())); // plus one dim for batch +// for (int i = 0; i < childDims.ndims(); i++) +// dims[i] = childDims[i]; +// +// for (int i = 0; i < cropLayer->axis.size(); i++) { +// offsets[cropLayer->axis[i]] = cropLayer->offset[i]; +// } +// +// if (cropLayer->axis.size() == dims.size()) { +// for (size_t i = 0; i < cropLayer->axis.size(); i++) { +// if (cropLayer->axis[i] == 1) { +// channelAxis = static_cast(i); +// break; +// } +// } +// } +// +// if (!getChildEdges().size()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +} + +void MKLDNNCropNode::initSupportedPrimitiveDescriptors() { + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// if (!supportedPrimitiveDescriptors.empty()) +// return; +// +// InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// precision = getCnnLayer()->outData[0]->getPrecision(); +// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// if (inputDataType != outputDataType) { +// outputDataType = inputDataType; // Crop doesn't convert precisions, only moves data +// } +// +// auto& inDims = getParentEdgeAt(0)->getDims(); +// if (inDims.ndims() != 2 && inDims.ndims() != 4 && inDims.ndims() != 5) { +// IE_THROW() << "Crop supports only 2d, 4d and 5d blobs."; +// } +// +// memory::format_tag fmt = memory::format_tag::undef; +// switch (inDims.ndims()) { +// case 2: fmt = memory::format_tag::nc; break; +// case 4: fmt = memory::format_tag::nchw; break; +// case 5: fmt = memory::format_tag::ncdhw; break; +// } +// +// InferenceEngine::LayerConfig config; +// config.dynBatchSupport = true; +// config.inConfs.resize(getParentEdges().size()); +// config.outConfs.resize(1); +// for (size_t i = 0; i < getParentEdges().size(); i++) { +// config.inConfs[i].inPlace = -1; +// config.inConfs[i].constant = i != 0; +// config.inConfs[i].desc = MKLDNNMemoryDesc(getParentEdgeAt(i)->getDims(), inputDataType, fmt); +// } +// config.outConfs[0].inPlace = -1; +// config.outConfs[0].constant = false; +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt); +// +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, fmt); +// +// if ((inDims.ndims() == 4 || inDims.ndims() == 5) && channelAxis >= 0 && dims[channelAxis] % 8 == 0) { +// fmt = inDims.ndims() == 5 ? memory::format_tag::nCdhw8c : memory::format_tag::nChw8c; +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, fmt); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt); +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, fmt); +// if (dims[channelAxis] % 16 == 0) { +// fmt = inDims.ndims() == 5 ? memory::format_tag::nCdhw16c : memory::format_tag::nChw16c; +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, fmt); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, fmt); +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, fmt); +// } +// } +} + +void MKLDNNCropNode::createPrimitive() { + auto& dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); + auto& srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); + if (!dstMemPtr || !dstMemPtr->GetPrimitivePtr()) + IE_THROW() << "Destination memory didn't allocate."; + if (!srcMemPtr || !srcMemPtr->GetPrimitivePtr()) + IE_THROW() << "Input memory didn't allocate."; + if (getSelectedPrimitiveDescriptor() == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; +} + +void MKLDNNCropNode::execute(mkldnn::stream strm) { + auto& parentMem = getParentEdgeAt(0)->getMemory(); + + int m_block_size = 1; + if (!parentMem.GetDesc().isPlainFormat()) { + const auto &desc = parentMem.GetDescriptor().data; + const auto &blk = desc.format_desc.blocking; + IE_ASSERT(desc.format_kind == dnnl_blocked && + blk.inner_nblks == 1 && + blk.inner_idxs[0] == 1); + m_block_size = blk.inner_blks[0]; + } + const int m_inner_dim = dims[dims.size() - 1] * m_block_size; + + const auto &dst_mem = getChildEdgeAt(0)->getMemory(); + + const int dst_ndims = dst_mem.GetDesc().getDims().ndims(); + + // TODO: Rewrite it in general case. For every tensor + // and rank, without using letter N,C,D,H,W + const int OFFSET_N = (dst_ndims > 0) ? offsets[0] : 0; + const int OFFSET_C = (dst_ndims > 1) ? offsets[1] : 0; + const int OFFSET_D = (dst_ndims > 4) ? offsets[offsets.size() - 3] : 0; + const int OFFSET_H = (dst_ndims > 2) ? offsets[offsets.size() - 2] : 0; + const int OFFSET_W = (dst_ndims > 3) ? offsets[offsets.size() - 1] : 0; + + // TODO: Check applicability of dyn_batch_lim in early steps. + // crop of batch dimension doesn't support dyn batch. + const int ON = (dst_ndims > 0) ? std::min(batchToProcess(), getChildEdgeAt(0)->getDims()[0]) : 1; + const int OC = (dst_ndims > 1) ? dims[1] : 1; + const int OD = (dst_ndims > 4) ? dims[dims.size() - 3] : 1; + const int OH = (dst_ndims > 2) ? dims[dims.size() - 2] : 1; + const int OW = (dst_ndims > 3) ? dims[dims.size() - 1] : 1; + + memory::dims src_dims = parentMem.GetDims(); + int src_ndims = static_cast(src_dims.size()); + + const int IC = (src_ndims > 1) ? rnd_up(src_dims[1], m_block_size) : 1; + const int ID = (src_ndims > 4) ? src_dims[src_dims.size() - 3] : 1; + const int IH = (src_ndims > 2) ? src_dims[src_dims.size() - 2] : 1; + const int IW = (src_ndims > 3) ? src_dims[src_dims.size() - 1] : 1; + + const size_t itemSize = parentMem.GetDesc().GetElementSize(); + + const auto *src_data = reinterpret_cast(parentMem.GetPtr()); + auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemory().GetPtr()); + + if (OD == 1 && OH == 1 && OW == 1 && ID == 1 && IH == 1 && IW == 1) { + parallel_for(ON, [&](int n) { + cpu_memcpy(dst_data + itemSize * n * OC, src_data + itemSize *((n+OFFSET_N)*IC + OFFSET_C), OC * itemSize); + }); + } else { + parallel_for2d(ON, (OC / m_block_size), [&](int n, int c) { + for (int d = 0; d < OD; ++d) { + int dst_ind = (n*OC + c*m_block_size)*OD*OH*OW + d*m_block_size*OH*OW; + + int src_ind = ((n+OFFSET_N)*IC + (c*m_block_size+OFFSET_C))*ID*IH*IW + + ((d+OFFSET_D)*IH*IW + OFFSET_H*IW + OFFSET_W)*m_block_size; + + for (int h = 0; h < OH; ++h) { + cpu_memcpy(dst_data + itemSize * dst_ind, src_data + itemSize * src_ind, m_inner_dim * itemSize); + + src_ind += IW * m_block_size; + dst_ind += OW * m_block_size; + } + } + }); + } +} + +bool MKLDNNCropNode::created() const { + return getType() == Crop; +} +REG_MKLDNN_PRIM_FOR(MKLDNNCropNode, Crop); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.h new file mode 100644 index 00000000000000..56a9349888fe55 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_crop_node.h @@ -0,0 +1,35 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +namespace MKLDNNPlugin { + +class MKLDNNCropNode : public MKLDNNNode { +public: + MKLDNNCropNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ~MKLDNNCropNode() override = default; + + void getSupportedDescriptors() override; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override; + void execute(mkldnn::stream strm) override; + bool created() const override; + bool canBeInPlace() const override { + return false; + } + +private: + int channelAxis = 1; + std::vector offsets; + std::vector dims; +}; + +} // namespace MKLDNNPlugin + diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 2fbfedf75b81ef..d9408ba66bc2c6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -17,114 +17,115 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const InferenceEngine::CNNLayerPtr& layer, - const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(layer, eng, cache) { +MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptr& op, + const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); }); } void MKLDNNDeconvolutionNode::getSupportedDescriptors() { - if (!descs_fwd.empty() && !descs_bwd.empty()) - return; - - InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); - if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) - precision = InferenceEngine::Precision::FP32; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - precision = getCnnLayer()->outData[0]->getPrecision(); - if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) - precision = InferenceEngine::Precision::FP32; - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16) - inputDataType = outputDataType = memory::data_type::bf16; - - if (getParentEdges().empty() || getParentEdges().size() > 3) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); - if (getChildEdges().empty()) - IE_THROW() << "Incorrect number of output edges for layer " << getName(); - - auto * deconvLayer = dynamic_cast(getCnnLayer().get()); - if (deconvLayer == nullptr) - IE_THROW() << "Cannot convert deconvolution layer."; - if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) { - IE_THROW() << "Weights are empty for layer: " << deconvLayer->name - << " used in MKLDNN node: " << getName() << "\n" - << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" - << " to load them from .bin part of the IR"; - } - withGroups = (deconvLayer->_group > 1); - isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth && - deconvLayer->_group == deconvLayer->input()->getDims()[1]; - - bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3; - if (withBiases) { - Blob::Ptr biases; - - if (getParentEdges().size() == 3) { - auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer(); - if (biasLayer->type != "Const") - IE_THROW() << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases"; - biases = biasLayer->blobs["custom"]; - } else { - biases = deconvLayer->_biases; - } - - // WA: we add bias as depthwise post op - setBiasAsPostOp(biases); - } - - /* Original layout format for deconv weights is iohw (from Caffe). - * We specify oihw, but mean iohw, because there are no more - * suitable format in MKLDNN. - */ - SizeVector weightDims; - if (withGroups) { - weightDims = { - deconvLayer->_group, - deconvLayer->input()->getTensorDesc().getDims()[1] / deconvLayer->_group, - deconvLayer->_out_depth / deconvLayer->_group, - }; - groupNum = deconvLayer->_group; - } else { - weightDims = { - deconvLayer->input()->getTensorDesc().getDims()[1], - deconvLayer->_out_depth - }; - } - for (int i = 1; i <= deconvLayer->_kernel.size(); i++) { - weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]); - } - - if (getParentEdges().size() == 1) - internalBlobs.push_back(createInternalBlob(weightDims, true)); - - invertVectorCopyUtoI(deconvLayer->_stride, stride); - for (int i = 1; i <= deconvLayer->_dilation.size(); i++) { - dilation.push_back(static_cast(deconvLayer->_dilation[deconvLayer->_dilation.size() - i]) - 1); - } - auto allPads = getPaddings(*deconvLayer); - invertVectorCopyUtoI(allPads.begin, paddingL); - invertVectorCopyUtoI(allPads.end, paddingR); - - weightsDims = MKLDNNDims(weightDims); - - for (int i = 0; i < paddingR.size(); i++) { - int with_group = (withGroups) ? 1 : 0; - int krn = weightsDims[with_group + 2 + i]; - int src = getChildEdgeAt(0)->getDims()[2 + i]; - int dst = getParentEdgeAt(0)->getDims()[2 + i]; - - krn = (krn - 1)*(dilation[i] + 1) + 1; - int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1; - paddingR[i] = (dst - calc_dst) * stride[i]; - } - - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); - createDescriptor({in_candidate}, {out_candidate}); - } + IE_THROW() << "[NM] Not implemented"; +// if (!descs_fwd.empty() && !descs_bwd.empty()) +// return; +// +// InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); +// if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) +// precision = InferenceEngine::Precision::FP32; +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// precision = getCnnLayer()->outData[0]->getPrecision(); +// if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) +// precision = InferenceEngine::Precision::FP32; +// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16) +// inputDataType = outputDataType = memory::data_type::bf16; +// +// if (getParentEdges().empty() || getParentEdges().size() > 3) +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// if (getChildEdges().empty()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +// +// auto * deconvLayer = dynamic_cast(getCnnLayer().get()); +// if (deconvLayer == nullptr) +// IE_THROW() << "Cannot convert deconvolution layer."; +// if (getParentEdges().size() == 1 && deconvLayer->_weights == nullptr) { +// IE_THROW() << "Weights are empty for layer: " << deconvLayer->name +// << " used in MKLDNN node: " << getName() << "\n" +// << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" +// << " to load them from .bin part of the IR"; +// } +// withGroups = (deconvLayer->_group > 1); +// isDW = withGroups && deconvLayer->_group == deconvLayer->_out_depth && +// deconvLayer->_group == deconvLayer->input()->getDims()[1]; +// +// bool withBiases = (deconvLayer->_biases != nullptr && deconvLayer->_biases->size() != 0) || getParentEdges().size() == 3; +// if (withBiases) { +// Blob::Ptr biases; +// +// if (getParentEdges().size() == 3) { +// auto biasLayer = getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer(); +// if (biasLayer->type != "Const") +// IE_THROW() << "Deconvolution layer with name '" << getName() << "' doesn't support non-constant biases"; +// biases = biasLayer->blobs["custom"]; +// } else { +// biases = deconvLayer->_biases; +// } +// +// // WA: we add bias as depthwise post op +// setBiasAsPostOp(biases); +// } +// +// /* Original layout format for deconv weights is iohw (from Caffe). +// * We specify oihw, but mean iohw, because there are no more +// * suitable format in MKLDNN. +// */ +// SizeVector weightDims; +// if (withGroups) { +// weightDims = { +// deconvLayer->_group, +// deconvLayer->input()->getTensorDesc().getDims()[1] / deconvLayer->_group, +// deconvLayer->_out_depth / deconvLayer->_group, +// }; +// groupNum = deconvLayer->_group; +// } else { +// weightDims = { +// deconvLayer->input()->getTensorDesc().getDims()[1], +// deconvLayer->_out_depth +// }; +// } +// for (int i = 1; i <= deconvLayer->_kernel.size(); i++) { +// weightDims.push_back(deconvLayer->_kernel[deconvLayer->_kernel.size() - i]); +// } +// +// if (getParentEdges().size() == 1) +// internalBlobs.push_back(createInternalBlob(weightDims, true)); +// +// invertVectorCopyUtoI(deconvLayer->_stride, stride); +// for (int i = 1; i <= deconvLayer->_dilation.size(); i++) { +// dilation.push_back(static_cast(deconvLayer->_dilation[deconvLayer->_dilation.size() - i]) - 1); +// } +// auto allPads = getPaddings(*deconvLayer); +// invertVectorCopyUtoI(allPads.begin, paddingL); +// invertVectorCopyUtoI(allPads.end, paddingR); +// +// weightsDims = MKLDNNDims(weightDims); +// +// for (int i = 0; i < paddingR.size(); i++) { +// int with_group = (withGroups) ? 1 : 0; +// int krn = weightsDims[with_group + 2 + i]; +// int src = getChildEdgeAt(0)->getDims()[2 + i]; +// int dst = getParentEdgeAt(0)->getDims()[2 + i]; +// +// krn = (krn - 1)*(dilation[i] + 1) + 1; +// int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1; +// paddingR[i] = (dst - calc_dst) * stride[i]; +// } +// +// for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { +// MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); +// MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); +// createDescriptor({in_candidate}, {out_candidate}); +// } } void MKLDNNDeconvolutionNode::setBiasAsPostOp(const InferenceEngine::Blob::Ptr& biases) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index 8caf837357927a..def36400e9801e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNDeconvolutionNode : public MKLDNNNode { public: - MKLDNNDeconvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNDeconvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNDeconvolutionNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp index 8b2ea38864e14e..5b6e8548ccc569 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp @@ -741,63 +741,65 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ } }; -MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, +MKLDNNDeformableConvolutionNode::MKLDNNDeformableConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) {} + : MKLDNNNode(op, eng, cache) {} void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { - if (!descs.empty()) - return; - - auto * defConvLayer = dynamic_cast(getCnnLayer().get()); - if (defConvLayer == nullptr) - IE_THROW() << "Cannot convert deformable convolution layer."; - - std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' "; - - if (getParentEdges().size() != 3) - IE_THROW() << errorPrefix << "has incorrect number of input edges"; - if (getChildEdges().empty()) - IE_THROW() << errorPrefix << "has incorrect number of output edges"; - - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input."; - } - - if (getParentEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); - } - - if (getParentEdgeAt(1)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); - } - - if (getParentEdgeAt(2)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims(); - } - - if (getChildEdgeAt(0)->getDims().ndims() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); - } - - bool isMerged = (!getMergeWith().empty()); - bool isGrouped = defConvLayer->_group != 1; - if (isMerged && isGrouped) - IE_THROW() << errorPrefix << "cannot be initialized: group splitted mode are used together with direct group specification."; - - group = defConvLayer->_group; - if (isMerged) { - group = getMergeWith().size() + 1; - } - - invertVectorCopyUtoI(defConvLayer->_stride, stride); - deformable_group = defConvLayer->_deformable_group; - for (int i = 1; i <= defConvLayer->_dilation.size(); i++) { - dilation.push_back(static_cast(defConvLayer->_dilation[defConvLayer->_dilation.size() - i] - 1)); - } - - auto allPads = getPaddings(*defConvLayer); - invertVectorCopyUtoI(allPads.begin, paddingL); + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// if (!descs.empty()) +// return; +// +// auto * defConvLayer = dynamic_cast(getCnnLayer().get()); +// if (defConvLayer == nullptr) +// IE_THROW() << "Cannot convert deformable convolution layer."; +// +// std::string errorPrefix = "DeformableConvolution layer with name '" + getName() + "' "; +// +// if (getParentEdges().size() != 3) +// IE_THROW() << errorPrefix << "has incorrect number of input edges"; +// if (getChildEdges().empty()) +// IE_THROW() << errorPrefix << "has incorrect number of output edges"; +// +// if (getParentEdgeAt(0)->getDims().ndims() != 4) { +// IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input."; +// } +// +// if (getParentEdgeAt(0)->getDims().ndims() != 4) { +// IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getDims().ndims(); +// } +// +// if (getParentEdgeAt(1)->getDims().ndims() != 4) { +// IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getDims().ndims(); +// } +// +// if (getParentEdgeAt(2)->getDims().ndims() != 4) { +// IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getDims().ndims(); +// } +// +// if (getChildEdgeAt(0)->getDims().ndims() != 4) { +// IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getDims().ndims(); +// } +// +// bool isMerged = (!getMergeWith().empty()); +// bool isGrouped = defConvLayer->_group != 1; +// if (isMerged && isGrouped) +// IE_THROW() << errorPrefix << "cannot be initialized: group splitted mode are used together with direct group specification."; +// +// group = defConvLayer->_group; +// if (isMerged) { +// group = getMergeWith().size() + 1; +// } +// +// invertVectorCopyUtoI(defConvLayer->_stride, stride); +// deformable_group = defConvLayer->_deformable_group; +// for (int i = 1; i <= defConvLayer->_dilation.size(); i++) { +// dilation.push_back(static_cast(defConvLayer->_dilation[defConvLayer->_dilation.size() - i] - 1)); +// } +// +// auto allPads = getPaddings(*defConvLayer); +// invertVectorCopyUtoI(allPads.begin, paddingL); } void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h index 9a1379ab2ebbb8..6d2f863ed8dcc7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.h @@ -65,7 +65,7 @@ struct jit_uni_def_conv_kernel { class MKLDNNDeformableConvolutionNode : public MKLDNNNode { public: - MKLDNNDeformableConvolutionNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNDeformableConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNDeformableConvolutionNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 3f7b02b9a4c3d3..7a5a02da8d680d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -4,7 +4,6 @@ #include "mkldnn_eltwise_node.h" -#include #include #include @@ -21,6 +20,9 @@ #include "emitters/jit_mkldnn_emitters.hpp" #include "emitters/jit_bf16_emitters.hpp" #include +#include "utils/general_utils.h" + +#include "ngraph/ngraph.hpp" #include #include @@ -360,54 +362,52 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu }; std::set get_supported_precisions(MKLDNNNode& node) { - auto& eltwiseNode = dynamic_cast(node); - std::set precisions; - OV_SWITCH(MKLDNNPlugin, SupportedPrecisions, precisions, eltwiseNode.getOpType(), - OV_CASE(Relu, jit_mkldnn_aux_emitter), - OV_CASE(Gelu, jit_mkldnn_aux_emitter), - OV_CASE(Elu, jit_mkldnn_aux_emitter), - OV_CASE(Tanh, jit_mkldnn_aux_emitter), - OV_CASE(Logistic, jit_mkldnn_aux_emitter), - OV_CASE(Square, jit_mkldnn_aux_emitter), - OV_CASE(Abs, jit_mkldnn_aux_emitter), - OV_CASE(Sqrt, jit_mkldnn_aux_emitter), - OV_CASE(Linear, jit_mkldnn_aux_emitter), - OV_CASE(BoundedRelu, jit_mkldnn_aux_emitter), - OV_CASE(SoftRelu, jit_mkldnn_aux_emitter), - OV_CASE(Relu6, jit_mkldnn_aux_emitter), - OV_CASE(Exp, jit_mkldnn_aux_emitter), - OV_CASE(Clamp, jit_mkldnn_aux_emitter), - OV_CASE(Swish, jit_mkldnn_aux_emitter), - OV_CASE(Hswish, jit_mkldnn_aux_emitter), - OV_CASE(Mish, jit_mkldnn_aux_emitter), - OV_CASE(Hsigmoid, jit_mkldnn_aux_emitter), - OV_CASE(Round, jit_mkldnn_aux_emitter), - OV_CASE(Add, jit_add_emitter), - OV_CASE(MulAdd, jit_mul_add_emitter), - OV_CASE(Subtract, jit_subtract_emitter), - OV_CASE(Multiply, jit_multiply_emitter), - OV_CASE(Divide, jit_divide_emitter), - OV_CASE(FloorMod, jit_floor_mod_emitter), - OV_CASE(Mod, jit_mod_emitter), - OV_CASE(Maximum, jit_maximum_emitter), - OV_CASE(Minimum, jit_minimum_emitter), - OV_CASE(SquaredDifference, jit_squared_difference_emitter), - OV_CASE(PowerDynamic, jit_power_dynamic_emitter), - OV_CASE(Equal, jit_equal_emitter), - OV_CASE(NotEqual, jit_not_equal_emitter), - OV_CASE(Greater, jit_greater_emitter), - OV_CASE(GreaterEqual, jit_greater_equal_emitter), - OV_CASE(Less, jit_less_emitter), - OV_CASE(LessEqual, jit_less_equal_emitter), - OV_CASE(LogicalAnd, jit_logical_and_emitter), - OV_CASE(LogicalOr, jit_logical_or_emitter), - OV_CASE(LogicalXor, jit_logical_xor_emitter), - OV_CASE(LogicalNot, jit_logical_not_emitter), - OV_CASE(PowerStatic, jit_power_static_emitter), - OV_CASE(Prelu, jit_prelu_emitter), - OV_CASE(Erf, jit_erf_emitter)); + OV_SWITCH(MKLDNNPlugin, SupportedPrecisions, precisions, node.getAlgorithm(), + OV_CASE(EltwiseRelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseGelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseElu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseTanh, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSigmoid, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSquare, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseAbs, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSqrt, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseLinear, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseBoundedRelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSoftRelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseRelu6, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseExp, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseClamp, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSwish, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseHswish, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseMish, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseHsigmoid, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseAdd, jit_add_emitter), + OV_CASE(EltwiseMulAdd, jit_mul_add_emitter), + OV_CASE(EltwiseSubtract, jit_subtract_emitter), + OV_CASE(EltwiseMultiply, jit_multiply_emitter), + OV_CASE(EltwiseDivide, jit_divide_emitter), + OV_CASE(EltwiseFloorMod, jit_floor_mod_emitter), + OV_CASE(EltwiseMod, jit_mod_emitter), + OV_CASE(EltwiseMaximum, jit_maximum_emitter), + OV_CASE(EltwiseMinimum, jit_minimum_emitter), + OV_CASE(EltwiseSquaredDifference, jit_squared_difference_emitter), + OV_CASE(EltwisePowerDynamic, jit_power_dynamic_emitter), + OV_CASE(EltwiseEqual, jit_equal_emitter), + OV_CASE(EltwiseNotEqual, jit_not_equal_emitter), + OV_CASE(EltwiseGreater, jit_greater_emitter), + OV_CASE(EltwiseGreaterEqual, jit_greater_equal_emitter), + OV_CASE(EltwiseLess, jit_less_emitter), + OV_CASE(EltwiseLessEqual, jit_less_equal_emitter), + OV_CASE(EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(EltwiseLogicalOr, jit_logical_or_emitter), + OV_CASE(EltwiseLogicalXor, jit_logical_xor_emitter), + OV_CASE(EltwiseLogicalNot, jit_logical_not_emitter), + OV_CASE(EltwisePowerStatic, jit_power_static_emitter), + OV_CASE(EltwisePrelu, jit_prelu_emitter)); if (precisions.empty()) IE_THROW() << "Unsupported operation type for Eltwise emitter"; @@ -426,50 +426,50 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu exec_prec }; - OV_SWITCH(MKLDNNPlugin, EltwiseEmitter, ctx, eltwiseNode.getOpType(), - OV_CASE(Relu, jit_mkldnn_aux_emitter), - OV_CASE(Gelu, jit_mkldnn_aux_emitter), - OV_CASE(Elu, jit_mkldnn_aux_emitter), - OV_CASE(Tanh, jit_mkldnn_aux_emitter), - OV_CASE(Logistic, jit_mkldnn_aux_emitter), - OV_CASE(Square, jit_mkldnn_aux_emitter), - OV_CASE(Abs, jit_mkldnn_aux_emitter), - OV_CASE(Sqrt, jit_mkldnn_aux_emitter), - OV_CASE(Linear, jit_mkldnn_aux_emitter), - OV_CASE(BoundedRelu, jit_mkldnn_aux_emitter), - OV_CASE(SoftRelu, jit_mkldnn_aux_emitter), - OV_CASE(Relu6, jit_mkldnn_aux_emitter), - OV_CASE(Exp, jit_mkldnn_aux_emitter), - OV_CASE(Clamp, jit_mkldnn_aux_emitter), - OV_CASE(Swish, jit_mkldnn_aux_emitter), - OV_CASE(Hswish, jit_mkldnn_aux_emitter), - OV_CASE(Mish, jit_mkldnn_aux_emitter), - OV_CASE(Hsigmoid, jit_mkldnn_aux_emitter), - OV_CASE(Round, jit_mkldnn_aux_emitter), - OV_CASE(Add, jit_add_emitter), - OV_CASE(MulAdd, jit_mul_add_emitter), - OV_CASE(Subtract, jit_subtract_emitter), - OV_CASE(Multiply, jit_multiply_emitter), - OV_CASE(Divide, jit_divide_emitter), - OV_CASE(FloorMod, jit_floor_mod_emitter), - OV_CASE(Mod, jit_mod_emitter), - OV_CASE(Maximum, jit_maximum_emitter), - OV_CASE(Minimum, jit_minimum_emitter), - OV_CASE(SquaredDifference, jit_squared_difference_emitter), - OV_CASE(PowerDynamic, jit_power_dynamic_emitter), - OV_CASE(Equal, jit_equal_emitter), - OV_CASE(NotEqual, jit_not_equal_emitter), - OV_CASE(Greater, jit_greater_emitter), - OV_CASE(GreaterEqual, jit_greater_equal_emitter), - OV_CASE(Less, jit_less_emitter), - OV_CASE(LessEqual, jit_less_equal_emitter), - OV_CASE(LogicalAnd, jit_logical_and_emitter), - OV_CASE(LogicalOr, jit_logical_or_emitter), - OV_CASE(LogicalXor, jit_logical_xor_emitter), - OV_CASE(LogicalNot, jit_logical_not_emitter), - OV_CASE(PowerStatic, jit_power_static_emitter), - OV_CASE(Prelu, jit_prelu_emitter), - OV_CASE(Erf, jit_erf_emitter)); + OV_SWITCH(MKLDNNPlugin, EltwiseEmitter, ctx, eltwiseNode.getAlgorithm(), + OV_CASE(EltwiseRelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseGelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseElu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseTanh, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSigmoid, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSquare, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseAbs, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSqrt, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseLinear, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseBoundedRelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSoftRelu, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseRelu6, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseExp, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseClamp, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseSwish, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseHswish, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseMish, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseHsigmoid, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseRoundHalfToEven, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseRoundHalfAwayFromZero, jit_mkldnn_aux_emitter), + OV_CASE(EltwiseAdd, jit_add_emitter), + OV_CASE(EltwiseMulAdd, jit_mul_add_emitter), + OV_CASE(EltwiseSubtract, jit_subtract_emitter), + OV_CASE(EltwiseMultiply, jit_multiply_emitter), + OV_CASE(EltwiseDivide, jit_divide_emitter), + OV_CASE(EltwiseFloorMod, jit_floor_mod_emitter), + OV_CASE(EltwiseMod, jit_mod_emitter), + OV_CASE(EltwiseMaximum, jit_maximum_emitter), + OV_CASE(EltwiseMinimum, jit_minimum_emitter), + OV_CASE(EltwiseSquaredDifference, jit_squared_difference_emitter), + OV_CASE(EltwisePowerDynamic, jit_power_dynamic_emitter), + OV_CASE(EltwiseEqual, jit_equal_emitter), + OV_CASE(EltwiseNotEqual, jit_not_equal_emitter), + OV_CASE(EltwiseGreater, jit_greater_emitter), + OV_CASE(EltwiseGreaterEqual, jit_greater_equal_emitter), + OV_CASE(EltwiseLess, jit_less_emitter), + OV_CASE(EltwiseLessEqual, jit_less_equal_emitter), + OV_CASE(EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(EltwiseLogicalOr, jit_logical_or_emitter), + OV_CASE(EltwiseLogicalXor, jit_logical_xor_emitter), + OV_CASE(EltwiseLogicalNot, jit_logical_not_emitter), + OV_CASE(EltwisePowerStatic, jit_power_static_emitter), + OV_CASE(EltwisePrelu, jit_prelu_emitter)); if (!ctx.emitter) IE_THROW() << "Unsupported operation type for Eltwise emitter"; @@ -915,105 +915,77 @@ MKLDNNEltwiseNode::initializers = { }}, }; -void MKLDNNEltwiseNode::init() { - InferenceEngine::details::CaselessEq comparator; - auto layerType = getCnnLayer().get()->type; - - auto * eltwiseLayer = dynamic_cast(getCnnLayer().get()); - if (eltwiseLayer) { - if (!eltwiseLayer->coeff.empty()) - IE_THROW() << "Eltwise node with name `" << getName() << "` doesn't support input coefficients."; - - switch (eltwiseLayer->_operation) { - case EltwiseLayer::Sum: eltwiseOp = Add; break; - case EltwiseLayer::Prod: eltwiseOp = Multiply; break; - case EltwiseLayer::Max: eltwiseOp = Maximum; break; - case EltwiseLayer::Sub: eltwiseOp = Subtract; break; - case EltwiseLayer::Min: eltwiseOp = Minimum; break; - case EltwiseLayer::Div: eltwiseOp = Divide; break; - case EltwiseLayer::Squared_diff: eltwiseOp = SquaredDifference; break; - case EltwiseLayer::Floor_mod: eltwiseOp = FloorMod; break; - case EltwiseLayer::Pow: eltwiseOp = PowerDynamic; break; - case EltwiseLayer::Equal: eltwiseOp = Equal; break; - case EltwiseLayer::Not_equal: eltwiseOp = NotEqual; break; - case EltwiseLayer::Greater: eltwiseOp = Greater; break; - case EltwiseLayer::Greater_equal: eltwiseOp = GreaterEqual; break; - case EltwiseLayer::Less: eltwiseOp = Less; break; - case EltwiseLayer::Less_equal: eltwiseOp = LessEqual; break; - case EltwiseLayer::Logical_AND: eltwiseOp = LogicalAnd; break; - case EltwiseLayer::Logical_OR: eltwiseOp = LogicalOr; break; - case EltwiseLayer::Logical_XOR: eltwiseOp = LogicalXor; break; - default: IE_THROW() << "Unsupported algorithm for Eltwise node with name `" << getName() << "`."; - } - } else if (comparator(layerType, "mod")) { - eltwiseOp = Mod; - } else if (comparator(layerType, "power")) { - eltwiseOp = PowerStatic; - - auto *powerLayer = dynamic_cast(getCnnLayer().get()); - if (powerLayer == nullptr) - IE_THROW() << "Cannot convert power layer."; - - alpha = powerLayer->power; - beta = powerLayer->scale; - gamma = powerLayer->offset; - } else if (comparator(layerType, "scaleshift")) { - if (getCnnLayer().get()->blobs.size() == 2) { - eltwiseOp = MulAdd; - eltwiseAlgorithm = mkldnn::algorithm::depthwise_scale_shift; + node.alpha = alphaConstOp->cast_vector()[0]; } else { - eltwiseOp = Multiply; + node.alpha = 1.0f; + } + node.algorithm = EltwiseSwish; + node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_swish; + }}, + {ngraph::op::v4::HSwish::type_info, [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + node.algorithm = EltwiseHswish; + node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_hswish; + }}, + {ngraph::op::v4::Mish::type_info, [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + node.algorithm = EltwiseMish; + node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_mish; + }}, + {ngraph::op::v5::HSigmoid::type_info, [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + node.algorithm = EltwiseHsigmoid; + node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_hsigmoid; + }}, + {ngraph::op::v5::Round::type_info, [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + auto roundOp = getNgraphOpAs(op); + + switch (roundOp->get_mode()) { + case ngraph::op::v5::Round::RoundMode::HALF_TO_EVEN: + node.algorithm = EltwiseRoundHalfToEven; + node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_round_half_to_even; + break; + case ngraph::op::v5::Round::RoundMode::HALF_AWAY_FROM_ZERO: + node.algorithm = EltwiseRoundHalfAwayFromZero; + node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_round_half_away_from_zero; + break; } - } else if (comparator(layerType, "prelu")) { - eltwiseOp = Prelu; - eltwiseAlgorithm = mkldnn::algorithm::depthwise_prelu; - } else if (comparator(layerType, "activation") && initializers.find(getCnnLayer().get()->GetParamAsString("type")) != initializers.end()) { - initializers[getCnnLayer().get()->GetParamAsString("type")](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta); - } else if (comparator(layerType, "relu") || - comparator(layerType, "gelu") || - comparator(layerType, "elu") || - comparator(layerType, "sigmoid") || - comparator(layerType, "logistic") || - comparator(layerType, "tanh") || - comparator(layerType, "relu6") || - comparator(layerType, "exp") || - comparator(layerType, "not") || - comparator(layerType, "clamp") || - comparator(layerType, "swish") || - comparator(layerType, "hswish") || - comparator(layerType, "mish") || - comparator(layerType, "hsigmoid") || - comparator(layerType, "round")) { - initializers[layerType](getCnnLayer().get(), eltwiseOp, eltwiseAlgorithm, alpha, beta); - } else if (comparator(layerType, "erf")) { - eltwiseOp = Erf; + }}, + {ngraph::op::v0::PRelu::type_info, [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { + node.algorithm = EltwisePrelu; + }}, + // TODO [NM]: we need to introduce custom MulAdd operation +// {ngraph::op::v0::MulAdd::type_info, [](const std::shared_ptr& op, MKLDNNEltwiseNode& node) { +// node.algorithm = EltwiseMish; +// node.mkldnnAlgorithm = mkldnn::algorithm::eltwise_mish; +// }}, +}; + +MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) { + if (initializers.find(op->get_type_info()) != initializers.end()) { + initializers[op->get_type_info()](op, *this); } else { - IE_THROW() << "Unsupported algorithm for Eltwise node with name `" << getName() << "`."; + IE_THROW(NotImplemented) + << "CPU Eltwise node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); } } size_t MKLDNNEltwiseNode::getOpInputsNum() const { - switch (getOpType()) { - case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: case PowerStatic: - case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: - case Mish: case Hsigmoid: case Round: - case LogicalNot: - case Erf: + switch (getAlgorithm()) { + case EltwiseRelu: case EltwiseGelu: case EltwiseElu: case EltwiseTanh: case EltwiseSigmoid: case EltwiseSquare: case EltwiseAbs: case EltwiseSqrt: + case EltwisePowerStatic: case EltwiseLinear: case EltwiseBoundedRelu: case EltwiseSoftRelu: case EltwiseRelu6: case EltwiseExp: case EltwiseClamp: + case EltwiseSwish: case EltwiseHswish: case EltwiseMish: case EltwiseHsigmoid: case EltwiseRoundHalfToEven: case EltwiseRoundHalfAwayFromZero: + case EltwiseLogicalNot: return 1; - case Add: case Subtract: case Multiply: case Divide: case FloorMod: case Mod: case Maximum: case Minimum: case SquaredDifference: - case PowerDynamic: case Equal: case NotEqual: case Greater: case GreaterEqual: case Less: case LessEqual: case LogicalAnd: - case LogicalOr: case LogicalXor: case Prelu: + case EltwiseAdd: case EltwiseSubtract: case EltwiseMultiply: case EltwiseDivide: case EltwiseFloorMod: case EltwiseMod: case EltwiseMaximum: + case EltwiseMinimum: case EltwiseSquaredDifference: case EltwisePowerDynamic: case EltwiseEqual: case EltwiseNotEqual: case EltwiseGreater: + case EltwiseGreaterEqual: case EltwiseLess: case EltwiseLessEqual: case EltwiseLogicalAnd: case EltwiseLogicalOr: case EltwiseLogicalXor: + case EltwisePrelu: return 2; - case MulAdd: + case EltwiseMulAdd: return 3; default: IE_THROW() << "Unsupported operation for Eltwise node with name `" << getName() << "`."; } } -bool MKLDNNEltwiseNode::isSum() { - return eltwiseOp == Add; -} - bool MKLDNNEltwiseNode::isWithBroadcast() { auto oDims = outDims[0].ToSizeVector(); for (size_t i = 0; i < inDims.size(); i++) { @@ -1064,14 +1036,14 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { << " (actual = " << getParentEdges().size() << ")"; std::vector inputPrecisions; - for (int i = 0; i < getCnnLayer()->insData.size(); i++) { - inputPrecisions.push_back(getCnnLayer()->insData[i].lock()->getPrecision()); + for (const auto &i : getOriginalInputPrecisions()) { + inputPrecisions.push_back(i); } for (auto& fusedNode : fusedWith) { if (fusedNode->getType() == Eltwise) { - for (int i = 1; i < fusedNode->getCnnLayer()->insData.size(); i++) { - inputPrecisions.push_back(fusedNode->getCnnLayer()->insData[i].lock()->getPrecision()); + for (int i = 1; i < fusedNode->getOriginalInputPrecisions().size(); i++) { + inputPrecisions.push_back(fusedNode->getOriginalInputPrecisions()[i]); } } } @@ -1079,12 +1051,9 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { if (inputPrecisions.size() != getParentEdges().size()) IE_THROW() << "Eltwise node with name `" << getName() << "` has invalid input precisions configuration."; - InferenceEngine::Precision outputPrecision = getCnnLayer()->outData[0]->getPrecision(); + InferenceEngine::Precision outputPrecision = getOriginalOutputPrecisions()[0]; if (!fusedWith.empty()) { - auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); - if (lastFusedLayer) { - outputPrecision = lastFusedLayer->outData[0]->getPrecision(); - } + outputPrecision = fusedWith[fusedWith.size() - 1]->getOriginalOutputPrecisions()[0]; } if (!mayiuse(avx512_core)) { @@ -1118,9 +1087,9 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { // TODO: delete after new LPT (ngraph based) is merged // WA is needed to handle bug in LPT that produces wrong precision after average pooling (I8/U8 instead of FP32) - if (eltwiseOp == MulAdd && (inputPrecisions[0] == Precision::U8 || inputPrecisions[0] == Precision::I8)) { - auto poolingLayer = dynamic_cast(getParentEdgesAtPort(0)[0]->getParent()->getCnnLayer().get()); - if (poolingLayer && poolingLayer->_type == PoolingLayer::AVG) { + if (getAlgorithm() == EltwiseMulAdd && (inputPrecisions[0] == Precision::U8 || inputPrecisions[0] == Precision::I8)) { + auto parentNode = getParentEdgesAtPort(0)[0]->getParent(); + if (getParentEdgesAtPort(0)[0]->getParent()->getAlgorithm() == PoolingAvg) { inputPrecisions[0] = Precision::FP32; } } @@ -1179,7 +1148,6 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { dataConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1; dataConfig.constant = false; - dataConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset); config.inConfs.push_back(dataConfig); @@ -1615,8 +1583,8 @@ void MKLDNNEltwiseNode::executeReference(const std::vector& src size_t inputNum = src_ptrs.size(); std::shared_ptr ref_eltwise_injector = nullptr; - if (eltwiseAlgorithm != mkldnn::algorithm::undef) { - ref_eltwise_injector = std::make_shared(static_cast(eltwiseAlgorithm), alpha, beta, 1.f); + if (getMKLDNNAlgorithm() != mkldnn::algorithm::undef) { + ref_eltwise_injector = std::make_shared(static_cast(getMKLDNNAlgorithm()), alpha, beta, 1.f); } parallel_nt(0, [&](const int ithr, const int nthr) { @@ -1651,34 +1619,34 @@ void MKLDNNEltwiseNode::executeReference(const std::vector& src } float* dst_ptr_f = reinterpret_cast(dst_ptr + index_out); - switch (getOpType()) { - case Relu: case Gelu: case Elu: case Tanh: case Logistic: case Square: case Abs: case Sqrt: - case Linear: case BoundedRelu: case SoftRelu: case Relu6: case Exp: case Clamp: case Swish: case Hswish: - case Mish: case Hsigmoid: case Round: + switch (getAlgorithm()) { + case EltwiseRelu: case EltwiseGelu: case EltwiseElu: case EltwiseTanh: case EltwiseSigmoid: case EltwiseSquare: case EltwiseAbs: + case EltwiseSqrt: case EltwiseLinear: case EltwiseBoundedRelu: case EltwiseSoftRelu: case EltwiseRelu6: case EltwiseExp: case EltwiseClamp: + case EltwiseSwish: case EltwiseHswish: case EltwiseMish: case EltwiseHsigmoid: case EltwiseRoundHalfToEven: case EltwiseRoundHalfAwayFromZero: *dst_ptr_f = ref_eltwise_injector->compute_scalar(src_f[0]); break; - case Add: *dst_ptr_f = src_f[0] + src_f[1]; break; - case MulAdd: *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break; - case Subtract: *dst_ptr_f = src_f[0] - src_f[1]; break; - case Multiply: *dst_ptr_f = src_f[0] * src_f[1]; break; - case Divide: *dst_ptr_f = src_f[0] / src_f[1]; break; - case FloorMod: *dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1]; break; - case Mod: *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break; - case Maximum: *dst_ptr_f = std::max(src_f[0], src_f[1]); break; - case Minimum: *dst_ptr_f = std::min(src_f[0], src_f[1]); break; - case SquaredDifference: *dst_ptr_f = powf((src_f[0] - src_f[1]), 2.f); break; - case PowerDynamic: *dst_ptr_f = powf(src_f[0], src_f[1]); break; - case Equal: *dst_ptr_f = src_f[0] == src_f[1]; break; - case NotEqual: *dst_ptr_f = src_f[0] != src_f[1]; break; - case Greater: *dst_ptr_f = src_f[0] > src_f[1]; break; - case GreaterEqual: *dst_ptr_f = src_f[0] >= src_f[1]; break; - case Less: *dst_ptr_f = src_f[0] < src_f[1]; break; - case LessEqual: *dst_ptr_f = src_f[0] <= src_f[1]; break; - case LogicalAnd: *dst_ptr_f = src_f[0] && src_f[1]; break; - case LogicalOr: *dst_ptr_f = src_f[0] || src_f[1]; break; - case LogicalXor: *dst_ptr_f = (src_f[0] || src_f[1]) - (src_f[0] && src_f[1]); break; - case LogicalNot: *dst_ptr_f = !src_f[0]; break; - case PowerStatic: *dst_ptr_f = powf(beta * src_f[0] + gamma, alpha); break; - case Prelu: *dst_ptr_f = src_f[0] > 0 ? src_f[0] : src_f[0] * src_f[1]; break; + case EltwiseAdd: *dst_ptr_f = src_f[0] + src_f[1]; break; + case EltwiseMulAdd: *dst_ptr_f = src_f[0] * src_f[1] + src_f[2]; break; + case EltwiseSubtract: *dst_ptr_f = src_f[0] - src_f[1]; break; + case EltwiseMultiply: *dst_ptr_f = src_f[0] * src_f[1]; break; + case EltwiseDivide: *dst_ptr_f = src_f[0] / src_f[1]; break; + case EltwiseFloorMod: *dst_ptr_f = src_f[0] - floorf(src_f[0] / src_f[1]) * src_f[1]; break; + case EltwiseMod: *dst_ptr_f = src_f[0] - truncf(src_f[0] / src_f[1]) * src_f[1]; break; + case EltwiseMaximum: *dst_ptr_f = std::max(src_f[0], src_f[1]); break; + case EltwiseMinimum: *dst_ptr_f = std::min(src_f[0], src_f[1]); break; + case EltwiseSquaredDifference: *dst_ptr_f = powf((src_f[0] - src_f[1]), 2.f); break; + case EltwisePowerDynamic: *dst_ptr_f = powf(src_f[0], src_f[1]); break; + case EltwiseEqual: *dst_ptr_f = src_f[0] == src_f[1]; break; + case EltwiseNotEqual: *dst_ptr_f = src_f[0] != src_f[1]; break; + case EltwiseGreater: *dst_ptr_f = src_f[0] > src_f[1]; break; + case EltwiseGreaterEqual: *dst_ptr_f = src_f[0] >= src_f[1]; break; + case EltwiseLess: *dst_ptr_f = src_f[0] < src_f[1]; break; + case EltwiseLessEqual: *dst_ptr_f = src_f[0] <= src_f[1]; break; + case EltwiseLogicalAnd: *dst_ptr_f = src_f[0] && src_f[1]; break; + case EltwiseLogicalOr: *dst_ptr_f = src_f[0] || src_f[1]; break; + case EltwiseLogicalXor: *dst_ptr_f = (src_f[0] || src_f[1]) - (src_f[0] && src_f[1]); break; + case EltwiseLogicalNot: *dst_ptr_f = !src_f[0]; break; + case EltwisePowerStatic: *dst_ptr_f = powf(beta * src_f[0] + gamma, alpha); break; + case EltwisePrelu: *dst_ptr_f = src_f[0] > 0 ? src_f[0] : src_f[0] * src_f[1]; break; default: IE_THROW() << "Unsupported operation type for Eltwise node with name `" << getName() << "`"; } } @@ -1737,7 +1705,7 @@ bool MKLDNNEltwiseNode::canBeInPlace() const { } void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) { - switch (getAlgorithm()) { + switch (getMKLDNNAlgorithm()) { case mkldnn::algorithm::eltwise_relu: case mkldnn::algorithm::eltwise_tanh: case mkldnn::algorithm::eltwise_elu: @@ -1758,57 +1726,49 @@ void MKLDNNEltwiseNode::appendPostOps(mkldnn::post_ops& ops) { case mkldnn::algorithm::eltwise_hsigmoid: case mkldnn::algorithm::eltwise_round_half_to_even: case mkldnn::algorithm::eltwise_round_half_away_from_zero: - ops.append_eltwise(1.0, getAlgorithm(), getAlpha(), getBeta()); + ops.append_eltwise(1.0, getMKLDNNAlgorithm(), getAlpha(), getBeta()); break; case mkldnn::algorithm::depthwise_scale_shift: case mkldnn::algorithm::depthwise_prelu: - if (scales.empty() && shifts.empty()) { - size_t bufferSize = static_cast(outDims[0][outDims[0].size() > 1 ? 1 : 0]); - size_t bufferSizeAligned = rnd_up(bufferSize, 16); - - Blob::Ptr scalesBlob = getCnnLayer()->blobs["weights"]; - if (scalesBlob == nullptr) - IE_THROW() << "Cannot get weights blob in Eltwise node with name `" << getName() << "`"; - scales.resize(bufferSizeAligned, 0); - const float *scalesBufferPtr = scalesBlob->buffer().as(); - for (int i = 0; i < bufferSize; i++) { - scales[i] = scalesBufferPtr[scalesBlob->size() == 1 ? 0 : i]; - } - - Blob::Ptr shiftsBlob = getCnnLayer()->blobs["biases"]; - if (shiftsBlob != nullptr) { - shifts.resize(bufferSizeAligned, 0); - const float *shiftsBufferPtr = shiftsBlob->buffer().as(); - for (int i = 0; i < bufferSize; i++) { - shifts[i] = shiftsBufferPtr[shiftsBlob->size() == 1 ? 0 : i]; - } - } - } - - ops.append_depthwise(getAlgorithm(), &scales[0], shifts.empty() ? nullptr : &shifts[0]); + IE_THROW() << "[NM] Not implemented"; +// if (scales.empty() && shifts.empty()) { +// size_t bufferSize = static_cast(outDims[0][outDims[0].size() > 1 ? 1 : 0]); +// size_t bufferSizeAligned = rnd_up(bufferSize, 16); +// +// Blob::Ptr scalesBlob = getCnnLayer()->blobs["weights"]; +// if (scalesBlob == nullptr) +// IE_THROW() << "Cannot get weights blob in Eltwise node with name `" << getName() << "`"; +// scales.resize(bufferSizeAligned, 0); +// const float *scalesBufferPtr = scalesBlob->buffer().as(); +// for (int i = 0; i < bufferSize; i++) { +// scales[i] = scalesBufferPtr[scalesBlob->size() == 1 ? 0 : i]; +// } +// +// Blob::Ptr shiftsBlob = getCnnLayer()->blobs["biases"]; +// if (shiftsBlob != nullptr) { +// shifts.resize(bufferSizeAligned, 0); +// const float *shiftsBufferPtr = shiftsBlob->buffer().as(); +// for (int i = 0; i < bufferSize; i++) { +// shifts[i] = shiftsBufferPtr[shiftsBlob->size() == 1 ? 0 : i]; +// } +// } +// } +// +// ops.append_depthwise(getAlgorithm(), &scales[0], shifts.empty() ? nullptr : &shifts[0]); break; default: IE_THROW() << "Appending Eltwise node with name `" << getName() << "` as post operation is not supported"; } } bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { - auto isOneOf = [](EltwiseOpType alg, std::vector algs) { - for (auto a : algs) { - if (alg == a) { - return true; - } - } - return false; - }; - - auto isSuitableNode = [](const MKLDNNEltwiseNode* node) { + auto isSuitableNode = [this](const MKLDNNEltwiseNode* node) { // [WA] Since execution precision change from I32 to FP32 for Divide operation may lead to incorrect results // we disable its fusing otherwise there is no guarantee it will be executed it I32 // [TODO] We need to rewrite support for different precisions at all to avoid implicit conversions to FP32 // (all should be handled via explicit convert operations) - if (node->getOpType() == Divide) { - for (int i = 0; i < node->getCnnLayer()->insData.size(); i++) { - if (node->getCnnLayer()->insData[i].lock()->getPrecision() == Precision::I32) { + if (node->getAlgorithm() == EltwiseDivide) { + for (const auto &originalInputPrecision : getOriginalInputPrecisions()) { + if (originalInputPrecision == Precision::I32) { return false; } } @@ -1830,21 +1790,17 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { return false; if (node->getType() == Eltwise) { - auto eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode->getParentEdgesAtPort(0)[0]->getParent().get() != this) { - if (!isSuitableNode(this)) { - return false; - } - + if (node->getFusingPort() != 0) { // Eltwise jitter doesn't respect commutative property, so fusing is disabled in case it applied not for 0-th port. - if (isOneOf(eltwiseNode->getOpType(), {Subtract, Divide, FloorMod, Mod, PowerDynamic, Greater, GreaterEqual, Less, LessEqual})) { + if (one_of(node->getAlgorithm(), EltwiseSubtract, EltwiseDivide, EltwiseFloorMod, EltwiseMod, EltwisePowerDynamic, EltwiseGreater, + EltwiseGreaterEqual, EltwiseLess, EltwiseLessEqual)) { return false; } // Limitation: inputs precision definition inside Eltwise node assumes fusing is applied for 0-th port, // otherwise we need identical precision on all inputs of fused node - for (int i = 1; i < eltwiseNode->getCnnLayer()->insData.size(); i++) { - if (eltwiseNode->getCnnLayer()->insData[0].lock()->getPrecision() != eltwiseNode->getCnnLayer()->insData[i].lock()->getPrecision()) { + for (int i = 1; i < getOriginalInputPrecisions().size(); i++) { + if (getOriginalInputPrecisions()[0] != getOriginalInputPrecisions()[i]) { return false; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h index d5ab2430fc2116..1003067f2358a8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h @@ -15,55 +15,6 @@ namespace MKLDNNPlugin { #define MAX_ELTWISE_INPUTS 7 -enum EltwiseOpType { - Add = 0, - Multiply, - Subtract, - Divide, - FloorMod, - Mod, - Maximum, - Minimum, - SquaredDifference, - PowerDynamic, - PowerStatic, - MulAdd, - - Equal, - NotEqual, - Greater, - GreaterEqual, - Less, - LessEqual, - - LogicalAnd, - LogicalOr, - LogicalXor, - LogicalNot, - - Relu, - Gelu, - Elu, - Tanh, - Logistic, - Square, - Abs, - Sqrt, - Linear, - BoundedRelu, - SoftRelu, - Relu6, - Exp, - Clamp, - Swish, - Prelu, - Mish, - Hswish, - Hsigmoid, - Round, - Erf -}; - struct jit_eltwise_params { size_t inputs_number; size_t input_size; @@ -108,7 +59,7 @@ struct jit_uni_eltwise_kernel { class MKLDNNEltwiseNode : public MKLDNNNode { public: - MKLDNNEltwiseNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNEltwiseNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNEltwiseNode() override = default; void getSupportedDescriptors() override; @@ -120,14 +71,12 @@ class MKLDNNEltwiseNode : public MKLDNNNode { bool created() const override; bool canBeInPlace() const override; - bool isSum(); bool isWithBroadcast(); bool canFuse(const MKLDNNNodePtr& node) const; size_t getOpInputsNum() const; - EltwiseOpType getOpType() const { return eltwiseOp; } - mkldnn::algorithm getAlgorithm() const { return eltwiseAlgorithm; } + mkldnn::algorithm getMKLDNNAlgorithm() const { return mkldnnAlgorithm; } float getAlpha() const { return alpha; } float getBeta() const { return beta; } @@ -137,10 +86,7 @@ class MKLDNNEltwiseNode : public MKLDNNNode { InferenceEngine::Precision getRuntimePrecision() const override; private: - void init() override; - - EltwiseOpType eltwiseOp = Add; - mkldnn::algorithm eltwiseAlgorithm = mkldnn::algorithm::undef; + mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef; std::shared_ptr eltwise_kernel = nullptr; jit_eltwise_params jep = {}; @@ -174,8 +120,7 @@ class MKLDNNEltwiseNode : public MKLDNNNode { void offset_out_calc(std::vector& offset, std::vector& dims); void offset_in_calc(std::vector& offset, std::vector& dims_in, std::vector& dims_out); - static InferenceEngine::details::caseless_map> initializers; + static std::map&, MKLDNNEltwiseNode& node)>> initializers; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index 1d16892181c0ae..9439a6833c7f63 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -17,20 +17,20 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache), withBiases(false), baseInputsNumber(0) { - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); - }); - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - if (internalBlobs.size() <= 1) - return MKLDNNMemoryDesc(); - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1)); - }); - - if (getCnnLayer()->type == "FullyConnected" || getCnnLayer()->type == "InnerProduct") { - baseInputsNumber = getCnnLayer().get()->insData.size(); - } +MKLDNNFullyConnectedNode::MKLDNNFullyConnectedNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache), withBiases(false), baseInputsNumber(0) { +// internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { +// return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); +// }); +// internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { +// if (internalBlobs.size() <= 1) +// return MKLDNNMemoryDesc(); +// return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(1)); +// }); +// +// if (getCnnLayer()->type == "FullyConnected" || getCnnLayer()->type == "InnerProduct") { +// baseInputsNumber = getCnnLayer().get()->insData.size(); +// } } std::vector MKLDNNFullyConnectedNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { @@ -50,82 +50,84 @@ std::vector MKLDNNFullyConnectedNode::getAvailableFormatsFor } void MKLDNNFullyConnectedNode::getSupportedDescriptors() { - if (!descs.empty()) - return; - - InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - precision = getCnnLayer()->outData[0]->getPrecision(); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - - if (inputDataType == memory::data_type::f32) { - outputDataType = memory::data_type::f32; - } - - if (baseInputsNumber > 1) { - if (!fusedWith.empty()) { - auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); - if (lastFusedLayer) { - outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(lastFusedLayer->outData[0]->getPrecision()); - } - } - auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision()); - - if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8) && - inputDataType != memory::data_type::bf16) { - inputDataType = memory::data_type::f32; - outputDataType = memory::data_type::f32; - } - } - - auto * fcLayer = dynamic_cast(getCnnLayer().get()); - if (fcLayer == nullptr) - IE_THROW() << "Cannot convert fully connected layer."; - if (fcLayer->_weights == nullptr && baseInputsNumber == 1) { - IE_THROW() << "Weights are empty for layer: " << fcLayer->name - << " used in MKLDNN node: " << getName() << "\n" - << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" - << " to load them from .bin part of the IR"; - } - - if (getParentEdges().size() != baseInputsNumber) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); - if (getChildEdges().empty()) - IE_THROW() << "Incorrect number of output edges for layer " << getName(); - - MKLDNNDims inDims = getParentEdgeAt(0)->getDims(); - MKLDNNDims outDims = getChildEdgeAt(0)->getDims(); - - if (!one_of(inDims.ndims(), 2, 3, 4, 5)) { - IE_THROW() << "Unsupported source format for FC layer. Expected 5, 4, 3 or 2, got: " - << inDims.ndims() << " dims."; - } - - if (inDims.ndims() == 3) { - weightsDims = InferenceEngine::SizeVector({static_cast(outDims[2]), static_cast(inDims[2])}); - } else { - weightsDims.push_back(outDims[1]); - for (int i = 1; i < inDims.ndims(); i++) - weightsDims.push_back(inDims[i]); - } - biasesDims.push_back(weightsDims[0]); - - if (baseInputsNumber == 1) { - internalBlobs.push_back(createInternalBlob(weightsDims, true)); - } - - withBiases = (fcLayer->_biases != nullptr && fcLayer->_biases->size() != 0) || baseInputsNumber == 3; - - if (withBiases && baseInputsNumber == 1) { - internalBlobs.push_back(createInternalBlob(biasesDims, false)); - } - - for (auto format : getAvailableFormatsForDims(inDims)) { - MKLDNNMemoryDesc in_candidate(inDims, inputDataType, format); - MKLDNNMemoryDesc out_candidate(outDims, outputDataType, memory::format_tag::any); - - createDescriptor({in_candidate}, {out_candidate}); - } + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// if (!descs.empty()) +// return; +// +// InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// precision = getCnnLayer()->outData[0]->getPrecision(); +// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// +// if (inputDataType == memory::data_type::f32) { +// outputDataType = memory::data_type::f32; +// } +// +// if (baseInputsNumber > 1) { +// if (!fusedWith.empty()) { +// auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); +// if (lastFusedLayer) { +// outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(lastFusedLayer->outData[0]->getPrecision()); +// } +// } +// auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[1].lock()->getPrecision()); +// +// if ((!one_of(inputDataType , memory::data_type::u8, memory::data_type::s8) || weightsDataType != memory::data_type::s8) && +// inputDataType != memory::data_type::bf16) { +// inputDataType = memory::data_type::f32; +// outputDataType = memory::data_type::f32; +// } +// } +// +// auto * fcLayer = dynamic_cast(getCnnLayer().get()); +// if (fcLayer == nullptr) +// IE_THROW() << "Cannot convert fully connected layer."; +// if (fcLayer->_weights == nullptr && baseInputsNumber == 1) { +// IE_THROW() << "Weights are empty for layer: " << fcLayer->name +// << " used in MKLDNN node: " << getName() << "\n" +// << "Use the second argumemt of InferenceEngine::Core::ReadNetwork" +// << " to load them from .bin part of the IR"; +// } +// +// if (getParentEdges().size() != baseInputsNumber) +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// if (getChildEdges().empty()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +// +// MKLDNNDims inDims = getParentEdgeAt(0)->getDims(); +// MKLDNNDims outDims = getChildEdgeAt(0)->getDims(); +// +// if (!one_of(inDims.ndims(), 2, 3, 4, 5)) { +// IE_THROW() << "Unsupported source format for FC layer. Expected 5, 4, 3 or 2, got: " +// << inDims.ndims() << " dims."; +// } +// +// if (inDims.ndims() == 3) { +// weightsDims = InferenceEngine::SizeVector({static_cast(outDims[2]), static_cast(inDims[2])}); +// } else { +// weightsDims.push_back(outDims[1]); +// for (int i = 1; i < inDims.ndims(); i++) +// weightsDims.push_back(inDims[i]); +// } +// biasesDims.push_back(weightsDims[0]); +// +// if (baseInputsNumber == 1) { +// internalBlobs.push_back(createInternalBlob(weightsDims, true)); +// } +// +// withBiases = (fcLayer->_biases != nullptr && fcLayer->_biases->size() != 0) || baseInputsNumber == 3; +// +// if (withBiases && baseInputsNumber == 1) { +// internalBlobs.push_back(createInternalBlob(biasesDims, false)); +// } +// +// for (auto format : getAvailableFormatsForDims(inDims)) { +// MKLDNNMemoryDesc in_candidate(inDims, inputDataType, format); +// MKLDNNMemoryDesc out_candidate(outDims, outputDataType, memory::format_tag::any); +// +// createDescriptor({in_candidate}, {out_candidate}); +// } } void MKLDNNFullyConnectedNode::createPrimitive() { @@ -171,85 +173,87 @@ void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) { } void MKLDNNFullyConnectedNode::setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false) { - int blob_idx = 0; - mkldnn::post_ops ops; - - for (auto &node : fusedWith) { - auto* quantizeNode = dynamic_cast(node.get()); - if (quantizeNode) { - quantizeNode->appendPostOps(ops); - continue; - } - - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode && (eltwiseNode->getOpType() == MulAdd || eltwiseNode->getOpType() == Prelu)) { - if (initWeights) { - auto* depthwiseLayer = reinterpret_cast(eltwiseNode->getCnnLayer().get()); - int ndims = getParentEdgeAt(0)->getDims().ndims(); - MKLDNNDims depthwiseDims({static_cast(rnd_up(ndims == 3 ? getChildEdgeAt(0)->getDims()[2] : getChildEdgeAt(0)->getDims()[1], 16))}); - - PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); - PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format_tag::x); - PostOpsIntBlobMemory[blob_idx]->FillZero(); - - // In case ndims == 3 graph optimizer allows fusing only if all weights values are the same - if (depthwiseLayer->blobs["weights"]->size() == 1 || ndims == 3) { - float broadcastValue = static_cast(depthwiseLayer->_weights->buffer())[0]; - for (int i = 0; i < PostOpsIntBlobMemory[blob_idx]->GetDesc().getDims()[0]; i++) { - static_cast(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue; - } - } else { - PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::format_tag::x, - depthwiseLayer->_weights->buffer(), - depthwiseLayer->_weights->size() * - MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32)); - } - - if (eltwiseNode->getAlgorithm() == algorithm::depthwise_scale_shift) { - PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); - PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format_tag::x); - PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); - - // In case ndims == 3 graph optimizer allows fusing only if all biases values are the same - if (depthwiseLayer->blobs["biases"]->size() == 1 || ndims == 3) { - float broadcastValue = static_cast(depthwiseLayer->_biases->buffer())[0]; - for (int i = 0; i < PostOpsIntBlobMemory[blob_idx + 1]->GetDesc().getDims()[0]; i++) { - static_cast(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue; - } - } else { - PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::format_tag::x, - depthwiseLayer->_biases->buffer(), - depthwiseLayer->_biases->size() * - MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32)); - } - - ops.append_depthwise(eltwiseNode->getAlgorithm(), - (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(), - (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData()); - - blob_idx += 2; - } else { - ops.append_depthwise(eltwiseNode->getAlgorithm(), - (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(), - nullptr); - - blob_idx += 1; - } - } else { - ops.append_depthwise(eltwiseNode->getAlgorithm(), - nullptr, - nullptr); - } - - continue; - } - - if (eltwiseNode) { - eltwiseNode->appendPostOps(ops); - } - } - - attr.set_post_ops(ops); + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// int blob_idx = 0; +// mkldnn::post_ops ops; +// +// for (auto &node : fusedWith) { +// auto* quantizeNode = dynamic_cast(node.get()); +// if (quantizeNode) { +// quantizeNode->appendPostOps(ops); +// continue; +// } +// +// auto* eltwiseNode = dynamic_cast(node.get()); +// if (eltwiseNode && (eltwiseNode->getOpType() == MulAdd || eltwiseNode->getOpType() == Prelu)) { +// if (initWeights) { +// auto* depthwiseLayer = reinterpret_cast(eltwiseNode->getCnnLayer().get()); +// int ndims = getParentEdgeAt(0)->getDims().ndims(); +// MKLDNNDims depthwiseDims({static_cast(rnd_up(ndims == 3 ? getChildEdgeAt(0)->getDims()[2] : getChildEdgeAt(0)->getDims()[1], 16))}); +// +// PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); +// PostOpsIntBlobMemory[blob_idx]->Create(depthwiseDims, memory::data_type::f32, memory::format_tag::x); +// PostOpsIntBlobMemory[blob_idx]->FillZero(); +// +// // In case ndims == 3 graph optimizer allows fusing only if all weights values are the same +// if (depthwiseLayer->blobs["weights"]->size() == 1 || ndims == 3) { +// float broadcastValue = static_cast(depthwiseLayer->_weights->buffer())[0]; +// for (int i = 0; i < PostOpsIntBlobMemory[blob_idx]->GetDesc().getDims()[0]; i++) { +// static_cast(PostOpsIntBlobMemory[blob_idx]->GetData())[i] = broadcastValue; +// } +// } else { +// PostOpsIntBlobMemory[blob_idx]->SetData(memory::data_type::f32, memory::format_tag::x, +// depthwiseLayer->_weights->buffer(), +// depthwiseLayer->_weights->size() * +// MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32)); +// } +// +// if (eltwiseNode->getAlgorithm() == algorithm::depthwise_scale_shift) { +// PostOpsIntBlobMemory.push_back(MKLDNNMemoryPtr(new MKLDNNMemory(getEngine()))); +// PostOpsIntBlobMemory[blob_idx + 1]->Create(depthwiseDims, memory::data_type::f32, memory::format_tag::x); +// PostOpsIntBlobMemory[blob_idx + 1]->FillZero(); +// +// // In case ndims == 3 graph optimizer allows fusing only if all biases values are the same +// if (depthwiseLayer->blobs["biases"]->size() == 1 || ndims == 3) { +// float broadcastValue = static_cast(depthwiseLayer->_biases->buffer())[0]; +// for (int i = 0; i < PostOpsIntBlobMemory[blob_idx + 1]->GetDesc().getDims()[0]; i++) { +// static_cast(PostOpsIntBlobMemory[blob_idx + 1]->GetData())[i] = broadcastValue; +// } +// } else { +// PostOpsIntBlobMemory[blob_idx + 1]->SetData(memory::data_type::f32, memory::format_tag::x, +// depthwiseLayer->_biases->buffer(), +// depthwiseLayer->_biases->size() * +// MKLDNNExtensionUtils::sizeOfDataType(memory::data_type::f32)); +// } +// +// ops.append_depthwise(eltwiseNode->getAlgorithm(), +// (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(), +// (const float *) PostOpsIntBlobMemory[blob_idx + 1]->GetData()); +// +// blob_idx += 2; +// } else { +// ops.append_depthwise(eltwiseNode->getAlgorithm(), +// (const float *) PostOpsIntBlobMemory[blob_idx]->GetData(), +// nullptr); +// +// blob_idx += 1; +// } +// } else { +// ops.append_depthwise(eltwiseNode->getAlgorithm(), +// nullptr, +// nullptr); +// } +// +// continue; +// } +// +// if (eltwiseNode) { +// eltwiseNode->appendPostOps(ops); +// } +// } +// +// attr.set_post_ops(ops); } bool MKLDNNFullyConnectedNode::created() const { @@ -301,42 +305,44 @@ std::shared_ptr MKLDNNFullyConnectedNode::initPrimitiveA void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { - TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0]; - - mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); - mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); - if (inDesc.getPrecision() == Precision::BF16) { - bdt = mkldnn::memory::data_type::f32; - } else if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) { - wdt = memory::data_type::s8; - bdt = baseInputsNumber == 3 ? MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[2].lock()->getPrecision()) : memory::data_type::f32; - } - - if (inDesc.getDims().size() == 3) { - auto inDims = inDesc.getDims(); - auto outDims = outDesc.getDims(); - InferenceEngine::SizeVector normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; - InferenceEngine::SizeVector normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; - inDesc = InferenceEngine::TensorDesc(inDesc.getPrecision(), normalizedInDims, TensorDesc::getLayoutByDims(normalizedInDims)); - outDesc = InferenceEngine::TensorDesc(outDesc.getPrecision(), normalizedOutDims, TensorDesc::getLayoutByDims(normalizedOutDims)); - } - - MKLDNNMemoryDesc in_candidate(inDesc); - MKLDNNMemoryDesc out_candidate(outDesc); - MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); - - if (withBiases) { - MKLDNNMemoryDesc bias_candidate(MKLDNNDims(biasesDims), bdt, memory::format_tag::any); - MKLDNNDescriptor desc(std::shared_ptr( - new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, - bias_candidate, out_candidate))); - descs.push_back(desc); - } else { - MKLDNNDescriptor desc(std::shared_ptr( - new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, - out_candidate))); - descs.push_back(desc); - } + IE_THROW() << "Not implemented"; + // TODO [NM]: reimplement w/o using CNNLayer +// TensorDesc inDesc = inputDesc[0], outDesc = outputDesc[0]; +// +// mkldnn::memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); +// mkldnn::memory::data_type bdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); +// if (inDesc.getPrecision() == Precision::BF16) { +// bdt = mkldnn::memory::data_type::f32; +// } else if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) { +// wdt = memory::data_type::s8; +// bdt = baseInputsNumber == 3 ? MKLDNNExtensionUtils::IEPrecisionToDataType(getCnnLayer()->insData[2].lock()->getPrecision()) : memory::data_type::f32; +// } +// +// if (inDesc.getDims().size() == 3) { +// auto inDims = inDesc.getDims(); +// auto outDims = outDesc.getDims(); +// InferenceEngine::SizeVector normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; +// InferenceEngine::SizeVector normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; +// inDesc = InferenceEngine::TensorDesc(inDesc.getPrecision(), normalizedInDims, TensorDesc::getLayoutByDims(normalizedInDims)); +// outDesc = InferenceEngine::TensorDesc(outDesc.getPrecision(), normalizedOutDims, TensorDesc::getLayoutByDims(normalizedOutDims)); +// } +// +// MKLDNNMemoryDesc in_candidate(inDesc); +// MKLDNNMemoryDesc out_candidate(outDesc); +// MKLDNNMemoryDesc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); +// +// if (withBiases) { +// MKLDNNMemoryDesc bias_candidate(MKLDNNDims(biasesDims), bdt, memory::format_tag::any); +// MKLDNNDescriptor desc(std::shared_ptr( +// new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, +// bias_candidate, out_candidate))); +// descs.push_back(desc); +// } else { +// MKLDNNDescriptor desc(std::shared_ptr( +// new inner_product_forward::desc(prop_kind::forward_scoring, in_candidate, wgh_candidate, +// out_candidate))); +// descs.push_back(desc); +// } } MKLDNNMemoryDesc MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h index 7afcd016057064..fcad95c4430caa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNFullyConnectedNode : public MKLDNNNode { public: - MKLDNNFullyConnectedNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNFullyConnectedNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNFullyConnectedNode() override = default; std::vector getAvailableFormatsForDims(const MKLDNNDims &dims) const override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp index 9bf971f3190301..fcb84e0c040271 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.cpp @@ -3,7 +3,6 @@ // #include "mkldnn_gemm_node.h" -#include #include #include #include @@ -18,15 +17,21 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNGemmNode::MKLDNNGemmNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +MKLDNNGemmNode::MKLDNNGemmNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) { + auto matMulOp = ngraph::as_type_ptr(op); + if (matMulOp) { + alpha = 1; + beta = 1; + transposeA = matMulOp->get_transpose_a(); + transposeB = matMulOp->get_transpose_b(); + } else { + IE_THROW(NotImplemented) + << "CPU Gemm node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); + } +} void MKLDNNGemmNode::getSupportedDescriptors() { - auto* gemmLayer = dynamic_cast(getCnnLayer().get()); - - if (gemmLayer == nullptr) - IE_THROW() << "Cannot convert gemm layer."; - if (getParentEdges().size() != 2 && getParentEdges().size() != 3) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) @@ -36,11 +41,6 @@ void MKLDNNGemmNode::getSupportedDescriptors() { auto inDims1 = getParentEdgeAt(1)->getDims(); auto outDims = getChildEdgeAt(0)->getDims(); - alpha = gemmLayer->alpha; - beta = gemmLayer->beta; - transposeA = gemmLayer->transpose_a; - transposeB = gemmLayer->transpose_b; - if ((inDims0.ndims() < 2 || inDims0.ndims() > 4) || (inDims1.ndims() < 2 || inDims1.ndims() > 4)) IE_THROW() << "Unsupported input dims count for layer " << getName(); @@ -120,8 +120,8 @@ void MKLDNNGemmNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto inPrec0 = getCnnLayer()->insData[0].lock()->getPrecision(); - auto inPrec1 = getCnnLayer()->insData[1].lock()->getPrecision(); + auto inPrec0 = getOriginalInputPrecisions()[0]; + auto inPrec1 = getOriginalInputPrecisions()[1]; if ((inPrec0 != Precision::U8 && inPrec0 != Precision::I8) || inPrec1 != Precision::I8 || isThreeInputs) { if (inPrec0 == Precision::BF16 || inPrec1 == Precision::BF16) { inPrec0 = Precision::BF16; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h index 24c31bcddb5ec4..f5f901bc27ea22 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gemm_node.h @@ -13,7 +13,7 @@ namespace MKLDNNPlugin { class MKLDNNGemmNode : public MKLDNNNode { public: - MKLDNNGemmNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNGemmNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNGemmNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp index df7d9f4c37e7b2..40a4435c5927dc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp @@ -12,16 +12,15 @@ using namespace mkldnn; using namespace MKLDNNPlugin; -MKLDNNGenericNode::MKLDNNGenericNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) { - params = layer->params; - blobs = layer->blobs; +MKLDNNGenericNode::MKLDNNGenericNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache), ngraphOp(op) { +// params = layer->params; +// blobs = layer->blobs; } void MKLDNNGenericNode::getSupportedDescriptors() { if (!extFactory && impls.empty()) { - std::string type = getCnnLayer() ? getCnnLayer()->type : "Generic"; - IE_THROW() << "Cannot get generic primitive for layer: " << getName() << " with type: " << type; + IE_THROW() << "Cannot get generic primitive for layer: " << getName() << " with type: " << getTypeStr(); } } @@ -30,23 +29,6 @@ void MKLDNNGenericNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::ResponseDesc resp; - if (impls.empty()) { - if (!extFactory) - IE_THROW() << "Descriptor for generic primitive doesn't exist"; - - std::vector impls_no_exec; - - InferenceEngine::StatusCode rc = extFactory->getImplementations(impls_no_exec, &resp); - for (const auto& impl : impls_no_exec) { - if (auto exec_impl = std::dynamic_pointer_cast(impl)) { - impls.emplace_back(exec_impl); - } - } - if (rc != InferenceEngine::OK) { - IE_THROW() << resp.msg; - } - } - for (auto &impl : impls) { std::vector configs; auto rc = impl->getSupportedConfigurations(configs, &resp); @@ -84,16 +66,33 @@ bool MKLDNNGenericNode::created() const { } bool MKLDNNGenericNode::created(const MKLDNNExtensionManager::Ptr &extMgr) { - if (getCnnLayer() && extMgr) { + if (ngraphOp && extMgr) { // We should save extension manager in order to avoid situation when // it will destroyed before extensibility primitives - if (getCnnLayer()->getNode()) { - auto impl = extMgr->CreateImplementation(getCnnLayer()->getNode()); - if (auto execImpl = std::dynamic_pointer_cast(impl)) - impls.emplace_back(execImpl); - } + auto impl = extMgr->CreateImplementation(ngraphOp); + if (auto execImpl = std::dynamic_pointer_cast(impl)) + impls.emplace_back(execImpl); + if (impls.empty()) { - extFactory = extMgr->CreateExtensionFactory(getCnnLayer()); + extFactory = extMgr->CreateExtensionFactory(ngraphOp); + + if (!extFactory) + IE_THROW(NotImplemented) << "Descriptor for generic primitive doesn't exist"; + + std::vector impls_no_exec; + InferenceEngine::ResponseDesc resp; + InferenceEngine::StatusCode rc = extFactory->getImplementations(impls_no_exec, &resp); + if (rc == InferenceEngine::NOT_IMPLEMENTED) { + IE_THROW(NotImplemented) << resp.msg; + } else if (rc != InferenceEngine::OK) { + IE_THROW() << resp.msg; + } + + for (const auto& impl : impls_no_exec) { + if (auto exec_impl = std::dynamic_pointer_cast(impl)) { + impls.emplace_back(exec_impl); + } + } } if (extFactory || !impls.empty()) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h index 98160351cca6e7..6570d466965360 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.h @@ -16,7 +16,7 @@ namespace MKLDNNPlugin { class MKLDNNGenericNode : public MKLDNNNode { public: - MKLDNNGenericNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNGenericNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNGenericNode() = default; void getSupportedDescriptors() override; @@ -34,12 +34,14 @@ class MKLDNNGenericNode : public MKLDNNNode { void execLayer(); void cleanup() override; - protected: InferenceEngine::ILayerImplFactory::Ptr extFactory; std::vector impls; - std::map params; - std::map blobs; + + const std::shared_ptr ngraphOp; + +// std::map params; +// std::map blobs; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp index ea478185720575..b5c74959aae7f7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp @@ -3,28 +3,50 @@ // #include "mkldnn_input_node.h" -#include "../mkldnn_extension_utils.h" +#include "mkldnn_extension_utils.h" #include #include #include +#include +#include +#include +#include #include "caseless.hpp" #include "common/cpu_memcpy.h" #include "common/cpu_convert.h" using namespace mkldnn; using namespace MKLDNNPlugin; -using namespace InferenceEngine::details; +using namespace InferenceEngine; +using namespace details; +using namespace ngraph::op; + +MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) { + if (!one_of(op->get_type_info(), v0::Parameter::type_info, v0::Constant::type_info, v0::Result::type_info)) + IE_THROW() << "CPU Input node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); -MKLDNNInputNode::MKLDNNInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) { constant = ConstantType::NoConst; - if (layer && CaselessEq()(layer->type, "const")) { + constBlob = nullptr; + + auto constOp = ngraph::as_type_ptr(op); + if (constOp) { constant = ConstantType::Const; - if (layer->blobs.size() != 1 || getType() != Input || !layer->blobs.begin()->second) - IE_THROW() << "Incorrect const input " << getName(); - constBlob = layer->blobs.begin()->second; - } else { - constBlob = nullptr; + + auto dataPrecision = convertPrecision(op->get_element_type()); + + size_t shapeSize = ngraph::shape_size(op->get_shape()); + constexpr size_t byte_size{8}; + if (dataPrecision == Precision::BIN) { + shapeSize = (shapeSize + (byte_size - 1)) / byte_size; + } + + TensorDesc td(dataPrecision, {shapeSize}, Layout::C); + + auto blob = make_blob_with_precision(td, const_cast(constOp->get_data_ptr())); + blob->allocate(); + + constBlob = blob; } } @@ -46,28 +68,30 @@ void MKLDNNInputNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::LayerConfig config; + LayerConfig config; config.dynBatchSupport = true; if (getType() == Input || getType() == MemoryInput) { - precision = getCnnLayer()->outData[0]->getPrecision(); - if (precision == InferenceEngine::Precision::U16 || isMeanImage) { - precision = InferenceEngine::Precision::FP32; + precision = getOriginalOutputPrecisions()[0]; + if (precision == Precision::U16 || isMeanImage) { + precision = Precision::FP32; } - InferenceEngine::DataConfig dataConfig; + DataConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - auto mem_tdesc = MKLDNNMemoryDesc(getCnnLayer()->outData[0]->getTensorDesc()); + auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + auto mem_tdesc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); dataConfig.desc = mem_tdesc; config.outConfs.push_back(dataConfig); } else if (getType() == Output) { - precision = getCnnLayer()->insData[0].lock()->getPrecision(); - if (precision == InferenceEngine::Precision::U16) precision = InferenceEngine::Precision::FP32; - InferenceEngine::DataConfig dataConfig; + precision = getOriginalInputPrecisions()[0]; + if (precision == Precision::U16) precision = Precision::FP32; + DataConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - auto mem_tdesc = MKLDNNMemoryDesc(getCnnLayer()->insData[0].lock()->getTensorDesc()); + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + auto mem_tdesc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); dataConfig.desc = mem_tdesc; config.inConfs.push_back(dataConfig); } @@ -98,13 +122,13 @@ bool MKLDNNInputNode::created() const { } namespace { - bool isDefaultOrder(const InferenceEngine::SizeVector &order) { + bool isDefaultOrder(const SizeVector &order) { return std::is_sorted(order.begin(), order.end(), [](size_t a, size_t b) { return a + 1 == b; }); } - std::tuple isDefaultStrides(const InferenceEngine::SizeVector &strides, - const InferenceEngine::SizeVector &dims) { + std::tuple isDefaultStrides(const SizeVector &strides, + const SizeVector &dims) { if (strides.size() != dims.size()) return std::make_tuple(false, 0); @@ -119,7 +143,7 @@ namespace { return std::make_tuple(true, dim); } - bool isCompatibleTensors(const InferenceEngine::TensorDesc &lhs, const InferenceEngine::TensorDesc &rhs, + bool isCompatibleTensors(const TensorDesc &lhs, const TensorDesc &rhs, bool isNeedPrecValid = true) { auto const &lhsBlockingDesc = lhs.getBlockingDesc(); auto const &rhsBlockingDesc = rhs.getBlockingDesc(); @@ -150,8 +174,8 @@ void MKLDNNInputNode::execute(mkldnn::stream strm) { int8_t *dstData = dstBlob->buffer(); cpu_memcpy_s(dstData, dstBlob->byteSize(), srcData, constBlob->byteSize()); - } else if (constBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::BIN || - dstBlob->getTensorDesc().getPrecision() == InferenceEngine::Precision::BIN) { + } else if (constBlob->getTensorDesc().getPrecision() == Precision::BIN || + dstBlob->getTensorDesc().getPrecision() == Precision::BIN) { size_t dstSize = dstBlob->size() / 8; if (constBlob->size() != dstSize) { IE_THROW() << "Incorrect blob sizes for node " << getName(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h index 7f7024371c29f4..b1dc432f31b662 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h @@ -12,7 +12,7 @@ namespace MKLDNNPlugin { class MKLDNNInputNode : public MKLDNNNode { public: - MKLDNNInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNInputNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index 7f12bb493d1d0a..625edf1f5d151f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -5,14 +5,12 @@ #include "mkldnn_interpolate_node.h" #include "mkldnn_quantize_node.h" -#include #include "mkldnn_eltwise_node.h" #include #include #include #include #include -#include #include "ie_parallel.hpp" #include @@ -1593,25 +1591,26 @@ struct jit_uni_interpolate_kernel_f32 : public jit_uni_interpolate_kernel, publi } }; -MKLDNNInterpolateNode::MKLDNNInterpolateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) { - std::string modeString = layer->GetParamAsString("mode"); - if (modeString == "nearest") { - mode = InterpolateMode::nearest; - } else if (modeString == "linear") { - size_t rank = layer->insData[0].lock()->getDims().size(); - if (rank < 5) { - mode = InterpolateMode::linear_onnx; - } else { - mode = InterpolateMode::linear; - } - } else if (modeString == "linear_onnx") { - mode = InterpolateMode::linear_onnx; - } else if (modeString == "cubic") { - mode = InterpolateMode::cubic; - } else { - IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support interpolate mode:" << modeString; - } +MKLDNNInterpolateNode::MKLDNNInterpolateNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) { + IE_THROW() << "[NM] Not implemented"; +// std::string modeString = layer->GetParamAsString("mode"); +// if (modeString == "nearest") { +// mode = InterpolateMode::nearest; +// } else if (modeString == "linear") { +// size_t rank = layer->insData[0].lock()->getDims().size(); +// if (rank < 5) { +// mode = InterpolateMode::linear_onnx; +// } else { +// mode = InterpolateMode::linear; +// } +// } else if (modeString == "linear_onnx") { +// mode = InterpolateMode::linear_onnx; +// } else if (modeString == "cubic") { +// mode = InterpolateMode::cubic; +// } else { +// IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support interpolate mode:" << modeString; +// } } // shapeND: n c d h w @@ -1650,270 +1649,272 @@ SizeVector to5Dim(SizeVector casesDim) { } void MKLDNNInterpolateNode::getSupportedDescriptors() { - if (!descs.empty()) - return; - - if (getParentEdges().size() != 3 && getParentEdges().size() != 4) - // data, target_shape, scale, axis(optional). - IE_THROW() << "Interpolate layer with name '" << getName() << "' has incorrect number of input edges"; - isAxesSpecified = (getParentEdges().size() == 3) ? false : true; - if (getChildEdges().empty()) - IE_THROW() << "Interpolate layer with name '" << getName() << "' has incorrect number of output edges"; - - srcDim = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector(); - int dataRank = srcDim.size(); - switch (dataRank) { - case 1: - case 3: - spatialDimSize = 1; - break; - case 2: - case 4: - spatialDimSize = 2; - break; - case 5: - if (mode != InterpolateMode::cubic) { - spatialDimSize = 3; - } else { - IE_THROW() << "Interpolate layer with name '" << getName() << - "' of 'cubic' mode only support input tensor of 2 or 4 rank"; - } - break; - default: - IE_THROW() << "Interpolate layer with name '" << getName() << - "' does not support input tensor of rank :" << dataRank; - break; - } - - auto *layer = getCnnLayer().get(); - std::string modeString = layer->GetParamAsString("coordinate_transformation_mode", "half_pixel"); - if (modeString == "half_pixel") { - coordTransMode = InterpolateCoordTransMode::half_pixel; - } else if (modeString == "pytorch_half_pixel") { - coordTransMode = InterpolateCoordTransMode::pytorch_half_pixel; - } else if (modeString == "asymmetric") { - coordTransMode = InterpolateCoordTransMode::asymmetric; - } else if (modeString == "tf_half_pixel_for_nn") { - coordTransMode = InterpolateCoordTransMode::tf_half_pixel_for_nn; - } else if (modeString == "align_corners") { - coordTransMode = InterpolateCoordTransMode::align_corners; - } else { - IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support coordinate transformation mode: " << modeString; - } - - if (mode == InterpolateMode::nearest) { - modeString = layer->GetParamAsString("nearest_mode", "round_prefer_floor"); - if (modeString == "round_prefer_floor") { - nearestMode = InterpolateNearestMode::round_prefer_floor; - } else if (modeString == "round_prefer_ceil") { - nearestMode = InterpolateNearestMode::round_prefer_ceil; - } else if (modeString == "floor") { - nearestMode = InterpolateNearestMode::floor; - } else if (modeString == "ceil") { - nearestMode = InterpolateNearestMode::ceil; - } else if (modeString == "simple") { - nearestMode = InterpolateNearestMode::simple; - } else { - IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support nearest round mode: " << modeString; - } - } else if (mode == InterpolateMode::cubic) { - cubeCoeff = layer->GetParamAsFloat("cube_coeff", -0.75); - } - antialias = layer->GetParamAsBool("antialias", false); - shapeInferMode = layer->GetParamAsString("shape_calculation_mode"); - - // get pad - std::vector defPad(dataRank, 0); - padBegin = layer->GetParamAsInts("pads_begin", defPad); - padEnd = layer->GetParamAsInts("pads_end", defPad); - for (int i = 0; i < padBegin.size(); i++) { - if (padBegin[i] != 0) { - hasPad = true; - break; - } - } - for (int i = 0; i < padEnd.size(); i++) { - if (padEnd[i] != 0) { - hasPad = true; - break; - } - } - //correct pad - if (hasPad) { - auto correctPad = [&](std::vector pad, int rank) { - int padLen = pad.size(); - if (padLen == rank) { - return pad; - } - std::vector result; - if (padLen > rank) { - result.insert(result.end(), pad.begin(), pad.begin() + rank); - } else { - result = pad; - result.insert(result.end(), rank - padLen, 0); - } - return result; - }; - - padBegin = correctPad(padBegin, dataRank); - padEnd = correctPad(padEnd, dataRank); - srcDimPad = getPaddedInputShape(); - } else { - srcDimPad = srcDim; - } - dstDim = getChildEdgeAt(0)->getDims().ToSizeVector(); - - // extract const buffer - auto scalesLayer = getParentEdgesAtPort(SCALES_ID)[0]->getParent()->getCnnLayer(); - if (scalesLayer->type == "Const") { - auto scalesBlob = dynamic_cast*>(scalesLayer->blobs["custom"].get()); - auto scalesData = scalesBlob->buffer().as(); - int scalesLen = getParentEdgeAt(SCALES_ID)->getDims()[0]; - scales.resize(scalesLen); - for (int i = 0; i < scalesLen; i++) { - scales[i] = scalesData[i]; - } - } else { - IE_THROW() << "Interpolate layer with name '" << getName() << "' only supports const 'scales' input."; - } - - if (isAxesSpecified) { - auto axesLayer = getParentEdgesAtPort(AXES_ID)[0]->getParent()->getCnnLayer(); - if (axesLayer->type == "Const") { - auto axesBlob = dynamic_cast*>(axesLayer->blobs["custom"].get()); - auto axesData = axesBlob->buffer().as(); - int axesLen = getParentEdgeAt(AXES_ID)->getDims()[0]; - axes.resize(axesLen); - for (int i = 0; i < axesLen; i++) { - axes[i] = axesData[i]; - } - } else { - IE_THROW() << "Interpolate layer with name '" << getName() << "' only supports const 'axes' input."; - } - } else { - int dataRank = srcDim.size(); - axes.resize(dataRank); - for (int i = 0; i < dataRank; i++) { - axes[i] = i; - } - } - - if (scales.size() != axes.size()) { - IE_THROW() << "Interpolate layer with name '" << getName() << - "' does not have the same number elements in scales as in axis."; - } + IE_THROW() << "[NM] Not implemented"; +// if (!descs.empty()) +// return; +// +// if (getParentEdges().size() != 3 && getParentEdges().size() != 4) +// // data, target_shape, scale, axis(optional). +// IE_THROW() << "Interpolate layer with name '" << getName() << "' has incorrect number of input edges"; +// isAxesSpecified = (getParentEdges().size() == 3) ? false : true; +// if (getChildEdges().empty()) +// IE_THROW() << "Interpolate layer with name '" << getName() << "' has incorrect number of output edges"; +// +// srcDim = getParentEdgeAt(DATA_ID)->getDims().ToSizeVector(); +// int dataRank = srcDim.size(); +// switch (dataRank) { +// case 1: +// case 3: +// spatialDimSize = 1; +// break; +// case 2: +// case 4: +// spatialDimSize = 2; +// break; +// case 5: +// if (mode != InterpolateMode::cubic) { +// spatialDimSize = 3; +// } else { +// IE_THROW() << "Interpolate layer with name '" << getName() << +// "' of 'cubic' mode only support input tensor of 2 or 4 rank"; +// } +// break; +// default: +// IE_THROW() << "Interpolate layer with name '" << getName() << +// "' does not support input tensor of rank :" << dataRank; +// break; +// } +// +// auto *layer = getCnnLayer().get(); +// std::string modeString = layer->GetParamAsString("coordinate_transformation_mode", "half_pixel"); +// if (modeString == "half_pixel") { +// coordTransMode = InterpolateCoordTransMode::half_pixel; +// } else if (modeString == "pytorch_half_pixel") { +// coordTransMode = InterpolateCoordTransMode::pytorch_half_pixel; +// } else if (modeString == "asymmetric") { +// coordTransMode = InterpolateCoordTransMode::asymmetric; +// } else if (modeString == "tf_half_pixel_for_nn") { +// coordTransMode = InterpolateCoordTransMode::tf_half_pixel_for_nn; +// } else if (modeString == "align_corners") { +// coordTransMode = InterpolateCoordTransMode::align_corners; +// } else { +// IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support coordinate transformation mode: " << modeString; +// } +// +// if (mode == InterpolateMode::nearest) { +// modeString = layer->GetParamAsString("nearest_mode", "round_prefer_floor"); +// if (modeString == "round_prefer_floor") { +// nearestMode = InterpolateNearestMode::round_prefer_floor; +// } else if (modeString == "round_prefer_ceil") { +// nearestMode = InterpolateNearestMode::round_prefer_ceil; +// } else if (modeString == "floor") { +// nearestMode = InterpolateNearestMode::floor; +// } else if (modeString == "ceil") { +// nearestMode = InterpolateNearestMode::ceil; +// } else if (modeString == "simple") { +// nearestMode = InterpolateNearestMode::simple; +// } else { +// IE_THROW() << "Interpolate layer with name '" << getName() << "' does not support nearest round mode: " << modeString; +// } +// } else if (mode == InterpolateMode::cubic) { +// cubeCoeff = layer->GetParamAsFloat("cube_coeff", -0.75); +// } +// antialias = layer->GetParamAsBool("antialias", false); +// shapeInferMode = layer->GetParamAsString("shape_calculation_mode"); +// +// // get pad +// std::vector defPad(dataRank, 0); +// padBegin = layer->GetParamAsInts("pads_begin", defPad); +// padEnd = layer->GetParamAsInts("pads_end", defPad); +// for (int i = 0; i < padBegin.size(); i++) { +// if (padBegin[i] != 0) { +// hasPad = true; +// break; +// } +// } +// for (int i = 0; i < padEnd.size(); i++) { +// if (padEnd[i] != 0) { +// hasPad = true; +// break; +// } +// } +// //correct pad +// if (hasPad) { +// auto correctPad = [&](std::vector pad, int rank) { +// int padLen = pad.size(); +// if (padLen == rank) { +// return pad; +// } +// std::vector result; +// if (padLen > rank) { +// result.insert(result.end(), pad.begin(), pad.begin() + rank); +// } else { +// result = pad; +// result.insert(result.end(), rank - padLen, 0); +// } +// return result; +// }; +// +// padBegin = correctPad(padBegin, dataRank); +// padEnd = correctPad(padEnd, dataRank); +// srcDimPad = getPaddedInputShape(); +// } else { +// srcDimPad = srcDim; +// } +// dstDim = getChildEdgeAt(0)->getDims().ToSizeVector(); +// +// // extract const buffer +// auto scalesLayer = getParentEdgesAtPort(SCALES_ID)[0]->getParent()->getCnnLayer(); +// if (scalesLayer->type == "Const") { +// auto scalesBlob = dynamic_cast*>(scalesLayer->blobs["custom"].get()); +// auto scalesData = scalesBlob->buffer().as(); +// int scalesLen = getParentEdgeAt(SCALES_ID)->getDims()[0]; +// scales.resize(scalesLen); +// for (int i = 0; i < scalesLen; i++) { +// scales[i] = scalesData[i]; +// } +// } else { +// IE_THROW() << "Interpolate layer with name '" << getName() << "' only supports const 'scales' input."; +// } +// +// if (isAxesSpecified) { +// auto axesLayer = getParentEdgesAtPort(AXES_ID)[0]->getParent()->getCnnLayer(); +// if (axesLayer->type == "Const") { +// auto axesBlob = dynamic_cast*>(axesLayer->blobs["custom"].get()); +// auto axesData = axesBlob->buffer().as(); +// int axesLen = getParentEdgeAt(AXES_ID)->getDims()[0]; +// axes.resize(axesLen); +// for (int i = 0; i < axesLen; i++) { +// axes[i] = axesData[i]; +// } +// } else { +// IE_THROW() << "Interpolate layer with name '" << getName() << "' only supports const 'axes' input."; +// } +// } else { +// int dataRank = srcDim.size(); +// axes.resize(dataRank); +// for (int i = 0; i < dataRank; i++) { +// axes[i] = i; +// } +// } +// +// if (scales.size() != axes.size()) { +// IE_THROW() << "Interpolate layer with name '" << getName() << +// "' does not have the same number elements in scales as in axis."; +// } } void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) - return; - - setPostOps(attr, true); - - Precision inputPrecision = getCnnLayer()->insData[DATA_ID].lock()->getPrecision(); - if ((inputPrecision != Precision::I8) && (inputPrecision != Precision::U8) && (inputPrecision != Precision::BF16)) { - inputPrecision = Precision::FP32; - } - if ((inputPrecision == Precision::BF16) && !mayiuse(avx512_core)) { - inputPrecision = Precision::FP32; - } - Precision outputPrecision = inputPrecision; - - if (!fusedWith.empty()) { - auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); - if (lastFusedLayer) { - outputPrecision = lastFusedLayer->outData[0]->getPrecision(); - } - } - - if (!mayiuse(cpu::x64::sse41)) { - inputPrecision = outputPrecision = Precision::FP32; - } - - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); - srcDataSize = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); - dstDataSize = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); - - inputPrec = inputPrecision; - outputPrec = outputPrecision; - - InferenceEngine::LayerConfig config; - config.dynBatchSupport = false; - if (isAxesSpecified) { - config.inConfs.resize(4); - } else { - config.inConfs.resize(3); - } - config.outConfs.resize(1); - config.inConfs[DATA_ID].constant = false; - config.inConfs[TARGET_SHAPE_ID].constant = false; - config.inConfs[SCALES_ID].constant = false; - config.outConfs[0].constant = false; - config.inConfs[DATA_ID].inPlace = -1; - config.inConfs[TARGET_SHAPE_ID].inPlace = -1; - config.inConfs[SCALES_ID].inPlace = -1; - config.outConfs[0].inPlace = -1; - if (isAxesSpecified) { - config.inConfs[AXES_ID].constant = false; - config.inConfs[AXES_ID].inPlace = -1; - } - - auto targetShapeType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); - auto scalesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::FP32); - auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); - - auto pushDesc = [&](memory::format_tag dataFormat, impl_desc_type implDetail) { - config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), inputDataType, dataFormat); - config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(TARGET_SHAPE_ID)->getDims(), targetShapeType, memory::format_tag::x); - config.inConfs[SCALES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(SCALES_ID)->getDims(), scalesType, memory::format_tag::x); - if (isAxesSpecified) - config.inConfs[AXES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES_ID)->getDims(), axesType, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, dataFormat); - supportedPrimitiveDescriptors.push_back({config, implDetail, dataFormat}); - }; - - auto channels = getParentEdgeAt(DATA_ID)->getDims().ndims() > 1 ? getParentEdgeAt(DATA_ID)->getDims()[1] : 1; - - if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref); - } else { - // blk and by_channel JIT kernel on sse41 or above machine - if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) { - if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::nhwc, jit_avx512); - if (channels != 1) - pushDesc(memory::format_tag::nChw16c, jit_avx512); - } else if (mayiuse(cpu::x64::avx2)) { - pushDesc(memory::format_tag::nhwc, jit_avx2); - if (channels != 1) - pushDesc(memory::format_tag::nChw8c, jit_avx2); - } else { - pushDesc(memory::format_tag::nhwc, jit_sse42); - if (channels != 1) - pushDesc(memory::format_tag::nChw8c, jit_sse42); - } - } else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode != InterpolateMode::cubic) { - if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::ndhwc, jit_avx512); - if (channels != 1) - pushDesc(memory::format_tag::nCdhw16c, jit_avx512); - } else if (mayiuse(cpu::x64::avx2)) { - pushDesc(memory::format_tag::ndhwc, jit_avx2); - if (channels != 1) - pushDesc(memory::format_tag::nCdhw8c, jit_avx2); - } else { - pushDesc(memory::format_tag::ndhwc, jit_sse42); - if (channels != 1) - pushDesc(memory::format_tag::nCdhw8c, jit_sse42); - } - } - - // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) - if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { - pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), jit_avx2); - } - } + IE_THROW() << "[NM] Not implemented"; +// if (!supportedPrimitiveDescriptors.empty()) +// return; +// +// setPostOps(attr, true); +// +// Precision inputPrecision = getCnnLayer()->insData[DATA_ID].lock()->getPrecision(); +// if ((inputPrecision != Precision::I8) && (inputPrecision != Precision::U8) && (inputPrecision != Precision::BF16)) { +// inputPrecision = Precision::FP32; +// } +// if ((inputPrecision == Precision::BF16) && !mayiuse(avx512_core)) { +// inputPrecision = Precision::FP32; +// } +// Precision outputPrecision = inputPrecision; +// +// if (!fusedWith.empty()) { +// auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); +// if (lastFusedLayer) { +// outputPrecision = lastFusedLayer->outData[0]->getPrecision(); +// } +// } +// +// if (!mayiuse(cpu::x64::sse41)) { +// inputPrecision = outputPrecision = Precision::FP32; +// } +// +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); +// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); +// srcDataSize = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); +// dstDataSize = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); +// +// inputPrec = inputPrecision; +// outputPrec = outputPrecision; +// +// InferenceEngine::LayerConfig config; +// config.dynBatchSupport = false; +// if (isAxesSpecified) { +// config.inConfs.resize(4); +// } else { +// config.inConfs.resize(3); +// } +// config.outConfs.resize(1); +// config.inConfs[DATA_ID].constant = false; +// config.inConfs[TARGET_SHAPE_ID].constant = false; +// config.inConfs[SCALES_ID].constant = false; +// config.outConfs[0].constant = false; +// config.inConfs[DATA_ID].inPlace = -1; +// config.inConfs[TARGET_SHAPE_ID].inPlace = -1; +// config.inConfs[SCALES_ID].inPlace = -1; +// config.outConfs[0].inPlace = -1; +// if (isAxesSpecified) { +// config.inConfs[AXES_ID].constant = false; +// config.inConfs[AXES_ID].inPlace = -1; +// } +// +// auto targetShapeType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); +// auto scalesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::FP32); +// auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); +// +// auto pushDesc = [&](memory::format_tag dataFormat, impl_desc_type implDetail) { +// config.inConfs[DATA_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(DATA_ID)->getDims(), inputDataType, dataFormat); +// config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(TARGET_SHAPE_ID)->getDims(), targetShapeType, memory::format_tag::x); +// config.inConfs[SCALES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(SCALES_ID)->getDims(), scalesType, memory::format_tag::x); +// if (isAxesSpecified) +// config.inConfs[AXES_ID].desc = MKLDNNMemoryDesc(getParentEdgeAt(AXES_ID)->getDims(), axesType, memory::format_tag::x); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, dataFormat); +// supportedPrimitiveDescriptors.push_back({config, implDetail, dataFormat}); +// }; +// +// auto channels = getParentEdgeAt(DATA_ID)->getDims().ndims() > 1 ? getParentEdgeAt(DATA_ID)->getDims()[1] : 1; +// +// if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { +// pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), ref); +// } else { +// // blk and by_channel JIT kernel on sse41 or above machine +// if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 4) { +// if (mayiuse(cpu::x64::avx512_common)) { +// pushDesc(memory::format_tag::nhwc, jit_avx512); +// if (channels != 1) +// pushDesc(memory::format_tag::nChw16c, jit_avx512); +// } else if (mayiuse(cpu::x64::avx2)) { +// pushDesc(memory::format_tag::nhwc, jit_avx2); +// if (channels != 1) +// pushDesc(memory::format_tag::nChw8c, jit_avx2); +// } else { +// pushDesc(memory::format_tag::nhwc, jit_sse42); +// if (channels != 1) +// pushDesc(memory::format_tag::nChw8c, jit_sse42); +// } +// } else if (getParentEdgeAt(DATA_ID)->getDims().ndims() == 5 && mode != InterpolateMode::cubic) { +// if (mayiuse(cpu::x64::avx512_common)) { +// pushDesc(memory::format_tag::ndhwc, jit_avx512); +// if (channels != 1) +// pushDesc(memory::format_tag::nCdhw16c, jit_avx512); +// } else if (mayiuse(cpu::x64::avx2)) { +// pushDesc(memory::format_tag::ndhwc, jit_avx2); +// if (channels != 1) +// pushDesc(memory::format_tag::nCdhw8c, jit_avx2); +// } else { +// pushDesc(memory::format_tag::ndhwc, jit_sse42); +// if (channels != 1) +// pushDesc(memory::format_tag::nCdhw8c, jit_sse42); +// } +// } +// +// // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) +// if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { +// pushDesc(MKLDNNMemory::GetPlainFormat(getParentEdgeAt(DATA_ID)->getDims()), jit_avx2); +// } +// } } void MKLDNNInterpolateNode::createPrimitive() { @@ -3175,34 +3176,35 @@ inline int MKLDNNInterpolateNode::nearestRound(float originCoord, bool isDownsam } bool MKLDNNInterpolateNode::canFuse(const MKLDNNNodePtr& node) const { - auto isOneOf = [&](EltwiseOpType alg, std::vector algs) { - for (auto a : algs) { - if (alg == a) { - return true; - } - } - return false; - }; - - if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { - return false; - } - - if (node->getType() == Quantize) { - auto* quantizeNode = dynamic_cast(node.get()); - if (quantizeNode == nullptr) - IE_THROW() << "Cannot get quantize node " << node->getName(); - return !quantizeNode->isBinarization(); - } else if (node->getType() == Eltwise) { - auto* eltwiseNode = dynamic_cast(node.get()); - if (eltwiseNode == nullptr) - IE_THROW() << "Cannot get eltwise node " << node->getName(); - return isOneOf(eltwiseNode->getOpType(), {Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, - Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) || - (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2); - } - - return false; + IE_THROW() << "[NM] Not implemented"; +// auto isOneOf = [&](EltwiseOpType alg, std::vector algs) { +// for (auto a : algs) { +// if (alg == a) { +// return true; +// } +// } +// return false; +// }; +// +// if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { +// return false; +// } +// +// if (node->getType() == Quantize) { +// auto* quantizeNode = dynamic_cast(node.get()); +// if (quantizeNode == nullptr) +// IE_THROW() << "Cannot get quantize node " << node->getName(); +// return !quantizeNode->isBinarization(); +// } else if (node->getType() == Eltwise) { +// auto* eltwiseNode = dynamic_cast(node.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot get eltwise node " << node->getName(); +// return isOneOf(eltwiseNode->getOpType(), {Prelu, Relu, Gelu, Elu, Logistic, BoundedRelu, Clamp, +// Tanh, Swish, Hswish, Mish, Hsigmoid, Round, Linear, Abs, Square, Sqrt}) || +// (eltwiseNode->getOpType() == MulAdd && eltwiseNode->getCnnLayer()->blobs.size() == 2); +// } +// +// return false; } bool MKLDNNInterpolateNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h index 369765538dc236..61d276210fdc4e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h @@ -85,7 +85,7 @@ struct jit_uni_interpolate_kernel { class MKLDNNInterpolateNode : public MKLDNNNode { public: - MKLDNNInterpolateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNInterpolateNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNInterpolateNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp index b51e437923eaed..959a172e2b7565 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp @@ -12,38 +12,38 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNLrnNode::MKLDNNLrnNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +MKLDNNLrnNode::MKLDNNLrnNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) {} void MKLDNNLrnNode::getSupportedDescriptors() { - if (!descs.empty()) - return; - InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); - if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) - precision = InferenceEngine::Precision::FP32; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto * lrnLayer = dynamic_cast(getCnnLayer().get()); - - if (lrnLayer == nullptr) - IE_THROW() << "Cannot convert lrn layer."; - - if (getParentEdges().size() != 1) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); - if (getChildEdges().empty()) - IE_THROW() << "Incorrect number of output edges for layer " << getName(); - - isAcrossMaps = lrnLayer->_isAcrossMaps; - alpha = lrnLayer->_alpha; - beta = lrnLayer->_beta; - size = lrnLayer->_size; - k = lrnLayer->_k; - - auto parentDims = getParentEdgeAt(0)->getDims(); - - for (auto format : getAvailableFormatsForDims(parentDims)) { - MKLDNNMemoryDesc in_candidate(parentDims, inputDataType, format); - createDescriptor({in_candidate}, {}); - } +// if (!descs.empty()) +// return; +// InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); +// if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) +// precision = InferenceEngine::Precision::FP32; +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); +// auto * lrnLayer = dynamic_cast(getCnnLayer().get()); +// +// if (lrnLayer == nullptr) +// IE_THROW() << "Cannot convert lrn layer."; +// +// if (getParentEdges().size() != 1) +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// if (getChildEdges().empty()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +// +// isAcrossMaps = lrnLayer->_isAcrossMaps; +// alpha = lrnLayer->_alpha; +// beta = lrnLayer->_beta; +// size = lrnLayer->_size; +// k = lrnLayer->_k; +// +// auto parentDims = getParentEdgeAt(0)->getDims(); +// +// for (auto format : getAvailableFormatsForDims(parentDims)) { +// MKLDNNMemoryDesc in_candidate(parentDims, inputDataType, format); +// createDescriptor({in_candidate}, {}); +// } } void MKLDNNLrnNode::createPrimitive() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h index 68dc087ab59413..4a125584c5ccca 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNLrnNode : public MKLDNNNode { public: - MKLDNNLrnNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNLrnNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNLrnNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp index 895e2ccfc53a80..953c33331eb8e9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp @@ -14,8 +14,8 @@ using namespace InferenceEngine; std::mutex MKLDNNMemoryNodeVirtualEdge::holderMutex; -MKLDNNMemoryOutputNode::MKLDNNMemoryOutputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) , MKLDNNMemoryNode(layer) { +MKLDNNMemoryOutputNode::MKLDNNMemoryOutputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) , MKLDNNMemoryNode(op) { if (created()) { holder = MKLDNNMemoryNodeVirtualEdge::registerOutput(this); } @@ -50,8 +50,8 @@ void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm) { inputMemoryNode->storeState(srcMemory); } -MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNInputNode(layer, eng, cache), MKLDNNMemoryNode(layer), dataStore(new MKLDNNMemory{eng}) { +MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNInputNode(op, eng, cache), MKLDNNMemoryNode(layer), dataStore(new MKLDNNMemory{eng}) { if (created()) { holder = MKLDNNMemoryNodeVirtualEdge::registerInput(this); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp index fbc560f23d3000..33033dc401b49d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.hpp @@ -18,10 +18,10 @@ class MKLDNNMemoryNode { std::string _id; public: explicit MKLDNNMemoryNode(std::string id) : _id(id) {} - explicit MKLDNNMemoryNode(InferenceEngine::CNNLayerPtr lp) { - if (lp->params.find("id") != lp->params.end()) { - _id = lp->GetParamAsString("id"); - } + explicit MKLDNNMemoryNode(const std::shared_ptr& op) { +// if (lp->params.find("id") != lp->params.end()) { +// _id = lp->GetParamAsString("id"); +// } } virtual ~MKLDNNMemoryNode() = default; std::string getId() { @@ -61,7 +61,7 @@ class MKLDNNMemoryNodeVirtualEdge { class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode { public: - MKLDNNMemoryOutputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNMemoryOutputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNMemoryOutputNode() override; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -85,7 +85,7 @@ class MKLDNNMemoryOutputNode : public MKLDNNNode, public MKLDNNMemoryNode { class MKLDNNMemoryInputNode : public MKLDNNInputNode, public MKLDNNMemoryNode { public: - MKLDNNMemoryInputNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNMemoryInputNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNMemoryInputNode() override; bool created() const override { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index 55bdb61a2106ae..4ce8ee313db899 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -605,8 +605,8 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator }; ////////////////////////////////////////////////////////////////////////////////// -MKLDNNMVNNode::MKLDNNMVNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache), epsMode_(insideSqrt) {} +MKLDNNMVNNode::MKLDNNMVNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache), epsMode_(insideSqrt) {} void MKLDNNMVNNode::getSupportedDescriptors() { if (!descs.empty()) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h index b28daa3f7e3eaf..e3dd4a0cf8bdf9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.h @@ -73,7 +73,7 @@ struct jit_uni_mvn_kernel { class MKLDNNMVNNode : public MKLDNNNode { public: - MKLDNNMVNNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNMVNNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNMVNNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h index fae0bd915dafd9..8d6f33d477754a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.h @@ -75,7 +75,7 @@ struct jit_uni_normalize_kernel { class MKLDNNNormalizeNode : public MKLDNNNode { public: - MKLDNNNormalizeNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNNormalizeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNNormalizeNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h index 1c598e497d048f..f363750ab1420d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h @@ -12,7 +12,7 @@ namespace MKLDNNPlugin { class MKLDNNPadNode : public MKLDNNNode { public: - MKLDNNPadNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNPadNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNPadNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp index 507cfa1d2385a9..2755df53756a71 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.cpp @@ -3,7 +3,6 @@ // #include "mkldnn_permute_node.h" -#include #include #include #include @@ -136,97 +135,99 @@ struct jit_uni_permute_kernel_f32 : public jit_uni_permute_kernel, public jit_ge Xbyak::Xmm xmm = Xbyak::Xmm(0); }; -MKLDNNPermuteNode::MKLDNNPermuteNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) {} +MKLDNNPermuteNode::MKLDNNPermuteNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) {} void MKLDNNPermuteNode::getSupportedDescriptors() { - if (getParentEdges().size() != 1) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); - if (!getChildEdges().size()) - IE_THROW() << "Incorrect number of output edges for layer " << getName(); - - auto& layer = getCnnLayer(); - if (!layer) { - IE_THROW() << "Cannot get CNNLayer."; - } - - order.clear(); - std::vector layerOrder = layer->GetParamAsInts("order"); - for (auto ord : layerOrder) - order.push_back(static_cast(ord)); - - if (order.empty()) { - size_t rank = getParentEdgeAt(0)->getDims().ndims(); - for (size_t i = 1; i <= rank; ++i) { - order.emplace_back(rank - i); - } - } + IE_THROW() << "[NM] Not implemented"; +// if (getParentEdges().size() != 1) +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// if (!getChildEdges().size()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +// +// auto& layer = getCnnLayer(); +// if (!layer) { +// IE_THROW() << "Cannot get CNNLayer."; +// } +// +// order.clear(); +// std::vector layerOrder = layer->GetParamAsInts("order"); +// for (auto ord : layerOrder) +// order.push_back(static_cast(ord)); +// +// if (order.empty()) { +// size_t rank = getParentEdgeAt(0)->getDims().ndims(); +// for (size_t i = 1; i <= rank; ++i) { +// order.emplace_back(rank - i); +// } +// } } void MKLDNNPermuteNode::initSupportedPrimitiveDescriptors() { - if (!supportedPrimitiveDescriptors.empty()) - return; - - prec = getCnnLayer()->insData[0].lock()->getPrecision(); - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); - - InferenceEngine::LayerConfig config; - config.dynBatchSupport = true; - config.inConfs.resize(1); - config.outConfs.resize(1); - config.inConfs[0].inPlace = -1; - config.inConfs[0].constant = false; - config.outConfs[0].inPlace = -1; - config.outConfs[0].constant = false; - if (getParentEdgeAt(0)->getDims().ndims() == 4) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nchw}); - - auto srcDims = getParentEdgeAt(0)->getDims(); - if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw8c}); - } - - if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw16c}); - } - - if (prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nhwc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nhwc}); - } - } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ncdhw}); - - auto srcDims = getParentEdgeAt(0)->getDims(); - if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw8c}); - } - - if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw16c}); - } - - if (prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ndhwc); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ndhwc); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ndhwc}); - } - } else { - // general plain case - config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); - config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); - } + IE_THROW() << "[NM] Not implemented"; +// if (!supportedPrimitiveDescriptors.empty()) +// return; +// +// prec = getCnnLayer()->insData[0].lock()->getPrecision(); +// auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); +// auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(prec); +// +// InferenceEngine::LayerConfig config; +// config.dynBatchSupport = true; +// config.inConfs.resize(1); +// config.outConfs.resize(1); +// config.inConfs[0].inPlace = -1; +// config.inConfs[0].constant = false; +// config.outConfs[0].inPlace = -1; +// config.outConfs[0].constant = false; +// if (getParentEdgeAt(0)->getDims().ndims() == 4) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nchw); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nchw); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nchw}); +// +// auto srcDims = getParentEdgeAt(0)->getDims(); +// if (srcDims[1] % 8 == 0) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw8c); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw8c}); +// } +// +// if (srcDims[1] % 16 == 0) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nChw16c); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nChw16c}); +// } +// +// if (prec == Precision::I8 || prec == Precision::U8) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nhwc); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::nhwc); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nhwc}); +// } +// } else if (getParentEdgeAt(0)->getDims().ndims() == 5) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ncdhw); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ncdhw); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ncdhw}); +// +// auto srcDims = getParentEdgeAt(0)->getDims(); +// if (srcDims[1] % 8 == 0) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw8c); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw8c}); +// } +// +// if (srcDims[1] % 16 == 0) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::nCdhw16c); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::nCdhw16c}); +// } +// +// if (prec == Precision::I8 || prec == Precision::U8) { +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::ndhwc); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType, memory::format_tag::ndhwc); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown, memory::format_tag::ndhwc}); +// } +// } else { +// // general plain case +// config.inConfs[0].desc = MKLDNNMemoryDesc(getParentEdgeAt(0)->getDims(), inputDataType); +// config.outConfs[0].desc = MKLDNNMemoryDesc(getChildEdgeAt(0)->getDims(), outputDataType); +// supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); +// } } void MKLDNNPermuteNode::createPrimitive() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h index 2a591980b23e94..fbdb19c0d4c76e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_permute_node.h @@ -45,7 +45,7 @@ struct jit_uni_permute_kernel { class MKLDNNPermuteNode : public MKLDNNNode { public: - MKLDNNPermuteNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNPermuteNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNPermuteNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp index 0693453ed73356..794815544ed236 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp @@ -7,22 +7,58 @@ #include "mkldnn_quantize_node.h" #include "mkldnn_conv_node.h" #include "mkldnn_concat_node.h" -#include #include #include #include #include #include -#include #include using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNPoolingNode::MKLDNNPoolingNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, - MKLDNNWeightsSharing::Ptr &cache) - : MKLDNNNode(layer, eng, cache) {} +MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) + : MKLDNNNode(op, eng, cache) { + auto maxPoolOp = ngraph::as_type_ptr(op); + auto avgPoolOp = ngraph::as_type_ptr(op); + if (maxPoolOp) { + algorithm = PoolingMax; + exclude_pad = false; + + for (int i = 0; i < maxPoolOp->get_strides().size(); i++) { + stride.push_back(static_cast(maxPoolOp->get_strides()[i])); + } + for (int i = 0; i < maxPoolOp->get_kernel().size(); i++) { + kernel.push_back(static_cast(maxPoolOp->get_kernel()[i])); + } + for (int i = 0; i < maxPoolOp->get_pads_begin().size(); i++) { + data_pad_begin.push_back(static_cast(maxPoolOp->get_pads_begin()[i])); + } + for (int i = 0; i < maxPoolOp->get_pads_end().size(); i++) { + data_pad_end.push_back(static_cast(maxPoolOp->get_pads_end()[i])); + } + } else if (avgPoolOp) { + algorithm = PoolingAvg; + exclude_pad = avgPoolOp->get_exclude_pad(); + + for (int i = 0; i < avgPoolOp->get_strides().size(); i++) { + stride.push_back(static_cast(avgPoolOp->get_strides()[i])); + } + for (int i = 0; i < avgPoolOp->get_kernel().size(); i++) { + kernel.push_back(static_cast(avgPoolOp->get_kernel()[i])); + } + for (int i = 0; i < avgPoolOp->get_pads_begin().size(); i++) { + data_pad_begin.push_back(static_cast(avgPoolOp->get_pads_begin()[i])); + } + for (int i = 0; i < avgPoolOp->get_pads_end().size(); i++) { + data_pad_end.push_back(static_cast(avgPoolOp->get_pads_end()[i])); + } + } else { + IE_THROW(NotImplemented) + << "CPU Pooling node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); + } +} std::vector MKLDNNPoolingNode::getAvailableFormatsForDims(const MKLDNNDims &dims) const { if (dims.ndims() == 0) @@ -44,49 +80,26 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { if (!descs.empty()) return; - auto * poolingLayer = dynamic_cast(getCnnLayer().get()); - if (poolingLayer == nullptr) - IE_THROW() << "Cannot convert pooling layer."; - if (getParentEdges().size() != 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - type = poolingLayer->_type; - exclude_pad = poolingLayer->_exclude_pad; - - inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); - outputPrecision = getCnnLayer()->outData[0]->getPrecision(); - // Dirty WA to support stat based quantization approach - if (this->getCnnLayer()->precision != Precision::I8 - && inputPrecision != Precision::BF16) { - if (type == PoolingLayer::MAX) { - // MKLDNN supports only equal precisions for input and output - outputPrecision = inputPrecision; - } else if (type == PoolingLayer::AVG) { - outputPrecision = Precision::FP32; - } - } - if (inputPrecision == Precision::BF16) { + inputPrecision = getOriginalInputPrecisions()[0]; + outputPrecision = getOriginalOutputPrecisions()[0]; + + // MKLDNN supports only equal precisions for input and output + if (one_of(inputPrecision, Precision::FP32, Precision::BF16)) { outputPrecision = inputPrecision; } if (!fusedWith.empty()) { - auto lastFusedLayer = fusedWith[fusedWith.size() - 1].get()->getCnnLayer(); - if (lastFusedLayer) { - outputPrecision = lastFusedLayer->outData[0]->getPrecision(); - } + outputPrecision = fusedWith.back()->getOriginalOutputPrecisions()[0]; } auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); - invertVectorCopyUtoI(poolingLayer->_stride, stride); - invertVectorCopyUtoI(poolingLayer->_kernel, kernel); - auto allPads = getPaddings(*poolingLayer); - invertVectorCopyUtoI(allPads.begin, data_pad_begin); - invertVectorCopyUtoI(allPads.end, data_pad_end); effective_pad_begin = data_pad_begin; effective_pad_end.resize(data_pad_end.size()); @@ -152,8 +165,8 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector orig_dims) { return memory::dims(orig_dims.begin(), orig_dims.end()); @@ -190,7 +201,7 @@ void MKLDNNPoolingNode::createDescriptor(const std::vector& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNPoolingNode() override = default; void createDescriptor(const std::vector& inputDesc, @@ -32,7 +32,6 @@ class MKLDNNPoolingNode : public MKLDNNNode { private: void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false); - InferenceEngine::PoolingLayer::PoolType type = InferenceEngine::PoolingLayer::MAX; bool exclude_pad = false; std::vector stride; std::vector kernel; @@ -52,8 +51,6 @@ class MKLDNNPoolingNode : public MKLDNNNode { InferenceEngine::Precision inputPrecision = InferenceEngine::Precision::FP32; InferenceEngine::Precision outputPrecision = InferenceEngine::Precision::FP32; - - std::vector PostOpsIntBlobMemory; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp index 6af113381ed362..e5fe74a95e88d7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.cpp @@ -4,7 +4,6 @@ #include "mkldnn_quantize_node.h" -#include #include #include #include @@ -817,275 +816,275 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ } }; -MKLDNNQuantizeNode::MKLDNNQuantizeNode(CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +MKLDNNQuantizeNode::MKLDNNQuantizeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) {} void MKLDNNQuantizeNode::init() { - auto* quantizeLayer = dynamic_cast(getCnnLayer().get()); - if (quantizeLayer == nullptr) - IE_THROW() << "Cannot convert Quantize layer " << getName(); - - levels = quantizeLayer->levels; - if (levels <= 1) - IE_THROW() << "Quantize layer " << getName() << " supports only parameter levels > 1"; - - if (getParentEdges().size() != 5) - IE_THROW() << "Incorrect number of input edges for layer " << getName(); - if (getChildEdges().empty()) - IE_THROW() << "Incorrect number of output edges for layer " << getName(); - - for (size_t i = 0; i < getParentEdges().size(); i++) { - if (getParentEdgesAtPort(i).size() != 1) - IE_THROW() << "Quantize layer " << getName() << " has unsupported number of parent edges at port " << i; - } - - auto initAxisIdx = [&](size_t edgeIdx) { - auto edge = getParentEdgesAtPort(edgeIdx)[0]; - - size_t axisIdx = 0; - int numberOfNonUnit = 0; - if (edge->getDims().ndims() > 0) { - if (edge->getDims()[0] > 1) { - numberOfNonUnit++; - } - } - - for (int i = 1; i < edge->getDims().ndims(); i++) { - if (edge->getDims()[i] > 1) { - axisIdx = i; - numberOfNonUnit++; - } - } - if (numberOfNonUnit > 1) { - IE_THROW() << "Quantize layer " << getName() << " supports only per-tensor and per-channel quantizations"; - } - - return axisIdx; - }; - - axis = getParentEdgesAtPort(0)[0]->getDims().ndims() == 1 ? 0 : 1; - - std::set quantizationParamsAxisesIdxs; - std::set quantizationParamsAxisesSizes; - - auto inputLowAxis = initAxisIdx(1); - isInputLowBroadcasted = getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis] == 1; - if (!isInputLowBroadcasted) { - quantizationParamsAxisesIdxs.insert(inputLowAxis); - quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis]); - } - - auto inputHighAxis = initAxisIdx(2); - isInputHighBroadcasted = getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis] == 1; - if (!isInputHighBroadcasted) { - quantizationParamsAxisesIdxs.insert(inputHighAxis); - quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis]); - } - - auto outputLowAxis = initAxisIdx(3); - isOutputLowBroadcasted = getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis] == 1; - if (!isOutputLowBroadcasted) { - quantizationParamsAxisesIdxs.insert(outputLowAxis); - quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis]); - } - - auto outputHighAxis = initAxisIdx(4); - isOutputHighBroadcasted = getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis] == 1; - if (!isOutputHighBroadcasted) { - quantizationParamsAxisesIdxs.insert(outputHighAxis); - quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis]); - } - - if (quantizationParamsAxisesIdxs.size() > 1 || quantizationParamsAxisesSizes.size() > 1) - IE_THROW() << "Unsupported input sizes for Quantize layer with name " << getName(); - - if (quantizationParamsAxisesIdxs.size() == 1) { - axis = *quantizationParamsAxisesIdxs.begin(); - } - - auto inputLowAxisSize = getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis]; - auto inputHighAxisSize = getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis]; - auto outputLowAxisSize = getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis]; - auto outputHighAxisSize = getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis]; - - size_t axisRealSize = static_cast(getParentEdgesAtPort(0)[0]->getDims()[axis]); - size_t axisPaddedSize = static_cast(rnd_up(getParentEdgesAtPort(0)[0]->getDims()[axis], 16)); - - if (quantizationParamsAxisesSizes.size() == 1) { - if (*quantizationParamsAxisesSizes.begin() != axisRealSize) - IE_THROW() << "Unsupported input sizes for Quantize layer with name " << getName(); - } - - for (size_t i = 1; i < getParentEdges().size(); i++) { - if (!getParentEdgesAtPort(i)[0]->getParent()->isConstant()) - IE_THROW() << "Quantize layer with name " << getName() << " has non const input on " << i << " port"; - auto prec = getCnnLayer()->insData[i].lock()->getPrecision(); - if (prec != Precision::FP32) - IE_THROW() << "Quantize layer with name " << getName() << " has unsupported precision " << prec << " on " << i << " port"; - } - - auto inputLowBlob = dynamic_cast*>(getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); - auto inputLowData = inputLowBlob->buffer().as(); - - auto inputHighBlob = dynamic_cast*>(getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); - auto inputHighData = inputHighBlob->buffer().as(); - - auto outputLowBlob = dynamic_cast*>(getParentEdgesAtPort(3)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); - auto outputLowData = outputLowBlob->buffer().as(); - - auto outputHighBlob = dynamic_cast*>(getParentEdgesAtPort(4)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); - auto outputHighData = outputHighBlob->buffer().as(); - - bool binarization = levels == 2; - - if (binarization) { - for (int i = 0; i < outputLowAxisSize; i++) { - if (outputLowData[i] != 1.f && outputLowData[i] != 0.f) { - binarization = false; - break; - } - } - - for (int i = 0; i < outputHighAxisSize; i++) { - if (outputHighData[i] != 1.f && outputHighData[i] != 0.f) { - binarization = false; - break; - } - } - - for (ptrdiff_t i = 0; i < std::max(inputLowAxisSize, inputHighAxisSize); i++) { - if (inputLowData[isInputLowBroadcasted ? 0 : i] != inputHighData[isInputHighBroadcasted ? 0 : i]) { - binarization = false; - break; - } - } - } - - if (binarization) { - quantizeOpType = QuantizeOpType::Binarization; - - binarizationThresholds.resize(axisPaddedSize); - binarizationOutputMask.resize(axisPaddedSize); - - for (int i = 0; i < axisRealSize; i++) { - binarizationThresholds[i] = inputLowData[isInputLowBroadcasted ? 0 : i]; - binarizationOutputMask[i] = outputHighData[isOutputHighBroadcasted ? 0 : i] == 1.f ? 0xffffffff : 0x00000000; - } - } else { - auto allElementsAreEqual = [&](const float* data, size_t size) { - if (size == 0) - return true; - - auto first = data[0]; - for (int i = 1; i < size; i++) { - if (data[i] != first) - return false; - } - - return true; - }; - - if (allElementsAreEqual(inputLowData, inputLowAxisSize)) { - inputLowAxisSize = 1; - isInputLowBroadcasted = true; - } - - if (allElementsAreEqual(inputHighData, inputHighAxisSize)) { - inputHighAxisSize = 1; - isInputHighBroadcasted = true; - } - - if (allElementsAreEqual(outputLowData, outputLowAxisSize)) { - outputLowAxisSize = 1; - isOutputLowBroadcasted = true; - } - - if (allElementsAreEqual(outputHighData, outputHighAxisSize)) { - outputHighAxisSize = 1; - isOutputHighBroadcasted = true; - } - - cropLow.resize(inputLowAxisSize); - cropHigh.resize(inputHighAxisSize); - inputScale.resize(std::max(inputLowAxisSize, inputHighAxisSize)); - inputShift.resize(std::max(inputLowAxisSize, inputHighAxisSize)); - outputScale.resize(std::max(outputLowAxisSize, outputHighAxisSize)); - outputShift.resize(outputLowAxisSize); - - bool quantizationOnly = true; - - for (int i = 0; i < cropLow.size(); i++) { - float il = inputLowData[isInputLowBroadcasted ? 0 : i]; - - cropLow[i] = il; - } - - for (int i = 0; i < cropHigh.size(); i++) { - float ih = inputHighData[isInputHighBroadcasted ? 0 : i]; - - cropHigh[i] = ih; - } - - for (int i = 0; i < inputScale.size(); i++) { - float il = inputLowData[isInputLowBroadcasted ? 0 : i]; - float ih = inputHighData[isInputHighBroadcasted ? 0 : i]; - -#if defined(VALIDATE_QUANTIZATION_RANGES) - if ((il == ih && levels != 2) || il > ih || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) { - IE_THROW() << "Quantize layer with name '" << getName() << "' has invalid input quantize ranges: " - << "inputLow = " << il << ", inputHigh = " << ih; - } -#endif - - inputScale[i] = (levels - 1) / (ih - il); - inputShift[i] = -il * (levels - 1) / (ih - il); - } - - for (int i = 0; i < outputScale.size(); i++) { - float ol = outputLowData[isOutputLowBroadcasted ? 0 : i]; - float oh = outputHighData[isOutputHighBroadcasted ? 0 : i]; - -#if defined(VALIDATE_QUANTIZATION_RANGES) - if (std::isnan(ol) || std::isnan(oh) || std::isinf(ol) || std::isinf(oh)) { - IE_THROW() << "Quantize layer with name '" << getName() << "' has wrong output quantize ranges: " - << "outputLow = " << ol << ", outputHigh = " << oh; - } -#endif - - outputScale[i] = (oh - ol) / (levels - 1); - - if (outputScale[i] != 1.f) - quantizationOnly = false; - } - - for (int i = 0; i < outputShift.size(); i++) { - float ol = outputLowData[isOutputLowBroadcasted ? 0 : i]; - - outputShift[i] = ol; - - if (outputShift[i] != 0.f) - quantizationOnly = false; - } - - quantizeOpType = quantizationOnly ? QuantizeOpType::Quantization : QuantizeOpType::FakeQuantization; - } - - if (binarization) { - inputPrecision = Precision::FP32; - outputPrecision = Precision::BIN; - } else { - inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); - outputPrecision = getCnnLayer()->outData[0]->getPrecision(); - - if (inputPrecision != Precision::FP32 && inputPrecision != Precision::U8 && inputPrecision != Precision::I8) - inputPrecision = Precision::FP32; - - if (outputPrecision != Precision::FP32 && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) - outputPrecision = Precision::FP32; - } + IE_THROW() << "[NM] Not implemented"; +// auto* quantizeLayer = dynamic_cast(getCnnLayer().get()); +// if (quantizeLayer == nullptr) +// IE_THROW() << "Cannot convert Quantize layer " << getName(); +// +// levels = quantizeLayer->levels; +// if (levels <= 1) +// IE_THROW() << "Quantize layer " << getName() << " supports only parameter levels > 1"; +// +// if (getParentEdges().size() != 5) +// IE_THROW() << "Incorrect number of input edges for layer " << getName(); +// if (getChildEdges().empty()) +// IE_THROW() << "Incorrect number of output edges for layer " << getName(); +// +// for (size_t i = 0; i < getParentEdges().size(); i++) { +// if (getParentEdgesAtPort(i).size() != 1) +// IE_THROW() << "Quantize layer " << getName() << " has unsupported number of parent edges at port " << i; +// } +// +// auto initAxisIdx = [&](size_t edgeIdx) { +// auto edge = getParentEdgesAtPort(edgeIdx)[0]; +// +// size_t axisIdx = 0; +// int numberOfNonUnit = 0; +// if (edge->getDims().ndims() > 0) { +// if (edge->getDims()[0] > 1) { +// numberOfNonUnit++; +// } +// } +// +// for (int i = 1; i < edge->getDims().ndims(); i++) { +// if (edge->getDims()[i] > 1) { +// axisIdx = i; +// numberOfNonUnit++; +// } +// } +// if (numberOfNonUnit > 1) { +// IE_THROW() << "Quantize layer " << getName() << " supports only per-tensor and per-channel quantizations"; +// } +// +// return axisIdx; +// }; +// +// axis = getParentEdgesAtPort(0)[0]->getDims().ndims() == 1 ? 0 : 1; +// +// std::set quantizationParamsAxisesIdxs; +// std::set quantizationParamsAxisesSizes; +// +// auto inputLowAxis = initAxisIdx(1); +// isInputLowBroadcasted = getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis] == 1; +// if (!isInputLowBroadcasted) { +// quantizationParamsAxisesIdxs.insert(inputLowAxis); +// quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis]); +// } +// +// auto inputHighAxis = initAxisIdx(2); +// isInputHighBroadcasted = getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis] == 1; +// if (!isInputHighBroadcasted) { +// quantizationParamsAxisesIdxs.insert(inputHighAxis); +// quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis]); +// } +// +// auto outputLowAxis = initAxisIdx(3); +// isOutputLowBroadcasted = getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis] == 1; +// if (!isOutputLowBroadcasted) { +// quantizationParamsAxisesIdxs.insert(outputLowAxis); +// quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis]); +// } +// +// auto outputHighAxis = initAxisIdx(4); +// isOutputHighBroadcasted = getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis] == 1; +// if (!isOutputHighBroadcasted) { +// quantizationParamsAxisesIdxs.insert(outputHighAxis); +// quantizationParamsAxisesSizes.insert(getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis]); +// } +// +// if (quantizationParamsAxisesIdxs.size() > 1 || quantizationParamsAxisesSizes.size() > 1) +// IE_THROW() << "Unsupported input sizes for Quantize layer with name " << getName(); +// +// if (quantizationParamsAxisesIdxs.size() == 1) { +// axis = *quantizationParamsAxisesIdxs.begin(); +// } +// +// auto inputLowAxisSize = getParentEdgesAtPort(1)[0]->getDims()[inputLowAxis]; +// auto inputHighAxisSize = getParentEdgesAtPort(2)[0]->getDims()[inputHighAxis]; +// auto outputLowAxisSize = getParentEdgesAtPort(3)[0]->getDims()[outputLowAxis]; +// auto outputHighAxisSize = getParentEdgesAtPort(4)[0]->getDims()[outputHighAxis]; +// +// size_t axisRealSize = static_cast(getParentEdgesAtPort(0)[0]->getDims()[axis]); +// size_t axisPaddedSize = static_cast(rnd_up(getParentEdgesAtPort(0)[0]->getDims()[axis], 16)); +// +// if (quantizationParamsAxisesSizes.size() == 1) { +// if (*quantizationParamsAxisesSizes.begin() != axisRealSize) +// IE_THROW() << "Unsupported input sizes for Quantize layer with name " << getName(); +// } +// +// for (size_t i = 1; i < getParentEdges().size(); i++) { +// if (!getParentEdgesAtPort(i)[0]->getParent()->isConstant()) +// IE_THROW() << "Quantize layer with name " << getName() << " has non const input on " << i << " port"; +// auto prec = getCnnLayer()->insData[i].lock()->getPrecision(); +// if (prec != Precision::FP32) +// IE_THROW() << "Quantize layer with name " << getName() << " has unsupported precision " << prec << " on " << i << " port"; +// } +// +// auto inputLowBlob = dynamic_cast*>(getParentEdgesAtPort(1)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); +// auto inputLowData = inputLowBlob->buffer().as(); +// +// auto inputHighBlob = dynamic_cast*>(getParentEdgesAtPort(2)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); +// auto inputHighData = inputHighBlob->buffer().as(); +// +// auto outputLowBlob = dynamic_cast*>(getParentEdgesAtPort(3)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); +// auto outputLowData = outputLowBlob->buffer().as(); +// +// auto outputHighBlob = dynamic_cast*>(getParentEdgesAtPort(4)[0]->getParent()->getCnnLayer()->blobs["custom"].get()); +// auto outputHighData = outputHighBlob->buffer().as(); +// +// bool binarization = levels == 2; +// +// if (binarization) { +// for (int i = 0; i < outputLowAxisSize; i++) { +// if (outputLowData[i] != 1.f && outputLowData[i] != 0.f) { +// binarization = false; +// break; +// } +// } +// +// for (int i = 0; i < outputHighAxisSize; i++) { +// if (outputHighData[i] != 1.f && outputHighData[i] != 0.f) { +// binarization = false; +// break; +// } +// } +// +// for (ptrdiff_t i = 0; i < std::max(inputLowAxisSize, inputHighAxisSize); i++) { +// if (inputLowData[isInputLowBroadcasted ? 0 : i] != inputHighData[isInputHighBroadcasted ? 0 : i]) { +// binarization = false; +// break; +// } +// } +// } +// +// if (binarization) { +// quantizeOpType = QuantizeOpType::Binarization; +// +// binarizationThresholds.resize(axisPaddedSize); +// binarizationOutputMask.resize(axisPaddedSize); +// +// for (int i = 0; i < axisRealSize; i++) { +// binarizationThresholds[i] = inputLowData[isInputLowBroadcasted ? 0 : i]; +// binarizationOutputMask[i] = outputHighData[isOutputHighBroadcasted ? 0 : i] == 1.f ? 0xffffffff : 0x00000000; +// } +// } else { +// auto allElementsAreEqual = [&](const float* data, size_t size) { +// if (size == 0) +// return true; +// +// auto first = data[0]; +// for (int i = 1; i < size; i++) { +// if (data[i] != first) +// return false; +// } +// +// return true; +// }; +// +// if (allElementsAreEqual(inputLowData, inputLowAxisSize)) { +// inputLowAxisSize = 1; +// isInputLowBroadcasted = true; +// } +// +// if (allElementsAreEqual(inputHighData, inputHighAxisSize)) { +// inputHighAxisSize = 1; +// isInputHighBroadcasted = true; +// } +// +// if (allElementsAreEqual(outputLowData, outputLowAxisSize)) { +// outputLowAxisSize = 1; +// isOutputLowBroadcasted = true; +// } +// +// if (allElementsAreEqual(outputHighData, outputHighAxisSize)) { +// outputHighAxisSize = 1; +// isOutputHighBroadcasted = true; +// } +// +// cropLow.resize(inputLowAxisSize); +// cropHigh.resize(inputHighAxisSize); +// inputScale.resize(std::max(inputLowAxisSize, inputHighAxisSize)); +// inputShift.resize(std::max(inputLowAxisSize, inputHighAxisSize)); +// outputScale.resize(std::max(outputLowAxisSize, outputHighAxisSize)); +// outputShift.resize(outputLowAxisSize); +// +// bool quantizationOnly = true; +// +// for (int i = 0; i < cropLow.size(); i++) { +// float il = inputLowData[isInputLowBroadcasted ? 0 : i]; +// +// cropLow[i] = il; +// } +// +// for (int i = 0; i < cropHigh.size(); i++) { +// float ih = inputHighData[isInputHighBroadcasted ? 0 : i]; +// +// cropHigh[i] = ih; +// } +// +// for (int i = 0; i < inputScale.size(); i++) { +// float il = inputLowData[isInputLowBroadcasted ? 0 : i]; +// float ih = inputHighData[isInputHighBroadcasted ? 0 : i]; +// +//#if defined(VALIDATE_QUANTIZATION_RANGES) +// if ((il == ih && levels != 2) || il > ih || std::isnan(il) || std::isnan(ih) || std::isinf(il) || std::isinf(ih)) { +// IE_THROW() << "Quantize layer with name '" << getName() << "' has invalid input quantize ranges: " +// << "inputLow = " << il << ", inputHigh = " << ih; +// } +//#endif +// +// inputScale[i] = (levels - 1) / (ih - il); +// inputShift[i] = -il * (levels - 1) / (ih - il); +// } +// +// for (int i = 0; i < outputScale.size(); i++) { +// float ol = outputLowData[isOutputLowBroadcasted ? 0 : i]; +// float oh = outputHighData[isOutputHighBroadcasted ? 0 : i]; +// +//#if defined(VALIDATE_QUANTIZATION_RANGES) +// if (std::isnan(ol) || std::isnan(oh) || std::isinf(ol) || std::isinf(oh)) { +// IE_THROW() << "Quantize layer with name '" << getName() << "' has wrong output quantize ranges: " +// << "outputLow = " << ol << ", outputHigh = " << oh; +// } +//#endif +// +// outputScale[i] = (oh - ol) / (levels - 1); +// +// if (outputScale[i] != 1.f) +// quantizationOnly = false; +// } +// +// for (int i = 0; i < outputShift.size(); i++) { +// float ol = outputLowData[isOutputLowBroadcasted ? 0 : i]; +// +// outputShift[i] = ol; +// +// if (outputShift[i] != 0.f) +// quantizationOnly = false; +// } +// +// quantizeOpType = quantizationOnly ? QuantizeOpType::Quantization : QuantizeOpType::FakeQuantization; +// } +// +// if (binarization) { +// inputPrecision = Precision::FP32; +// outputPrecision = Precision::BIN; +// } else { +// inputPrecision = getCnnLayer()->insData[0].lock()->getPrecision(); +// outputPrecision = getCnnLayer()->outData[0]->getPrecision(); +// +// if (inputPrecision != Precision::FP32 && inputPrecision != Precision::U8 && inputPrecision != Precision::I8) +// inputPrecision = Precision::FP32; +// +// if (outputPrecision != Precision::FP32 && outputPrecision != Precision::U8 && outputPrecision != Precision::I8) +// outputPrecision = Precision::FP32; +// } } - std::vector MKLDNNQuantizeNode::getDataFormats() const { // Special case for first FQ in the network if (getParentEdgesAtPort(0)[0]->getDims()[getAxis()] == 3) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h index 234fd103d8ae56..44df8f74fba549 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_quantize_node.h @@ -70,7 +70,7 @@ struct jit_uni_quantize_kernel { class MKLDNNQuantizeNode : public MKLDNNNode { public: - MKLDNNQuantizeNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNQuantizeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNQuantizeNode() override = default; void initSupportedPrimitiveDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h index 1005cf58a043d3..a714e1e5cdc341 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h @@ -79,7 +79,7 @@ struct jit_uni_reduce_post_kernel { class MKLDNNReduceNode : public MKLDNNNode { public: - MKLDNNReduceNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNReduceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNReduceNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp new file mode 100644 index 00000000000000..cfe0a3c55b45d2 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp @@ -0,0 +1,78 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "mkldnn_reference_node.h" +#include +#include +#include + +using namespace mkldnn; +using namespace MKLDNNPlugin; +using namespace InferenceEngine; +using namespace InferenceEngine::details; + +MKLDNNReferenceNode::MKLDNNReferenceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache), ngraphOp(op) { + setType(Reference); +} + +void MKLDNNReferenceNode::getSupportedDescriptors() {} + +void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() { + if (!supportedPrimitiveDescriptors.empty()) + return; + + InferenceEngine::LayerConfig config; + for (size_t i = 0; i < inDims.size(); i++) { + InferenceEngine::DataConfig dataConfig; + dataConfig.inPlace = -1; + dataConfig.constant = false; + + dataConfig.desc = MKLDNNMemoryDesc(inDims[i], + MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_input_element_type(i))), + MKLDNNMemory::GetPlainFormat(inDims[i])); + + config.inConfs.push_back(dataConfig); + } + + for (size_t i = 0; i < outDims.size(); i++) { + InferenceEngine::DataConfig dataConfig; + dataConfig.inPlace = -1; + dataConfig.constant = false; + + dataConfig.desc = MKLDNNMemoryDesc(outDims[i], + MKLDNNExtensionUtils::IEPrecisionToDataType(convertPrecision(ngraphOp->get_output_element_type(i))), + MKLDNNMemory::GetPlainFormat(outDims[i])); + + config.outConfs.push_back(dataConfig); + } + + supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref, memory::format_tag::undef}); +} + +void MKLDNNReferenceNode::createPrimitive() {} + +void MKLDNNReferenceNode::execute(mkldnn::stream strm) { + ngraph::HostTensorVector inputs; + for (size_t i = 0; i < inDims.size(); i++) { + void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr(); + inputs.push_back(std::make_shared(ngraphOp->get_input_element_type(i), ngraphOp->get_input_shape(i), srcDataPtr)); + } + + ngraph::HostTensorVector outputs; + for (size_t i = 0; i < outDims.size(); i++) { + void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr(); + outputs.push_back(std::make_shared(ngraphOp->get_output_element_type(i), ngraphOp->get_output_shape(i), dstDataPtr)); + } + + if (!ngraphOp->evaluate(outputs, inputs)) { + IE_THROW(NotImplemented) + << "Cannot find reference implementation for node " << ngraphOp->get_type_name() << " with name '" << ngraphOp->get_friendly_name() << "'."; + } +} + +bool MKLDNNReferenceNode::created() const { + return getType() == Reference; +} +REG_MKLDNN_PRIM_FOR(MKLDNNReferenceNode, Reference); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h new file mode 100644 index 00000000000000..71c5ed08db8695 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h @@ -0,0 +1,29 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +//#include +#include +//#include + +namespace MKLDNNPlugin { + +class MKLDNNReferenceNode : public MKLDNNNode { +public: + MKLDNNReferenceNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ~MKLDNNReferenceNode() override = default; + + void getSupportedDescriptors() override; + void initSupportedPrimitiveDescriptors() override; + void createPrimitive() override; + void execute(mkldnn::stream strm) override; + bool created() const override; + +private: + const std::shared_ptr ngraphOp; +}; + +} // namespace MKLDNNPlugin + diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index 3cbe7ef2aee8f8..865513ec393383 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -16,10 +16,12 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNReorderNode::MKLDNNReorderNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) : - MKLDNNNode(layer, eng, w_cache) { -} +MKLDNNReorderNode::MKLDNNReorderNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) : + MKLDNNNode(op, eng, w_cache) {} +MKLDNNReorderNode::MKLDNNReorderNode(const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &w_cache) : + MKLDNNNode("Reorder", name, eng, w_cache) { +} void MKLDNNReorderNode::getSupportedDescriptors() { if (outDims.empty() && output.getLayout() != InferenceEngine::Layout::ANY) outDims.push_back(MKLDNNDims(output.getDims())); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h index 41fc72797819d3..85112c36875c95 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h @@ -14,7 +14,10 @@ namespace MKLDNNPlugin { class MKLDNNReorderNode : public MKLDNNNode { public: - MKLDNNReorderNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + // TODO [NM]: do we need ngraph::Node based ctor at all? + MKLDNNReorderNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNReorderNode(const std::string& name, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + ~MKLDNNReorderNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp index 4893546499aa21..d0c4e5f0ed53f4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp @@ -3,7 +3,6 @@ // #include "mkldnn_reshape_node.h" -#include #include #include #include @@ -12,8 +11,8 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNReshapeNode::MKLDNNReshapeNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +MKLDNNReshapeNode::MKLDNNReshapeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) {} void MKLDNNReshapeNode::getSupportedDescriptors() { if (getParentEdges().size() != 1 && getParentEdges().size() != 2) @@ -26,9 +25,9 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); + InferenceEngine::Precision precision = getOriginalInputPrecisions()[0]; auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - precision = getCnnLayer()->outData[0]->getPrecision(); + precision = getOriginalOutputPrecisions()[0]; auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); // Current reshape implementation is simple memory reinterpret, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h index b4776fd7332457..12f7009b45392f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNReshapeNode : public MKLDNNNode { public: - MKLDNNReshapeNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNReshapeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNReshapeNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h index cb16a3d242d8ee..d1cd3e9fd6cc2e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h @@ -14,7 +14,7 @@ namespace MKLDNNPlugin { class MKLDNNRNN : public MKLDNNNode { public: - MKLDNNRNN(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNRNN(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNRNN() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h index 5e1901644bc831..ded8d6e043909d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h @@ -20,7 +20,7 @@ enum ROIAlignOpType { class MKLDNNROIAlignNode : public MKLDNNNode { public: - MKLDNNROIAlignNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNROIAlignNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNROIAlignNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h index f3b19aa2328aa2..292d73ed0babd8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h @@ -65,7 +65,7 @@ struct jit_uni_roi_pooling_kernel { class MKLDNNROIPoolingNode : public MKLDNNNode { public: - MKLDNNROIPoolingNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNROIPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNROIPoolingNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h index 720e3234fad024..9a65a1291440fb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h @@ -20,7 +20,7 @@ enum class ScatterUpdateMode { class MKLDNNScatterUpdateNode : public MKLDNNNode { public: - MKLDNNScatterUpdateNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNScatterUpdateNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNScatterUpdateNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp index a247b09f936eae..42cd241f1a9e92 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp @@ -4,7 +4,6 @@ #include "mkldnn_softmax_node.h" -#include #include #include #include @@ -13,33 +12,31 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -MKLDNNSoftMaxNode::MKLDNNSoftMaxNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +MKLDNNSoftMaxNode::MKLDNNSoftMaxNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) { + auto softmaxOp = ngraph::as_type_ptr(op); + if (softmaxOp) { + axis = softmaxOp->get_axis(); + } else { + IE_THROW(NotImplemented) + << "CPU Softmax node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); + } +} void MKLDNNSoftMaxNode::getSupportedDescriptors() { if (descs.size()) return; - InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision(); + InferenceEngine::Precision precision = getOriginalInputPrecisions()[0]; if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16) precision = InferenceEngine::Precision::FP32; auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - SoftMaxLayer* smLayer = dynamic_cast(getCnnLayer().get()); - if (smLayer == nullptr) - IE_THROW() << "Cannot convert softmax layer."; - if (getParentEdges().size() != 1) IE_THROW() << "Incorrect number of input edges for layer " << getName(); if (!getChildEdges().size()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - axis = smLayer->axis; - - if (axis >= getParentEdgeAt(0)->getDims().ndims()) { - IE_THROW() << "Incorrect axis!"; - } - if (getParentEdgeAt(0)->getDims().ndims() == 3) { MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, memory::format_tag::abc); createDescriptor({in_candidate}, {}); @@ -92,7 +89,7 @@ void MKLDNNSoftMaxNode::createPrimitive() { } bool MKLDNNSoftMaxNode::created() const { - return getType() == SoftMax; + return getType() == Softmax; } void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { @@ -127,4 +124,4 @@ void MKLDNNSoftMaxNode::createDescriptor(const std::vector& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNSoftMaxNode() override = default; void initOptimalPrimitiveDescriptor() override; @@ -25,7 +25,7 @@ class MKLDNNSoftMaxNode : public MKLDNNNode { bool created() const override; private: - int axis = 0; + size_t axis = 0; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp index 1336ffd16f22db..230eb062472679 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp @@ -4,7 +4,6 @@ #include "mkldnn_split_node.h" #include "common/cpu_memcpy.h" -#include #include #include #include @@ -57,176 +56,178 @@ static TensorDesc makeChannelBlockedTensorDesc(const Precision& precision, const return TensorDesc(precision, srcDims, {blkDims, order}); } -MKLDNNSplitNode::MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(layer, eng, cache) {} +MKLDNNSplitNode::MKLDNNSplitNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : + MKLDNNNode(op, eng, cache) {} void MKLDNNSplitNode::getSupportedDescriptors() { - auto splitLayer = dynamic_cast(getCnnLayer().get()); - - if (splitLayer == nullptr) - THROW_ERROR << "can not convert from CNN layer."; - - if (getParentEdges().size() != 1) - THROW_ERROR << "has incorrect number of input nodes."; - if (getChildEdges().empty()) - THROW_ERROR << "has incorrect number of output nodes."; - - axis = splitLayer->_axis; - if (axis >= getParentEdgeAt(0)->getDims().ndims()) - THROW_ERROR << "has invalid value of axis parameter."; + THROW_IE_EXCEPTION << "[NM] Not implemented"; +// auto splitLayer = dynamic_cast(getCnnLayer().get()); +// +// if (splitLayer == nullptr) +// THROW_ERROR << "can not convert from CNN layer."; +// +// if (getParentEdges().size() != 1) +// THROW_ERROR << "has incorrect number of input nodes."; +// if (getChildEdges().empty()) +// THROW_ERROR << "has incorrect number of output nodes."; +// +// axis = splitLayer->_axis; +// if (axis >= getParentEdgeAt(0)->getDims().ndims()) +// THROW_ERROR << "has invalid value of axis parameter."; } void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { - using TensorDescFactory = std::function; - constexpr size_t channelsPos = 1lu; - - if (!supportedPrimitiveDescriptors.empty()) - return; - - if (getCnnLayer()->insData.empty()) { - THROW_ERROR << "has an empty input in the CNN layer"; - } - - auto inpData = getCnnLayer()->insData[0].lock(); - if (!inpData) { - THROW_ERROR << "input data is empty"; - } - - auto srcDims = getParentEdgeAt(0)->getDims(); - auto axis_size = 0; - auto dstFirstDims = getChildEdgeAt(0)->getDims(); - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; - if (dstFirstDims.ndims() != o_Dims.ndims()) { - THROW_ERROR << "only supports output blobs with equal number of dimensions"; - } - - axis_size += o_Dims[axis]; - for (size_t j = 0; j < dstFirstDims.ndims(); j++) { - if (j == axis) - continue; - if (o_Dims[j] != dstFirstDims[j]) - THROW_ERROR << "has incorrect output dimensions"; - } - } - dstFirstDims[axis] = axis_size; - if (dstFirstDims.size() != srcDims.size()) - THROW_ERROR << "sizes of input blob and sum of output blobs are not equal."; - - - InferenceEngine::Precision inpPrecision = inpData->getPrecision(); - auto outPrecision = inpPrecision; // the split layer doesn't convert precisions - - // make primitive descriptor factory function for different configurations - bool dynBatchSupport = true; - if (axis < 1) { - dynBatchSupport = false; - } - auto makePdInfo = [dynBatchSupport](TensorDescFactory getTensorDesc, const Precision& precision, const MKLDNNDims& srcDims, - const std::vector& outDims, impl_desc_type type) -> PrimitiveDescInfo { - InferenceEngine::LayerConfig config; - - config.dynBatchSupport = dynBatchSupport; - config.inConfs.resize(1); - config.inConfs[0].inPlace = -1; - config.inConfs[0].constant = false; - config.inConfs[0].desc = getTensorDesc(precision, srcDims.ToSizeVector()); - config.outConfs.resize(outDims.size()); - - std::vector outFormats; - - for (size_t i = 0; i < outDims.size(); i++) { - auto o_Dims = outDims[i]; - - config.outConfs[i].inPlace = -1; - config.outConfs[i].constant = false; - config.outConfs[i].desc = getTensorDesc(precision, o_Dims.ToSizeVector()); - outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); - } - return {config, type, outFormats}; - }; - - //Set plain format - supportedPrimitiveDescriptors.push_back(makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref)); - - //Set per channel format. - supportedPrimitiveDescriptors.push_back(makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref)); - - //Support channel blocked format - std::vector blockedPdIndexes; - if (srcDims.ndims() > channelsPos) { - for (size_t sizeS : {8lu, 16lu}) { - SizeVector blkDims = srcDims.ToSizeVector(); - if (blkDims[channelsPos] % sizeS) - continue; - - bool blocked = true; - for (size_t i = 0; i < outDims.size(); i++) { - if (outDims[i].ToSizeVector()[channelsPos] % sizeS) { - blocked = false; - break; - } - } - if (blocked) { - using std::placeholders::_1; - using std::placeholders::_2; - supportedPrimitiveDescriptors.push_back(makePdInfo(std::bind(&makeChannelBlockedTensorDesc, _1, _2, sizeS), - inpPrecision, srcDims, outDims, impl_desc_type::ref)); - blockedPdIndexes.push_back(supportedPrimitiveDescriptors.size() - 1); - } - } - } - - // Optimized inplace case - std::vector pdIndexesToReuse(1, 0); // at least the first plain layout can be optimized inplace. - if (axis < 2) { - pdIndexesToReuse.insert(pdIndexesToReuse.end(), blockedPdIndexes.begin(), blockedPdIndexes.end()); - } - - for (auto refPdIndex : pdIndexesToReuse) { - const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); - auto config = refConfig; - - const auto& order = refConfig.inConfs[0].desc.getBlockingDesc().getOrder(); - const auto& blkDims = refConfig.inConfs[0].desc.getBlockingDesc().getBlockDims(); - auto numOfDim = blkDims.size(); - - std::vector outFormats; - SizeVector offsets(numOfDim, 0lu); - SizeVector strides(numOfDim); - strides.back() = 1lu; - size_t offset = (std::numeric_limits::max)(); - - for (size_t i = 2; i <= numOfDim; i++) { - if (numOfDim - i < axis) { - strides[numOfDim - i] = (std::numeric_limits::max)(); - } else { - strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; - } - } - - config.inConfs[0].desc = TensorDesc(inpPrecision, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); - - for (size_t i = 0; i < outDims.size(); i++) { - const auto& outBlkDims = refConfig.outConfs[i].desc.getBlockingDesc().getBlockDims(); - const auto& dims = refConfig.outConfs[i].desc.getDims(); - - config.outConfs[i].inPlace = 0; - config.outConfs[i].desc = TensorDesc(outPrecision, dims, {outBlkDims, order, offset, offsets, strides}); - outFormats.emplace_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); - } - supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats); - } - - // Special nspc -> ncsp case when splitting channels - if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) { - auto plain = makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref); - auto perChannel = makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref); - - plain.getConfig().inConfs[0].desc = perChannel.getConfig().inConfs[0].desc; - - supportedPrimitiveDescriptors.push_back(plain); - } + THROW_IE_EXCEPTION << "[NM] Not implemented"; +// using TensorDescFactory = std::function; +// constexpr size_t channelsPos = 1lu; +// +// if (!supportedPrimitiveDescriptors.empty()) +// return; +// +// if (getCnnLayer()->insData.empty()) { +// THROW_ERROR << "has an empty input in the CNN layer"; +// } +// +// auto inpData = getCnnLayer()->insData[0].lock(); +// if (!inpData) { +// THROW_ERROR << "input data is empty"; +// } +// +// auto srcDims = getParentEdgeAt(0)->getDims(); +// auto axis_size = 0; +// auto dstFirstDims = getChildEdgeAt(0)->getDims(); +// for (size_t i = 0; i < outDims.size(); i++) { +// auto o_Dims = outDims[i]; +// if (dstFirstDims.ndims() != o_Dims.ndims()) { +// THROW_ERROR << "only supports output blobs with equal number of dimensions"; +// } +// +// axis_size += o_Dims[axis]; +// for (size_t j = 0; j < dstFirstDims.ndims(); j++) { +// if (j == axis) +// continue; +// if (o_Dims[j] != dstFirstDims[j]) +// THROW_ERROR << "has incorrect output dimensions"; +// } +// } +// dstFirstDims[axis] = axis_size; +// if (dstFirstDims.size() != srcDims.size()) +// THROW_ERROR << "sizes of input blob and sum of output blobs are not equal."; +// +// +// InferenceEngine::Precision inpPrecision = inpData->getPrecision(); +// auto outPrecision = inpPrecision; // the split layer doesn't convert precisions +// +// // make primitive descriptor factory function for different configurations +// bool dynBatchSupport = true; +// if (axis < 1) { +// dynBatchSupport = false; +// } +// auto makePdInfo = [dynBatchSupport](TensorDescFactory getTensorDesc, const Precision& precision, const MKLDNNDims& srcDims, +// const std::vector& outDims, impl_desc_type type) -> PrimitiveDescInfo { +// InferenceEngine::LayerConfig config; +// +// config.dynBatchSupport = dynBatchSupport; +// config.inConfs.resize(1); +// config.inConfs[0].inPlace = -1; +// config.inConfs[0].constant = false; +// config.inConfs[0].desc = getTensorDesc(precision, srcDims.ToSizeVector()); +// config.outConfs.resize(outDims.size()); +// +// std::vector outFormats; +// +// for (size_t i = 0; i < outDims.size(); i++) { +// auto o_Dims = outDims[i]; +// +// config.outConfs[i].inPlace = -1; +// config.outConfs[i].constant = false; +// config.outConfs[i].desc = getTensorDesc(precision, o_Dims.ToSizeVector()); +// outFormats.push_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); +// } +// return {config, type, outFormats}; +// }; +// +// //Set plain format +// supportedPrimitiveDescriptors.push_back(makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref)); +// +// //Set per channel format. +// supportedPrimitiveDescriptors.push_back(makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref)); +// +// //Support channel blocked format +// std::vector blockedPdIndexes; +// if (srcDims.ndims() > channelsPos) { +// for (size_t sizeS : {8lu, 16lu}) { +// SizeVector blkDims = srcDims.ToSizeVector(); +// if (blkDims[channelsPos] % sizeS) +// continue; +// +// bool blocked = true; +// for (size_t i = 0; i < outDims.size(); i++) { +// if (outDims[i].ToSizeVector()[channelsPos] % sizeS) { +// blocked = false; +// break; +// } +// } +// if (blocked) { +// using std::placeholders::_1; +// using std::placeholders::_2; +// supportedPrimitiveDescriptors.push_back(makePdInfo(std::bind(&makeChannelBlockedTensorDesc, _1, _2, sizeS), +// inpPrecision, srcDims, outDims, impl_desc_type::ref)); +// blockedPdIndexes.push_back(supportedPrimitiveDescriptors.size() - 1); +// } +// } +// } +// +// // Optimized inplace case +// std::vector pdIndexesToReuse(1, 0); // at least the first plain layout can be optimized inplace. +// if (axis < 2) { +// pdIndexesToReuse.insert(pdIndexesToReuse.end(), blockedPdIndexes.begin(), blockedPdIndexes.end()); +// } +// +// for (auto refPdIndex : pdIndexesToReuse) { +// const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); +// auto config = refConfig; +// +// const auto& order = refConfig.inConfs[0].desc.getBlockingDesc().getOrder(); +// const auto& blkDims = refConfig.inConfs[0].desc.getBlockingDesc().getBlockDims(); +// auto numOfDim = blkDims.size(); +// +// std::vector outFormats; +// SizeVector offsets(numOfDim, 0lu); +// SizeVector strides(numOfDim); +// strides.back() = 1lu; +// size_t offset = (std::numeric_limits::max)(); +// +// for (size_t i = 2; i <= numOfDim; i++) { +// if (numOfDim - i < axis) { +// strides[numOfDim - i] = (std::numeric_limits::max)(); +// } else { +// strides[numOfDim - i] = strides[numOfDim - i + 1] * blkDims[numOfDim - i + 1]; +// } +// } +// +// config.inConfs[0].desc = TensorDesc(inpPrecision, srcDims.ToSizeVector(), {blkDims, order, offset, offsets, strides}); +// +// for (size_t i = 0; i < outDims.size(); i++) { +// const auto& outBlkDims = refConfig.outConfs[i].desc.getBlockingDesc().getBlockDims(); +// const auto& dims = refConfig.outConfs[i].desc.getDims(); +// +// config.outConfs[i].inPlace = 0; +// config.outConfs[i].desc = TensorDesc(outPrecision, dims, {outBlkDims, order, offset, offsets, strides}); +// outFormats.emplace_back(MKLDNNMemoryDesc(config.outConfs[i].desc).getFormat()); +// } +// supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown, outFormats); +// } +// +// // Special nspc -> ncsp case when splitting channels +// if (axis == 1 && (dstFirstDims.ndims() == 4 || dstFirstDims.ndims() == 5)) { +// auto plain = makePdInfo(&makePlainTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref); +// auto perChannel = makePdInfo(&makePerChannelTensorDesc, inpPrecision, srcDims, outDims, impl_desc_type::ref); +// +// plain.getConfig().inConfs[0].desc = perChannel.getConfig().inConfs[0].desc; +// +// supportedPrimitiveDescriptors.push_back(plain); +// } } void MKLDNNSplitNode::createPrimitive() { @@ -299,66 +300,67 @@ bool MKLDNNSplitNode::isOptimized() { } void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() { - if (!isOptimized()) { - MKLDNNNode::initOptimalPrimitiveDescriptor(); - return; - } - - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - THROW_ERROR << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); - if (isInitConfig(config)) - return; - - for (size_t i = 0; i < config.inConfs.size(); i++) { - if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY || - !isUninitTensorDesc(config.inConfs[i].desc)) - continue; - - int num = getParentEdgeAt(i)->getOutputNum(); - if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) { - if (num >= 0) { - if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && - getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0) - getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor(); - if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && - MKLDNNExtensionUtils::initTensorsAreEqual( - getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc, - config.inConfs[i].desc)) { - config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc; - continue; - } - } - } - config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(), - config.inConfs[i].desc.getDims(), { - config.inConfs[i].desc.getBlockingDesc().getBlockDims(), - config.inConfs[i].desc.getBlockingDesc().getOrder() - }); - } - const auto& cnnLayer = getCnnLayer(); - if (!cnnLayer) - THROW_ERROR << "cannot be created without CNNLayer!"; - if (config.outConfs.size() != outDims.size()) - THROW_ERROR << "has invalid config"; - size_t offset = 0; - for (size_t i = 0; i < cnnLayer->outData.size(); i++) { - config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(), - config.outConfs[i].desc.getDims(), { - config.outConfs[i].desc.getBlockingDesc().getBlockDims(), - config.outConfs[i].desc.getBlockingDesc().getOrder(), - config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset, - config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(), - config.inConfs[0].desc.getBlockingDesc().getStrides() - }); - size_t axisSize = 1; - for (size_t j = axis; j < config.outConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { - axisSize *= config.outConfs[i].desc.getBlockingDesc().getBlockDims()[j]; - } - offset += axisSize; - } - initDescriptor(config); + THROW_IE_EXCEPTION << "[NM] Not implemented"; +// if (!isOptimized()) { +// MKLDNNNode::initOptimalPrimitiveDescriptor(); +// return; +// } +// +// auto selected_pd = getSelectedPrimitiveDescriptor(); +// if (selected_pd == nullptr) +// THROW_ERROR << "Preferable primitive descriptor is not set."; +// auto config = selected_pd->getConfig(); +// if (isInitConfig(config)) +// return; +// +// for (size_t i = 0; i < config.inConfs.size(); i++) { +// if (config.inConfs[i].desc.getLayout() == InferenceEngine::Layout::ANY || +// !isUninitTensorDesc(config.inConfs[i].desc)) +// continue; +// +// int num = getParentEdgeAt(i)->getOutputNum(); +// if (getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()) { +// if (num >= 0) { +// if (isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && +// getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].inPlace >= 0) +// getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor(); +// if (!isUninitTensorDesc(getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc) && +// MKLDNNExtensionUtils::initTensorsAreEqual( +// getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc, +// config.inConfs[i].desc)) { +// config.inConfs[i].desc = getParentEdgeAt(i)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num].desc; +// continue; +// } +// } +// } +// config.inConfs[i].desc = InferenceEngine::TensorDesc(config.inConfs[i].desc.getPrecision(), +// config.inConfs[i].desc.getDims(), { +// config.inConfs[i].desc.getBlockingDesc().getBlockDims(), +// config.inConfs[i].desc.getBlockingDesc().getOrder() +// }); +// } +// const auto& cnnLayer = getCnnLayer(); +// if (!cnnLayer) +// THROW_ERROR << "cannot be created without CNNLayer!"; +// if (config.outConfs.size() != outDims.size()) +// THROW_ERROR << "has invalid config"; +// size_t offset = 0; +// for (size_t i = 0; i < cnnLayer->outData.size(); i++) { +// config.outConfs[i].desc = InferenceEngine::TensorDesc(config.outConfs[i].desc.getPrecision(), +// config.outConfs[i].desc.getDims(), { +// config.outConfs[i].desc.getBlockingDesc().getBlockDims(), +// config.outConfs[i].desc.getBlockingDesc().getOrder(), +// config.inConfs[0].desc.getBlockingDesc().getOffsetPadding() + offset, +// config.inConfs[0].desc.getBlockingDesc().getOffsetPaddingToData(), +// config.inConfs[0].desc.getBlockingDesc().getStrides() +// }); +// size_t axisSize = 1; +// for (size_t j = axis; j < config.outConfs[i].desc.getBlockingDesc().getBlockDims().size(); j++) { +// axisSize *= config.outConfs[i].desc.getBlockingDesc().getBlockDims()[j]; +// } +// offset += axisSize; +// } +// initDescriptor(config); } void MKLDNNSplitNode::selectOptimalPrimitiveDescriptor() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h index af546860f39726..2743ec2a65997f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.h @@ -12,7 +12,7 @@ namespace MKLDNNPlugin { class MKLDNNSplitNode : public MKLDNNNode { public: - MKLDNNSplitNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNSplitNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNSplitNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h index a1ba870044d8b8..34821531659798 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.h @@ -45,7 +45,7 @@ class PortChecker { class MKLDNNTensorIteratorNode : public MKLDNNNode { public: - MKLDNNTensorIteratorNode(InferenceEngine::CNNLayerPtr layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNTensorIteratorNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNTensorIteratorNode() override = default; void initSupportedPrimitiveDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h index e3247c2bfbeabe..12631c43c6947a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h @@ -12,7 +12,7 @@ namespace MKLDNNPlugin { class MKLDNNTileNode : public MKLDNNNode { public: - MKLDNNTileNode(const InferenceEngine::CNNLayerPtr& layer, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); + MKLDNNTileNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); ~MKLDNNTileNode() override = default; void getSupportedDescriptors() override; diff --git a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp new file mode 100644 index 00000000000000..05ce831fb11616 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp @@ -0,0 +1,30 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace MKLDNNPlugin { + +inline std::string getRTInfoValue(const std::map>& rtInfo, std::string paramName) { + auto it = rtInfo.find(paramName); + if (it != rtInfo.end()) { + auto value = std::dynamic_pointer_cast>(it->second); + return value->get(); + } else { + return ""; + } +}; + +template +inline const std::shared_ptr getNgraphOpAs(const std::shared_ptr& op) { + auto typedOp = ngraph::as_type_ptr(op); + if (!typedOp) + THROW_IE_EXCEPTION << "Can't get ngraph node " << op->get_type_name() << " with name " << op->get_friendly_name(); + return typedOp; +} + +} // namespace MKLDNNPlugin \ No newline at end of file diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 0298734feaf9bd..0607b056ce6499 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -21,8 +21,6 @@ std::vector disabledTestPatterns() { // TODO: Issue 33886 R"(.*(QuantGroupConv2D).*)", R"(.*(QuantGroupConv3D).*)", - // TODO: failed to downgrade to opset v0 in interpreter backend - R"(.*Gather.*axis=-1.*)", // TODO: Issue: 34518 R"(.*RangeLayerTest.*)", R"(.*(RangeAddSubgraphTest).*Start=1.2.*Stop=(5.2|-5.2).*Step=(0.1|-0.1).*netPRC=FP16.*)", diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp index 7e19ba866a592b..6e801aeb91fc57 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/single_layer_tests/pooling.cpp @@ -92,17 +92,17 @@ const auto avgPoolExplicitPadCeilRoundingParams = ::testing::Combine( ::testing::Values(true, false) ); -INSTANTIATE_TEST_CASE_P(smoke_AvgPool_ExplicitPad_CeilRounding, PoolingLayerTest, - ::testing::Combine( - avgPoolExplicitPadCeilRoundingParams, - ::testing::ValuesIn(netPrecisions), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(InferenceEngine::Layout::ANY), - ::testing::Values(std::vector({1, 3, 30, 30})), - ::testing::Values(CommonTestUtils::DEVICE_GPU)), - PoolingLayerTest::getTestCaseName); +//INSTANTIATE_TEST_CASE_P(smoke_AvgPool_ExplicitPad_CeilRounding, PoolingLayerTest, +// ::testing::Combine( +// avgPoolExplicitPadCeilRoundingParams, +// ::testing::ValuesIn(netPrecisions), +// ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), +// ::testing::Values(InferenceEngine::Precision::UNSPECIFIED), +// ::testing::Values(InferenceEngine::Layout::ANY), +// ::testing::Values(InferenceEngine::Layout::ANY), +// ::testing::Values(std::vector({1, 3, 30, 30})), +// ::testing::Values(CommonTestUtils::DEVICE_GPU)), +// PoolingLayerTest::getTestCaseName); /* +========== Explicit Pad Floor Rounding ========== */ const auto avgPoolExplicitPadFloorRoundingParams = ::testing::Combine( diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp index a6a3b737cde53b..0b9f3e881fe486 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/layers/internal/graph_leaks_test.cpp @@ -254,7 +254,7 @@ TEST_F(MKLDNNGraphLeaksTests, MKLDNN_not_release_outputs_fp32) { InferenceEngine::IExecutableNetworkInternal::Ptr exeNetwork1; ASSERT_NO_THROW(exeNetwork1 = score_engine->LoadNetwork(network, {})); - size_t modified_outputs_size = getGraph(exeNetwork1).GetOutputNodes().size(); + size_t modified_outputs_size = getGraph(exeNetwork1).GetOutputNodesMap().size(); InferenceEngine::CNNNetwork network2; ASSERT_NO_THROW(network2 = core.ReadNetwork(model, weights_ptr)); @@ -263,7 +263,7 @@ TEST_F(MKLDNNGraphLeaksTests, MKLDNN_not_release_outputs_fp32) { InferenceEngine::IExecutableNetworkInternal::Ptr exeNetwork2; ASSERT_NO_THROW(exeNetwork2 = score_engine->LoadNetwork(network2, {})); - size_t original_outputs_size = getGraph(exeNetwork2).GetOutputNodes().size(); + size_t original_outputs_size = getGraph(exeNetwork2).GetOutputNodesMap().size(); ASSERT_NE(modified_outputs_size, original_outputs_size); ASSERT_EQ(1, original_outputs_size); diff --git a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp index 7a341f15f2263c..f6d0de68f0ccdc 100644 --- a/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp +++ b/inference-engine/tests_deprecated/unit/engines/mkldnn/graph/test_graph.hpp @@ -87,8 +87,8 @@ class MKLDNNGraphTestClass: public MKLDNNPlugin::MKLDNNGraph { void PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in, int batch) { if (!IsReady()) IE_THROW()<< "Wrong state. Topology not ready."; - auto input = inputNodes.find(name); - if (input != inputNodes.end()) { + auto input = inputNodesMap.find(name); + if (input != inputNodesMap.end()) { MKLDNNPlugin::MKLDNNDims outDims; if(input->second->getChildEdgeAt(0)->getDims().ndims() == 0 ) outDims = MKLDNNPlugin::MKLDNNDims(InferenceEngine::SizeVector(1,1));