From cef701b2dd29ed47858e565e36818bd950cc0622 Mon Sep 17 00:00:00 2001 From: Maxim Andronov Date: Thu, 19 Aug 2021 16:47:04 +0300 Subject: [PATCH] New descriptor hierarchy (#20) * New desc hierarchy started + conv updated * cleanup * cpplint * fix * cleanup * started reimplement via oneDnnDesc * small fixes * fixes + conv passed * moved desc to separate folder * fixes * oneDnn desc compare fix * review fixes * caching * code cleanup * clean up memory interface * getPaddedElementsCount added * review fixes * rename mkldnn_desc -> onednn_desc * applied comments for main PR * applied comments for new_desc_hierarchy * applied comments after second review * applied comments after third review * rename applyUndefinedOffset -> cloneWithUndefStridesAndOffset * applied comments fourth review * getPaddedElementsCount restore for blk desc * small fixes * restore unit test * applied comments fifth review * simple getPaddedElementsCount --- .../src/mkldnn_plugin/CMakeLists.txt | 101 +- .../mkldnn_plugin/cpu_memory_desc_utils.cpp | 285 --- .../src/mkldnn_plugin/cpu_memory_desc_utils.h | 95 - .../src/mkldnn_plugin/cpu_shape.cpp | 3 +- .../src/mkldnn_plugin/cpu_types.h | 5 + .../memory_desc/blocked_memory_desc.cpp | 34 + .../memory_desc/blocked_memory_desc.h | 83 + .../cpu_blocked_memory_desc.cpp | 118 +- .../cpu_blocked_memory_desc.h | 45 +- .../{ => memory_desc}/cpu_memory_desc.h | 45 +- .../memory_desc/cpu_memory_desc_utils.cpp | 138 ++ .../memory_desc/cpu_memory_desc_utils.h | 100 + .../memory_desc/dnnl_blocked_memory_desc.cpp | 805 ++++++++ .../memory_desc/dnnl_blocked_memory_desc.h | 74 + .../memory_desc/dnnl_memory_desc.cpp | 79 + .../memory_desc/dnnl_memory_desc.h | 69 + .../src/mkldnn_plugin/mkldnn_edge.h | 4 +- .../mkldnn_plugin/mkldnn_extension_utils.cpp | 58 +- .../mkldnn_plugin/mkldnn_extension_utils.h | 19 +- .../src/mkldnn_plugin/mkldnn_graph.cpp | 39 +- .../src/mkldnn_plugin/mkldnn_graph.h | 5 +- .../src/mkldnn_plugin/mkldnn_graph_dumper.cpp | 12 +- .../mkldnn_plugin/mkldnn_graph_optimizer.cpp | 1631 +++++++++++++---- .../mkldnn_plugin/mkldnn_infer_request.cpp | 26 +- .../src/mkldnn_plugin/mkldnn_memory.cpp | 1123 +----------- .../src/mkldnn_plugin/mkldnn_memory.h | 167 +- .../src/mkldnn_plugin/mkldnn_memory_state.h | 4 +- .../src/mkldnn_plugin/mkldnn_node.cpp | 104 +- .../src/mkldnn_plugin/mkldnn_node.h | 58 +- .../nodes/common/blocked_desc_creator.cpp | 12 +- .../nodes/common/blocked_desc_creator.h | 14 +- .../nodes/mkldnn_adaptive_pooling.cpp | 12 +- .../nodes/mkldnn_batch_to_space_node.cpp | 8 +- .../nodes/mkldnn_bin_conv_node.cpp | 18 +- .../nodes/mkldnn_broadcast_node.cpp | 4 +- .../nodes/mkldnn_concat_node.cpp | 60 +- .../mkldnn_plugin/nodes/mkldnn_conv_node.cpp | 108 +- .../mkldnn_plugin/nodes/mkldnn_conv_node.h | 2 +- .../nodes/mkldnn_convert_node.cpp | 30 +- .../mkldnn_plugin/nodes/mkldnn_convert_node.h | 6 +- .../nodes/mkldnn_cum_sum_node.cpp | 6 +- .../nodes/mkldnn_deconv_node.cpp | 64 +- .../mkldnn_plugin/nodes/mkldnn_deconv_node.h | 4 +- .../nodes/mkldnn_def_conv_node.cpp | 31 +- .../nodes/mkldnn_depth_to_space_node.cpp | 10 +- .../mkldnn_plugin/nodes/mkldnn_dft_node.cpp | 4 +- .../nodes/mkldnn_eltwise_node.cpp | 57 +- .../mkldnn_embedding_bag_offset_sum_node.cpp | 2 +- .../mkldnn_embedding_bag_packed_sum_node.cpp | 2 +- .../mkldnn_embedding_segments_sum_node.cpp | 2 +- .../mkldnn_extract_image_patches_node.cpp | 4 +- .../nodes/mkldnn_fake_quantize_node.cpp | 13 +- .../nodes/mkldnn_fullyconnected_node.cpp | 32 +- .../nodes/mkldnn_fullyconnected_node.h | 4 +- .../nodes/mkldnn_gather_nd_node.cpp | 6 +- .../nodes/mkldnn_gather_node.cpp | 2 +- .../nodes/mkldnn_gather_tree_node.cpp | 2 +- .../nodes/mkldnn_generic_node.cpp | 9 +- .../mkldnn_plugin/nodes/mkldnn_input_node.cpp | 7 +- .../mkldnn_plugin/nodes/mkldnn_input_node.h | 1 - .../nodes/mkldnn_interpolate_node.cpp | 20 +- .../mkldnn_plugin/nodes/mkldnn_lrn_node.cpp | 15 +- .../src/mkldnn_plugin/nodes/mkldnn_lrn_node.h | 2 +- .../nodes/mkldnn_matmul_node.cpp | 4 +- .../nodes/mkldnn_memory_node.cpp | 7 +- .../nodes/mkldnn_multiclass_nms.cpp | 4 +- .../mkldnn_plugin/nodes/mkldnn_mvn_node.cpp | 15 +- .../nodes/mkldnn_non_max_suppression_node.cpp | 8 +- .../nodes/mkldnn_normalize_node.cpp | 15 +- .../nodes/mkldnn_one_hot_node.cpp | 2 +- .../mkldnn_plugin/nodes/mkldnn_pad_node.cpp | 20 +- .../nodes/mkldnn_pooling_node.cpp | 34 +- .../nodes/mkldnn_psroi_pooling_node.cpp | 14 +- .../nodes/mkldnn_psroi_pooling_node.h | 6 +- .../mkldnn_plugin/nodes/mkldnn_range_node.cpp | 2 +- .../nodes/mkldnn_reduce_node.cpp | 18 +- .../nodes/mkldnn_reference_node.cpp | 2 +- .../nodes/mkldnn_region_yolo_node.cpp | 8 +- .../nodes/mkldnn_reorder_node.cpp | 96 +- .../mkldnn_plugin/nodes/mkldnn_reorder_node.h | 2 + .../nodes/mkldnn_reshape_node.cpp | 4 +- .../nodes/mkldnn_reverse_sequence_node.cpp | 4 +- .../src/mkldnn_plugin/nodes/mkldnn_rnn.cpp | 32 +- .../src/mkldnn_plugin/nodes/mkldnn_rnn.h | 12 +- .../nodes/mkldnn_roi_align_node.cpp | 12 +- .../nodes/mkldnn_roi_pooling_node.cpp | 12 +- .../mkldnn_plugin/nodes/mkldnn_roll_node.cpp | 6 +- .../nodes/mkldnn_scatter_update_node.cpp | 18 +- .../nodes/mkldnn_select_node.cpp | 10 +- .../nodes/mkldnn_shuffle_channels_node.cpp | 8 +- .../nodes/mkldnn_softmax_node.cpp | 10 +- .../nodes/mkldnn_space_to_batch_node.cpp | 16 +- .../nodes/mkldnn_space_to_depth_node.cpp | 10 +- .../mkldnn_plugin/nodes/mkldnn_split_node.cpp | 62 +- .../nodes/mkldnn_strided_slice_node.cpp | 22 +- .../nodes/mkldnn_tensoriterator_node.cpp | 12 +- .../mkldnn_plugin/nodes/mkldnn_tile_node.cpp | 6 +- .../nodes/mkldnn_transpose_node.cpp | 38 +- .../src/mkldnn_plugin/utils/blob_dump.cpp | 18 +- .../src/mkldnn_plugin/utils/blob_dump.h | 3 +- .../src/mkldnn_plugin/utils/cpu_utils.hpp | 2 +- .../src/mkldnn_plugin/utils/general_utils.h | 25 - .../src/mkldnn_plugin/utils/ngraph_utils.hpp | 8 + .../src/mkldnn_plugin/utils/node_dumper.cpp | 8 +- .../unit/cpu/mkldnn_memory_desc_test.cpp | 147 +- 105 files changed, 3905 insertions(+), 2759 deletions(-) delete mode 100644 inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp delete mode 100644 inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.cpp create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.h rename inference-engine/src/mkldnn_plugin/{ => memory_desc}/cpu_blocked_memory_desc.cpp (67%) rename inference-engine/src/mkldnn_plugin/{ => memory_desc}/cpu_blocked_memory_desc.h (58%) rename inference-engine/src/mkldnn_plugin/{ => memory_desc}/cpu_memory_desc.h (76%) create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp create mode 100644 inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt index 6e066a4656c384..a1a69c11572b61 100644 --- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt +++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt @@ -16,9 +16,104 @@ if (ENABLE_CPU_DEBUG_CAPS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCPU_DEBUG_CAPS") endif() -file(GLOB_RECURSE SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) -file(GLOB_RECURSE HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h - ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp) +## TODO +set(LAYERS + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_bin_conv_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_concat_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_conv_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_convert_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_strided_slice_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_deconv_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_def_conv_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_eltwise_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_fullyconnected_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_matmul_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_generic_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_input_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_lrn_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_memory_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pad_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_transpose_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_pooling_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_fake_quantize_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reorder_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reshape_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_rnn.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_align_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roi_pooling_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_psroi_pooling_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_softmax_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_split_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tensoriterator_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_tile_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_mvn_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_normalize_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_scatter_update_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_interpolate_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reduce_node.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_reference_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_depth_to_space_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_space_to_depth_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_batch_to_space_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_space_to_batch_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_broadcast_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_embedding_bag_sum_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_embedding_bag_offset_sum_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_embedding_bag_packed_sum_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_embedding_segments_sum_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_gather_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_gather_elements_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_gather_nd_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_one_hot_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_region_yolo_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_select_node.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/mkldnn_roll_node.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/list.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_greedy_decoder_seq_len.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/ctc_loss.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/detectionoutput_onnx.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/extract_image_patches.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/gather_tree.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/grn.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/non_max_suppression.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/log_softmax.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/math.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/priorgridgenerator_onnx.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_onnx.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/range.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/reorg_yolo.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/reverse_sequence.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/roifeatureextractor_onnx.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/shuffle_channels.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/bucketize.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topkrois_onnx.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/softmax.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/topk.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/proposal_imp.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/nodes/cum_sum.cpp +) + +file(GLOB SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utils/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utils/rt_info/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/nodes/common/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/emitters/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ngraph_transformations/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/ngraph_transformations/op/*.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/memory_desc/*.cpp + ${LAYERS} + ${OS_SPECIFIC_SRC} +) + +# file(GLOB_RECURSE SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +# file(GLOB_RECURSE HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/*.h +# ${CMAKE_CURRENT_SOURCE_DIR}/*.hpp) addVersionDefines(mkldnn_plugin.cpp CI_BUILD_NUMBER) diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp deleted file mode 100644 index 9eaa1654466f97..00000000000000 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "cpu_memory_desc.h" -#include "cpu_memory_desc_utils.h" -#include "mkldnn_memory.h" -#include "utils/general_utils.h" -#include "utils/cpu_utils.hpp" -#include -#include -#include -#include - -using namespace mkldnn; -using namespace MKLDNNPlugin; -using namespace InferenceEngine; - -namespace MKLDNNPlugin { - -/** - * Convert to BlockedDescriptor - * - * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} - * strides // the order of outer dims is encoded here - * inner_blks 4 16 4 - * inner_idxs 1 0 1 - * - * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted. - * How to convert into IE representation: - * 0. Detect a new_outer_order of outer_dims via descending strides. - * 1. IE strides : concatenate strides in new_outer_order and inner strides. - * 2. IE dims : concatenate outer dims in new_outer_order with auto padding and inner blocks - * 3. IE order : concatenate new_outer_order and inner_idxs - */ -BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc) { - mkldnn::memory::desc desc = inpDesc; - const auto dims = desc.dims(); - - if (desc.data.format_kind != dnnl_blocked) - IE_THROW() << "Conversion is not possible"; - - if (desc.data.extra.flags != dnnl_memory_extra_flag_none) { - IE_THROW() << "Conversion is not possible"; - } - - const auto &blk_desc = desc.data.format_desc.blocking; - - const size_t inner_ndims = blk_desc.inner_nblks; - - // IE offset padded to data. Same as for oneDNN - SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; - size_t ie_blk_offset0 = desc.data.offset0; - - // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims. - // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will - // fill it with zero. - ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0); - - BlockedMemoryDesc res(inpDesc.getPrecision(), inpDesc.getShape(), inpDesc.getBlockDims(), - inpDesc.getOrder(), ie_blk_offset0, ie_blk_offset_to_data, inpDesc.getStrides()); - return res; -} - - -InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) { - if (auto blockingDesc = dynamic_cast(&desc)) { - return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), - {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(), - blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()}); - } else if (auto mkldnnDesc = dynamic_cast(&desc)) { - auto blockingDesc = convertToBlockedDescriptor(*mkldnnDesc); - return InferenceEngine::TensorDesc(blockingDesc.getPrecision(), blockingDesc.getShape().getStaticDims(), - {blockingDesc.getBlockDims(), blockingDesc.getOrder(), blockingDesc.getOffsetPadding(), - blockingDesc.getOffsetPaddingToData(), blockingDesc.getStrides()}); - } - - IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc"; - - return InferenceEngine::TensorDesc(); -} - -MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const MemoryDesc& desc) { - if (MemoryDescType::Blocked == desc.getType()) { - return convertToMKLDNNMemoryDesc(*(desc.as())); - } else if (MemoryDescType::Mkldnn == desc.getType()) { - return *(desc.as()); - } else { - IE_THROW() << "Cannot convert MemoryDesc to MKLDNNMemoryDesc"; - } -} - -MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc) { - return MKLDNNMemoryDesc(desc.getPrecision(), desc.getShape(), desc.getBlockDims(), - desc.getOrder(), desc.getOffsetPadding(), desc.getOffsetPaddingToData(), desc.getStrides()); -} - - -/** - * Construct from IE::TensorDesc - * @param tDesc - * - * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} - * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. - * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence - * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims - * - * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) - * - * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of - * real dims spliting. - * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] - * but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims - * Normalization of representation: Make strides growing but keep layout same as original. Not all - * layout allow us to meet normalize form of tensor desc. - * - * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] - */ -MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc) { - mkldnn::memory::desc mkldnnDesc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef); - auto dims = tDesc.getDims(); - - // TODO: implicit conversion of dims is no good... - if (tDesc.getLayout() == Layout::SCALAR) { - mkldnnDesc.data.format_kind = dnnl_blocked; - mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - mkldnnDesc.data.ndims = 1; - mkldnnDesc.data.dims[0] = 1; - mkldnnDesc.data.padded_dims[0] = 1; - mkldnnDesc.data.format_desc.blocking.strides[0] = 1; - mkldnnDesc.data.padded_offsets[0] = 0; - mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - return MKLDNNMemoryDesc(mkldnnDesc); - } - - if (tDesc.getLayout() == Layout::ANY) { - mkldnnDesc.data.format_kind = dnnl_format_kind_any; - mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - mkldnnDesc.data.ndims = dims.size(); - std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); - std::copy(dims.begin(), dims.end(), mkldnnDesc.data.padded_dims); - mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::fill(mkldnnDesc.data.padded_offsets, mkldnnDesc.data.padded_offsets + dims.size(), 0); - return MKLDNNMemoryDesc(mkldnnDesc); - } - - auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims(); - auto ie_order = tDesc.getBlockingDesc().getOrder(); - auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData(); - auto ie_strides = tDesc.getBlockingDesc().getStrides(); - - size_t outer_ndims = dims.size(); - size_t inner_ndims = ie_order.size() - dims.size(); - - bool is_descending_strides = true; - for (int i = 1; i < ie_strides.size(); i++) { - is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); - } - - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims - // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) - IE_THROW() << "Unsupported case for conversion"; - - std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension - for (size_t i = 0; i < outer_ndims; i++) { - outer_order[ie_order[i]] = i; - } - bool outer_is_correct_permutation_of_n = - std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); - - if (!outer_is_correct_permutation_of_n) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted - for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { - inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); - } - - if (!inner_block_are_dense) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), - [](size_t pad) { return pad == 0; }); - - if (!inner_pad_offsets_is_zero) - IE_THROW() << "Unsupported case for conversion"; - - // Fill general memory desc fields - mkldnnDesc.data.format_kind = dnnl_blocked; - mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - mkldnnDesc.data.ndims = dims.size(); - mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); - std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.data.padded_offsets); - std::fill(mkldnnDesc.data.padded_dims, mkldnnDesc.data.padded_dims + outer_ndims, 1); - for (size_t i = 0; i < ie_order.size(); i++) { - auto idx = ie_order[i]; - mkldnnDesc.data.padded_dims[idx] *= ie_blkdDims[i]; - } - - // Fill blocking desc - auto &dnn_blk_desc = mkldnnDesc.data.format_desc.blocking; - dnn_blk_desc.inner_nblks = inner_ndims; - std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); - std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); - for (size_t i = 0; i < outer_ndims; i++) { - dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; - } - - return MKLDNNMemoryDesc(mkldnnDesc); -} - -BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MemoryDesc &desc) { - if (desc.getType() == MemoryDescType::Blocked) { - return *(desc.as()); - } else if (desc.getType() == MemoryDescType::Mkldnn) { - return MemoryDescUtils::convertToBlockedDescriptor(*(desc.as())); - } else { - IE_THROW() << "Cannot convert to blocked memory descriptor. Unsupported memory desc type"; - } -} - -MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc) { - if (desc.getFormatKind() != dnnl_format_kind_t::dnnl_blocked) - IE_THROW() << "applyUndefinedOffset doesn't support not dnnl_blocked MKLDNNMemoryDesc"; - - std::vector strides; - std::vector offsetPaddingToData; - - strides.resize(desc.getBlockDims().size(), Shape::UNDEFINED_DIM); - offsetPaddingToData.resize(desc.getBlockDims().size(), 0); - size_t offsetPadding = Shape::UNDEFINED_DIM; - MKLDNNMemoryDesc retDesc(desc.getPrecision(), desc.getShape(), desc.getBlockDims(), - desc.getOrder(), offsetPadding, offsetPaddingToData, strides); - return MKLDNNPlugin::make_unique(std::move(retDesc)); -} - -MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const BlockedMemoryDesc &desc) { - std::vector strides; - std::vector offsetPaddingToData; - - strides.resize(desc.getBlockDims().size(), Shape::UNDEFINED_DIM); - offsetPaddingToData.resize(desc.getBlockDims().size(), 0); - size_t offsetPadding = Shape::UNDEFINED_DIM; - - return MKLDNNPlugin::make_unique(desc.getPrecision(), desc.getShape(), desc.getBlockDims(), - desc.getOrder(), offsetPadding, offsetPaddingToData, strides); -} - -MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) { - if (MemoryDescType::Blocked == desc->getType()) { - auto blockedDesc = desc->as(); - return MKLDNNPlugin::make_unique(blockedDesc->getPrecision(), blockedDesc->getShape(), - blockedDesc->getBlockDims(), blockedDesc->getOrder()); - } else if (MemoryDescType::Mkldnn == desc->getType()) { - auto mkldnnDesc = desc->as(); - MKLDNNMemoryDesc retDesc(desc->getPrecision(), desc->getShape(), - mkldnnDesc->getBlockDims(), mkldnnDesc->getOrder()); - return MKLDNNPlugin::make_unique(std::move(retDesc)); - } else { - IE_THROW() << "resetOffset supports Blocked and Mkldnn descriptors only"; - } -} - -InferenceEngine::Blob::Ptr MemoryDescUtils::createBlob(const MemoryDesc &memDesc) { - // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor - InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); - - desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); - InferenceEngine::Blob::Ptr blob = make_blob_with_precision(desc); - blob->allocate(); - return blob; -} - -InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) { - // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor - auto& memDesc = mem.GetDesc(); - InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); - - desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); - return make_blob_with_precision(desc, mem.GetData()); -} - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h deleted file mode 100644 index 536448ce74b1fa..00000000000000 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -namespace MKLDNNPlugin { -class MKLDNNMemoryDesc; -class BlockedMemoryDesc; -class MKLDNNMemory; - -class MemoryDescUtils { -public: - /** - * @brief Converts MemoryDesc to InferenceEngine::TensorDesc - * @param desc MemoryDesc to be converted - * @return converted InferenceEngine::TensorDesc - */ - static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc); - - /** - * @brief Converts MemoryDesc to MKLDNNMemoryDesc - * @param desc MemoryDesc to be converted - * @return converted MKLDNNMemoryDesc - */ - static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const MemoryDesc& desc); - - /** - * @brief Converts BlockedMemoryDesc to MKLDNNMemoryDesc - * @param desc BlockedMemoryDesc to be converted - * @return converted MKLDNNMemoryDesc - */ - static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc); - - /** - * @brief Converts InferenceEngine::TensorDesc to MKLDNNMemoryDesc - * @param desc InferenceEngine::TensorDesc to be converted - * @return converted MKLDNNMemoryDesc - */ - static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& desc); - - /** - * @brief Converts MemoryDesc to BlockedMemoryDesc - * @param desc MemoryDesc to be converted - * @return converted BlockedMemoryDesc - */ - static BlockedMemoryDesc convertToBlockedDescriptor(const MemoryDesc& desc); - - /** - * @brief Converts MKLDNNMemoryDesc to BlockedMemoryDesc - * @param desc MKLDNNMemoryDesc to be converted - * @return converted BlockedMemoryDesc - */ - static BlockedMemoryDesc convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc); - - /** - * @brief Creates MKLDNNMemoryDesc with offset0 of UNDEFINED_DIM size - * @param desc modifiable MKLDNNMemoryDesc - * @return pointer to MKLDNNMemoryDesc - */ - static MemoryDescPtr applyUndefinedOffset(const MKLDNNMemoryDesc& desc); - - /** - * @brief Creates BlockedMemoryDesc with offsetPadding, strides of UNDEFINED_DIM size and offsetPaddingToData of 0 size - * @param desc modifiable BlockedMemoryDesc - * @return pointer to BlockedMemoryDesc - */ - static MemoryDescPtr applyUndefinedOffset(const BlockedMemoryDesc& desc); - - /** - * @brief Creates MemoryDesc with offsetPadding of 0 size - * @param desc modifiable MemoryDesc - * @return pointer to MemoryDesc - */ - static MemoryDescPtr resetOffset(const MemoryDesc* desc); - - /** - * @brief Creates InferenceEngine::Blob from MemoryDesc - * @param desc MemoryDesc from which will be created InferenceEngine::Blob - * @return pointer to InferenceEngine::Blob - */ - static InferenceEngine::Blob::Ptr createBlob(const MemoryDesc& memDesc); - - /** - * @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse - * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob - * @return pointer to InferenceEngine::Blob - */ - static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem); -}; - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.cpp b/inference-engine/src/mkldnn_plugin/cpu_shape.cpp index bc31d90773c2e8..68b6dba1b404a6 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_shape.cpp +++ b/inference-engine/src/mkldnn_plugin/cpu_shape.cpp @@ -4,6 +4,7 @@ #include "cpu_shape.h" #include "utils/general_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" using namespace MKLDNNPlugin; @@ -37,7 +38,7 @@ std::string Shape::toString() const { size_t i = 0; do { if (dims[i] == Shape::UNDEFINED_DIM) { - output << dim2str(minDims[i]) << " - " << dim2str(maxDims[i]); + output << MemoryDescUtils::dim2str(minDims[i]) << " - " << MemoryDescUtils::dim2str(maxDims[i]); } else { output << dims[i]; } diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h index 7c820c4db50ccf..130fc142e980f5 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.h +++ b/inference-engine/src/mkldnn_plugin/cpu_types.h @@ -4,8 +4,13 @@ #pragma once +#include + namespace MKLDNNPlugin { +using Dim = std::size_t; +using VectorDims = std::vector; + enum Type { Unknown, Generic, diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.cpp new file mode 100644 index 00000000000000..a7b231c37af35e --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "blocked_memory_desc.h" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; + +bool BlockedMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const { + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) + return false; + + if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) { + return false; + } + + if (!dimsEqualWeak(this->getOffsetPaddingToData(), rhs.getOffsetPaddingToData())) { + return false; + } + + // this check needed to avoid inserting unnecessary reorders if the memory is used in place and the batch size is equal to 1 + size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : + Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 + if (!dimsEqualWeak(this->getStrides(), rhs.getStrides(), skipAxis)) { + return false; + } + + if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) { + return false; + } + + return dimsEqualWeak(this->getOffsetPadding(), rhs.getOffsetPadding()); +} diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.h new file mode 100644 index 00000000000000..23dd182879a129 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.h @@ -0,0 +1,83 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_memory_desc.h" + +namespace MKLDNNPlugin { + +class BlockedMemoryDesc : public virtual MemoryDesc { +public: + BlockedMemoryDesc() {} + + /** + * @brief Returns the blocked dimensions + * + * @return blocked dimensions + */ + virtual const std::vector& getBlockDims() const = 0; + + /** + * @brief Returns the vector of order + * + * @return order + */ + virtual const std::vector& getOrder() const = 0; + + /** + * @brief Returns the per-dimension offset vector + * + * @return offsets + */ + virtual const std::vector& getOffsetPaddingToData() const = 0; + + /** + * @brief Returns the offset to the current memory block + * + * @return offset + */ + virtual size_t getOffsetPadding() const = 0; + + /** + * @brief Returns strides for each dimension + * + * @return strides + */ + virtual const std::vector& getStrides() const = 0; + + /** + * @brief Check that desc has padded dims + * + * @return true if exist padded dims, otherwise false + */ + virtual bool blocksExtended() const = 0; + + /** + * @brief Compute number of elements taking into account padded dims + * + * @return number of elements taking into account padded dims + */ + virtual size_t getPaddedElementsCount() const = 0; + +protected: + /** + * @brief Check descs on compatibility + * WARNING: Check only BlockedMemoryDesc specific attributes like: strides, order etc. + * Doesn't perform type check for descs + * Doesn't perform descs specific attributes check + * @return true if compatible, otherwise false + */ + bool isCompatible(const BlockedMemoryDesc &rhs) const; + + mutable VectorDims blockedDims; + mutable std::vector strides; + mutable std::vector order; + mutable std::vector offsetPaddingToData; +}; + +using BlockedMemoryDescPtr = std::unique_ptr; +using BlockedMemoryDescCPtr = std::unique_ptr; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp similarity index 67% rename from inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp rename to inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp index 96b32b52a75a88..401d5ecd1a0b10 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp @@ -4,10 +4,11 @@ #include "cpu_blocked_memory_desc.h" #include "mkldnn_memory.h" +#include "dnnl_blocked_memory_desc.h" using namespace MKLDNNPlugin; -BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape) : MemoryDesc(shape, Blocked) , precision(prc) { +CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape) : MemoryDesc(shape, Blocked), precision(prc) { auto& dims = shape.getDims(); order.resize(dims.size()); std::iota(order.begin(), order.end(), 0); @@ -21,15 +22,15 @@ BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const Shape } } -BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, +CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, const std::vector& order, size_t offsetPadding, const std::vector& offsetPaddingToData, const std::vector& strides) : MemoryDesc(shape, Blocked), precision(prc) { if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - IE_THROW() << "BlockedMemoryDesc do not support undefined order."; + IE_THROW() << "CpuBlockedMemoryDesc do not support undefined order."; } if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - IE_THROW() << "BlockedMemoryDesc doesn't support undefined blockedDims."; + IE_THROW() << "CpuBlockedMemoryDesc doesn't support undefined blockedDims."; } this->order = order; @@ -65,7 +66,7 @@ BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const Shape } } -bool BlockedMemoryDesc::isDefinedImp() const { +bool CpuBlockedMemoryDesc::isDefinedImp() const { bool defined = true; defined = defined && std::none_of(blockedDims.cbegin(), blockedDims.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); defined = defined && std::none_of(strides.cbegin(), strides.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); @@ -76,48 +77,26 @@ bool BlockedMemoryDesc::isDefinedImp() const { return defined; } -bool BlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { +bool CpuBlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { const MemoryDesc* pRhs = &rhs; - if (auto blockingDesc = dynamic_cast(pRhs)) { - return isCompatible(*blockingDesc); - } else if (auto mkldnnDesc = dynamic_cast(pRhs)) { - return mkldnnDesc->isCompatible(*this); + if (auto cpuBlkDesc = dynamic_cast(pRhs)) { + return isCompatible(*cpuBlkDesc); + } else if (auto dnnlBlkDesc = dynamic_cast(pRhs)) { + return isCompatible(*dnnlBlkDesc); } else { return false; } } -bool BlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs) const { - if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) - return false; - - if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) { - return false; - } - - if (!dimsEqualWeak(this->getOffsetPaddingToData(), rhs.getOffsetPaddingToData())) { - return false; - } - - // this check needed to avoid inserting unnecessary reorders if the memory is used in place and the batch size is equal to 1 - size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : - Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 - if (!dimsEqualWeak(this->getStrides(), rhs.getStrides(), skipAxis)) { - return false; - } - - if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) { - return false; - } - - return dimsEqualWeak(this->getOffsetPadding(), rhs.getOffsetPadding()); +bool CpuBlockedMemoryDesc::isCompatible(const CpuBlockedMemoryDesc &rhs) const { + return BlockedMemoryDesc::isCompatible(rhs); } -bool BlockedMemoryDesc::isCompatible(const MKLDNNMemoryDesc& rhs) const { +bool CpuBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc &rhs) const { return rhs.isCompatible(*this); } -size_t BlockedMemoryDesc::getMemSizeImp() const { +size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const { int64_t e_size = getOffsetPadding() + 1; // size in bytes (from begin of data to last element) for (int j = 0; j < getBlockDims().size(); j++) e_size += (getBlockDims()[j] - 1) * getStrides()[j]; @@ -128,9 +107,9 @@ size_t BlockedMemoryDesc::getMemSizeImp() const { return e_size; } -size_t BlockedMemoryDesc::getMaxMemSize() const { +size_t CpuBlockedMemoryDesc::getMaxMemSize() const { if (shape.isStatic()) { - return getCurrentSize(); + return getCurrentMemSize(); } auto& maxDims = shape.getMaxDims(); @@ -139,10 +118,10 @@ size_t BlockedMemoryDesc::getMaxMemSize() const { } auto maxDimsDesc = cloneWithNewDims(maxDims); - return maxDimsDesc->getCurrentSize(); + return maxDimsDesc->getCurrentMemSize(); } -size_t BlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const { +size_t CpuBlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const { InferenceEngine::SizeVector off_v = v; size_t n_blocked_dims = order.size(); @@ -162,7 +141,7 @@ size_t BlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const return offset; } -size_t BlockedMemoryDesc::getElementOffset(size_t elemNumber) const { +size_t CpuBlockedMemoryDesc::getElementOffset(size_t elemNumber) const { // TODO [DS]: rewrite to support dynamic shapes auto& dims = shape.getStaticDims(); size_t n_dims = dims.size(); @@ -176,7 +155,7 @@ size_t BlockedMemoryDesc::getElementOffset(size_t elemNumber) const { return getOffset(pos); } -bool BlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { +bool CpuBlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { switch (layoutType) { case LayoutType::ncsp: return isPlainFormat(); @@ -191,7 +170,7 @@ bool BlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { } } -bool BlockedMemoryDesc::isPlainFormat() const { +bool CpuBlockedMemoryDesc::isPlainFormat() const { if (shape.getRank() != order.size()) { return false; } @@ -203,7 +182,7 @@ bool BlockedMemoryDesc::isPlainFormat() const { return true; } -bool BlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { +bool CpuBlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { if ((order.size() - shape.getRank()) != 1) { return false; } @@ -221,7 +200,7 @@ bool BlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { return true; } -bool BlockedMemoryDesc::isTailCFormat() const { +bool CpuBlockedMemoryDesc::isTailCFormat() const { if (shape.getRank() < 3) { return false; } @@ -237,7 +216,7 @@ bool BlockedMemoryDesc::isTailCFormat() const { return true; } -std::string BlockedMemoryDesc::serializeFormat() const { +std::string CpuBlockedMemoryDesc::serializeFormat() const { std::stringstream result; char startLetter = 'a'; std::unordered_map mapAxisBlockSize; @@ -260,8 +239,21 @@ std::string BlockedMemoryDesc::serializeFormat() const { return result.str(); } -std::unique_ptr BlockedMemoryDesc::cloneWithNewDimsImp(const std::vector &dims) const { - std::vector newBlockedDims(order.size()); +std::unique_ptr CpuBlockedMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const { + if (std::any_of(dims.begin(), dims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + IE_THROW() << "Can't clone desc if new dims are undefined"; + } + + // TODO [DS]: add stride recalculation for strided blobs + for (int i = strides.size() - 2; i >= 0 ; i--) { + if (strides[i] == Shape::UNDEFINED_DIM) + break; + + if (strides[i] != strides[i + 1] * blockedDims[i + 1]) + IE_THROW(NotImplemented) << "Can't clone desc with new dims for not dense tensor"; + } + + VectorDims newBlockedDims(order.size()); for (size_t i = 0; i < dims.size(); ++i) { newBlockedDims[order[i]] = dims[i]; @@ -274,10 +266,36 @@ std::unique_ptr BlockedMemoryDesc::cloneWithNewDimsImp(const std::ve } } - std::vector newOffsetPaddingToData; + VectorDims newOffsetPaddingToData; if (std::none_of(offsetPaddingToData.begin(), offsetPaddingToData.end(), [](size_t x){ return x == Shape::UNDEFINED_DIM;})) { newOffsetPaddingToData = offsetPaddingToData; } - return MKLDNNPlugin::make_unique(precision, Shape(dims), newBlockedDims, order, offsetPadding, newOffsetPaddingToData); + return MKLDNNPlugin::make_unique(precision, Shape(dims), newBlockedDims, order, offsetPadding, newOffsetPaddingToData); +} + +bool CpuBlockedMemoryDesc::blocksExtended() const { + const size_t rank = shape.getRank(); + for (size_t i = rank; i < order.size(); i++) { + size_t idx = order[i]; + Dim paddedDim = 1; + for (size_t j = rank; j < order.size(); j++) { + if (order[j] == idx) + paddedDim *= blockedDims[j]; + } + if (blockedDims[idx] == Shape::UNDEFINED_DIM) { + paddedDim = Shape::UNDEFINED_DIM; + } else { + paddedDim *= blockedDims[idx]; + } + if (paddedDim != shape.getDims()[idx]) + return true; + } + return false; +} + +size_t CpuBlockedMemoryDesc::getPaddedElementsCount() const { + if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) + IE_THROW() << "Can't compute padded elements count for non undefined blocked dims"; + return std::accumulate(blockedDims.begin(), blockedDims.end(), size_t{1}, std::multiplies()); } diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h similarity index 58% rename from inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h rename to inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h index c85e8968be6da2..c2197aa96419ef 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h @@ -4,29 +4,26 @@ #pragma once -#include "cpu_memory_desc.h" +#include "blocked_memory_desc.h" +#include "utils/general_utils.h" namespace MKLDNNPlugin { -class MKLDNNMemoryDesc; - -class BlockedMemoryDesc : public MemoryDesc { +class CpuBlockedMemoryDesc : public BlockedMemoryDesc { public: - BlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape); + CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape); - BlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, - const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, - const std::vector& strides = {}); + CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, + const std::vector& strides = {}); MemoryDescPtr clone() const override { - return MKLDNNPlugin::make_unique(*this); + return MKLDNNPlugin::make_unique(*this); } bool isCompatible(const MemoryDesc& rhs) const override; - - bool isCompatible(const BlockedMemoryDesc& rhs) const; - - bool isCompatible(const MKLDNNMemoryDesc& rhs) const; + bool isCompatible(const CpuBlockedMemoryDesc &rhs) const; + bool isCompatible(const DnnlBlockedMemoryDesc &rhs) const; InferenceEngine::Precision getPrecision() const override { return precision; @@ -36,7 +33,7 @@ class BlockedMemoryDesc : public MemoryDesc { precision = std::move(prc); } - const std::vector& getBlockDims() const { + const std::vector& getBlockDims() const override { return blockedDims; } @@ -45,7 +42,7 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return order */ - const std::vector& getOrder() const { + const std::vector& getOrder() const override { return order; } @@ -54,7 +51,7 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return offsets */ - const std::vector& getOffsetPaddingToData() const { + const std::vector& getOffsetPaddingToData() const override { return offsetPaddingToData; } /** @@ -62,7 +59,7 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return offset */ - size_t getOffsetPadding() const { + size_t getOffsetPadding() const override { return offsetPadding; } @@ -71,19 +68,23 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return strides */ - const std::vector& getStrides() const { + const std::vector& getStrides() const override { return strides; } + bool blocksExtended() const override; + bool hasLayoutType(LayoutType layoutType) const override; std::string serializeFormat() const override; size_t getMaxMemSize() const override; + size_t getPaddedElementsCount() const override; + private: size_t getElementOffset(size_t elemNumber) const override; - size_t getMemSizeImp() const override; + size_t getCurrentMemSizeImp() const override; size_t getOffset(const InferenceEngine::SizeVector& v) const; bool isPlainFormat() const; bool isBlockedCFormat(size_t blk_size) const; @@ -93,10 +94,8 @@ class BlockedMemoryDesc : public MemoryDesc { private: InferenceEngine::Precision precision; - std::vector blockedDims; - std::vector strides; - std::vector order; - std::vector offsetPaddingToData; size_t offsetPadding; + mutable VectorDims paddedDims; }; + } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h similarity index 76% rename from inference-engine/src/mkldnn_plugin/cpu_memory_desc.h rename to inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h index 66f7cd20c2d5b5..047e0e12527658 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h @@ -7,13 +7,32 @@ #include #include #include "cpu_shape.h" -#include "utils/general_utils.h" +#include "cpu_types.h" +#include "memory_desc/cpu_memory_desc_utils.h" + +/** + * @brief + * + * MemoryDesc - the descriptor of tensor representation in memory. Describes all required information + * for proper allocation and handling tensor in some buffer. The real memory is not present, just description. + * This object answers on question how and where data with logical index [x1, x2, .. xN] placed in real buffer. + * In the simplest case it describe a mapping between "logical offset" and "real offset". + * + */ namespace MKLDNNPlugin { +class MemoryDesc; + +using MemoryDescPtr = std::unique_ptr; +using MemoryDescConstPtr = std::unique_ptr; + enum MemoryDescType { - Blocked, - Mkldnn + Undef = 0, + Blocked = 1, + Mkldnn = 1 << 1, + + DnnlBlocked = Blocked | Mkldnn }; enum class LayoutType : unsigned { @@ -39,13 +58,13 @@ class MemoryDesc { virtual void setPrecision(InferenceEngine::Precision prc) = 0; - virtual std::unique_ptr clone() const = 0; + virtual MemoryDescPtr clone() const = 0; // clone descriptor with new dims. Throws an exception if some of the new dims conflicts with the internal shape (i.e. its defined dims ,rank, upper bounds) - std::unique_ptr cloneWithNewDims(const std::vector& dims) const { + MemoryDescPtr cloneWithNewDims(const VectorDims& dims) const { if (!getShape().isCompatible(dims)) { IE_THROW(ParameterMismatch) << "Can not clone with new dims. Descriptor's shape: " << getShape().toString() << - " is incompatible with provided dimensions: " << dims2str(dims) << "."; + " is incompatible with provided dimensions: " << MemoryDescUtils::dims2str(dims) << "."; } return cloneWithNewDimsImp(dims); @@ -72,10 +91,10 @@ class MemoryDesc { * @brief Get minimal required memory size in bytes. * @return return minimal required memory size in bytes or UNDEFINED_SIZE in case undefined descriptor */ - size_t getCurrentSize() const { + size_t getCurrentMemSize() const { size_t retVal = UNDEFINED_SIZE; if (isDefined()) { - retVal = getMemSizeImp(); + retVal = getCurrentMemSizeImp(); } return retVal; } @@ -103,20 +122,21 @@ class MemoryDesc { static constexpr size_t UNDEFINED_SIZE = std::numeric_limits::max(); protected: + MemoryDesc() : type(MemoryDescType::Undef) {} MemoryDesc(Shape shape, MemoryDescType type) : shape(std::move(shape)), type(type) {} - MemoryDesc(const std::vector& dims, MemoryDescType type) + MemoryDesc(const VectorDims& dims, MemoryDescType type) : shape(dims), type(type) {} - virtual size_t getMemSizeImp() const = 0; + virtual size_t getCurrentMemSizeImp() const = 0; // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc. virtual size_t getElementOffset(size_t elemNumber) const = 0; virtual bool isDefinedImp() const = 0; - virtual std::unique_ptr cloneWithNewDimsImp(const std::vector& dims) const = 0; + virtual MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const = 0; MemoryDescType type; Shape shape; @@ -132,7 +152,4 @@ class MemoryDesc { friend class MKLDNNSplitNode; }; -using MemoryDescPtr = std::unique_ptr; -using MemoryDescConstPtr = std::unique_ptr; - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp new file mode 100644 index 00000000000000..3c0ca444a7f03a --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp @@ -0,0 +1,138 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "mkldnn_memory.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "utils/general_utils.h" +#include "utils/cpu_utils.hpp" +#include +#include +#include +#include +#include + +using namespace mkldnn; +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +namespace MKLDNNPlugin { + +std::unique_ptr MemoryDescUtils::convertToDnnlMemoryDesc(const MemoryDesc& desc) { + if (MemoryDescType::Blocked == desc.getType()) { + return convertToDnnlMemoryDesc(*(desc.as())); + } else if (MemoryDescType::Mkldnn & desc.getType()) { + return std::unique_ptr(dynamic_cast(desc.clone().release())); + } else { + IE_THROW() << "Cannot convert MemoryDesc to DnnlMemoryDesc"; + } +} + +std::unique_ptr MemoryDescUtils::convertToDnnlMemoryDesc(const CpuBlockedMemoryDesc& desc) { + return std::unique_ptr(new DnnlBlockedMemoryDesc(desc.getPrecision(), desc.getShape(), desc.getBlockDims(), + desc.getOrder(), desc.getOffsetPadding(), desc.getOffsetPaddingToData(), desc.getStrides())); +} + +std::unique_ptr MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) { + const auto &blkDesc = desc.getBlockingDesc(); + return std::unique_ptr(new DnnlBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), + blkDesc.getOrder(), blkDesc.getOffsetPadding(), + blkDesc.getOffsetPaddingToData(), blkDesc.getStrides())); +} + +std::unique_ptr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDesc& desc) { + if (desc.getType() & MemoryDescType::Blocked) { + return std::unique_ptr(dynamic_cast(desc.clone().release())); + } else { + IE_THROW() << "Can not convert unsupported memory descriptor"; + } +} + +MemoryDescPtr MemoryDescUtils::cloneWithUndefStridesAndOffset(const MemoryDesc& desc) { + if (desc.getType() == MemoryDescType::Mkldnn) { + IE_THROW() << "Can't apply undefined offset for mkldnn memory desc"; + } + + const auto blkMemDesc = desc.as(); + + std::vector strides; + std::vector offsetPaddingToData; + strides.resize(blkMemDesc->getBlockDims().size(), Shape::UNDEFINED_DIM); + offsetPaddingToData.resize(blkMemDesc->getBlockDims().size(), 0); + size_t offsetPadding = Shape::UNDEFINED_DIM; + + if (blkMemDesc->getType() == MemoryDescType::Blocked) { + return MKLDNNPlugin::make_unique(blkMemDesc->getPrecision(), blkMemDesc->getShape(), blkMemDesc->getBlockDims(), + blkMemDesc->getOrder(), offsetPadding, offsetPaddingToData, strides); + } else if (blkMemDesc->getType() == MemoryDescType::DnnlBlocked) { + return std::unique_ptr(new DnnlBlockedMemoryDesc(blkMemDesc->getPrecision(), blkMemDesc->getShape(), + blkMemDesc->getBlockDims(), blkMemDesc->getOrder(), + offsetPadding, offsetPaddingToData, strides)); + } else { + IE_THROW() << "Cannot apply undefined offset. Unsupported memory desc type"; + } +} + +MemoryDescPtr MemoryDescUtils::cloneWithDefaultStridesAndOffset(const MemoryDesc* desc) { + const auto blkMemDesc = desc->as(); + + if (MemoryDescType::Blocked == desc->getType()) { + return MKLDNNPlugin::make_unique(blkMemDesc->getPrecision(), blkMemDesc->getShape(), + blkMemDesc->getBlockDims(), blkMemDesc->getOrder()); + } else if (MemoryDescType::DnnlBlocked == desc->getType()) { + return std::unique_ptr(new DnnlBlockedMemoryDesc(blkMemDesc->getPrecision(), blkMemDesc->getShape(), + blkMemDesc->getBlockDims(), blkMemDesc->getOrder())); + } else { + IE_THROW() << "cloneWithDefaultStridesAndOffset supports Blocked descriptors only"; + } +} + +InferenceEngine::Blob::Ptr MemoryDescUtils::createBlob(const MemoryDesc &memDesc) { + // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor + InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); + + desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); + InferenceEngine::Blob::Ptr blob = make_blob_with_precision(desc); + blob->allocate(); + return blob; +} + +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) { + // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor + auto& memDesc = mem.getDesc(); + InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); + + desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); + return make_blob_with_precision(desc, mem.GetData()); +} + +InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) { + if (auto blockingDesc = dynamic_cast(&desc)) { + return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), + {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(), + blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()}); + } else { + IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc"; + } +} + +std::string MemoryDescUtils::dim2str(size_t dim) { + return dim == Shape::UNDEFINED_DIM ? "?" : std::to_string(dim); +} + +std::string MemoryDescUtils::dims2str(const std::vector& dims) { + std::stringstream output; + output << "{"; + + auto itr = dims.begin(); + do { + output << dim2str(*itr); + } while (++itr != dims.end() && output << ", "); + + output << "}"; + return output.str(); +} + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h new file mode 100644 index 00000000000000..0b6938d7b2fab2 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h @@ -0,0 +1,100 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "mkldnn/ie_mkldnn.h" + +namespace MKLDNNPlugin { + +class MemoryDesc; +class DnnlMemoryDesc; +class BlockedMemoryDesc; +class DnnlBlockedMemoryDesc; +class CpuBlockedMemoryDesc; +class MKLDNNMemory; + +class MemoryDescUtils { +public: + /** + * @brief Converts MemoryDesc to DnnlMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted DnnlMemoryDesc + */ + static std::unique_ptr convertToDnnlMemoryDesc(const MemoryDesc& desc); + + /** + * @brief Converts BlockedMemoryDesc to DnnlMemoryDesc + * @param desc BlockedMemoryDesc to be converted + * @return converted DnnlMemoryDesc + */ + static std::unique_ptr convertToDnnlMemoryDesc(const CpuBlockedMemoryDesc& desc); + + /** + * @brief Converts InferenceEngine::TensorDesc to DnnlBlockedMemoryDesc + * @param desc InferenceEngine::TensorDesc to be converted + * @return converted DnnlBlockedMemoryDesc + */ + static std::unique_ptr convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc); + + /** + * @brief Converts MemoryDesc to BlockedMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted BlockedMemoryDesc + */ + static std::unique_ptr convertToBlockedMemoryDesc(const MemoryDesc& desc); + + /** + * @brief Creates BlockedMemoryDesc with offsetPadding and strides of UNDEFINED_DIM size + * @param desc is the MemoryDesc to be cloned + * @return pointer to the new MemoryDesc + */ + static std::unique_ptr cloneWithUndefStridesAndOffset(const MemoryDesc& desc); + + /** + * @brief Creates MemoryDesc with offsetPadding of 0 size and default strides + * @param desc is the MemoryDesc to be cloned + * @return pointer to the new MemoryDesc + */ + static std::unique_ptr cloneWithDefaultStridesAndOffset(const MemoryDesc* desc); + + /** + * @brief Creates InferenceEngine::Blob from MemoryDesc + * @param desc MemoryDesc from which will be created InferenceEngine::Blob + * @return pointer to InferenceEngine::Blob + */ + static InferenceEngine::Blob::Ptr createBlob(const MemoryDesc& memDesc); + + /** + * @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse + * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob + * @return pointer to InferenceEngine::Blob + */ + static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem); + + /** + * @brief Converts MemoryDesc to InferenceEngine::TensorDesc + * @param desc MemoryDesc to be converted + * @return converted InferenceEngine::TensorDesc + */ + static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc); + + /** + * @brief Converts dim to string, undefined dim represented as ? + * @param dim Dim to be converted + * @return dim as string + */ + static std::string dim2str(size_t dim); + + /** + * @brief Converts dims to string, undefined dim represented as ? + * @param dim Dims to be converted + * @return dims as string + */ + static std::string dims2str(const std::vector& dims); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp new file mode 100644 index 00000000000000..b59b1314e23ab4 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp @@ -0,0 +1,805 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include +#include + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape) : MemoryDesc(shape, DnnlBlocked) { + const auto ndims = shape.getRank(); + const auto &dims = shape.getDims(); + mkldnn::memory::dims plain_strides; + if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL); + } else { + plain_strides.resize(ndims, 1); + for (size_t i = 1; i < ndims; i++) { + plain_strides[ndims - i -1] = plain_strides[ndims - i] * dims[ndims - i]; + } + } + + desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), MKLDNNExtensionUtils::IEPrecisionToDataType(prc), plain_strides}; + + order.resize(ndims); + std::iota(order.begin(), order.end(), 0); +} + +/** + * Construct from blocked parameters + * + * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} + * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. + * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence + * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims + * + * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) + * + * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of + * real dims spliting. + * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] + * but not [0]<=>[4] because it break splitting original dims into internal blocked dims + * Normalization of representation: Make strides growing but keep layout same as original. Not all + * layout allow us to meet normalize form of tensor desc. + * + * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] + */ +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding, const std::vector& offsetPaddingToData, + const std::vector& strides) : MemoryDesc(shape, DnnlBlocked) { + using namespace mkldnn; + // scalar case + if (shape.getRank() == 0) { + desc.data.format_kind = dnnl_blocked; + desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + desc.data.ndims = 1; + desc.data.dims[0] = 1; + desc.data.padded_dims[0] = 1; + desc.data.format_desc.blocking.strides[0] = 1; + desc.data.padded_offsets[0] = 0; + desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding); + return; + } + + if (order.size() != blockedDims.size()) { + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, order and blocked dims must have equals size"; + } + + if (!offsetPaddingToData.empty() && offsetPaddingToData.size() != order.size()) { + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, offsetPaddingToData must have equal size with order and blocked dims"; + } + + if (!strides.empty() && strides.size() != order.size()) { + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, strides must have equal size with order and blocked dims"; + } + + if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined order."; + } + + if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined blockedDims."; + } + + auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims()); + + size_t outer_ndims = dims.size(); + size_t inner_ndims = order.size() - dims.size(); + + if (!strides.empty()) { + bool is_descending_strides = true; + for (int i = 1; i < strides.size(); i++) { + is_descending_strides &= (strides[i - 1] >= strides[i]); + } + + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims + // and may be we can achieve correct "descending strides" form which allow conversion. + if (!is_descending_strides) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides); + } + + std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension + for (size_t i = 0; i < outer_ndims; i++) { + outer_order[order[i]] = i; + } + bool outer_is_correct_permutation_of_n = + std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); + + if (!outer_is_correct_permutation_of_n) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc because of incorrect order: " << vec2str(order); + + if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) { + bool inner_block_are_dense = one_of(strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted + for (int i = outer_ndims; i < strides.size() - 1; i++) { + inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]); + } + + if (!inner_block_are_dense) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides) << " inner blocks are not dense."; + } + + // Fill general memory desc fields + desc.data.format_kind = dnnl_blocked; + desc.data.extra.flags = 0; + desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + desc.data.ndims = dims.size(); + desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding); + std::copy(dims.begin(), dims.end(), desc.data.dims); + + if (!offsetPaddingToData.empty()) { + bool inner_pad_offsets_is_zero = std::all_of(offsetPaddingToData.begin() + outer_ndims, offsetPaddingToData.end(), + [](size_t pad) { return pad == 0; }); + + if (!inner_pad_offsets_is_zero) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, inner pad offsets is not zero: " << vec2str(offsetPaddingToData); + auto dnnlPaddedOffsets = MKLDNNExtensionUtils::convertToDnnlDims(offsetPaddingToData); + std::copy(dnnlPaddedOffsets.begin(), dnnlPaddedOffsets.begin() + outer_ndims, desc.data.padded_offsets); + } else { + std::fill(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + outer_ndims, 0); + } + + std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1); + auto dnnlBlkDims = MKLDNNExtensionUtils::convertToDnnlDims(blockedDims); + + for (size_t i = 0; i < order.size(); i++) { + auto idx = order[i]; + if (desc.data.padded_dims[idx] != DNNL_RUNTIME_DIM_VAL && dnnlBlkDims[i] != DNNL_RUNTIME_DIM_VAL) { + desc.data.padded_dims[idx] *= dnnlBlkDims[i]; + } else { + desc.data.padded_dims[idx] = DNNL_RUNTIME_DIM_VAL; + } + } + + // Fill blocking desc + auto &dnn_blk_desc = desc.data.format_desc.blocking; + dnn_blk_desc.inner_nblks = inner_ndims; + std::copy(dnnlBlkDims.end() - inner_ndims, dnnlBlkDims.end(), dnn_blk_desc.inner_blks); + std::copy(order.end() - inner_ndims, order.end(), dnn_blk_desc.inner_idxs); + + if (strides.empty()) { + if (std::any_of(dnnlBlkDims.begin(), dnnlBlkDims.end(), [](memory::dim val) { return val == DNNL_RUNTIME_DIM_VAL; })) { + std::fill(std::begin(dnn_blk_desc.strides), std::begin(dnn_blk_desc.strides) + outer_ndims, DNNL_RUNTIME_DIM_VAL); + } else { + //TODO [DS]: phase 2: refactor + std::vector tmpStrides(order.size()); + tmpStrides[order.size() - 1] = 1; + for (size_t i = 2; i <= order.size(); i++) { + tmpStrides[order.size() - i] = tmpStrides[order.size() - (i - 1)] * dnnlBlkDims[blockedDims.size() - (i - 1)]; + } + for (size_t i = 0; i < outer_ndims; i++) { + dnn_blk_desc.strides[i] = tmpStrides[outer_order[i]]; + } + } + } else { + for (size_t i = 0; i < outer_ndims; i++) { + auto dnnlStrides = MKLDNNExtensionUtils::convertToDnnlDims(strides); + dnn_blk_desc.strides[i] = dnnlStrides[outer_order[i]]; + } + } + + this->order = order; +} + +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) : + MemoryDesc(shape, DnnlBlocked) { + using namespace mkldnn; + if (format == memory::format_tag::any || format == memory::format_tag::undef) + IE_THROW(Unexpected) << "Can't create mkldnn::desc with any or undef format"; + + const auto dims = shape.getDims(); + if (format == memory::format_tag::x && shape.getRank() == 0) { + desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); + } else { + desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dims), dataType, format); + } + + std::vector perm; + std::vector inner_blks; + std::vector inner_idxs; + + mkldnn::impl::memory_desc_wrapper::compute_blocking(mkldnn::memory::convert_to_c(format), perm, inner_blks, inner_idxs); + + order.swap(perm); + order.insert(order.end(), inner_idxs.begin(), inner_idxs.end()); +} + +const std::vector& DnnlBlockedMemoryDesc::getBlockDims() const { + if (blockedDims.empty()) { + const auto dims = desc.dims(); + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + std::vector total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + // blocked dims + // [dims via new_outer_order with auto pad] U [inner_blk_dims] + std::vector outer_block_dims = MKLDNNExtensionUtils::convertToVectorDims(dims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + if (outer_block_dims[i] != Shape::UNDEFINED_DIM) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); + + blockedDims.resize(total_ndims, 0); + std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, + blockedDims.end() - blk_desc.inner_nblks); + std::transform(outer_order.begin(), outer_order.end(), blockedDims.begin(), + [&] (size_t i) { return outer_block_dims[i]; }); + } + return blockedDims; +} + +const std::vector& DnnlBlockedMemoryDesc::getStrides() const { + if (strides.empty()) { + const auto dims = desc.dims(); + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + std::vector inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); + + // blocked strides + // [outer_strides via new_outer_order] U [inner_strides] + strides.resize(total_ndims, 0); + std::copy(inner_strides.rbegin(), inner_strides.rend(), strides.rbegin()); + std::transform(outer_order.begin(), outer_order.end(), strides.begin(), + [&](size_t i) { return blk_desc.strides[i] == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : blk_desc.strides[i]; }); + } + return strides; +} + +const std::vector& DnnlBlockedMemoryDesc::getOrder() const { + return order; +} + +const std::vector& DnnlBlockedMemoryDesc::getOffsetPaddingToData() const { + if (offsetPaddingToData.empty()) { + offsetPaddingToData = std::vector(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + getOrder().size()); + } + return offsetPaddingToData; +} + +size_t DnnlBlockedMemoryDesc::getOffsetPadding() const { + return MKLDNNExtensionUtils::convertToDim(desc.data.offset0); +} + +bool DnnlBlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { + if (auto desc = dynamic_cast(&rhs)) { + return isCompatible(*desc); + } else if (auto desc = dynamic_cast(&rhs)) { + return isCompatible(*desc); + } else { + return false; + } +} + +bool DnnlBlockedMemoryDesc::isCompatible(const CpuBlockedMemoryDesc& rhs) const { + return this->desc.data.extra.flags == dnnl_memory_extra_flag_none && BlockedMemoryDesc::isCompatible(rhs); +} + +bool DnnlBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc& rhs) const { + using namespace dnnl; + using namespace impl; + using namespace impl::utils; + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { + return false; + } + + if (this->desc == rhs.desc) { + return true; + } + memory_desc_wrapper wrappedThis(this->desc.data); + memory_desc_wrapper wrappedRhs(rhs.desc.data); + if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) + return false; + + int stride_start = wrappedThis.ndims() > 0 && wrappedThis.dims()[0] == 1 ? 1 : 0; // ignore batch axis stride if batch size == 1 + + const auto thisExtra = this->desc.data.extra; + const auto rhsExtra = rhs.desc.data.extra; + return this->getOrder() == rhs.getOrder() && (thisExtra.flags == rhsExtra.flags && thisExtra.compensation_mask == rhsExtra.compensation_mask && + thisExtra.scale_adjust == rhsExtra.scale_adjust) && wrappedThis.similar_to(wrappedRhs, true, true, 0, stride_start, true, true); +} + +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc) : + MemoryDesc(MKLDNNExtensionUtils::convertToVectorDims(mdesc.dims()), DnnlBlocked) { + desc = mdesc; + if (desc.data.format_kind == dnnl::impl::format_kind::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; + + mkldnn::impl::memory_desc_wrapper descWrapped(desc.data); + if (!descWrapped.is_blocking_desc()) + IE_THROW(Unexpected) << "Can't create DnnlBlockedMemoryDesc from not blocking desc"; + + if (descWrapped.has_runtime_dims_or_strides()) { + IE_THROW(Unexpected) << "Cannot calculate order from undefined dims or strides"; + } + + const auto dims = desc.dims(); + + const auto &blk_desc = descWrapped.blocking_desc(); + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + std::vector inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + } + + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + std::vector total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::iota(outer_order.begin(), outer_order.end(), 0); + std::sort(outer_order.begin(), outer_order.end(), + [&blk_desc, &outer_block_dims](size_t ind_l, size_t ind_r) { + return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || + (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + + + // blocked order + // [new_outer_order] U [inner_idxs] + SizeVector blk_order(total_ndims, 0); + std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); + std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); + order.swap(blk_order); +} + +bool DnnlBlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { + switch (layoutType) { + case LayoutType::ncsp: + return isPlainFormat(); + case LayoutType::nspc: + return isTailCFormat(); + case LayoutType::nCsp8c: + return isBlockedCFormat(8); + case LayoutType::nCsp16c: + return isBlockedCFormat(16); + default: + return false; + } +} + +bool DnnlBlockedMemoryDesc::isPlainFormat() const { + if (shape.getRank() != order.size()) { + return false; + } + for (size_t i = 0; i < order.size(); ++i) { + if (order[i] != i) { + return false; + } + } + return true; +} + +bool DnnlBlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { + const auto &blocking = desc.data.format_desc.blocking; + + if (desc.data.format_kind !=dnnl_blocked || + blocking.inner_nblks != 1 || + blocking.inner_idxs[0] != 1) + return false; + + if ((order.size() - shape.getRank()) != 1) { + return false; + } + for (size_t i = 0; i < order.size() - 1; ++i) { + if (order[i] != i) { + return false; + } + } + if (blk_size != UNREACHABLE_DIM && blk_size != blocking.inner_blks[0]) { + return false; + } + + return true; +} + +bool DnnlBlockedMemoryDesc::isTailCFormat() const { + if (shape.getRank() < 3) { + return false; + } + if (shape.getRank() != order.size()) { + return false; + } + if (!std::is_sorted(order.begin(), --order.end())) { + return false; + } + if (order.back() != 1) { + return false; + } + return true; +} + +std::unique_ptr DnnlBlockedMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const { + if (std::any_of(dims.begin(), dims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + IE_THROW() << "Can't clone desc if new dims are undefined"; + } + + // TODO [DS]: add stride recalculation for strided blobs + getStrides(); + getBlockDims(); + for (int i = strides.size() - 2; i >= 0 ; i--) { + if (strides[i] == Shape::UNDEFINED_DIM) + break; + + if (strides[i] != strides[i + 1] * blockedDims[i + 1]) + IE_THROW(NotImplemented) << "Can't clone desc with new dims for not dense tensor"; + } + + using namespace dnnl::impl::utils; + auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims); + mkldnn::memory::desc newMklDesc = desc; + array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size()); + std::vector perm(order.begin(), order.begin() + mklDims.size()); + auto& blockingDesc = newMklDesc.data.format_desc.blocking; + auto numInnerBlks = blockingDesc.inner_nblks; + std::vector innerBlks(std::begin(blockingDesc.inner_blks), std::begin(blockingDesc.inner_blks) + numInnerBlks); + std::vector innerIdxs(std::begin(blockingDesc.inner_idxs), std::begin(blockingDesc.inner_idxs) + numInnerBlks); + auto retCode = dnnl::impl::fill_blocked(newMklDesc.data, perm, innerBlks, innerIdxs); + if (retCode != dnnl::impl::status::success) { + IE_THROW() << "Can not clone DnnlBlockedMemoryDesc with dims: " << MemoryDescUtils::dims2str(dims); + } + return std::unique_ptr(new DnnlBlockedMemoryDesc(newMklDesc)); +} + +bool DnnlBlockedMemoryDesc::blocksExtended() const { + for (int i = 0; i < desc.data.ndims; i++) { + if (desc.data.dims[i] != desc.data.padded_dims[i]) + return true; + } + return false; +} + +static const std::map> form_tags_by_ndims { + {0, { + mkldnn::memory::format_tag::a // TODO :: really 1d layout for scalar?? + }}, {1, { + mkldnn::memory::format_tag::a + }}, {2, { + mkldnn::memory::format_tag::ab, + mkldnn::memory::format_tag::ba + }}, {3, { + mkldnn::memory::format_tag::abc, + mkldnn::memory::format_tag::acb, + mkldnn::memory::format_tag::bac, + mkldnn::memory::format_tag::bca, + mkldnn::memory::format_tag::cba, + + mkldnn::memory::format_tag::Abc16a, + mkldnn::memory::format_tag::ABc16a16b, + mkldnn::memory::format_tag::ABc4a4b, + mkldnn::memory::format_tag::aBc16b, + mkldnn::memory::format_tag::aBc32b, + mkldnn::memory::format_tag::ABc16b16a, + mkldnn::memory::format_tag::Abc4a, + mkldnn::memory::format_tag::aBc4b, + mkldnn::memory::format_tag::ABc4b16a4b, + mkldnn::memory::format_tag::ABc2b8a4b, + mkldnn::memory::format_tag::ABc16b16a4b, + mkldnn::memory::format_tag::ABc16b16a2b, + mkldnn::memory::format_tag::ABc4b4a, + mkldnn::memory::format_tag::ABc8a16b2a, + mkldnn::memory::format_tag::ABc8a8b, + mkldnn::memory::format_tag::ABc8a4b, + mkldnn::memory::format_tag::aBc8b, + mkldnn::memory::format_tag::ABc8b16a2b, + mkldnn::memory::format_tag::ABc8b8a, + mkldnn::memory::format_tag::Acb16a, + mkldnn::memory::format_tag::Acb4a, + mkldnn::memory::format_tag::Acb8a, + mkldnn::memory::format_tag::BAc16a16b, + mkldnn::memory::format_tag::BAc16b16a, + }}, {4, { // Popular + mkldnn::memory::format_tag::abcd, // plain + mkldnn::memory::format_tag::acdb, // tail_c + mkldnn::memory::format_tag::aBcd8b, // blocked 8c + mkldnn::memory::format_tag::aBcd16b, // blocked 16c + + mkldnn::memory::format_tag::abdc, + + mkldnn::memory::format_tag::bacd, + mkldnn::memory::format_tag::bcda, + mkldnn::memory::format_tag::cdba, + mkldnn::memory::format_tag::dcab, + + mkldnn::memory::format_tag::Abcd8a, + mkldnn::memory::format_tag::Abcd16a, + mkldnn::memory::format_tag::Abcd32a, + mkldnn::memory::format_tag::ABcd16a16b, + mkldnn::memory::format_tag::aBcd32b, + mkldnn::memory::format_tag::ABcd16b16a, + mkldnn::memory::format_tag::aBCd16b16c, + mkldnn::memory::format_tag::aBCd16c16b, + mkldnn::memory::format_tag::Abcd4a, + mkldnn::memory::format_tag::aBcd4b, + mkldnn::memory::format_tag::ABcd4b16a4b, + mkldnn::memory::format_tag::ABcd2b8a4b, + mkldnn::memory::format_tag::ABcd4b4a, + mkldnn::memory::format_tag::ABcd4a4b, + mkldnn::memory::format_tag::aBCd4c16b4c, + mkldnn::memory::format_tag::aBCd2c8b4c, + mkldnn::memory::format_tag::ABcd16b16a4b, + mkldnn::memory::format_tag::ABcd16b16a2b, + mkldnn::memory::format_tag::aBCd16c16b4c, + mkldnn::memory::format_tag::aBCd16c16b2c, + mkldnn::memory::format_tag::aBCd4c4b, + mkldnn::memory::format_tag::aBCd4b4c, + mkldnn::memory::format_tag::ABcd8a16b2a, + mkldnn::memory::format_tag::ABcd8a8b, + mkldnn::memory::format_tag::ABcd8a32b, + mkldnn::memory::format_tag::ABcd32a32b, + mkldnn::memory::format_tag::ABcd8a4b, + + mkldnn::memory::format_tag::ABcd8b16a2b, + mkldnn::memory::format_tag::aBCd8b16c2b, + mkldnn::memory::format_tag::ABcd8b8a, + mkldnn::memory::format_tag::aBCd8b8c, + mkldnn::memory::format_tag::aBCd8b4c, + mkldnn::memory::format_tag::aBCd8c16b2c, + mkldnn::memory::format_tag::aBCd8c8b, + + mkldnn::memory::format_tag::ABcd4a8b8a4b, + mkldnn::memory::format_tag::ABcd2a8b8a2b, + + mkldnn::memory::format_tag::aBdc16b, + mkldnn::memory::format_tag::aBdc4b, + mkldnn::memory::format_tag::aBdc8b, + mkldnn::memory::format_tag::aCBd16b16c, + mkldnn::memory::format_tag::aCBd16c16b, + mkldnn::memory::format_tag::Acdb16a, + mkldnn::memory::format_tag::Acdb4a, + mkldnn::memory::format_tag::Acdb8a, + mkldnn::memory::format_tag::BAcd16a16b, + mkldnn::memory::format_tag::BAcd16b16a, + mkldnn::memory::format_tag::ABcd32a32b, + mkldnn::memory::format_tag::Acdb32a, + mkldnn::memory::format_tag::aBCd2b4c2b, + mkldnn::memory::format_tag::aBCd2c4b2c, + mkldnn::memory::format_tag::aBCd4b8c2b, + mkldnn::memory::format_tag::aBCd4c8b2c, + }}, {5, { // Popular + mkldnn::memory::format_tag::abcde, // plain + mkldnn::memory::format_tag::acdeb, // tail_c + mkldnn::memory::format_tag::aBcde8b, // blocked 8c + mkldnn::memory::format_tag::aBcde16b, // blocked 16c + + mkldnn::memory::format_tag::abdec, + mkldnn::memory::format_tag::acbde, + mkldnn::memory::format_tag::bacde, + mkldnn::memory::format_tag::bcdea, + mkldnn::memory::format_tag::cdeba, + mkldnn::memory::format_tag::decab, + + mkldnn::memory::format_tag::Abcde16a, + mkldnn::memory::format_tag::Abcde32a, + mkldnn::memory::format_tag::ABcde16a16b, + mkldnn::memory::format_tag::aBcde32b, + mkldnn::memory::format_tag::ABcde16b16a, + mkldnn::memory::format_tag::aBCde16b16c, + mkldnn::memory::format_tag::aBCde16c16b, + mkldnn::memory::format_tag::aBCde2c8b4c, + mkldnn::memory::format_tag::Abcde4a, + mkldnn::memory::format_tag::aBcde4b, + mkldnn::memory::format_tag::ABcde4b4a, + mkldnn::memory::format_tag::ABcde4a4b, + mkldnn::memory::format_tag::aBCde4b4c, + mkldnn::memory::format_tag::aBCde4c16b4c, + mkldnn::memory::format_tag::aBCde16c16b4c, + mkldnn::memory::format_tag::aBCde16c16b2c, + mkldnn::memory::format_tag::aBCde4c4b, + mkldnn::memory::format_tag::Abcde8a, + mkldnn::memory::format_tag::ABcde8a8b, + mkldnn::memory::format_tag::ABcde8a4b, + mkldnn::memory::format_tag::ABcde8b16a2b, + mkldnn::memory::format_tag::ABcde4b16a4b, + mkldnn::memory::format_tag::ABcde2b8a4b, + mkldnn::memory::format_tag::aBCde8b16c2b, + mkldnn::memory::format_tag::ABcde8b8a, + mkldnn::memory::format_tag::aBCde8b8c, + mkldnn::memory::format_tag::aBCde8b4c, + mkldnn::memory::format_tag::aBCde4b8c8b4c, + mkldnn::memory::format_tag::aBCde2b8c8b2c, + mkldnn::memory::format_tag::aBCde8c16b2c, + mkldnn::memory::format_tag::aBCde8c8b, + mkldnn::memory::format_tag::aBdec16b, + mkldnn::memory::format_tag::aBdec4b, + mkldnn::memory::format_tag::aBdec8b, + mkldnn::memory::format_tag::aCBde16b16c, + mkldnn::memory::format_tag::aCBde16c16b, + mkldnn::memory::format_tag::Acdeb16a, + mkldnn::memory::format_tag::Acdeb4a, + mkldnn::memory::format_tag::Acdeb8a, + mkldnn::memory::format_tag::BAcde16b16a, + mkldnn::memory::format_tag::BAcde16a16b, + mkldnn::memory::format_tag::aBdec32b, + mkldnn::memory::format_tag::aBCde2b4c2b, + mkldnn::memory::format_tag::aBCde2c4b2c, + mkldnn::memory::format_tag::aBCde4b8c2b, + mkldnn::memory::format_tag::aBCde4c8b2c, + }}, {6, { // Popular + mkldnn::memory::format_tag::abcdef, // plain + mkldnn::memory::format_tag::acbdef, // permute + mkldnn::memory::format_tag::defcab, // permute + mkldnn::memory::format_tag::aBcdef16b, // blocked 16c + + mkldnn::memory::format_tag::aBCdef16b16c, + mkldnn::memory::format_tag::aBCdef16c16b, + mkldnn::memory::format_tag::aBcdef4b, + mkldnn::memory::format_tag::aBCdef2c8b4c, + mkldnn::memory::format_tag::aBCdef4c4b, + mkldnn::memory::format_tag::aBCdef4b4c, + mkldnn::memory::format_tag::aBCdef8b8c, + mkldnn::memory::format_tag::aBCdef8b4c, + mkldnn::memory::format_tag::aBCdef8c16b2c, + mkldnn::memory::format_tag::aBCdef4c16b4c, + mkldnn::memory::format_tag::aBCdef8c8b, + + mkldnn::memory::format_tag::aBdefc16b, + mkldnn::memory::format_tag::aCBdef16c16b, + mkldnn::memory::format_tag::aCBdef16b16c, + mkldnn::memory::format_tag::aBdefc4b, + mkldnn::memory::format_tag::aBdefc8b, + + mkldnn::memory::format_tag::Abcdef4a, + mkldnn::memory::format_tag::Abcdef8a, + mkldnn::memory::format_tag::Abcdef16a, + mkldnn::memory::format_tag::Abcdef32a, + mkldnn::memory::format_tag::aBCdef2b4c2b, + mkldnn::memory::format_tag::aBCdef2c4b2c, + mkldnn::memory::format_tag::aBCdef4b8c2b, + mkldnn::memory::format_tag::aBCdef4c8b2c, + }} +}; + +bool DnnlBlockedMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const { + mkldnn::memory::desc refDesc(desc.dims(), desc.data_type(), fmt); + + if (desc.data.ndims != refDesc.data.ndims) + return false; + + if (desc.data.format_kind != dnnl_blocked || refDesc.data.format_kind != dnnl_blocked) + IE_THROW() << "DnnlMemoryDesc::isSame is not implemented for non blocked memory format"; + + auto actualBlkDesc = desc.data.format_desc.blocking; + auto refBlkDesc = refDesc.data.format_desc.blocking; + if (actualBlkDesc.inner_nblks != refBlkDesc.inner_nblks) + return false; + + for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) + if (actualBlkDesc.inner_blks[i] != refBlkDesc.inner_blks[i]) + return false; + + for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) + if (actualBlkDesc.inner_idxs[i] != refBlkDesc.inner_idxs[i]) + return false; + + auto actualStrides = desc.data.format_desc.blocking.strides; + auto refStrides = refDesc.data.format_desc.blocking.strides; + + std::vector actualOrder(desc.data.ndims); + { + const auto dims = desc.dims(); + std::vector total_block_per_dim(dims.size(), 1); + const auto &blk_desc = desc.data.format_desc.blocking; + for (int i = 0; i < blk_desc.inner_nblks; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + std::vector outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + std::iota(actualOrder.begin(), actualOrder.end(), 0); + std::sort(actualOrder.begin(), actualOrder.end(), + [&actualStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { + return (actualStrides[ind_l] > actualStrides[ind_r]) || + (actualStrides[ind_l] == actualStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + } + + std::vector refOrder(refDesc.data.ndims); + { + const auto dims = refDesc.dims(); + std::vector total_block_per_dim(dims.size(), 1); + const auto &blk_desc = refDesc.data.format_desc.blocking; + for (int i = 0; i < blk_desc.inner_nblks; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + std::vector outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + std::iota(refOrder.begin(), refOrder.end(), 0); + std::sort(refOrder.begin(), refOrder.end(), + [&refStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { + return (refStrides[ind_l] > refStrides[ind_r]) || + (refStrides[ind_l] == refStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + } + + if (actualOrder != refOrder) { + return false; + } + + return true; +} + +mkldnn::memory::format_tag DnnlBlockedMemoryDesc::getFormat() const { + // TODO [OneDNN]: Previously it was a field of tdesc, but now the brute + // force search here. Please avoid of using this method. + const auto ndims = desc.dims().size(); + + // There are no suitable format_tag for this + if (ndims == 0 || ndims > 6) + return mkldnn::memory::format_tag::undef; + + for (const auto fmt : form_tags_by_ndims.at(ndims)) { + if (this->isSame(fmt)) + return fmt; + } + + return mkldnn::memory::format_tag::undef; +} + +std::string DnnlBlockedMemoryDesc::serializeFormat() const { + auto fmt = getFormat(); + return mkldnn::utils::fmt2str(fmt); +} + +size_t DnnlBlockedMemoryDesc::getMaxMemSize() const { + if (shape.isStatic()) { + return getCurrentMemSize(); + } + + auto& maxDims = shape.getMaxDims(); + if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + return UNDEFINED_SIZE; + } + + auto maxDimsDesc = cloneWithNewDims(maxDims); + return maxDimsDesc->getCurrentMemSize(); +} + +size_t DnnlBlockedMemoryDesc::getPaddedElementsCount() const { + return std::accumulate(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims, size_t{1}, + std::multiplies()); +} diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h new file mode 100644 index 00000000000000..860e770eefb644 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h @@ -0,0 +1,74 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "blocked_memory_desc.h" +#include "mkldnn_memory.h" +#include "mkldnn_extension_utils.h" + +namespace MKLDNNPlugin { + +class DnnlBlockedMemoryDesc : public BlockedMemoryDesc, public DnnlMemoryDesc { +public: + // Creates planar DnnlBlockedMemoryDesc + DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape); + + DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); + + MemoryDescPtr clone() const override { + return MKLDNNPlugin::make_unique(*this); + } + + bool isCompatible(const MemoryDesc& rhs) const override; + bool isCompatible(const DnnlBlockedMemoryDesc& rhs) const; + bool isCompatible(const CpuBlockedMemoryDesc& rhs) const; + + const std::vector& getBlockDims() const override; + + const std::vector& getOrder() const override; + + const std::vector& getOffsetPaddingToData() const override; + + size_t getOffsetPadding() const override; + + const std::vector& getStrides() const override; + + bool hasLayoutType(LayoutType layoutType) const override; + + bool blocksExtended() const override; + + bool isSame(mkldnn::memory::format_tag fmt) const override; + + std::string serializeFormat() const override; + + size_t getMaxMemSize() const override; + + size_t getPaddedElementsCount() const override; + +private: + DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, + const std::vector& strides = {}); + + DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc); + + std::unique_ptr cloneWithNewDimsImp(const std::vector& dims) const override; + + bool isPlainFormat() const; + bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const; + bool isTailCFormat() const; + + /** + * Try to define original format tag use on creation + * + * @return format tag if was able to define it + */ + mkldnn::memory::format_tag getFormat() const; + + friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); + friend class MemoryDescUtils; +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp new file mode 100644 index 00000000000000..a5cf7742680e45 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp @@ -0,0 +1,79 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "dnnl_memory_desc.h" +#include "mkldnn_extension_utils.h" +#include +#include "mkldnn/ie_mkldnn.h" + +namespace MKLDNNPlugin { + +DnnlMemoryDesc::DnnlMemoryDesc(const mkldnn::memory::desc& desc) : + MemoryDesc(Shape(MKLDNNExtensionUtils::convertToVectorDims(desc.dims())), Mkldnn), desc(desc) { + if (desc.data.format_kind == dnnl::impl::format_kind::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; +} + +size_t DnnlMemoryDesc::getCurrentMemSizeImp() const { + return MKLDNNExtensionUtils::getMemSizeForOneDnnDesc(desc); +} + +size_t DnnlMemoryDesc::getElementOffset(size_t elemNumber) const { + mkldnn::impl::memory_desc_wrapper wrapped(desc.data); + return wrapped.off_l(elemNumber); +} + +bool DnnlMemoryDesc::isCompatible(const MemoryDesc &rhs) const { + if (MemoryDescType::Mkldnn == rhs.getType()) { + return this->desc == rhs.as()->desc; + } else { + return false; + } +} + +// TODO: add serialization for packed format +std::string DnnlMemoryDesc::serializeFormat() const { + if (desc.data.format_kind == dnnl_format_kind_wino) { + switch (desc.data.format_desc.wino_desc.wino_format) { + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOIoi: return "wino_aaOIoi"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOio: return "wino_aaOio"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOBiOo: return "wino_aaOBiOo"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_OBaaIBOIio: return "wino_OBaaIBOIio"; + default: return "wino_undef"; + } + } + return "undef"; +} + +bool DnnlMemoryDesc::isDefinedImp() const { + mkldnn::impl::memory_desc_wrapper wrappedThis(desc.data); + + if (wrappedThis.has_runtime_dims_or_strides()) { + return false; + } + + return wrappedThis.offset0() != DNNL_RUNTIME_DIM_VAL; +} + +InferenceEngine::Precision DnnlMemoryDesc::getPrecision() const { + return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type()); +} + +void DnnlMemoryDesc::setPrecision(InferenceEngine::Precision prc) { + desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); +} + +std::unique_ptr DnnlMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const { + IE_THROW(Unexpected) << "Cannot clone non blocked oneDNN desc with new dims"; +} + +size_t DnnlMemoryDesc::getMaxMemSize() const { + if (shape.isDynamic()) { + IE_THROW() << "Can't compute max mem size for DnnlMemoryDesc with dynaimc shape"; + } + + return getCurrentMemSize(); +} + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h new file mode 100644 index 00000000000000..cb8b1d6ae321c7 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h @@ -0,0 +1,69 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_blocked_memory_desc.h" +#include "mkldnn_extension_utils.h" + +namespace MKLDNNPlugin { + +class DnnlMemoryDesc; + +using DnnlMemoryDescPtr = std::unique_ptr; +using DnnlMemoryDescCPtr = std::unique_ptr; + +class DnnlMemoryDesc : public virtual MemoryDesc { +public: + mkldnn::memory::data_type getDataType() const { + return static_cast(desc.data.data_type); + } + + dnnl_format_kind_t getFormatKind() const { + return desc.data.format_kind; + } + + std::unique_ptr clone() const override { + return MKLDNNPlugin::make_unique(*this); + } + + std::string serializeFormat() const override; + + InferenceEngine::Precision getPrecision() const override; + + void setPrecision(InferenceEngine::Precision prc) override; + + bool isCompatible(const MemoryDesc& rhs) const override; + + size_t getMaxMemSize() const override; + + const mkldnn::memory::desc& getDnnlDesc() const { + return desc; + } + + bool hasLayoutType(LayoutType layoutType) const override { return false; } + + virtual bool isSame(mkldnn::memory::format_tag fmt) const { return false; } + + bool hasEmptyExtraData() const { return desc.data.extra.flags == dnnl_memory_extra_flag_none; } + +protected: + DnnlMemoryDesc() {} + static constexpr size_t UNREACHABLE_DIM = std::numeric_limits::max(); + + mkldnn::memory::desc desc; + +private: + explicit DnnlMemoryDesc(const mkldnn::memory::desc& desc); + + size_t getElementOffset(size_t elemNumber) const override; + + size_t getCurrentMemSizeImp() const override; + bool isDefinedImp() const override; + std::unique_ptr cloneWithNewDimsImp(const std::vector& dims) const override; + + friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h index d2c2f6c6233cd9..ed8a59b34cc337 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h @@ -6,7 +6,7 @@ #include #include "cpu_shape.h" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" #include "mkldnn_weights_cache.hpp" #include @@ -68,7 +68,7 @@ class MKLDNNEdge { MKLDNNEdgePtr getSharedEdge() const; MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const; - bool canProvideMaxSize() { + bool hasDefinedMaxSize() { return getDesc().getMaxMemSize() != MemoryDesc::UNDEFINED_SIZE; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp index 008b4edff9d729..5df5c605dcc480 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp @@ -5,6 +5,7 @@ #include "mkldnn_extension_utils.h" #include "utils/general_utils.h" #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -75,22 +76,63 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d } } -InferenceEngine::SizeVector MKLDNNExtensionUtils::convertToSizeVector(const memory::dims& dims) { +Dim MKLDNNExtensionUtils::convertToDim(const dnnl::memory::dim &dim) { + return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast(dim); +} +dnnl::memory::dim MKLDNNExtensionUtils::convertToDnnlDim(const Dim &dim) { + return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast(dim); +} + +VectorDims MKLDNNExtensionUtils::convertToVectorDims(const memory::dims& dims) { std::vector vecResult; vecResult.reserve(dims.size()); std::back_insert_iterator> itr(vecResult); - std::transform(dims.begin(), dims.end(), itr, [](memory::dim x) { - return x == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast(x); - }); + std::transform(dims.begin(), dims.end(), itr, convertToDim); return vecResult; } -memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const InferenceEngine::SizeVector& dims) { +memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const VectorDims& dims) { memory::dims vecResult; vecResult.reserve(dims.size()); std::back_insert_iterator itr(vecResult); - std::transform(dims.begin(), dims.end(), itr, [](size_t x) { - return x == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast(x); - }); + std::transform(dims.begin(), dims.end(), itr, convertToDnnlDim); return vecResult; } + +memory::format_tag MKLDNNExtensionUtils::GetPlainFormatByRank(size_t rank) { + switch (rank) { + case 0: + case 1: + return memory::format_tag::a; + case 2: + return memory::format_tag::ab; + case 3: + return memory::format_tag::abc; + case 4: + return memory::format_tag::abcd; + case 5: + return memory::format_tag::abcde; + case 6: + return memory::format_tag::abcdef; + default: + return memory::format_tag::undef; + } +} + +DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc) { + if (desc.data.format_kind == dnnl_blocked) { + return std::unique_ptr(new DnnlBlockedMemoryDesc(desc)); + } else { + return std::unique_ptr(new DnnlMemoryDesc(desc)); + } +} + +size_t MKLDNNExtensionUtils::getMemSizeForOneDnnDesc(mkldnn::memory::desc desc) { + const auto offset0 = desc.data.offset0; + desc.data.offset0 = 0; + size_t size = desc.get_size(); + if (size == DNNL_RUNTIME_SIZE_VAL) + return MemoryDesc::UNDEFINED_SIZE; + size += offset0 * sizeOfDataType(desc.data_type()); + return size; +} diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h index 8e7f9a1b3742e7..d610d2fcc593b0 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h @@ -11,17 +11,30 @@ #include #include "mkldnn.hpp" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" namespace MKLDNNPlugin { +class DnnlMemoryDesc; + class MKLDNNExtensionUtils { public: static uint8_t sizeOfDataType(mkldnn::memory::data_type dataType); static mkldnn::memory::data_type IEPrecisionToDataType(const InferenceEngine::Precision& prec); static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType); - static InferenceEngine::SizeVector convertToSizeVector(const mkldnn::memory::dims& dims); - static std::vector convertToDnnlDims(const InferenceEngine::SizeVector& dims); + static Dim convertToDim(const dnnl::memory::dim &dim); + static dnnl::memory::dim convertToDnnlDim(const Dim &dim); + static VectorDims convertToVectorDims(const mkldnn::memory::dims& dims); + static std::vector convertToDnnlDims(const VectorDims& dims); + static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank); + + /** + * @brief Creates DnnlBlockedMemoryDesc if desc is blocked, otherwise DnnlMemoryDesc + * @param desc mkldnn::memory::desc from which one of the descriptors will be created + * @return pointer to DnnlBlockedMemoryDesc or DnnlMemoryDesc + */ + static std::unique_ptr makeDescriptor(const mkldnn::memory::desc &desc); + static size_t getMemSizeForOneDnnDesc(mkldnn::memory::desc desc); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index b8f4c3259ed459..d7a8fdc9029d24 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -39,7 +39,7 @@ #include "utils/node_dumper.h" #include "utils/ngraph_utils.hpp" #include "utils/cpu_utils.hpp" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include #include @@ -47,6 +47,7 @@ #include #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -439,8 +440,8 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { } static bool isReorderAvailable(const MemoryDesc& parentDesc, const MemoryDesc& childDesc, const mkldnn::engine& eng) { - memory::desc dstMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(childDesc); - memory::desc srcMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(parentDesc);; + memory::desc dstMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(childDesc)->getDnnlDesc(); + memory::desc srcMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(parentDesc)->getDnnlDesc(); mkldnn::primitive_attr attr; dnnl_primitive_desc_t result = nullptr; @@ -522,7 +523,7 @@ static edge_clusters_t findEdgeClusters(const std::vector & graph edge_cluster_idx_map_t edge_cluster_indices; for (auto &edge : graphEdges) { - if (!edge->canProvideMaxSize()) + if (!edge->hasDefinedMaxSize()) continue; auto edge_it = edge_cluster_indices.find(edge); @@ -642,7 +643,7 @@ void MKLDNNGraph::AllocateWithReuse() { size_t total_size = static_cast(memSolver.solve()) * alignment; memWorkspace = std::make_shared(eng); - memWorkspace->Create(MKLDNNMemoryDesc({total_size}, mkldnn::memory::data_type::s8)); + memWorkspace->Create(DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); if (edge_clusters.empty()) return; @@ -682,10 +683,10 @@ void MKLDNNGraph::Allocate() { // Allocate memory space for all edges marked with NeedAllocation AllocateWithReuse(); - // Resolve all other edges with status NotAllocated or in-place - for (auto& node : graphNodes) node->resolveNotAllocatedEdges(); + // Resolve all other edges with status NotAllocated and in-place + for (auto& node : graphNodes) node->resolveInPlaceEdges(); - // Create dummy memory with undefined desc + // Create dummy memory with undefined desc for edges that are not allocated on the previous stages (memory solver and inPlace resolving) for (auto& edge : graphEdges) edge->allocate(); // Check all getters. Should work. @@ -709,7 +710,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData(); if (ext_data_ptr != inter_data_ptr) { - auto ext_tdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(in->getTensorDesc()); + auto ext_tdesc = *MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); auto ext_mem = MKLDNNMemory(eng); ext_mem.Create(ext_tdesc, ext_data_ptr, false); @@ -743,7 +744,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { // TODO [DS]: phase 2: remove this blob allocation when possible, i.e. when dynamic ie blob representation becomes available if (out.find(name) == out.end()) { - out[name] = MemoryDescUtils::createBlob(intr_blob.GetDesc()); + out[name] = MemoryDescUtils::createBlob(intr_blob.getDesc()); } // TODO [DS]: is it sill true for the new paradigm? @@ -751,11 +752,15 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { // IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\""; // } - if (out.at(name)->size() != intr_blob.GetElementsCount()) { + if (out[name]->getTensorDesc().getDims() != intr_blob.getStaticDims()) { // TODO [DS]: phase 2: rewrite when dynamic ie blob representation becomes available // IE_THROW() << "Output blob number of elements is not equal network output number of elements (" -// << ext_blob->size() << "!=" << intr_blob.GetElementsCount() << ")."; - out[name] = MemoryDescUtils::createBlob(intr_blob.GetDesc()); +// << ext_blob->size() << "!=" << intr_blob.GetShape().getElementsCount() << ")."; + if (out[name]->byteSize() >= intr_blob.GetSize()) { + out[name]->getTensorDesc().reshape(intr_blob.getStaticDims()); + } else { + out[name] = MemoryDescUtils::createBlob(intr_blob.getDesc()); + } } auto ext_blob = out.at(name); @@ -773,7 +778,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { // That is the same memory. No need to copy if (ext_blob_ptr == intr_blob_ptr) continue; - int MB = intr_blob.GetDims()[0]; + int MB = intr_blob.getStaticDims()[0]; int MB_to_process = MB; // TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT??? // TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm @@ -784,9 +789,9 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { MB_to_process = node->batchToProcess(); } - size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB; + size_t size_to_copy = intr_blob.GetDescWithType()->getPaddedElementsCount() * MB_to_process / MB; - const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getMemory().GetDesc()); + const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getMemory().getDesc()); const auto expectedDesc = ext_blob->getTensorDesc(); // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it @@ -801,7 +806,7 @@ void MKLDNNGraph::PullOutputData(BlobMap &out) { } if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) { - auto outBlobDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(expectedDesc); + auto outBlobDesc = *MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); auto outBloMem = MKLDNNMemory(eng); outBloMem.Create(outBlobDesc, ext_blob_ptr, false); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index 5a51b07014278c..5c9f9a244ac932 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -86,14 +86,14 @@ class MKLDNNGraph { return outputNodesMap; } - MKLDNNNodePtr GetInputNodeByName(const std::string &name) { + MKLDNNNodePtr getInputNodeByName(const std::string &name) { auto input = inputNodesMap.find(name); if (input == inputNodesMap.end()) IE_THROW() << "CPU execution graph doesn't contain input node with name: " << name; return input->second; } - MKLDNNNodePtr GetOutputNodeByName(const std::string &name) { + MKLDNNNodePtr getOutputNodeByName(const std::string &name) { auto output = outputNodesMap.find(name); if (output == outputNodesMap.end()) IE_THROW() << "CPU execution graph doesn't contain output node with name: " << name; @@ -239,6 +239,7 @@ class MKLDNNGraph { friend std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); private: + // TODO: change std::map to std::unordered_map std::map inputNodesMap; std::map outputNodesMap; // these node pointers (from graphNodes) are to avoid regular checking for diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp index 523d5dce81b424..4c790cebf83696 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp @@ -45,11 +45,11 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no std::string outputPrecisionsStr; if (!node->getChildEdges().empty()) { - outputPrecisionsStr = node->getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); + outputPrecisionsStr = node->getChildEdgeAt(0)->getMemory().getDesc().getPrecision().name(); bool isAllEqual = true; for (size_t i = 1; i < node->getChildEdges().size(); i++) { - if (node->getChildEdgeAt(i - 1)->getMemory().GetDesc().getPrecision() != node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision()) { + if (node->getChildEdgeAt(i - 1)->getMemory().getDesc().getPrecision() != node->getChildEdgeAt(i)->getMemory().getDesc().getPrecision()) { isAllEqual = false; break; } @@ -58,12 +58,12 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no // If all output precisions are the same, we store the name only once if (!isAllEqual) { for (size_t i = 1; i < node->getChildEdges().size(); i++) - outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision().name()); + outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getMemory().getDesc().getPrecision().name()); } } else { // Branch to correctly handle output nodes if (!node->getParentEdges().empty()) { - outputPrecisionsStr = node->getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); + outputPrecisionsStr = node->getParentEdgeAt(0)->getMemory().getDesc().getPrecision().name(); } } serialization_info[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outputPrecisionsStr; @@ -160,7 +160,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph auto meta_data = extract_node_metadata(node); std::shared_ptr return_node; if (is_input) { - auto& desc = node->getChildEdgeAt(0)->getMemory().GetDesc(); + auto& desc = node->getChildEdgeAt(0)->getMemory().getDesc(); auto param = std::make_shared(details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); return_node = param; params.push_back(param); @@ -172,7 +172,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()); for (size_t port = 0; port < return_node->get_output_size(); ++port) { - auto& desc = node->getChildEdgeAt(port)->getMemory().GetDesc(); + auto& desc = node->getChildEdgeAt(port)->getMemory().getDesc(); return_node->set_output_type(port, details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); } } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index c450fc63ece68e..e45d76edde8c9c 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -44,7 +44,7 @@ #include #include "mkldnn_itt.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -57,100 +57,105 @@ void MKLDNNGraphOptimizer::ApplyCommonGraphOptimizations(MKLDNNGraph &graph) { FuseConvolutionAndBias(graph); graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd"); - FuseMultiplyAndAdd(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMultiplyAndAdd"); + // FuseMultiplyAndAdd(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseDeconvolutionAndSimpleOperation"); - FuseDeconvolutionAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseDeconvolutionAndSimpleOperation"); + // FuseDeconvolutionAndSimpleOperation(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBroadcastAndEltwise"); - FuseBroadcastAndEltwise(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseBroadcastAndEltwise"); + // FuseBroadcastAndEltwise(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndFakeQuantize"); - FuseClampAndFakeQuantize(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseClampAndFakeQuantize"); + // FuseClampAndFakeQuantize(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePerformedAsScaleShiftAndFakeQuantize"); - FusePerformedAsScaleShiftAndFakeQuantize(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePerformedAsScaleShiftAndFakeQuantize"); + // FusePerformedAsScaleShiftAndFakeQuantize(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndZeroPoints"); - FuseConvolutionAndZeroPoints(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndZeroPoints"); + // FuseConvolutionAndZeroPoints(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperationThroughMaxPool"); - FuseConvolutionAndSimpleOperationThroughMaxPool(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperationThroughMaxPool"); + // FuseConvolutionAndSimpleOperationThroughMaxPool(graph); + // graph.RemoveDroppedNodes(); OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation"); FuseConvolutionAndSimpleOperation(graph); graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); - graph.SortTopologically(); - graph.RemoveDroppedEdges(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); + // graph.SortTopologically(); + // graph.RemoveDroppedEdges(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndFakeQuantize"); - FusePoolingAndFakeQuantize(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FusePoolingAndFakeQuantize"); + // FusePoolingAndFakeQuantize(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); - graph.SortTopologically(); - graph.RemoveDroppedEdges(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); + // graph.SortTopologically(); + // graph.RemoveDroppedEdges(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDWConvolution"); - FuseConvolutionAndDWConvolution(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndDWConvolution"); + // FuseConvolutionAndDWConvolution(graph); + // graph.RemoveDroppedNodes(); OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionSumAndConvolutionSumActivation"); FuseConvolutionSumAndConvolutionSumActivation(graph); graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation"); - FuseConvolutionAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndSimpleOperation"); + // FuseConvolutionAndSimpleOperation(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFullyConnectedAndSimpleOperation"); - FuseFullyConnectedAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFullyConnectedAndSimpleOperation"); + // FuseFullyConnectedAndSimpleOperation(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMVNAndSimpleOperation"); - FuseMVNAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseMVNAndSimpleOperation"); + // FuseMVNAndSimpleOperation(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseInterpolateAndSimpleOperation"); - FuseInterpolateAndSimpleOperation(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseInterpolateAndSimpleOperation"); + // FuseInterpolateAndSimpleOperation(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeL2AndSimpleOperation"); - FuseNormalizeL2AndSimpleOperation(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseNormalizeL2AndSimpleOperation"); + // FuseNormalizeL2AndSimpleOperation(graph); + // graph.RemoveDroppedNodes(); - OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple"); - FuseEltwiseAndSimple(graph); - graph.RemoveDroppedNodes(); + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseEltwiseAndSimple"); + // FuseEltwiseAndSimple(graph); + // graph.RemoveDroppedNodes(); +<<<<<<< HEAD OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "reshapeRnnSeq"); reshapeRnnSeq(graph); graph.RemoveDroppedNodes(); OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); graph.RemoveDroppedEdges(); +======= + // OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "RemoveDroppedEdges"); + // graph.RemoveDroppedEdges(); +>>>>>>> New descriptor hierarchy (#20) } void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &graph) { - OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations"); + // OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations"); - DropDoubleReorders(graph); - graph.RemoveDroppedNodes(); + // DropDoubleReorders(graph); + // graph.RemoveDroppedNodes(); - MergeTransposeAndReorder(graph); - graph.RemoveDroppedNodes(); + // MergeTransposeAndReorder(graph); + // graph.RemoveDroppedNodes(); - graph.RemoveDroppedEdges(); + // graph.RemoveDroppedEdges(); } void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { @@ -266,6 +271,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { } } +<<<<<<< HEAD void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); @@ -594,6 +600,335 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { initializeOutputCompensation(conv); } } +======= +// void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSuitableParentNode = [](MKLDNNNodePtr node) { +// return node->getType() == Deconvolution && node->getChildEdges().size() == 1; +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSuitableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!parentNode->canFuse(childNode)) { +// parent++; +// continue; +// } + +// childNode->fuseInto(parentNode); + +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == Deconvolution) +// continue; + +// graph.RemoveEdge(p_edge); +// } + +// graph.DropNode(childNode); +// } +// } + +// void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableSecondInput = [](MKLDNNNodePtr node, SizeVector dataDims) { +// if (node->getType() != Input || !node->isConstant()) +// return false; +// auto secondInputDims = node->outputShapes[0].getDims(); +// if (secondInputDims.size() != dataDims.size() || secondInputDims.size() < 2) +// return false; + +// if (secondInputDims[0] != 1 || !dimsEqualStrong(secondInputDims[1], dataDims[1])) +// return false; + +// for (size_t i = 2; i < secondInputDims.size(); i++) { +// if (secondInputDims[i] != 1) +// return false; +// } + +// return true; +// }; + +// auto isSutableParentNode = [&](MKLDNNNodePtr node) { +// if (node->getAlgorithm() != EltwiseMultiply || !node->getFusedWith().empty() || +// node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) +// return false; + +// return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getShape().getDims()); +// }; + +// auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { +// if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) +// return false; + +// return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getShape().getDims()); +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(parentNode, childNode)) { +// parent++; +// continue; +// } + +// auto childs = childNode->childEdges; +// auto parents = childNode->parentEdges; + +// for (size_t i = 0; i < parents.size(); i++) { +// auto p_edge = parents[i].lock(); +// if (!p_edge) continue; +// auto parent = p_edge->getParent(); +// if (!parent) continue; + +// if (parent == parentNode) { +// for (size_t j = 0; j < childs.size(); j++) { +// if (!childs[j].lock()) +// continue; +// auto child = childs[j].lock()->getChild(); +// if (!child) +// continue; + +// MKLDNNEdgePtr &remEdge = p_edge; +// int inNum = 0; +// if (remEdge) { +// inNum = remEdge->getInputNum(); +// remEdge->drop(); +// graph.RemoveEdge(remEdge); +// } +// remEdge = childs[j].lock(); +// int outNum = 0; +// if (remEdge) { +// outNum = remEdge->getOutputNum(); +// remEdge->drop(); +// graph.RemoveEdge(remEdge); +// } +// MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); +// auto &graphEdges = graph.GetEdges(); +// graphEdges.push_back(newEdge); +// parent->addEdge(newEdge); +// } +// } else { +// MKLDNNEdgePtr &remEdge = p_edge; +// int inNum = 0; +// if (remEdge) { +// inNum = remEdge->getInputNum(); +// remEdge->drop(); +// graph.RemoveEdge(remEdge); +// } + +// auto parentEltwise = parentNode; +// MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentEltwise, inNum, parentEltwise->getParentEdges().size())); +// auto &graphEdges = graph.GetEdges(); +// graphEdges.push_back(newEdge); +// parent->addEdge(newEdge); + +// parentEltwise->inputShapes.push_back(parent->outputShapes[0]); +// } +// } + +// parentNode->addOriginalInputPrecision(childNode->getOriginalInputPrecisionAtPort(1)); +// parentNode->setAlgorithm(EltwiseMulAdd); +// parentNode->setTypeStr("MulAdd"); +// parentNode->addOriginalLayer(childNode->getOriginalLayers()); +// graph.DropNode(childNode); +// } +// } + +// void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableConvNode = [](MKLDNNNodePtr node) { +// bool retVal = false; +// if (node->getType() == Convolution) { +// if (auto convNode = std::dynamic_pointer_cast(node)) { +// auto rank = convNode->getParentEdgeAt(0)->getShape().getRank(); +// // int8 depthwise convolution does not support fusing zero points in 3D case +// if (implication(convNode->isDepthWise(), rank == 4)) { +// retVal = true; +// } +// } +// } +// return retVal; +// }; + +// auto initializeInputZeroPoints = [](MKLDNNNodePtr node, MKLDNNNodePtr parent0, MKLDNNNodePtr parent1) { +// auto* convNode = dynamic_cast(node.get()); +// if (convNode == nullptr) +// IE_THROW() << "Cannot get convolution node " << node->getName(); + +// int IC = node->getParentEdgesAtPort(0)[0]->getShape().getDims()[1]; +// int OC = node->getChildEdgesAtPort(0)[0]->getShape().getDims()[1]; + +// if (Shape::UNDEFINED_DIM == IC || Shape::UNDEFINED_DIM == OC) { +// return false; +// } + +// if (parent0->getType() == Eltwise) { +// if (!parent0->getFusedWith().empty() || !parent1->getFusedWith().empty()) +// return false; + +// // The plug-in doesn't support FP32 convolution with input/weights zero points. +// // In case weights are in FP32 (or we have zero points on weights which are not supported by INT8 convolution) we cannot use +// // INT8 implementation so we have to disable input zero points fusing as well. +// if (parent1->getType() != Input || !parent1->isConstant() || parent1->getOriginalOutputPrecisionAtPort(0) != Precision::I8) { +// return false; +// } + +// if (parent0->getAlgorithm() != Algorithm::EltwiseSubtract) +// return false; + +// if (parent0->getParentEdges().size() != 2) +// return false; + +// auto arg0 = parent0->getParentEdgesAtPort(1)[0]->getParent(); +// if (arg0->getType() == Input && arg0->isConstant()) { +// if (arg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8) +// return false; + +// if (parent0->getParentEdgesAtPort(1)[0]->getShape().getRank() < 2) { +// return false; +// } + +// auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims(); +// if (zpDims[0] != 1 || !dimsEqualStrong(zpDims[1], IC)) +// return false; + +// for (int i = 2; i < zpDims.size(); i++) { +// if (zpDims[i] != 1) +// return false; +// } + +// auto arg1 = parent0->getParentEdgesAtPort(0)[0]->getParent(); +// if (arg1->getOriginalOutputPrecisionAtPort(0) != Precision::U8) +// return false; + +// auto zeroPointsConstant = dynamic_cast(arg0.get()); +// if (zeroPointsConstant == nullptr) +// IE_THROW() << "Cannot cast to Input node"; + +// auto zeroPointsBlob = zeroPointsConstant->getMemoryPtr(); +// if (zeroPointsBlob == nullptr) +// IE_THROW() << "Cannot cast to TBlob internal zero points blob"; + +// auto zeroPointsData = static_cast(zeroPointsBlob->GetPtr()); +// if (zeroPointsData == nullptr) +// IE_THROW() << "zeroPointsBlob has not allocated buffer"; + +// auto zeroPointDataSize = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims()[1]; +// if (Shape::UNDEFINED_DIM == zeroPointDataSize) { +// return false; +// } + +// for (int j = 0; j < zeroPointDataSize; j++) { +// convNode->inputZeroPoints.push_back(zeroPointsData[j]); +// } +// } else { +// return false; +// } +// } else { +// return false; +// } + +// if (convNode->outputCompensation.empty()) { +// convNode->outputCompensation.resize(OC); +// } + +// return true; +// }; + +// auto initializeOutputCompensation = [](MKLDNNNodePtr node) { +// auto* convNode = dynamic_cast(node.get()); +// if (convNode == nullptr) +// IE_THROW() << "Cannot get convolution node " << node->getName(); + +// if (convNode->inputZeroPoints.empty()) +// return; + +// auto weightsConstant = dynamic_cast(convNode->getParentEdgesAtPort(1)[0]->getParent().get()); +// if (!weightsConstant || !weightsConstant->isConstant()) +// return; + +// auto weightsBlob = weightsConstant->getMemoryPtr(); +// if (weightsBlob == nullptr) +// IE_THROW() << "Cannot cast to TBlob internal weights blob"; + +// auto weightsPtr = static_cast(weightsBlob->GetPtr()); +// if (weightsPtr == nullptr) +// IE_THROW() << "weightsBlob has not allocated buffer"; + +// ptrdiff_t G = convNode->getGroupNum(); +// const int groupOffset = convNode->getAlgorithm() == ConvolutionGrouped ? 1 : 0; +// auto& weightsConstantDims = weightsConstant->outputShapes[0].getStaticDims(); + +// ptrdiff_t OC = weightsConstantDims[0 + groupOffset]; +// ptrdiff_t IC = weightsConstantDims[1 + groupOffset]; +// ptrdiff_t KD = weightsConstantDims.size() == (5 + groupOffset) ? weightsConstantDims[weightsConstantDims.size() - 3] : 1; +// ptrdiff_t KH = weightsConstantDims[weightsConstantDims.size() - 2]; +// ptrdiff_t KW = weightsConstantDims[weightsConstantDims.size() - 1]; + +// for (size_t g = 0; g < G; g++) { +// for (size_t oc = 0; oc < OC; oc++) { +// int32_t a = 0; +// for (size_t ic = 0; ic < IC; ic++) { +// for (size_t kd = 0; kd < KD; kd++) { +// for (size_t kh = 0; kh < KH; kh++) { +// for (size_t kw = 0; kw < KW; kw++) { +// size_t widx = g * OC * IC * KD * KH * KW + +// oc * IC * KD * KH * KW + +// ic * KD * KH * KW + +// kd * KH * KW + +// kh * KW + +// kw; + +// auto w = static_cast(weightsPtr[widx]); + +// auto izp = !convNode->inputZeroPoints.empty() ? static_cast(convNode->inputZeroPoints[g * IC + ic]) : 0; +// a += w * izp; + +// auto wzp = !convNode->weightsZeroPoints.empty() ? static_cast(convNode->weightsZeroPoints[g * OC + oc]) : 0; +// a -= wzp * izp; +// } +// } +// } +// } +// convNode->outputCompensation[g * OC + oc] = -a; +// } +// } +// }; + +// for (int i = 0; i < graphNodes.size(); i++) { +// auto conv = graphNodes[i]; +// if (!isSutableConvNode(conv)) continue; + +// auto dataEltwise = conv->getParentEdgesAtPort(0)[0]->getParent(); +// auto weightsEltwise = conv->getParentEdgesAtPort(1)[0]->getParent(); +// if (initializeInputZeroPoints(conv, dataEltwise, weightsEltwise)) { +// auto p_edge = dataEltwise->getParentEdgesAtPort(1)[0]; +// graph.RemoveEdge(p_edge); + +// graph.DropNode(dataEltwise); +// } + +// initializeOutputCompensation(conv); +// } +// } +>>>>>>> New descriptor hierarchy (#20) static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { return childNode->getType() == FakeQuantize && @@ -602,231 +937,231 @@ static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr child childNode->getOriginalOutputPrecisionAtPort(0)); } -void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getShape().getRank() != 3; - }; - - auto parent = graphNodes.begin(); - while (parent != graphNodes.end()) { - auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { - parent++; - continue; - } - - auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!parentNode->canFuse(childNode)) { - parent++; - continue; - } - - // BF16 Quantize Layer Fusing Disabling - if (BF16QuantizeNodeFusing(parentNode, childNode)) { - parent++; - continue; - } - - childNode->fuseInto(parentNode); - - if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { - auto parentEdges = childNode->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent()->getType() == FullyConnected) - continue; - - graph.RemoveEdge(p_edge); - } - } - - graph.DropNode(childNode); - } -} - -void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isConvolutionNode = [](const MKLDNNNodePtr &node) { - return node->getType() == Convolution; - }; - - auto is1x1Convolution = [](const std::shared_ptr &conv) { - const auto weightRank = conv->getWeightDims().size(); - return conv->getWeightDims()[weightRank - 1] == 1 && conv->getWeightDims()[weightRank - 2] == 1; - }; - - auto isSutableParentConvolution = [&](MKLDNNNodePtr node) { - if (node->isDropped()) - return false; - - const auto conv = std::dynamic_pointer_cast(node); - if (conv == nullptr) - IE_THROW() << "Cannot cast to convolution node " << node->getName(); - - if (!conv->weightsZeroPoints.empty()) - return false; - - const auto &strides = conv->getStride(); - const auto &paddings = conv->getPaddingL(); - const auto &inDims = node->getParentEdgeAt(0)->getShape().getDims(); - const auto &outDims = node->getChildEdgeAt(0)->getShape().getDims(); - bool isSupportedParams = conv->getGroupNum() == 1 && - inDims.size() == 4 && - dimsEqualStrong(inDims[inDims.size() - 1], outDims[outDims.size() - 1]) && - dimsEqualStrong(inDims[inDims.size() - 2], outDims[outDims.size() - 2]) && - is1x1Convolution(conv) && // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions - everyone_is(1, strides[strides.size() - 1], strides[strides.size() - 2]) && - everyone_is(0, paddings[paddings.size() - 1], paddings[paddings.size() - 2]) && - !conv->canBeExecutedInInt8(); - if (!isSupportedParams) return false; - - return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild()); - }; - - auto isSutableChildConvolution = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { - if (parentNode->isDropped() || childNode->isDropped()) - return false; - - const auto convChild = std::dynamic_pointer_cast(childNode); - if (convChild == nullptr) - IE_THROW() << "Cannot cast to convolution node " << childNode->getName(); - - const auto convParent = std::dynamic_pointer_cast(parentNode); - if (convParent == nullptr) - IE_THROW() << "Cannot cast to convolution node " << parentNode->getName(); - - if (!everyone_is(Precision::FP32, convParent->getOriginalOutputPrecisionAtPort(0), convChild->getOriginalInputPrecisionAtPort(0), - convChild->getOriginalOutputPrecisionAtPort(0))) - return false; - - auto parentOutputPrecision = !parentNode->fusedWith.empty() - ? parentNode->fusedWith[parentNode->fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0) - : parentNode->getOriginalOutputPrecisionAtPort(0); - - auto childOutputPrecision = !childNode->fusedWith.empty() - ? childNode->fusedWith[childNode->fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0) - : childNode->getOriginalOutputPrecisionAtPort(0); - - if (!everyone_is(Precision::FP32, parentOutputPrecision, childOutputPrecision)) - return false; - - if (!convChild->inputZeroPoints.empty() || !convChild->weightsZeroPoints.empty()) - return false; - - bool withBias = convChild->getOriginalInputPrecisions().size() == 3; - - const auto weightRank = convChild->getWeightDims().size(); - const auto stridesSize = convChild->getStride().size(); - bool isSupportedParams = dimsEqualStrong(convChild->outputShapes[0].getDims()[1], convChild->getGroupNum()) && - convChild->outputShapes[0].getDims()[1] != 1 && - everyone_is(3, convChild->getWeightDims()[weightRank - 1], convChild->getWeightDims()[weightRank - 2]) && - everyone_is(1, convChild->getPaddingL()[stridesSize - 1], convChild->getPaddingL()[stridesSize - 2]) && - everyone_is(1, convChild->getPaddingR()[stridesSize - 1], convChild->getPaddingR()[stridesSize - 2]) && - everyone_is(1, convChild->getDilation()[stridesSize - 1] + 1, convChild->getDilation()[stridesSize - 2] + 1) && - convChild->getStride()[stridesSize - 1] == convChild->getStride()[stridesSize - 2] && - withBias && - one_of(convChild->getStride()[stridesSize - 1], 1, 2) && - childNode->getChildEdgeAt(0)->getShape().getRank() == 4; - - return isSupportedParams; - }; - - auto isFusingWorthwhile = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { - if (!childNode->inputShapes[0].isStatic() || !childNode->outputShapes[0].isStatic()) { - return false; - } - - auto inDims = childNode->inputShapes[0].getStaticDims(); - auto outDims = childNode->outputShapes[0].getStaticDims(); - int elemSize = childNode->getOriginalOutputPrecisionAtPort(0).size(); - - int L3_cache_size = utils::get_cache_size(3, false); - int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize; - int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize; - - auto parentConvolutionNode = std::dynamic_pointer_cast(parentNode); - if (parentConvolutionNode == nullptr) - IE_THROW() << "Cannot get convolution node " << parentNode->getName(); - - if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) - return false; - - return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2); - }; - - for (int i = 0; i < graphNodes.size(); i++) { - if (!isConvolutionNode(graphNodes[i])) continue; - - auto parentConvNode = graphNodes[i]; - if (!isSutableParentConvolution(parentConvNode)) continue; - - auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue; - - if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue; - - parentConvNode->addFusedNode(childConvNode); - - for (auto node : childConvNode->getFusedWith()) { - parentConvNode->addFusedNode(node); - } - childConvNode->clearFusedWith(); - - graph.DropDWConvNode(childConvNode); - } -} - -// TODO [NM]: unite with FuseConvolutionAndSimpleOperation -void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); - - auto isSutableParentNode = [](MKLDNNNodePtr node) { - return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1 && - node->getOriginalOutputPrecisionAtPort(0) == Precision::FP32; - }; - - auto parent = graphNodes.begin(); - while (parent != graphNodes.end()) { - auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { - parent++; - continue; - } - - auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (childNode->getAlgorithm() != PoolingMax || childNode->getChildEdges().size() != 1) { - parent++; - continue; - } - - auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild(); - if (parentNode->getType() == BinaryConvolution && !parentNode->canFuse(fuseCandidate)) { - parent++; - continue; - } - - if (!one_of(fuseCandidate->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, - EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, - EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu)) { - parent++; - continue; - } - parentNode->addFusedNode(fuseCandidate); - parentNode->addOriginalLayer(fuseCandidate->getOriginalLayers()); - auto parentEdges = fuseCandidate->parentEdges; - for (auto &parentEdge : parentEdges) { - auto p_edge = parentEdge.lock(); - if (p_edge->getParent() == childNode) - continue; - - graph.RemoveEdge(p_edge); - } - graph.DropNode(fuseCandidate); - } -} +// void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getShape().getRank() != 3; +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!parentNode->canFuse(childNode)) { +// parent++; +// continue; +// } + +// // BF16 Quantize Layer Fusing Disabling +// if (BF16QuantizeNodeFusing(parentNode, childNode)) { +// parent++; +// continue; +// } + +// childNode->fuseInto(parentNode); + +// if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == FullyConnected) +// continue; + +// graph.RemoveEdge(p_edge); +// } +// } + +// graph.DropNode(childNode); +// } +// } + +// void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isConvolutionNode = [](const MKLDNNNodePtr &node) { +// return node->getType() == Convolution; +// }; + +// auto is1x1Convolution = [](const std::shared_ptr &conv) { +// const auto weightRank = conv->getWeightDims().size(); +// return conv->getWeightDims()[weightRank - 1] == 1 && conv->getWeightDims()[weightRank - 2] == 1; +// }; + +// auto isSutableParentConvolution = [&](MKLDNNNodePtr node) { +// if (node->isDropped()) +// return false; + +// const auto conv = std::dynamic_pointer_cast(node); +// if (conv == nullptr) +// IE_THROW() << "Cannot cast to convolution node " << node->getName(); + +// if (!conv->weightsZeroPoints.empty()) +// return false; + +// const auto &strides = conv->getStride(); +// const auto &paddings = conv->getPaddingL(); +// const auto &inDims = node->getParentEdgeAt(0)->getShape().getDims(); +// const auto &outDims = node->getChildEdgeAt(0)->getShape().getDims(); +// bool isSupportedParams = conv->getGroupNum() == 1 && +// inDims.size() == 4 && +// dimsEqualStrong(inDims[inDims.size() - 1], outDims[outDims.size() - 1]) && +// dimsEqualStrong(inDims[inDims.size() - 2], outDims[outDims.size() - 2]) && +// is1x1Convolution(conv) && // TODO [oneDNN] : fusing is permitted only with 1x1 convolutions +// everyone_is(1, strides[strides.size() - 1], strides[strides.size() - 2]) && +// everyone_is(0, paddings[paddings.size() - 1], paddings[paddings.size() - 2]) && +// !conv->canBeExecutedInInt8(); +// if (!isSupportedParams) return false; + +// return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild()); +// }; + +// auto isSutableChildConvolution = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { +// if (parentNode->isDropped() || childNode->isDropped()) +// return false; + +// const auto convChild = std::dynamic_pointer_cast(childNode); +// if (convChild == nullptr) +// IE_THROW() << "Cannot cast to convolution node " << childNode->getName(); + +// const auto convParent = std::dynamic_pointer_cast(parentNode); +// if (convParent == nullptr) +// IE_THROW() << "Cannot cast to convolution node " << parentNode->getName(); + +// if (!everyone_is(Precision::FP32, convParent->getOriginalOutputPrecisionAtPort(0), convChild->getOriginalInputPrecisionAtPort(0), +// convChild->getOriginalOutputPrecisionAtPort(0))) +// return false; + +// auto parentOutputPrecision = !parentNode->fusedWith.empty() +// ? parentNode->fusedWith[parentNode->fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0) +// : parentNode->getOriginalOutputPrecisionAtPort(0); + +// auto childOutputPrecision = !childNode->fusedWith.empty() +// ? childNode->fusedWith[childNode->fusedWith.size() - 1]->getOriginalOutputPrecisionAtPort(0) +// : childNode->getOriginalOutputPrecisionAtPort(0); + +// if (!everyone_is(Precision::FP32, parentOutputPrecision, childOutputPrecision)) +// return false; + +// if (!convChild->inputZeroPoints.empty() || !convChild->weightsZeroPoints.empty()) +// return false; + +// bool withBias = convChild->getOriginalInputPrecisions().size() == 3; + +// const auto weightRank = convChild->getWeightDims().size(); +// const auto stridesSize = convChild->getStride().size(); +// bool isSupportedParams = dimsEqualStrong(convChild->outputShapes[0].getDims()[1], convChild->getGroupNum()) && +// convChild->outputShapes[0].getDims()[1] != 1 && +// everyone_is(3, convChild->getWeightDims()[weightRank - 1], convChild->getWeightDims()[weightRank - 2]) && +// everyone_is(1, convChild->getPaddingL()[stridesSize - 1], convChild->getPaddingL()[stridesSize - 2]) && +// everyone_is(1, convChild->getPaddingR()[stridesSize - 1], convChild->getPaddingR()[stridesSize - 2]) && +// everyone_is(1, convChild->getDilation()[stridesSize - 1] + 1, convChild->getDilation()[stridesSize - 2] + 1) && +// convChild->getStride()[stridesSize - 1] == convChild->getStride()[stridesSize - 2] && +// withBias && +// one_of(convChild->getStride()[stridesSize - 1], 1, 2) && +// childNode->getChildEdgeAt(0)->getShape().getRank() == 4; + +// return isSupportedParams; +// }; + +// auto isFusingWorthwhile = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { +// if (!childNode->inputShapes[0].isStatic() || !childNode->outputShapes[0].isStatic()) { +// return false; +// } + +// auto inDims = childNode->inputShapes[0].getStaticDims(); +// auto outDims = childNode->outputShapes[0].getStaticDims(); +// int elemSize = childNode->getOriginalOutputPrecisionAtPort(0).size(); + +// int L3_cache_size = utils::get_cache_size(3, false); +// int dw_conv_input_size = inDims[0] * inDims[1] * inDims[2] * inDims[3] * elemSize; +// int dw_conv_output_size = outDims[0] * outDims[1]* outDims[2] * outDims[3] * elemSize; + +// auto parentConvolutionNode = std::dynamic_pointer_cast(parentNode); +// if (parentConvolutionNode == nullptr) +// IE_THROW() << "Cannot get convolution node " << parentNode->getName(); + +// if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) || impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) +// return false; + +// return (dw_conv_input_size + dw_conv_output_size > L3_cache_size / 2); +// }; + +// for (int i = 0; i < graphNodes.size(); i++) { +// if (!isConvolutionNode(graphNodes[i])) continue; + +// auto parentConvNode = graphNodes[i]; +// if (!isSutableParentConvolution(parentConvNode)) continue; + +// auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue; + +// if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue; + +// parentConvNode->addFusedNode(childConvNode); + +// for (auto node : childConvNode->getFusedWith()) { +// parentConvNode->addFusedNode(node); +// } +// childConvNode->clearFusedWith(); + +// graph.DropDWConvNode(childConvNode); +// } +// } + +// // TODO [NM]: unite with FuseConvolutionAndSimpleOperation +// void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1 && +// node->getOriginalOutputPrecisionAtPort(0) == Precision::FP32; +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (childNode->getAlgorithm() != PoolingMax || childNode->getChildEdges().size() != 1) { +// parent++; +// continue; +// } + +// auto fuseCandidate = childNode->getChildEdgeAt(0)->getChild(); +// if (parentNode->getType() == BinaryConvolution && !parentNode->canFuse(fuseCandidate)) { +// parent++; +// continue; +// } + +// if (!one_of(fuseCandidate->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, +// EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, +// EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu)) { +// parent++; +// continue; +// } +// parentNode->addFusedNode(fuseCandidate); +// parentNode->addOriginalLayer(fuseCandidate->getOriginalLayers()); +// auto parentEdges = fuseCandidate->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent() == childNode) +// continue; + +// graph.RemoveEdge(p_edge); +// } +// graph.DropNode(fuseCandidate); +// } +// } void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); @@ -873,43 +1208,43 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) } } -void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { - auto& graphNodes = graph.GetNodes(); +// void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { - if (node->getType() == Pooling) { - if (!one_of(node->getOriginalInputPrecisionAtPort(0), Precision::U8, Precision::I8)) - return false; - return node->getChildEdges().size() == 1 && node->getAlgorithm() == Algorithm::PoolingAvg; - } - return false; - }; +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// if (node->getType() == Pooling) { +// if (!one_of(node->getOriginalInputPrecisionAtPort(0), Precision::U8, Precision::I8)) +// return false; +// return node->getChildEdges().size() == 1 && node->getAlgorithm() == Algorithm::PoolingAvg; +// } +// return false; +// }; - auto isSutableChildNode = [](MKLDNNNodePtr node) { - return node->getType() == FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization; - }; +// auto isSutableChildNode = [](MKLDNNNodePtr node) { +// return node->getType() == FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization; +// }; - for (int i = 0; i < graphNodes.size(); i++) { - auto parent = graphNodes[i]; - if (!isSutableParentNode(parent)) continue; +// for (int i = 0; i < graphNodes.size(); i++) { +// auto parent = graphNodes[i]; +// if (!isSutableParentNode(parent)) continue; - auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(child)) continue; +// auto child = parent->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(child)) continue; - child->fuseInto(parent); +// child->fuseInto(parent); - auto parents = child->parentEdges; - for (size_t i = 0; i < parents.size(); i++) { - auto p_edge = parents[i].lock(); - if (p_edge->getParent()->getType() == Pooling) - continue; +// auto parents = child->parentEdges; +// for (size_t i = 0; i < parents.size(); i++) { +// auto p_edge = parents[i].lock(); +// if (p_edge->getParent()->getType() == Pooling) +// continue; - graph.RemoveEdge(p_edge); - } +// graph.RemoveEdge(p_edge); +// } - graph.DropNode(child); - } -} +// graph.DropNode(child); +// } +// } /** * Check if there is a data dependency between parent and child @@ -1151,6 +1486,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG } } +<<<<<<< HEAD void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); @@ -1861,3 +2197,674 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) { } } } +======= +// void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// return (node->getType() == MVN) && (node->getChildEdges().size() == 1); +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!parentNode->canFuse(childNode)) { +// parent++; +// continue; +// } + +// childNode->fuseInto(parentNode); + +// if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == MVN) +// continue; + +// graph.RemoveEdge(p_edge); +// } +// } + +// graph.DropNode(childNode); +// } +// } + +// void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSuitableParentNode = [](MKLDNNNodePtr node) { +// return node->getType() == Interpolate && node->getChildEdges().size() == 1; +// }; + +// auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { +// // Avoid cycle dependencies +// for (auto &childParentEdge : childNode->getParentEdges()) { +// for (auto &parentParentEdge : parentNode->getParentEdges()) { +// if (childParentEdge.lock()->getParent() == parentParentEdge.lock()->getParent()) +// return false; +// } +// } +// if (!childNode->getFusedWith().empty()) +// return false; +// auto interpolateNode = dynamic_cast(parentNode.get()); +// return interpolateNode->canFuse(childNode); +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSuitableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(parentNode, childNode)) { +// parent++; +// continue; +// } + +// childNode->fuseInto(parentNode); + +// if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == Interpolate) +// continue; + +// graph.RemoveEdge(p_edge); +// } +// } + +// graph.DropNode(childNode); +// } +// } + +// void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// return node->getType() == NormalizeL2 && node->getChildEdges().size() == 1; +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!parentNode->canFuse(childNode)) { +// parent++; +// continue; +// } + +// childNode->fuseInto(parentNode); + +// if (childNode->getType() == FakeQuantize || childNode->getType() == Eltwise) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == NormalizeL2) +// continue; + +// graph.RemoveEdge(p_edge); +// } +// } + +// graph.DropNode(childNode); +// } +// } + +// void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// return node->getType() == Eltwise && node->getChildEdges().size() == 1; +// }; + +// auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { +// if (parentNode->isConstant() && !childNode->isConstant()) +// return false; +// for (auto &childParentEdge : childNode->getParentEdges()) { +// // WA to prevent unsupported reorder exception issue in some cases +// if (childParentEdge.lock()->getParent()->getType() == Split) { +// return false; +// } + +// // Avoid cycle dependencies +// for (auto &parentParentEdge : parentNode->getParentEdges()) { +// if (childParentEdge.lock()->getParent() == parentParentEdge.lock()->getParent()) +// return false; +// } +// } + +// if (!childNode->getFusedWith().empty()) +// return false; + +// auto eltwiseNode = dynamic_cast(parentNode.get()); +// return eltwiseNode->canFuse(childNode); +// }; + +// auto parent = graphNodes.begin(); +// while (parent != graphNodes.end()) { +// auto parentNode = *parent; +// if (!isSutableParentNode(parentNode)) { +// parent++; +// continue; +// } + +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(parentNode, childNode)) { +// parent++; +// continue; +// } + +// childNode->fuseInto(parentNode); + +// if (childNode->getType() == FakeQuantize) { +// auto parentEdges = childNode->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (p_edge->getParent()->getType() == Eltwise) +// continue; + +// graph.RemoveEdge(p_edge); +// } + +// graph.DropNode(childNode); +// } else if (childNode->getType() == Eltwise) { +// auto children = childNode->childEdges; +// auto parents = childNode->parentEdges; +// auto initialParentInNum = parentNode->getParentEdges().size(); + +// for (size_t i = 0; i < parents.size(); i++) { +// auto p_edge = parents[i].lock(); +// if (!p_edge) continue; +// auto parent = p_edge->getParent(); +// if (!parent) continue; + +// if (parent == parentNode) { +// for (size_t j = 0; j < children.size(); j++) { +// if (!children[j].lock()) +// continue; +// auto child = children[j].lock()->getChild(); +// if (!child) +// continue; + +// MKLDNNEdgePtr &remEdge = p_edge; +// int inNum = 0; +// if (remEdge) { +// inNum = remEdge->getInputNum(); +// remEdge->drop(); +// graph.RemoveEdge(remEdge); +// } +// remEdge = children[j].lock(); +// int outNum = 0; +// if (remEdge) { +// outNum = remEdge->getOutputNum(); +// remEdge->drop(); +// graph.RemoveEdge(remEdge); +// } +// MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, child, inNum, outNum)); +// auto &graphEdges = graph.GetEdges(); +// graphEdges.push_back(newEdge); +// parent->addEdge(newEdge); + +// parent->outputShapes[inNum] = child->inputShapes[outNum]; +// } +// } else { +// MKLDNNEdgePtr &remEdge = p_edge; +// int inNum = 0; +// int outNum = parentNode->getParentEdges().size(); +// if (remEdge) { +// inNum = remEdge->getInputNum(); +// // Need to keep order for MulAdd +// if (childNode->getAlgorithm() == EltwiseMulAdd) { +// outNum = initialParentInNum + remEdge->getOutputNum() - 1; +// } +// remEdge->drop(); +// graph.RemoveEdge(remEdge); +// } + +// MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentNode, inNum, outNum)); +// auto &graphEdges = graph.GetEdges(); +// graphEdges.push_back(newEdge); +// parent->addEdge(newEdge); + +// parentNode->inputShapes.push_back(parent->outputShapes[0]); +// } +// } + +// graph.DropNode(childNode); +// } else { +// graph.DropNode(childNode); +// } +// } +// } + +// void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { +// std::set processed; +// int graphNodesSize = graph.GetNodes().size(); +// for (int i = 0; i < graphNodesSize; i++) { +// MKLDNNNodePtr& node = graph.GetNodes()[i]; +// if (processed.find(node) == processed.end() && node->getType() == Reorder +// && node->getChildEdges().size() == 1 +// && node->getChildEdgeAt(0)->getChild()->getType() == Reorder ) { +// auto nextNode = node->getChildEdgeAt(0)->getChild(); +// MKLDNNReorderNode* n = dynamic_cast(node.get()); +// if (n == nullptr) +// IE_THROW() << "Cannot get reorder layer " << node->getName(); +// MKLDNNReorderNode* nn = dynamic_cast(nextNode.get()); +// if (nn == nullptr) +// IE_THROW() << "Cannot get reorder layer " << nextNode->getName(); + +// MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent(); +// MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild(); + +// auto oldEdgeNum = n->getParentEdgeAt(0)->getInputNum(); + +// graph.DropNode(node); +// graph.DropNode(nextNode); + +// processed.insert(node); +// processed.insert(nextNode); + +// MKLDNNEdgePtr edge; +// for (auto cur : p->getChildEdgesAtPort(oldEdgeNum)) { +// if (cur->getChild() == c) +// edge = cur; +// } +// if (!edge) IE_THROW() << "Inappropriate graph processing"; + + +// std::string layerName = edge->getParent()->getName() + "_ScaleReorder_" + edge->getChild()->getName(); +// graph.InsertReorder(edge, layerName, n->getInput(), nn->getOutput(), false); +// graph.GetEdges().erase(std::remove(graph.GetEdges().begin(), graph.GetEdges().end(), edge), graph.GetEdges().end()); +// } +// } +// } + +// void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) { +// std::vector& graphNodes = graph.GetNodes(); + +// for (auto &graphNode : graphNodes) { +// if (graphNode->getType() != Generic +// || graphNode->getTypeStr() != "Broadcast" +// || graphNode->getChildEdges().size() != 1lu +// || graphNode->getChildEdgeAt(0)->getChild()->getType() != Eltwise) +// continue; + +// MKLDNNNodePtr& broadcastNode = graphNode; +// MKLDNNNodePtr eltwiseNode = broadcastNode->getChildEdgeAt(0)->getChild(); +// eltwiseNode->inputShapes[broadcastNode->getChildEdgeAt(0)->getOutputNum()] +// = broadcastNode->getParentEdgeAt(0)->getShape(); + +// auto& edges = graph.GetEdges(); +// for (size_t i = 1lu; i < broadcastNode->getParentEdges().size(); i++) { +// auto constParent = broadcastNode->getParentEdgeAt(i)->getParent(); +// for (auto it = edges.begin(); it != edges.end(); it++) { +// if ((*it) == constParent->getChildEdgeAt(0)) { +// edges.erase(it); +// constParent->remove(); +// break; +// } +// } +// } +// graph.DropNode(broadcastNode); +// } +// } + +// void MKLDNNGraphOptimizer::FuseClampAndFakeQuantize(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableClampNode = [](MKLDNNNodePtr node) { +// return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->getAlgorithm() == EltwiseClamp; +// }; + +// auto isSutableFakeQuantizeNode = [](MKLDNNNodePtr node) { +// return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization; +// }; + +// auto fuseClampAndFakeQuantizeNodes = [](MKLDNNNodePtr parent, MKLDNNNodePtr child) { +// auto* eltwiseNode = dynamic_cast(parent.get()); +// if (eltwiseNode == nullptr) +// IE_THROW() << "Cannot cast " << parent->getName() << " to Eltwise node"; + +// auto* fakeQuantizeNode = dynamic_cast(child.get()); +// if (fakeQuantizeNode == nullptr) +// IE_THROW() << "Cannot cast " << child->getName() << " to FakeQuantize node"; + +// const std::vector& cropLowData = fakeQuantizeNode->getCropLow(); +// const std::vector& cropHighData = fakeQuantizeNode->getCropHigh(); + +// std::vector newCropLow(cropLowData.size()); +// std::vector newCropHigh(cropHighData.size()); +// for (int i = 0; i < cropLowData.size(); i++) +// newCropLow[i] = std::max(cropLowData[i], eltwiseNode->getAlpha()); +// for (int i = 0; i < cropHighData.size(); i++) +// newCropHigh[i] = std::min(cropHighData[i], eltwiseNode->getBeta()); + +// fakeQuantizeNode->setCropLow(newCropLow); +// fakeQuantizeNode->setCropHigh(newCropHigh); + +// return true; +// }; + +// for (int i = 0; i < graphNodes.size(); i++) { +// auto parent = graphNodes[i]; +// if (!isSutableClampNode(parent)) continue; + +// auto child = parent->getChildEdgeAt(0)->getChild(); +// if (!isSutableFakeQuantizeNode(child)) continue; + +// if (fuseClampAndFakeQuantizeNodes(parent, child)) { +// graph.DropNode(parent); +// } +// } +// } + +// void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto getConstPort = [](const MKLDNNNodePtr node) -> int { +// if (node->getParentEdgeAt(0)->getParent()->getType() == Input && node->getParentEdgeAt(0)->getParent()->isConstant()) { +// return 0; +// } else if (node->getParentEdgeAt(1)->getParent()->getType() == Input && node->getParentEdgeAt(1)->getParent()->isConstant()) { +// return 1; +// } else { +// return -1; +// } +// }; + +// auto isSutableScaleShiftNode = [getConstPort](MKLDNNNodePtr node) { +// if (one_of(node->getAlgorithm(), EltwiseAdd, EltwiseSubtract, EltwiseMultiply, EltwiseDivide, EltwiseMulAdd)) { +// MKLDNNNode *parent = nullptr; +// if (node->getAlgorithm() != EltwiseMulAdd) { +// const auto constPort = getConstPort(node); +// if (constPort == -1) { +// return false; +// } +// parent = node->getParentEdgeAt(1 - constPort)->getParent().get(); +// } +// return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->canBePerformedAsScaleShift(parent); +// } +// return false; +// }; + +// auto isSutableFakeQuantizeNode = [](MKLDNNNodePtr node) { +// return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization; +// }; + +// auto fuseScaleShiftAndFakeQuantizeNodes = [getConstPort](MKLDNNNodePtr parent, MKLDNNNodePtr child) { +// auto fakeQuantizeNode = std::dynamic_pointer_cast(child); +// if (fakeQuantizeNode == nullptr) +// IE_THROW() << "Cannot cast " << child->getName() << " to FakeQuantize node"; + +// std::vector scalesBuffer; +// std::vector shiftsBuffer; +// parent->fillScalesAndShifts(parent->getParentEdgeAt(1 - getConstPort(parent))->getParent().get(), scalesBuffer, shiftsBuffer, 1); + +// for (int i = 0; i < scalesBuffer.size(); i++) +// if (scalesBuffer[i] == 0.f) +// return false; + +// const std::vector& cropLowData = fakeQuantizeNode->getCropLow(); +// const std::vector& cropHighData = fakeQuantizeNode->getCropHigh(); +// const std::vector& inputScaleData = fakeQuantizeNode->getInputScale(); +// const std::vector& inputShiftData = fakeQuantizeNode->getInputShift(); + +// std::vector newCropLow(scalesBuffer.size()); +// std::vector newCropHigh(scalesBuffer.size()); +// std::vector newInputScale(scalesBuffer.size()); +// std::vector newInputShift(scalesBuffer.size()); + +// for (int i = 0; i < newCropLow.size(); i++) { +// float cl = cropLowData.size() == 1 ? cropLowData[0] : cropLowData[i]; +// float ch = cropHighData.size() == 1 ? cropHighData[0] : cropHighData[i]; + +// float newCL = (cl - shiftsBuffer[i]) / scalesBuffer[i]; +// float newCH = (ch - shiftsBuffer[i]) / scalesBuffer[i]; + +// newCropLow[i] = std::min(newCL, newCH); +// newCropHigh[i] = std::max(newCL, newCH); +// if (std::isinf(newCropLow[i])) { +// newCropLow[i] = std::numeric_limits::lowest(); +// } +// if (std::isinf(newCropHigh[i])) { +// newCropHigh[i] = std::numeric_limits::max(); +// } +// } + +// std::vector zeroShift(newInputScale.size(), 0.f); + +// const auto isSubnormal = [](const float value) { +// const uint32_t *u32data = reinterpret_cast(&value); +// return (*u32data) && (((*u32data) & (0xFF << 23)) == 0); +// }; + +// for (int i = 0; i < newInputScale.size(); i++) { +// float isc = inputScaleData.size() == 1 ? inputScaleData[0] : inputScaleData[i]; + +// newInputScale[i] = isc * scalesBuffer[i]; +// if (isSubnormal(newInputScale[i])) { +// newInputScale[i] = 0.f; +// // zero value have to be shifted if it's not in input range +// float cl = cropLowData.size() == 1 ? cropLowData[0] : cropLowData[i]; +// float ch = cropHighData.size() == 1 ? cropHighData[0] : cropHighData[i]; +// if (0.f < cl) { +// zeroShift[i] = isc * cl; +// } +// if (ch < 0.f) { +// zeroShift[i] = isc * ch; +// } +// } +// } + +// for (int i = 0; i < newInputShift.size(); i++) { +// float isc = inputScaleData.size() == 1 ? inputScaleData[0] : inputScaleData[i]; +// float ish = inputShiftData.size() == 1 ? inputShiftData[0] : inputShiftData[i]; + +// newInputShift[i] = ish + shiftsBuffer[i] * isc + zeroShift[i]; +// if (isSubnormal(newInputShift[i])) { +// newInputShift[i] = 0.f; +// } +// } + +// fakeQuantizeNode->setCropLow(newCropLow); +// fakeQuantizeNode->setCropHigh(newCropHigh); +// fakeQuantizeNode->setInputScale(newInputScale); +// fakeQuantizeNode->setInputShift(newInputShift); + +// return true; +// }; + +// for (int i = 0; i < graphNodes.size(); i++) { +// auto parent = graphNodes[i]; +// if (!isSutableScaleShiftNode(parent)) continue; + +// auto child = parent->getChildEdgeAt(0)->getChild(); +// if (!isSutableFakeQuantizeNode(child)) continue; + +// if (fuseScaleShiftAndFakeQuantizeNodes(parent, child)) { +// auto parentEdges = parent->parentEdges; +// for (auto &parentEdge : parentEdges) { +// auto p_edge = parentEdge.lock(); +// if (!p_edge->getParent()->isConstant()) +// continue; + +// graph.RemoveEdge(p_edge); +// } + +// graph.DropNode(parent); +// } +// } +// } + +// void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { +// auto& graphNodes = graph.GetNodes(); + +// auto isSutableParentNode = [](MKLDNNNodePtr node) { +// return node->getType() == Transpose && node->getChildEdges().size() == 1; +// }; + +// auto isSutableChildNode = [](MKLDNNNodePtr node) { +// return node->getType() == Reorder && node->getChildEdges().size() == 1; +// }; + +// // Method checkAscendingSummaryOrder() checks that after the sequential execution of Transpose and Reorder nodes, +// // the order of the elements in the memory will not change. In other words, that Transpose+Reorder is identical permutation. +// auto checkAscendingSummaryOrder = [](std::shared_ptr &parentNode, std::shared_ptr &childNode) -> bool { +// auto* transposeNode = dynamic_cast(parentNode.get()); +// auto* reorderNode = dynamic_cast(childNode.get()); +// if (!transposeNode || !reorderNode) { +// return false; +// } + +// auto& transposeOrder = transposeNode->getOrder(); +// auto layoutOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->as()->getOrder(); + +// auto inBlockedDesc = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->as(); +// auto outBlockedDesc = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->as(); + +// auto& inOrder = inBlockedDesc->getOrder(); +// auto& outOrder = outBlockedDesc->getOrder(); + +// if (transposeOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) { +// return false; +// } + +// // revLayoutOrder - reverse permutation for layoutOrder +// auto revLayoutOrder = SizeVector(layoutOrder.size()); +// for (int i = 0; i < revLayoutOrder.size(); i++) { +// revLayoutOrder[layoutOrder[i]] = i; +// } + +// // newTransposeOrder - Transpose layout-aware permutation +// auto newTransposeOrder = SizeVector(transposeOrder.size()); +// for (int i = 0; i < newTransposeOrder.size(); i++) { +// newTransposeOrder[i] = layoutOrder[transposeOrder[revLayoutOrder[i]]]; +// } + +// // reorderOrder - Reorder layout-aware permutation +// auto reorderOrder = SizeVector(outOrder.size()); +// for (int i = 0; i < reorderOrder.size(); i++) { +// for (int j = 0; j < reorderOrder.size(); j++) { +// if (outOrder[i] == inOrder[j]) { +// reorderOrder[i] = j; +// continue; +// } +// } +// } + +// // summaryOrder - resulting Transpose+Reorder permutation +// auto summaryOrder = SizeVector(transposeOrder.size()); +// for (int i = 0; i < summaryOrder.size(); i++) { +// summaryOrder[i] = reorderOrder[newTransposeOrder[i]]; +// } + +// // check that Transpose+Reorder is the identical permutation +// for (int i = 0; i < summaryOrder.size(); i++) { +// if (summaryOrder[i] != i) { +// return false; +// } +// } + +// return true; +// }; + +// // Transpose and Reorder do opposite permutation to each other. +// // Example: +// // chain [physical layout: NCHW, logical layout: NCHW] -> Transpose(order=0312) -> [physical layout: NWCH, logical layout: NCHW] -> +// // Reorder(nchw->nhwc) -> [physical layout: NCHW, logical layout: NHWC] can be replaced with Reorder(nchw->nhwc; isOptimized=true) +// // which will just reinterprets layout without physical change of the memory. +// // Two cases are possible: +// // 1) inPrec = outPrec +// // In this case, we replace Transpose+Reorder pattern with a new Reorder that does nothing. +// // 2) inPrec != outPrec +// // As in the first case, we also replace Transpose+Reorder pattern with a new Reorder. +// // Additionally, we insert another Reorder that performs the conversion from the input precision (inPrec) +// // to the output precision (outPrec) +// auto mergeTransposeAndReorder = [&](std::shared_ptr& parentNode, std::shared_ptr& childNode) { +// auto parentParentNode = parentNode->getParentEdgesAtPort(0)[0]->getParent(); +// auto parentParentConstNode = parentNode->getParentEdgesAtPort(1)[0]->getParent(); +// auto childChildNode = childNode->getChildEdgeAt(0)->getChild(); + +// auto &remEdge = parentParentConstNode->getChildEdgeAt(0); +// remEdge->drop(); +// auto& edges = graph.GetEdges(); +// for (auto it = edges.begin(); it != edges.end(); it++) { +// if ((*it) == remEdge) { +// edges.erase(it); +// parentParentConstNode->remove(); +// break; +// } +// } + +// graph.DropNode(parentNode); +// graph.DropNode(childNode); + +// auto& inDesc = parentNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; +// auto& outDesc = childNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; + +// auto inPrec = inDesc->getPrecision(); +// auto outPrec = outDesc->getPrecision(); + +// auto reorderInDesc = inDesc->clone(); +// auto reorderOutDesc = outDesc->clone(); +// reorderOutDesc->setPrecision(inPrec); + +// std::string reorderlayerName = parentParentNode->getName() + "_" + +// MKLDNNReorderNode::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake"; + +// MKLDNNEdgePtr edge; +// for (auto &childEdge : parentParentNode->getChildEdges()) { +// if (childEdge.lock()->getChild() == childChildNode) { +// edge = childEdge.lock(); +// break; +// } +// } +// if (!edge) { +// IE_THROW() << "Transpose node '" << parentNode->getName() << "' has invalid edges."; +// } + +// auto reorderNode = graph.InsertReorder(edge, reorderlayerName, *reorderInDesc, *reorderOutDesc, true); + +// // case 2 +// if (inPrec != outPrec) { +// auto reorderInDesc2 = reorderOutDesc->clone(); +// auto reorderOutDesc2 = outDesc->clone(); + +// std::string reorderLayerName2 = reorderNode->getName() + "_" + +// MKLDNNReorderNode::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + childChildNode->getName(); + +// graph.InsertReorder(reorderNode->getChildEdgeAt(0), reorderLayerName2, *reorderInDesc2, *reorderOutDesc2, false); +// } +// }; + +// for (int i = 0; i < graphNodes.size(); i++) { +// auto parentNode = graphNodes[i]; +// if (!isSutableParentNode(parentNode)) { +// continue; +// } +// auto childNode = parentNode->getChildEdgeAt(0)->getChild(); +// if (!isSutableChildNode(childNode)) { +// continue; +// } + +// if (checkAscendingSummaryOrder(parentNode, childNode)) { +// mergeTransposeAndReorder(parentNode, childNode); +// } +// } +// } +>>>>>>> New descriptor hierarchy (#20) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index 03190828848fb3..b3a2603e1e8bc5 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -209,13 +209,13 @@ std::map MKLDNNPlugin: } void MKLDNNPlugin::MKLDNNInferRequest::createInputBlob(const std::string &name) { - MKLDNNNodeConstPtr inputNode = graph->GetInputNodeByName(name); + MKLDNNNodeConstPtr inputNode = graph->getInputNodeByName(name); if (inputNode->isDynamicNode() && !m_realShapes.count(name)) { IE_THROW() << "Cannot create blob " << name << " with dynamic shapes"; } - InferenceEngine::TensorDesc origDesc = MemoryDescUtils::convertToTensorDesc(inputNode->getChildEdgesAtPort(0)[0]->getMemory().GetDesc()); + InferenceEngine::TensorDesc origDesc = MemoryDescUtils::convertToTensorDesc(inputNode->getChildEdgesAtPort(0)[0]->getMemory().getDesc()); InferenceEngine::TensorDesc desc = origDesc; if (_networkInputs.find(name) != _networkInputs.end()) { @@ -256,14 +256,18 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: if (_inputs.find(name) == _inputs.end()) { createInputBlob(name); } - MKLDNNNodeConstPtr inputNode = graph->GetInputNodeByName(name); + MKLDNNNodeConstPtr inputNode = graph->getInputNodeByName(name); if (inputNode->isDynamicNode()) { if (!m_realShapes.count(name)) { IE_THROW() << "Cannot get blob " << name << " which contains dynamic shapes"; } if (_inputs[name]->getTensorDesc().getDims() != m_realShapes.at(name)) { - // TODO [DS]: reshape without reallocate? - createInputBlob(name); + if (_inputs[name]->size() >= + std::accumulate(m_realShapes.at(name).begin(), m_realShapes.at(name).end(), (size_t)1, std::multiplies())) { + _inputs[name]->getTensorDesc().reshape(m_realShapes.at(name)); + } else { + createInputBlob(name); + } } } data = _inputs[name]; @@ -287,7 +291,7 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: if (graph->hasOutputWithName(name)) { if (_outputs.find(name) == _outputs.end()) { - if (graph->GetOutputNodeByName(name)->isDynamicNode()) { + if (graph->getOutputNodeByName(name)->isDynamicNode()) { IE_THROW(NotImplemented) << "[DS] Can't get output blob for dynamic shapes before inference"; } @@ -384,7 +388,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In // pre-processing _preProcData[name]->setRoiBlob(data); } else { - auto inputNode = graph->GetInputNodeByName(name); + auto inputNode = graph->getInputNodeByName(name); if (foundInput->getInputData()->getPartialShape().rank().get_length() != data->getTensorDesc().getDims().size()) { IE_THROW(ParameterMismatch) << "Failed to set input blob. Rank mismatch."; } @@ -392,7 +396,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In if (foundInput->getInputData()->isDynamic()) { const auto &newShape = data->getTensorDesc().getDims(); m_realShapes[name] = newShape; - inputNode->resetOutputShape({newShape}); + inputNode->redefineOutputMemory({newShape}); } else { size_t inputSize = foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR ? InferenceEngine::details::product(foundInput->getTensorDesc().getDims()) : 1; @@ -414,7 +418,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In // TODO [DS]: enable inplace for dynamic input/output if (!inputNode->isDynamicNode() && - data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(inputNode->getChildEdgesAtPort(0)[0]->getMemory().GetDesc()) && + data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(inputNode->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { @@ -591,6 +595,6 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetShape(const std::string& name, const I m_realShapes[name] = dims; - auto inputNode = graph->GetInputNodeByName(name); - inputNode->resetOutputShape({dims}); + auto inputNode = graph->getInputNodeByName(name); + inputNode->redefineOutputMemory({dims}); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp index 157643717c1b01..1a018ab3bc6158 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp @@ -18,14 +18,10 @@ #include "nodes/common/cpu_convert.h" #include "mkldnn/ie_mkldnn.h" #include "cpu_shape.h" - -namespace dnnl { -namespace impl { -extern status_t fill_blocked(memory_desc_t &md, std::vector &perm, - std::vector &inner_blks, - std::vector &inner_idxs); -} // namespace impl -} // namespace dnnl +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "utils/cpu_utils.hpp" +#include "nodes/mkldnn_reorder_node.h" +#include "memory_desc/cpu_memory_desc.h" using namespace InferenceEngine; using namespace mkldnn; @@ -45,15 +41,11 @@ namespace { MKLDNNMemory::MKLDNNMemory(const mkldnn::engine& eng) : eng(eng) {} size_t MKLDNNMemory::GetSize() const { - uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType())); - return GetElementsCount() * itemSize; -} - -size_t MKLDNNMemory::GetElementsCount() const { - auto desc = GetDescriptor(); - std::vector dims(desc.data.padded_dims, - desc.data.padded_dims + desc.data.ndims); - return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies()); + auto size = getDesc().getCurrentMemSize(); + if (size == MemoryDesc::UNDEFINED_SIZE) { + IE_THROW() << "Can't get memory size for undefined shape"; + } + return size; } void MKLDNNMemory::Create(const memory::dims& dims, memory::data_type data_type, memory::format_tag format, const void* data) { @@ -61,7 +53,7 @@ void MKLDNNMemory::Create(const memory::dims& dims, memory::data_type data_type, format = memory::format_tag::any; } - memory::desc desc = MKLDNNMemoryDesc(MKLDNNExtensionUtils::convertToSizeVector(dims), data_type, format); + memory::desc desc = mkldnn::memory::desc(dims, data_type, format); Create(desc, data); } @@ -109,104 +101,22 @@ void MKLDNNMemory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroin } if (pMemDesc->isDefined()) { - Create(mkldnn::memory::desc(MemoryDescUtils::convertToMKLDNNMemoryDesc(*pMemDesc)), data, pads_zeroing); + Create(MemoryDescUtils::convertToDnnlMemoryDesc(*pMemDesc)->getDnnlDesc(), data, pads_zeroing); } else { //delayed dynamic allocation size_t maxMemSize = pMemDesc->getMaxMemSize(); - size_t dummySize = MemoryDesc::UNDEFINED_SIZE == maxMemSize ? 1 : maxMemSize; - MKLDNNMemoryDesc dummyDesc({dummySize}, mkldnn::memory::data_type::u8); - Create(mkldnn::memory::desc(dummyDesc), data, false); // no pads zeroing + VectorDims dummySize{MemoryDesc::UNDEFINED_SIZE == maxMemSize ? 1 : maxMemSize}; + DnnlBlockedMemoryDesc dummyDesc(InferenceEngine::Precision::U8, Shape(dummySize)); + Create(dummyDesc.getDnnlDesc(), data, false); // no pads zeroing } - size_t newUpperBound = prim->get_desc().get_size(); + size_t newUpperBound = MKLDNNExtensionUtils::getMemSizeForOneDnnDesc(prim->get_desc()); if (newUpperBound > memUpperBound) { memUpperBound = newUpperBound; } } - -void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) { - if (size != 0) - IE_ASSERT(size <= output.GetDescriptor().get_size()); - if (input.GetDescriptor() == output.GetDescriptor()) { - auto srcPtr = static_cast(input.GetPtr()); - auto dstPtr = static_cast(output.GetPtr()); - - auto copySize = size == 0 ? output.GetSize() : size; - cpu_memcpy(dstPtr, srcPtr, copySize); - } else { - std::unique_ptr pReorder; - std::shared_ptr srcMemoryPtr; - std::vector tmpBuff; - - try { - pReorder = std::unique_ptr(new mkldnn::reorder(input.GetPrimitive(), output.GetPrimitive())); - srcMemoryPtr = input.prim; - } - catch (const mkldnn::error& err) { - if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType()) { - //we probably could not make the reorder because there is no one supporting this precision conversion - //lets try to convert data first using cpu_convert - auto data = static_cast(input.GetPtr()); - tmpBuff.resize(input.GetSize()); - - cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), - MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount()); - - MKLDNNMemory tmpMem(output.eng); - tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetMKLDNNDesc().getFormat(), tmpBuff.data()); - - pReorder = std::unique_ptr(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive())); - srcMemoryPtr = tmpMem.prim; - } else { - throw; - } - } - if (pReorder) { - mkldnn::stream loc_stream(output.eng, stream::flags::default_order); - pReorder->execute(loc_stream, *srcMemoryPtr, *output.prim); - } else { - IE_THROW() << "Could not make mkldnn reorder."; - } - } -} - -// TODO: It should be done via wrap into Memory; -void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format, const void* data, size_t size, bool ftz) const { - IE_ASSERT(!one_of(format, memory::format_tag::undef, memory::format_tag::any)); - - auto dst_desc = GetDescriptor(); - memory::desc src_desc{dst_desc.dims(), dataType, format}; - - IE_ASSERT(size <= dst_desc.get_size()); - - if (dst_desc == src_desc) { - uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType)); - uint8_t* dataPtr = static_cast(GetData()); - // We cannot support strides for i/o blobs because it affects performance. - dataPtr += itemSize * prim->get_desc().data.offset0; - cpu_memcpy(dataPtr, data, size); - } else { - auto memData = this->GetDescriptor().data; - memory::dims dims(memData.dims, memData.dims + memData.ndims); - - MKLDNNMemory src(this->eng); - src.Create(dims, dataType, format, data); - - reorderData(src, *this); - } - if (ftz - && dataType == memory::data_type::f32 - && prim->get_desc().data.format_kind != dnnl_format_kind_wino - && GetDataType() != memory::data_type::bf16) { - // Internal blobs haven't strides yet. - auto *memData = static_cast(GetData()); - memData += prim->get_desc().data.offset0; - setSubnormalsToZero(memData, GetSize() / sizeof(float)); - } -} - void MKLDNNMemory::SetData(const MKLDNNMemory& src, size_t size, bool ftz) const { - reorderData(src, *this, size); + MKLDNNReorderNode::reorderData(src, *this, size); if (ftz && src.GetDataType() == memory::data_type::f32 @@ -224,96 +134,14 @@ void MKLDNNMemory::FillZero() { memset(dataPtr, 0, GetSize()); } -memory::format_tag MKLDNNMemory::GetPlainFormatByRank(size_t rank) { - switch (rank) { - case 0: - case 1: - return memory::format_tag::a; - case 2: - return memory::format_tag::ab; - case 3: - return memory::format_tag::abc; - case 4: - return memory::format_tag::abcd; - case 5: - return memory::format_tag::abcde; - case 6: - return memory::format_tag::abcdef; - default: - return memory::format_tag::undef; - } -} - -InferenceEngine::Layout MKLDNNMemory::GetPlainLayout(const memory::dims& dims) { - switch (dims.size()) { - case 0: return Layout::SCALAR; - case 1: return Layout::C; - case 2: return Layout::NC; - case 3: return Layout::CHW; - case 4: return Layout::NCHW; - case 5: return Layout::NCDHW; - default: - return Layout::BLOCKED; - } -} - -Precision MKLDNNMemory::convertToIePrec(memory::data_type dataType) { - return MKLDNNExtensionUtils::DataTypeToIEPrecision(dataType); -} - -memory::data_type MKLDNNMemory::convertToDataType(const InferenceEngine::Precision &precision) { - return MKLDNNExtensionUtils::IEPrecisionToDataType(precision); -} - -memory::format_tag MKLDNNMemory::Convert(const InferenceEngine::Layout layout) { - switch (layout) { - case NCHW: - return memory::format_tag::nchw; - case NHWC: - return memory::format_tag::nhwc; - case NCDHW: - return memory::format_tag::ncdhw; - case NDHWC: - return memory::format_tag::ndhwc; - case CHW: - return memory::format_tag::tnc; - case NC: - return memory::format_tag::nc; - case C: - return memory::format_tag::x; - case SCALAR: - return memory::format_tag::x; - default: - return memory::format_tag::undef; - } -} - -std::string MKLDNNMemory::formatToString(memory::format_tag fmt) { - return mkldnn::utils::fmt2str(fmt); -} - void *MKLDNNMemory::GetPtr() const { auto ptr = static_cast(GetData()); - auto md = GetDescriptor().data; + auto md = prim->get_desc().data; mkldnn::impl::memory_desc_wrapper wrapper(md); ptr += wrapper.offset0() * wrapper.data_type_size(); return ptr; } -template<> -MKLDNNMemoryDesc MKLDNNMemory::GetDescWithType() const { - if (auto descPtr = dynamic_cast(pMemDesc.get())) { - return *descPtr; - } else { - switch (pMemDesc->getType()) { - case (MemoryDescType::Blocked): - return MemoryDescUtils::convertToMKLDNNMemoryDesc(*(pMemDesc->as())); - default: - IE_THROW() << "Can not convert unsupported memory descriptor"; - } - } -} - void MKLDNNMemory::redefineDesc(const MemoryDesc& desc) { redefineDesc(desc.clone()); } @@ -335,920 +163,13 @@ void MKLDNNMemory::redefineDesc(MemoryDescPtr desc) { } template<> -BlockedMemoryDesc MKLDNNMemory::GetDescWithType() const { - if (auto descPtr = dynamic_cast(pMemDesc.get())) { - return *descPtr; - } else { - switch (pMemDesc->getType()) { - case (MemoryDescType::Mkldnn): - return MemoryDescUtils::convertToBlockedDescriptor(*(pMemDesc->as())); - default: - IE_THROW() << "Can not convert unsupported memory descriptor"; - } - } -} - -bool MKLDNNMemoryDesc::operator==(const MKLDNNMemoryDesc &rhs) const { - return this->desc == rhs.desc && order == rhs.order; -} - -bool MKLDNNMemoryDesc::operator!=(const MKLDNNMemoryDesc &rhs) const { - return !(*this == rhs); -} - -MKLDNNMemoryDesc::operator mkldnn::memory::desc() const { - return desc; -} - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::desc& desc) : - MemoryDesc(Shape(MKLDNNExtensionUtils::convertToSizeVector(desc.dims())), Mkldnn), desc(desc) { - if (desc.data.format_kind == dnnl::impl::format_kind::any) - IE_THROW(Unexpected) << "Memory format any is prohibited!"; - - mkldnn::impl::memory_desc_wrapper descWrapped(desc.data); - - if (descWrapped.is_blocking_desc()) { - if (descWrapped.has_runtime_dims_or_strides()) { - IE_THROW(Unexpected) << "Cannot calculate order from undefined dims or strides"; - } - - const auto dims = desc.dims(); - - const auto &blk_desc = descWrapped.blocking_desc(); - - const size_t outer_ndims = dims.size(); - const size_t inner_ndims = blk_desc.inner_nblks; - const size_t total_ndims = outer_ndims + inner_ndims; - - // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} - std::vector inner_strides(inner_ndims, 1); - for (size_t i = 1; i < blk_desc.inner_nblks; i++) { - inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; - } - - // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} - std::vector total_block_per_dim(outer_ndims, 1); - for (int i = 0; i < inner_ndims; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} - std::vector outer_order(outer_ndims); - std::iota(outer_order.begin(), outer_order.end(), 0); - std::sort(outer_order.begin(), outer_order.end(), - [&blk_desc, &outer_block_dims](size_t ind_l, size_t ind_r) { - return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || - (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - - - // blocked order - // [new_outer_order] U [inner_idxs] - SizeVector blk_order(total_ndims, 0); - std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); - std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); - order.swap(blk_order); - } -} - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) : - MKLDNNMemoryDesc(Shape(_dims), dataType, format) {} - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType) - : MemoryDesc(Shape(_dims), Mkldnn), desc() { - InitializePlain(_dims, dataType); -} - -void MKLDNNMemoryDesc::InitializePlain(const std::vector& _dims, mkldnn::memory::data_type dataType) { - const auto ndims = _dims.size(); - mkldnn::memory::dims plain_strides; - if (std::any_of(_dims.begin(), _dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL); - } else { - plain_strides.resize(ndims, 1); - for (size_t i = 1; i < ndims; i++) { - plain_strides[ndims - i -1] = plain_strides[ndims - i] * _dims[ndims - i]; - } - } - - order.resize(ndims); - std::iota(order.begin(), order.end(), 0); - - desc = {MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, plain_strides}; -} - -static const std::map> form_tags_by_ndims { - {0, { - mkldnn::memory::format_tag::a // TODO :: really 1d layout for scalar?? - }}, {1, { - mkldnn::memory::format_tag::a - }}, {2, { - mkldnn::memory::format_tag::ab, - mkldnn::memory::format_tag::ba - }}, {3, { - mkldnn::memory::format_tag::abc, - mkldnn::memory::format_tag::acb, - mkldnn::memory::format_tag::bac, - mkldnn::memory::format_tag::bca, - mkldnn::memory::format_tag::cba, - - mkldnn::memory::format_tag::Abc16a, - mkldnn::memory::format_tag::ABc16a16b, - mkldnn::memory::format_tag::ABc4a4b, - mkldnn::memory::format_tag::aBc16b, - mkldnn::memory::format_tag::aBc32b, - mkldnn::memory::format_tag::ABc16b16a, - mkldnn::memory::format_tag::Abc4a, - mkldnn::memory::format_tag::aBc4b, - mkldnn::memory::format_tag::ABc4b16a4b, - mkldnn::memory::format_tag::ABc2b8a4b, - mkldnn::memory::format_tag::ABc16b16a4b, - mkldnn::memory::format_tag::ABc16b16a2b, - mkldnn::memory::format_tag::ABc4b4a, - mkldnn::memory::format_tag::ABc8a16b2a, - mkldnn::memory::format_tag::ABc8a8b, - mkldnn::memory::format_tag::ABc8a4b, - mkldnn::memory::format_tag::aBc8b, - mkldnn::memory::format_tag::ABc8b16a2b, - mkldnn::memory::format_tag::ABc8b8a, - mkldnn::memory::format_tag::Acb16a, - mkldnn::memory::format_tag::Acb4a, - mkldnn::memory::format_tag::Acb8a, - mkldnn::memory::format_tag::BAc16a16b, - mkldnn::memory::format_tag::BAc16b16a, - }}, {4, { // Popular - mkldnn::memory::format_tag::abcd, // plain - mkldnn::memory::format_tag::acdb, // tail_c - mkldnn::memory::format_tag::aBcd8b, // blocked 8c - mkldnn::memory::format_tag::aBcd16b, // blocked 16c - - mkldnn::memory::format_tag::abdc, - - mkldnn::memory::format_tag::bacd, - mkldnn::memory::format_tag::bcda, - mkldnn::memory::format_tag::cdba, - mkldnn::memory::format_tag::dcab, - - mkldnn::memory::format_tag::Abcd8a, - mkldnn::memory::format_tag::Abcd16a, - mkldnn::memory::format_tag::Abcd32a, - mkldnn::memory::format_tag::ABcd16a16b, - mkldnn::memory::format_tag::aBcd32b, - mkldnn::memory::format_tag::ABcd16b16a, - mkldnn::memory::format_tag::aBCd16b16c, - mkldnn::memory::format_tag::aBCd16c16b, - mkldnn::memory::format_tag::Abcd4a, - mkldnn::memory::format_tag::aBcd4b, - mkldnn::memory::format_tag::ABcd4b16a4b, - mkldnn::memory::format_tag::ABcd2b8a4b, - mkldnn::memory::format_tag::ABcd4b4a, - mkldnn::memory::format_tag::ABcd4a4b, - mkldnn::memory::format_tag::aBCd4c16b4c, - mkldnn::memory::format_tag::aBCd2c8b4c, - mkldnn::memory::format_tag::ABcd16b16a4b, - mkldnn::memory::format_tag::ABcd16b16a2b, - mkldnn::memory::format_tag::aBCd16c16b4c, - mkldnn::memory::format_tag::aBCd16c16b2c, - mkldnn::memory::format_tag::aBCd4c4b, - mkldnn::memory::format_tag::aBCd4b4c, - mkldnn::memory::format_tag::ABcd8a16b2a, - mkldnn::memory::format_tag::ABcd8a8b, - mkldnn::memory::format_tag::ABcd8a32b, - mkldnn::memory::format_tag::ABcd32a32b, - mkldnn::memory::format_tag::ABcd8a4b, - - mkldnn::memory::format_tag::ABcd8b16a2b, - mkldnn::memory::format_tag::aBCd8b16c2b, - mkldnn::memory::format_tag::ABcd8b8a, - mkldnn::memory::format_tag::aBCd8b8c, - mkldnn::memory::format_tag::aBCd8b4c, - mkldnn::memory::format_tag::aBCd8c16b2c, - mkldnn::memory::format_tag::aBCd8c8b, - - mkldnn::memory::format_tag::ABcd4a8b8a4b, - mkldnn::memory::format_tag::ABcd2a8b8a2b, - - mkldnn::memory::format_tag::aBdc16b, - mkldnn::memory::format_tag::aBdc4b, - mkldnn::memory::format_tag::aBdc8b, - mkldnn::memory::format_tag::aCBd16b16c, - mkldnn::memory::format_tag::aCBd16c16b, - mkldnn::memory::format_tag::Acdb16a, - mkldnn::memory::format_tag::Acdb4a, - mkldnn::memory::format_tag::Acdb8a, - mkldnn::memory::format_tag::BAcd16a16b, - mkldnn::memory::format_tag::BAcd16b16a, - mkldnn::memory::format_tag::ABcd32a32b, - mkldnn::memory::format_tag::Acdb32a, - mkldnn::memory::format_tag::aBCd2b4c2b, - mkldnn::memory::format_tag::aBCd2c4b2c, - mkldnn::memory::format_tag::aBCd4b8c2b, - mkldnn::memory::format_tag::aBCd4c8b2c, - }}, {5, { // Popular - mkldnn::memory::format_tag::abcde, // plain - mkldnn::memory::format_tag::acdeb, // tail_c - mkldnn::memory::format_tag::aBcde8b, // blocked 8c - mkldnn::memory::format_tag::aBcde16b, // blocked 16c - - mkldnn::memory::format_tag::abdec, - mkldnn::memory::format_tag::acbde, - mkldnn::memory::format_tag::bacde, - mkldnn::memory::format_tag::bcdea, - mkldnn::memory::format_tag::cdeba, - mkldnn::memory::format_tag::decab, - - mkldnn::memory::format_tag::Abcde16a, - mkldnn::memory::format_tag::Abcde32a, - mkldnn::memory::format_tag::ABcde16a16b, - mkldnn::memory::format_tag::aBcde32b, - mkldnn::memory::format_tag::ABcde16b16a, - mkldnn::memory::format_tag::aBCde16b16c, - mkldnn::memory::format_tag::aBCde16c16b, - mkldnn::memory::format_tag::aBCde2c8b4c, - mkldnn::memory::format_tag::Abcde4a, - mkldnn::memory::format_tag::aBcde4b, - mkldnn::memory::format_tag::ABcde4b4a, - mkldnn::memory::format_tag::ABcde4a4b, - mkldnn::memory::format_tag::aBCde4b4c, - mkldnn::memory::format_tag::aBCde4c16b4c, - mkldnn::memory::format_tag::aBCde16c16b4c, - mkldnn::memory::format_tag::aBCde16c16b2c, - mkldnn::memory::format_tag::aBCde4c4b, - mkldnn::memory::format_tag::Abcde8a, - mkldnn::memory::format_tag::ABcde8a8b, - mkldnn::memory::format_tag::ABcde8a4b, - mkldnn::memory::format_tag::ABcde8b16a2b, - mkldnn::memory::format_tag::ABcde4b16a4b, - mkldnn::memory::format_tag::ABcde2b8a4b, - mkldnn::memory::format_tag::aBCde8b16c2b, - mkldnn::memory::format_tag::ABcde8b8a, - mkldnn::memory::format_tag::aBCde8b8c, - mkldnn::memory::format_tag::aBCde8b4c, - mkldnn::memory::format_tag::aBCde4b8c8b4c, - mkldnn::memory::format_tag::aBCde2b8c8b2c, - mkldnn::memory::format_tag::aBCde8c16b2c, - mkldnn::memory::format_tag::aBCde8c8b, - mkldnn::memory::format_tag::aBdec16b, - mkldnn::memory::format_tag::aBdec4b, - mkldnn::memory::format_tag::aBdec8b, - mkldnn::memory::format_tag::aCBde16b16c, - mkldnn::memory::format_tag::aCBde16c16b, - mkldnn::memory::format_tag::Acdeb16a, - mkldnn::memory::format_tag::Acdeb4a, - mkldnn::memory::format_tag::Acdeb8a, - mkldnn::memory::format_tag::BAcde16b16a, - mkldnn::memory::format_tag::BAcde16a16b, - mkldnn::memory::format_tag::aBdec32b, - mkldnn::memory::format_tag::aBCde2b4c2b, - mkldnn::memory::format_tag::aBCde2c4b2c, - mkldnn::memory::format_tag::aBCde4b8c2b, - mkldnn::memory::format_tag::aBCde4c8b2c, - }}, {6, { // Popular - mkldnn::memory::format_tag::abcdef, // plain - mkldnn::memory::format_tag::acbdef, // permute - mkldnn::memory::format_tag::defcab, // permute - mkldnn::memory::format_tag::aBcdef16b, // blocked 16c - - mkldnn::memory::format_tag::aBCdef16b16c, - mkldnn::memory::format_tag::aBCdef16c16b, - mkldnn::memory::format_tag::aBcdef4b, - mkldnn::memory::format_tag::aBCdef2c8b4c, - mkldnn::memory::format_tag::aBCdef4c4b, - mkldnn::memory::format_tag::aBCdef4b4c, - mkldnn::memory::format_tag::aBCdef8b8c, - mkldnn::memory::format_tag::aBCdef8b4c, - mkldnn::memory::format_tag::aBCdef8c16b2c, - mkldnn::memory::format_tag::aBCdef4c16b4c, - mkldnn::memory::format_tag::aBCdef8c8b, - - mkldnn::memory::format_tag::aBdefc16b, - mkldnn::memory::format_tag::aCBdef16c16b, - mkldnn::memory::format_tag::aCBdef16b16c, - mkldnn::memory::format_tag::aBdefc4b, - mkldnn::memory::format_tag::aBdefc8b, - - mkldnn::memory::format_tag::Abcdef4a, - mkldnn::memory::format_tag::Abcdef8a, - mkldnn::memory::format_tag::Abcdef16a, - mkldnn::memory::format_tag::Abcdef32a, - mkldnn::memory::format_tag::aBCdef2b4c2b, - mkldnn::memory::format_tag::aBCdef2c4b2c, - mkldnn::memory::format_tag::aBCdef4b8c2b, - mkldnn::memory::format_tag::aBCdef4c8b2c, - }} -}; - -mkldnn::memory::format_tag MKLDNNMemoryDesc::getFormat() const { - // TODO [OneDNN]: Previously it was a field of tdesc, but now the brute - // force search here. Please avoid of using this method. - const auto ndims = desc.dims().size(); - - // There are no suitable format_tag for this - if (ndims == 0 || ndims > 6) - return mkldnn::memory::format_tag::undef; - - for (const auto fmt : form_tags_by_ndims.at(ndims)) { - if (this->isSame(fmt)) - return fmt; - } - - return mkldnn::memory::format_tag::undef; -} - -bool MKLDNNMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const { - memory::desc refDesc(desc.dims(), desc.data_type(), fmt); - - if (desc.data.ndims != refDesc.data.ndims) - return false; - - if (desc.data.format_kind != dnnl_blocked || refDesc.data.format_kind != dnnl_blocked) - IE_THROW() << "MKLDNNMemoryDesc::isSame is not implemented for non blocked memory format"; - - auto actualBlkDesc = desc.data.format_desc.blocking; - auto refBlkDesc = refDesc.data.format_desc.blocking; - if (actualBlkDesc.inner_nblks != refBlkDesc.inner_nblks) - return false; - - for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) - if (actualBlkDesc.inner_blks[i] != refBlkDesc.inner_blks[i]) - return false; - - for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) - if (actualBlkDesc.inner_idxs[i] != refBlkDesc.inner_idxs[i]) - return false; - - auto actualStrides = desc.data.format_desc.blocking.strides; - auto refStrides = refDesc.data.format_desc.blocking.strides; - - std::vector actualOrder(desc.data.ndims); - { - const auto dims = desc.dims(); - std::vector total_block_per_dim(dims.size(), 1); - const auto &blk_desc = desc.data.format_desc.blocking; - for (int i = 0; i < blk_desc.inner_nblks; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - std::iota(actualOrder.begin(), actualOrder.end(), 0); - std::sort(actualOrder.begin(), actualOrder.end(), - [&actualStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (actualStrides[ind_l] > actualStrides[ind_r]) || - (actualStrides[ind_l] == actualStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - } - - std::vector refOrder(refDesc.data.ndims); - { - const auto dims = refDesc.dims(); - std::vector total_block_per_dim(dims.size(), 1); - const auto &blk_desc = refDesc.data.format_desc.blocking; - for (int i = 0; i < blk_desc.inner_nblks; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - std::iota(refOrder.begin(), refOrder.end(), 0); - std::sort(refOrder.begin(), refOrder.end(), - [&refStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (refStrides[ind_l] > refStrides[ind_r]) || - (refStrides[ind_l] == refStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - } - - if (actualOrder != refOrder) { - return false; - } - - return true; -} - -bool MKLDNNMemoryDesc::isPlainFormat() const { - if (desc.data.format_kind != dnnl_blocked) - return false; - - if (shape.getRank() != order.size()) { - return false; - } - for (size_t i = 0; i < order.size(); ++i) { - if (order[i] != i) { - return false; - } - } - return true; -} - -bool MKLDNNMemoryDesc::isBlockedCFormat(size_t blk_size) const { - const auto &blocking = desc.data.format_desc.blocking; - - if (desc.data.format_kind != dnnl_blocked || - blocking.inner_nblks != 1 || - blocking.inner_idxs[0] != 1) - return false; - - if ((order.size() - shape.getRank()) != 1) { - return false; - } - for (size_t i = 0; i < order.size() - 1; ++i) { - if (order[i] != i) { - return false; - } - } - if (blk_size != UNREACHABLE_DIM && blk_size != blocking.inner_blks[0]) { - return false; - } - - return true; -} - -bool MKLDNNMemoryDesc::isTailCFormat() const { - if (desc.data.format_kind != dnnl_blocked) - return false; - - if (shape.getRank() < 3) { - return false; - } - if (shape.getRank() != order.size()) { - return false; - } - if (!std::is_sorted(order.begin(), --order.end())) { - return false; - } - if (order.back() != 1) { - return false; - } - return true; -} - -bool MKLDNNMemoryDesc::blocksExtended() const { - for (int i = 0; i < desc.data.ndims; i++) { - if (desc.data.dims[i] != desc.data.padded_dims[i]) - return true; - } - return false; -} - -size_t MKLDNNMemoryDesc::getMemSizeImp() const { - return desc.get_size(); -} - -size_t MKLDNNMemoryDesc::getElementOffset(size_t elemNumber) const { - mkldnn::impl::memory_desc_wrapper wrapped(desc.data); - return wrapped.off_l(elemNumber); -} - -bool MKLDNNMemoryDesc::isCompatible(const MemoryDesc &rhs) const { - if (MemoryDescType::Blocked == rhs.getType()) { - return isCompatible(*(rhs.as())); - } else if (MemoryDescType::Mkldnn == rhs.getType()) { - return isCompatible(*(rhs.as())); - } else { - return false; - } -} - -static bool array_cmp_weak(const dnnl_dim_t *a1, const dnnl_dim_t *a2, size_t size) { - for (size_t i = 0; i < size; ++i) - if (a1[i] != a2[i] && a1[i] != DNNL_RUNTIME_DIM_VAL && a2[i] != DNNL_RUNTIME_DIM_VAL) return false; - return true; -} - -bool MKLDNNMemoryDesc::isCompatible(const MKLDNNMemoryDesc &rhs) const { - using namespace dnnl; - using namespace impl; - using namespace impl::utils; - if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { - return false; - } - - if (this->desc == rhs.desc) { - return true; - } - memory_desc_wrapper wrappedThis(this->desc.data); - memory_desc_wrapper wrappedRhs(rhs.desc.data); - if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) - return false; - if (wrappedThis.is_wino_desc() || wrappedThis.is_rnn_packed_desc()) return false; - - const auto &blk = wrappedThis.blocking_desc(); - const auto &r_blk = wrappedRhs.blocking_desc(); - - int stride_start = wrappedThis.ndims() >0 && wrappedThis.dims()[0] == 1 ? 1 : 0; //ignore batch axis stride if batch size == 1 - - // Here is a slightly modified version of mkldnn::impl::memory_desc_wrapper::similar_to() call able to skip specific strides check - // and use weak comparison - return wrappedThis.ndims() == wrappedRhs.ndims() - && wrappedThis.format_kind() == wrappedRhs.format_kind() - && wrappedThis.data_type() == wrappedRhs.data_type() - && array_cmp_weak(wrappedThis.dims(), wrappedRhs.dims(), wrappedThis.ndims()) - && array_cmp_weak(blk.strides + stride_start, r_blk.strides + stride_start, wrappedThis.ndims() - stride_start) - && blk.inner_nblks == r_blk.inner_nblks - && array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks) - && array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks) - && array_cmp_weak(wrappedThis.padded_dims(), wrappedRhs.padded_dims(), wrappedRhs.ndims()) - && array_cmp_weak(wrappedThis.padded_offsets(), wrappedRhs.padded_offsets(), wrappedThis.ndims()) - && dimsEqualWeak(wrappedThis.offset0(), wrappedRhs.offset0()); -} - - -/** - * Check compatibility with BlockedMemoryDesc - * - * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} - * strides // the order of outer dims is encoded here - * inner_blks 4 16 4 - * inner_idxs 1 0 1 - * - * BlockedMemoryDesc desc has more expressive ability. - * How to check compatibility with BlockedMemoryDesc representation: - * 0. Detect a new_outer_order of outer_dims via descending strides. - * 1. BlockedMemoryDesc strides : concatenate strides in new_outer_order and inner strides. - * 2. BlockedMemoryDesc dims : concatenate outer dims in new_outer_order with auto padding and inner blocks - * 3. BlockedMemoryDesc order : concatenate new_outer_order and inner_idxs - */ - -bool MKLDNNMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const { - if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { - return false; - } - - if (desc.data.format_kind != dnnl_blocked) { - return false; - } - - if (desc.data.extra.flags != dnnl_memory_extra_flag_none) { - return false; - } - - const auto dims = desc.dims(); - const auto &blk_desc = desc.data.format_desc.blocking; - - const size_t inner_ndims = blk_desc.inner_nblks; - - if (!dimsEqualWeak(order, rhs.getOrder())) { - return false; - } - - size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : - Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 - if (!dimsEqualWeak(getStrides(), rhs.getStrides(), skipAxis)) { - return false; - } - - if (!dimsEqualWeak(getBlockDims(), rhs.getBlockDims())) { - return false; - } - - // offset padded to data. Same as for oneDNN - SizeVector blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; - // TODO: The BlockedMemoryDesc implementation allow to specify offset_to_data for inner blocked dims. - // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will - // fill it with zero. - blk_offset_to_data.insert(blk_offset_to_data.end(), inner_ndims, 0); - if (!dimsEqualWeak(blk_offset_to_data, rhs.getOffsetPaddingToData())) { - return false; - } - - return dimsEqualWeak(desc.data.offset0, rhs.getOffsetPadding()); -} - -bool MKLDNNMemoryDesc::hasLayoutType(LayoutType layoutType) const { - switch (layoutType) { - case LayoutType::ncsp: - return isPlainFormat(); - case LayoutType::nspc: - return isTailCFormat(); - case LayoutType::nCsp8c: - return isBlockedCFormat(8); - case LayoutType::nCsp16c: - return isBlockedCFormat(16); - default: - return false; - } -} - -std::string MKLDNNMemoryDesc::serializeFormat() const { - if (desc.data.format_kind == dnnl_format_kind_wino) { - switch (desc.data.format_desc.wino_desc.wino_format) { - case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOIoi: return "wino_aaOIoi"; - case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOio: return "wino_aaOio"; - case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOBiOo: return "wino_aaOBiOo"; - case dnnl_wino_memory_format_t::dnnl_wino_wei_OBaaIBOIio: return "wino_OBaaIBOIio"; - default: return "wino_undef"; - } - } - auto fmt = getFormat(); - return mkldnn::utils::fmt2str(fmt); +DnnlMemoryDescPtr MKLDNNMemory::GetDescWithType() const { + return MemoryDescUtils::convertToDnnlMemoryDesc(*pMemDesc); } -bool MKLDNNMemoryDesc::isDefinedImp() const { - mkldnn::impl::memory_desc_wrapper wrappedThis(desc.data); - if (!wrappedThis.is_blocking_desc()) { - return true; - } - - if (wrappedThis.has_runtime_dims_or_strides()) { - return false; - } - - return wrappedThis.offset0() != Shape::UNDEFINED_DIM; -} - -InferenceEngine::Precision MKLDNNMemoryDesc::getPrecision() const { - return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type()); -} - -void MKLDNNMemoryDesc::setPrecision(InferenceEngine::Precision prc) { - desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); -} - -std::unique_ptr MKLDNNMemoryDesc::cloneWithNewDimsImp(const std::vector &dims) const { - using namespace dnnl::impl::utils; - if (desc.data.format_kind != dnnl_blocked) { - IE_THROW(Unexpected) << "Cannot clone non blocked oneDNN desc with new dims"; - } - - auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims); - mkldnn::memory::desc newMklDesc = desc; - array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size()); - std::vector perm(order.begin(), order.begin() + mklDims.size()); - auto& blockingDesc = newMklDesc.data.format_desc.blocking; - auto numInnerBlks = blockingDesc.inner_nblks; - std::vector innerBlks(std::begin(blockingDesc.inner_blks), std::begin(blockingDesc.inner_blks) + numInnerBlks); - std::vector innerIdxs(std::begin(blockingDesc.inner_idxs), std::begin(blockingDesc.inner_idxs) + numInnerBlks); - auto retCode = dnnl::impl::fill_blocked(newMklDesc.data, perm, innerBlks, innerIdxs); - if (retCode != dnnl::impl::status::success) { - IE_THROW() << "Can not clone MKLDNNMemoryDesc with dims: " << dims2str(dims); - } - return MKLDNNPlugin::make_unique(newMklDesc); -} - -size_t MKLDNNMemoryDesc::getMaxMemSize() const { - if (desc.data.format_kind != dnnl_blocked || shape.isStatic()) { - return getCurrentSize(); - } - - auto& maxDims = shape.getMaxDims(); - if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { - return UNDEFINED_SIZE; - } - - auto maxDimsDesc = cloneWithNewDims(maxDims); - return maxDimsDesc->getCurrentSize(); -} - -std::vector MKLDNNMemoryDesc::getStrides() const { - const auto dims = desc.dims(); - - const auto &blk_desc = desc.data.format_desc.blocking; - - const size_t outer_ndims = dims.size(); - const size_t inner_ndims = blk_desc.inner_nblks; - const size_t total_ndims = outer_ndims + inner_ndims; - - // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} - std::vector inner_strides(inner_ndims, 1); - for (size_t i = 1; i < blk_desc.inner_nblks; i++) { - inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; - } - - // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} - std::vector outer_order(outer_ndims); - std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); - - // blocked strides - // [outer_strides via new_outer_order] U [inner_strides] - SizeVector blk_strides(total_ndims, 0); - std::copy(inner_strides.rbegin(), inner_strides.rend(), blk_strides.rbegin()); - std::transform(outer_order.begin(), outer_order.end(), blk_strides.begin(), - [&](size_t i) { return blk_desc.strides[i] == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : blk_desc.strides[i]; }); - return blk_strides; -} - -std::vector MKLDNNMemoryDesc::getBlockDims() const { - const auto dims = desc.dims(); - - const auto &blk_desc = desc.data.format_desc.blocking; - - const size_t outer_ndims = dims.size(); - const size_t inner_ndims = blk_desc.inner_nblks; - const size_t total_ndims = outer_ndims + inner_ndims; - - // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} - std::vector total_block_per_dim(outer_ndims, 1); - for (int i = 0; i < inner_ndims; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - // blocked dims - // [dims via new_outer_order with auto pad] U [inner_blk_dims] - std::vector outer_block_dims = MKLDNNExtensionUtils::convertToSizeVector(dims); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - if (outer_block_dims[i] != Shape::UNDEFINED_DIM) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - } - - // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} - std::vector outer_order(outer_ndims); - std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); - - SizeVector blk_dims(total_ndims, 0); - std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, - blk_dims.end() - blk_desc.inner_nblks); - std::transform(outer_order.begin(), outer_order.end(), blk_dims.begin(), - [&] (size_t i) { return outer_block_dims[i]; }); - return blk_dims; -} - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const Shape &shape, dnnl::memory::data_type dataType, dnnl::memory::format_tag format) : MemoryDesc(shape, Mkldnn) { - auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims()); - if (format == memory::format_tag::any) - IE_THROW(Unexpected) << "Memory format any is prohibited!"; - if (format != memory::format_tag::undef) { - if (format == memory::format_tag::x && dims.size() == 0) { - desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); - } else { - desc = mkldnn::memory::desc(dims, dataType, format); - } - - std::vector perm; - std::vector inner_blks; - std::vector inner_idxs; - - mkldnn::impl::memory_desc_wrapper::compute_blocking(mkldnn::memory::convert_to_c(format), perm, inner_blks, inner_idxs); - - order.swap(perm); - order.insert(order.end(), inner_idxs.begin(), inner_idxs.end()); - } else { - // Trying to create plain descriptor - // This WA is needed since memory::format_tag doesn't contain plain tag for tensors with rank > 6D - InitializePlain(shape.getDims(), dataType); - } +template<> +BlockedMemoryDescPtr MKLDNNMemory::GetDescWithType() const { + return MemoryDescUtils::convertToBlockedMemoryDesc(*pMemDesc); } - -/** - * Construct from blocked parameters - * - * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} - * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. - * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence - * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims - * - * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) - * - * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of - * real dims spliting. - * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] - * but not [0]<=>[4] because it break splitting original dims into internal blocked dims - * Normalization of representation: Make strides growing but keep layout same as original. Not all - * layout allow us to meet normalize form of tensor desc. - * - * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] - */ - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(InferenceEngine::Precision prc, const Shape &shape, const std::vector &blockedDims, - const std::vector &order, size_t offsetPadding, const std::vector &offsetPaddingToData, - const std::vector &strides) : MemoryDesc(shape, Mkldnn) { - // scalar case - if (shape.getRank() == 0) { - desc.data.format_kind = dnnl_blocked; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(prc)); - desc.data.ndims = 1; - desc.data.dims[0] = 1; - desc.data.padded_dims[0] = 1; - desc.data.format_desc.blocking.strides[0] = 1; - desc.data.padded_offsets[0] = 0; - desc.data.offset0 = offsetPadding; - return; - } - - if (order.size() != blockedDims.size()) { - IE_THROW() << "Can not construct MKLDNNMemoryDesc, order and blocked dims must have equals size"; - } - - if (!offsetPaddingToData.empty() && offsetPaddingToData.size() != order.size()) { - IE_THROW() << "Can not construct MKLDNNMemoryDesc, offsetPaddingToData must have equal size with order and blocked dims"; - } - - if (!strides.empty() && strides.size() != order.size()) { - IE_THROW() << "Can not construct MKLDNNMemoryDesc, strides must have equal size with order and blocked dims"; - } - - if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - IE_THROW() << "MKLDNNMemoryDesc doesn't support undefined order."; - } - - if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - IE_THROW() << "MKLDNNMemoryDesc doesn't support undefined blockedDims."; - } - - auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims()); - - size_t outer_ndims = dims.size(); - size_t inner_ndims = order.size() - dims.size(); - - if (!strides.empty()) { - bool is_descending_strides = true; - for (int i = 1; i < strides.size(); i++) { - is_descending_strides &= (strides[i - 1] >= strides[i]); - } - - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims - // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) - IE_THROW() << "Can not construct MKLDNNMemoryDesc from strides: " << vec2str(strides); - } - - std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension - for (size_t i = 0; i < outer_ndims; i++) { - outer_order[order[i]] = i; - } - bool outer_is_correct_permutation_of_n = - std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); - - if (!outer_is_correct_permutation_of_n) - IE_THROW() << "Can not construct MKLDNNMemoryDesc because of incorrect order: " << vec2str(order); - - if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) { - bool inner_block_are_dense = one_of(strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted - for (int i = outer_ndims; i < strides.size() - 1; i++) { - inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]); - } - - if (!inner_block_are_dense) - IE_THROW() << "Can not construct MKLDNNMemoryDesc from strides: " << vec2str(strides) << " inner blocks are not dense."; - } - - // Fill general memory desc fields - desc.data.format_kind = dnnl_blocked; - desc.data.extra.flags = 0; - desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(prc)); - desc.data.ndims = dims.size(); - desc.data.offset0 = offsetPadding; - std::copy(dims.begin(), dims.end(), desc.data.dims); - - if (!offsetPaddingToData.empty()) { - bool inner_pad_offsets_is_zero = std::all_of(offsetPaddingToData.begin() + outer_ndims, offsetPaddingToData.end(), - [](size_t pad) { return pad == 0; }); - - if (!inner_pad_offsets_is_zero) - IE_THROW() << "Can not construct MKLDNNMemoryDesc, inner pad offsets is not zero: " << vec2str(offsetPaddingToData); - auto dnnlPaddedOffsets = MKLDNNExtensionUtils::convertToDnnlDims(offsetPaddingToData); - std::copy(dnnlPaddedOffsets.begin(), dnnlPaddedOffsets.begin() + outer_ndims, desc.data.padded_offsets); - } else { - std::fill(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + outer_ndims, 0); - } - - std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1); - auto dnnlBlkDims = MKLDNNExtensionUtils::convertToDnnlDims(blockedDims); - - for (size_t i = 0; i < order.size(); i++) { - auto idx = order[i]; - if (desc.data.padded_dims[idx] != DNNL_RUNTIME_DIM_VAL && dnnlBlkDims[i] != DNNL_RUNTIME_DIM_VAL) { - desc.data.padded_dims[idx] *= dnnlBlkDims[i]; - } else { - desc.data.padded_dims[idx] = DNNL_RUNTIME_DIM_VAL; - } - } - - // Fill blocking desc - auto &dnn_blk_desc = desc.data.format_desc.blocking; - dnn_blk_desc.inner_nblks = inner_ndims; - std::copy(dnnlBlkDims.end() - inner_ndims, dnnlBlkDims.end(), dnn_blk_desc.inner_blks); - std::copy(order.end() - inner_ndims, order.end(), dnn_blk_desc.inner_idxs); - - if (strides.empty()) { - if (std::any_of(dnnlBlkDims.begin(), dnnlBlkDims.end(), [](memory::dim val) { return val == DNNL_RUNTIME_DIM_VAL; })) { - std::fill(std::begin(dnn_blk_desc.strides), std::begin(dnn_blk_desc.strides) + outer_ndims, DNNL_RUNTIME_DIM_VAL); - } else { - //TODO [DS]: phase 2: refactor - std::vector tmpStrides(order.size()); - tmpStrides[order.size() - 1] = 1; - for (size_t i = 2; i <= order.size(); i++) { - tmpStrides[order.size() - i] = tmpStrides[order.size() - (i - 1)] * dnnlBlkDims[blockedDims.size() - (i - 1)]; - } - for (size_t i = 0; i < outer_ndims; i++) { - dnn_blk_desc.strides[i] = tmpStrides[outer_order[i]]; - } - } - } else { - for (size_t i = 0; i < outer_ndims; i++) { - auto dnnlStrides = MKLDNNExtensionUtils::convertToDnnlDims(strides); - dnn_blk_desc.strides[i] = dnnlStrides[outer_order[i]]; - } - } - - this->order = order; -} } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h index cfaaae9121f8ff..07224a182756fe 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h @@ -6,13 +6,14 @@ #include "ie_layouts.h" #include "mkldnn_dims.h" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" #include "mkldnn_extension_utils.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include #include #include -#include + +#include "memory_desc/dnnl_memory_desc.h" #include #include @@ -23,11 +24,6 @@ /** * @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level. * - * MKLDNNMemoryDesc - the descriptor of tensor representation in memory. Describes all required information - * for proper allocation and handling tensor in some buffer. The real memory is not present, just description. - * This object answers on question how and where data with logical index [x1, x2, .. xN] placed in real buffer. - * In the simplest case it describe a mapping between "logical offset" and "real offset". - * * MKLDNNMemory is an abstraction of some real tensor which contains some data. As in short it's a pair of * memory descriptor and raw buffer handler to contains data. In case of system memory raw buffer it's simple * "void*" on some system memory buffer. @@ -36,110 +32,6 @@ namespace MKLDNNPlugin { -/** - * Represent internal plugin abstraction of tensor description - * - */ -class MKLDNNMemoryDesc : public MemoryDesc { -public: - /** Construct a tensor desc with plain layout format (like ND C array) */ - MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType); - - /** Construct a tensor desc with specified layout format tag. Any and Undef is not supported */ - MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); - - MKLDNNMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); - - explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc); - - /** - * Try to define original format tag use on creation - * - * @return format tag if was able to define it - */ - mkldnn::memory::format_tag getFormat() const; // move to the private section - - mkldnn::memory::data_type getDataType() const { - return static_cast(desc.data.data_type); - } - - // TODO [DS]: phase 2: remove!!! - MKLDNNDims getDims() const { - return MKLDNNDims(desc.data.dims, desc.data.ndims); - } - - - // TODO [DS]: phase 2: move to the blocked desc interface - bool blocksExtended() const; - - // TODO [DS]: phase 2: remove - operator bool() const { - return getFormat() != mkldnn::memory::format_tag::any && getFormat() != mkldnn::memory::format_tag::undef; - } - - bool operator == (const MKLDNNMemoryDesc& rhs) const; - bool operator != (const MKLDNNMemoryDesc& rhs) const; - - operator mkldnn::memory::desc() const; - - bool isSame(mkldnn::memory::format_tag fmt) const; - - dnnl_format_kind_t getFormatKind() const { - return desc.data.format_kind; - } - - std::unique_ptr clone() const override { - return MKLDNNPlugin::make_unique(*this); - } - - bool hasLayoutType(LayoutType layoutType) const override; - - std::string serializeFormat() const override; - - InferenceEngine::Precision getPrecision() const override; - - void setPrecision(InferenceEngine::Precision prc) override; - - bool isCompatible(const MemoryDesc& rhs) const override; - bool isCompatible(const BlockedMemoryDesc& rhs) const; - bool isCompatible(const MKLDNNMemoryDesc& rhs) const; - - const std::vector& getOrder() const { - return order; - } - - size_t getMaxMemSize() const override; - -private: - size_t getElementOffset(size_t elemNumber) const override; - void InitializePlain(const std::vector& _dims, mkldnn::memory::data_type dataType); - - size_t getMemSizeImp() const override; - bool isPlainFormat() const; - bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const; - bool isTailCFormat() const; - bool isDefinedImp() const override; - std::unique_ptr cloneWithNewDimsImp(const std::vector& dims) const override; - - std::vector getStrides() const; - std::vector getBlockDims() const; - -private: - MKLDNNMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, - const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, - const std::vector& strides = {}); - - static constexpr size_t UNREACHABLE_DIM = std::numeric_limits::max(); - mkldnn::memory::desc desc; - std::vector order; - - friend BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc); - friend MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc); - friend MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc); - friend MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc); -}; - - class MKLDNNMemory { public: explicit MKLDNNMemory(const mkldnn::engine& eng); @@ -158,19 +50,14 @@ class MKLDNNMemory { return prim; } - // TODO [DS]: phase 2: remove - mkldnn::memory::desc GetDescriptor() const { - return prim->get_desc(); - } - - const MemoryDesc& GetDesc() const { + const MemoryDesc& getDesc() const { return *pMemDesc; } template ::value && !std::is_reference::value, int>::type = 0, typename std::enable_if::value, int>::type = 0> - T GetDescWithType() const; + std::unique_ptr GetDescWithType() const; /** * Return handler of buffer. Real data may starts from some other offset @@ -190,22 +77,14 @@ class MKLDNNMemory { */ void* GetPtr() const; - //TODO [DS]: phase 2: change to get precision mkldnn::memory::data_type GetDataType() const { - return static_cast(GetDescriptor().data.data_type); + return MKLDNNExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); } - //TODO [DS]: phase 2: align with descriptors size methods (reuse them under the hood) size_t GetSize() const; - //TODO [DS]: phase 2: remove - size_t GetElementsCount() const; - - - //TODO [DS]: phase 2: change to getShape - mkldnn::memory::dims GetDims() const { - auto data = GetDescriptor().data; - return {std::begin(data.dims), std::begin(data.dims) + data.ndims}; + const Shape& GetShape() const { + return getDesc().getShape(); } void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); @@ -217,9 +96,6 @@ class MKLDNNMemory { void redefineDesc(const MemoryDesc& desc); void redefineDesc(MemoryDescPtr desc); - // Like a plain format - //TODO [DS]: phase 2: remove - void SetData(mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format, const void* data, size_t size, bool ftz = true) const; void SetData(const MKLDNNMemory& memory, size_t size = 0, bool ftz = true) const; void FillZero(); @@ -227,22 +103,12 @@ class MKLDNNMemory { return useExternalStorage; } - //TODO [DS]: phase 2: move to oneDNN utils - static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank); - //TODO [DS]: phase 2: remove - static InferenceEngine::Layout GetPlainLayout(const mkldnn::memory::dims& dims); - static mkldnn::memory::format_tag Convert(const InferenceEngine::Layout layout); - static InferenceEngine::Precision convertToIePrec(mkldnn::memory::data_type dataType); - static mkldnn::memory::data_type convertToDataType(const InferenceEngine::Precision &precision); - - static std::string formatToString(mkldnn::memory::format_tag fmt); - //TODO [DS]: end remove section - - //TODO [DS]: phase 2: move to reorder - static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0); - const std::vector& getStaticDims() const { - return GetDesc().getShape().getStaticDims(); + return getDesc().getShape().getStaticDims(); + } + + mkldnn::engine getEngine() const { + return eng; } private: @@ -251,11 +117,6 @@ class MKLDNNMemory { void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true); - //TODO [DS]: phase 2: remove - const MKLDNNMemoryDesc GetMKLDNNDesc() const { - return MKLDNNMemoryDesc(prim->get_desc()); - } - private: MemoryDescPtr pMemDesc; std::shared_ptr prim; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h index 3cbe768370cd01..92f8cf4f2de42d 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h @@ -8,7 +8,7 @@ #include "blob_factory.hpp" #include "mkldnn_memory.h" #include "nodes/common/cpu_memcpy.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include @@ -18,7 +18,7 @@ class MKLDNNVariableState : public InferenceEngine::IVariableStateInternal { public: MKLDNNVariableState(std::string name, MKLDNNMemoryPtr storage) : InferenceEngine::IVariableStateInternal{name} { - state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->GetDesc())); + state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->getDesc())); state->allocate(); cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize()); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 7de5470c30e6d6..78d7834ebe10f0 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -55,7 +55,8 @@ #include "utils/general_utils.h" #include "utils/cpu_utils.hpp" #include "nodes/common/cpu_convert.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -474,7 +475,7 @@ bool MKLDNNNode::canBeInPlace() const { return true; } -void MKLDNNNode::resolveNotAllocatedEdges() { +void MKLDNNNode::resolveInPlaceEdges() { const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (!selected_pd) IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); @@ -504,6 +505,48 @@ void MKLDNNNode::resolveNotAllocatedEdges() { } } +std::unique_ptr MKLDNNNode::getBaseMemDescAtInputPort(size_t portNum) const { + if (auto primDesc = getSelectedPrimitiveDescriptor()) { + const auto& inConfs = primDesc->getConfig().inConfs; + if (inConfs.size() < portNum) { + IE_THROW() << "Can't get input memory desc at port: " << portNum << ", incorrect port number"; + } + return inConfs[portNum].desc->clone(); + } + IE_THROW() << "Can't get input memory desc, primitive descriptor is not selected"; +} + +std::unique_ptr MKLDNNNode::getBaseMemDescAtOutputPort(size_t portNum) const { + if (auto primDesc = getSelectedPrimitiveDescriptor()) { + const auto& outConfs = primDesc->getConfig().outConfs; + if (outConfs.size() < portNum) { + IE_THROW() << "Can't get output memory desc at port: " << portNum << ", incorrect port number"; + } + return outConfs[portNum].desc->clone(); + } + IE_THROW() << "Can't get output memory desc, primitive descriptor is not selected"; +} + +template<> +DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToDnnlMemoryDesc(*getBaseMemDescAtInputPort(portNum)); +} + +template<> +BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToBlockedMemoryDesc(*getBaseMemDescAtInputPort(portNum)); +} + +template<> +DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToDnnlMemoryDesc(*getBaseMemDescAtInputPort(portNum)); +} + +template<> +BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToBlockedMemoryDesc(*getBaseMemDescAtInputPort(portNum)); +} + std::string MKLDNNNode::getPrimitiveDescriptorType() { auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor(); @@ -640,7 +683,8 @@ void MKLDNNNode::execute(mkldnn::stream strm) { } void MKLDNNNode::executeDynamic(mkldnn::stream strm) { - resetOutputShape(); + const auto newShapes = shapeInfer(); + redefineOutputMemory(newShapes); executeDynamicImpl(strm); } @@ -653,19 +697,10 @@ void MKLDNNNode::redefineOutputMemory(const std::vector> &ne IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName(); } for (size_t i = 0; i < getOriginalOutputsNumber(); i++) { - getChildEdgesAtPort(i)[0]->getMemoryPtr()->redefineDesc(getOutputMemDescAtPort(i)->cloneWithNewDims(newShapes[i])); + getChildEdgesAtPort(i)[0]->getMemoryPtr()->redefineDesc(getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newShapes[i])); } } -void MKLDNNNode::resetOutputShape() { - const auto newShapes = shapeInfer(); - redefineOutputMemory(newShapes); -} - -void MKLDNNNode::resetOutputShape(const std::vector> &newShapes) { - redefineOutputMemory(newShapes); -} - void MKLDNNNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -680,7 +715,12 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { PortConfig portConfig; portConfig.inPlace = -1; portConfig.constant = false; - portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); + auto desc = getSrcMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + portConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + portConfig.desc = std::move(desc); + } config.inConfs.push_back(portConfig); } @@ -688,7 +728,12 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { PortConfig portConfig; portConfig.inPlace = canBeInPlace() ? 0 : -1; portConfig.constant = false; - portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); + auto desc = getDstMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + portConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + portConfig.desc = std::move(desc); + } config.outConfs.push_back(portConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -703,10 +748,9 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { void MKLDNNNode::filterSupportedPrimitiveDescriptors() { // Compare by partial layout descriptor (without particular strides values) auto areCompatible = [](const MemoryDesc& desc, mkldnn::memory::format_tag fmt) -> bool { - MKLDNNMemoryDesc fmt_tdesc = MKLDNNMemoryDesc{desc.getShape().getStaticDims(), - MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), - fmt}; - + auto fmt_tdesc = DnnlBlockedMemoryDesc(desc.getShape(), + MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + fmt); return desc.isCompatible(fmt_tdesc); }; @@ -803,12 +847,12 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) { for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) { if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc)) - IE_THROW() << "Incorrect descriptor for node: " << getName(); + IE_THROW() << "Incorrect descriptor for node: " << getName() << " on " << i << " intput port"; } for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) { if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc)) - IE_THROW() << "Incorrect descriptor for node: " << getName(); + IE_THROW() << "Incorrect descriptor for node: " << getName() << " on " << i << " output port"; } rightConfig = config; } @@ -829,7 +873,7 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de IE_THROW() << "Destination memory didn't allocate for node " << getName() << " from node " << getParentEdgeAt(i)->getParent()->getName() << "."; } - std::vector intDescs; + std::vector intDescs; for (auto &it : internalBlobDesc) intDescs.push_back(it(itpd, 0)); @@ -839,7 +883,7 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de auto create = [&] () { // TODO [DS]: internal blobs should be removed or rewritten using Memory object - auto newDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(internalBlob->getTensorDesc()); + auto newDesc = *MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc()); MKLDNNMemory memory{ engine }; memory.Create(newDesc, internalBlob->buffer()); @@ -1005,7 +1049,7 @@ std::unique_ptr MKLDNNNode::getDefinedInputDesc(const NodeConfig &co } } - return MemoryDescUtils::resetOffset(config.inConfs[idx].desc.get()); + return MemoryDescUtils::cloneWithDefaultStridesAndOffset(config.inConfs[idx].desc.get()); } std::unique_ptr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t idx) const { @@ -1033,7 +1077,7 @@ std::unique_ptr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &c } } - return MemoryDescUtils::resetOffset(config.outConfs[idx].desc.get()); + return MemoryDescUtils::cloneWithDefaultStridesAndOffset(config.outConfs[idx].desc.get()); } void MKLDNNNode::initOptimalPrimitiveDescriptor() { @@ -1066,12 +1110,12 @@ bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const { return true; } -std::unique_ptr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - return MKLDNNPlugin::make_unique(primitive_desc_it.src_desc(idx)); +std::unique_ptr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx)); } -std::unique_ptr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - return MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)); +std::unique_ptr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx)); } int MKLDNNNode::batchToProcess() { @@ -1326,7 +1370,7 @@ void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector& buffer) { auto *constInputNode = dynamic_cast(constInput.get()); auto constBlob = constInputNode->getMemoryPtr(); - auto const elementsCount = constBlob->GetElementsCount(); + const auto elementsCount = constBlob->GetDescWithType()->getPaddedElementsCount(); buffer.resize(elementsCount); cpu_convert(constBlob->GetPtr(), &buffer[0], diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index e6d086caa4e941..d093bc72b20f46 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -29,7 +29,7 @@ #include #include "cpu_types.h" #include "cpu_shape.h" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" namespace MKLDNNPlugin { @@ -455,27 +455,27 @@ class MKLDNNNode { return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex]; } - const MemoryDesc* getOutputMemDescAtPort(size_t portNum) const { - if (auto primDesc = getSelectedPrimitiveDescriptor()) { - const auto& outConfs = primDesc->getConfig().outConfs; - if (outConfs.size() < portNum) { - return nullptr; - } - return outConfs[portNum].desc.get(); - } - return nullptr; - } + /** + * @brief Returns input selected primitive descriptor on the specified port + * must be used after selectOptimalPrimitiveDescriptor stage + * @param portNum port number + * @return selected primitive descriptor with type T + */ + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + std::unique_ptr getInputMemDescAtPort(size_t portNum) const; - const MemoryDesc* getInputMemDescAtPort(size_t portNum) const { - if (auto primDesc = getSelectedPrimitiveDescriptor()) { - const auto& inConfs = primDesc->getConfig().inConfs; - if (inConfs.size() < portNum) { - return nullptr; - } - return inConfs[portNum].desc.get(); - } - return nullptr; - } + /** + * @brief Returns output selected primitive descriptor on the specified port + * must be used after selectOptimalPrimitiveDescriptor stage + * @param portNum port number + * @return selected primitive descriptor with type T + */ + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + std::unique_ptr getOutputMemDescAtPort(size_t portNum) const; void selectPrimitiveDescriptorByIndex(int index) { if (index < 0 || index >= supportedPrimitiveDescriptors.size()) @@ -490,7 +490,7 @@ class MKLDNNNode { virtual void setDynamicBatchLim(int lim); - void resolveNotAllocatedEdges(); + void resolveInPlaceEdges(); virtual void execute(mkldnn::stream strm); void executeDynamic(mkldnn::stream strm); @@ -518,8 +518,7 @@ class MKLDNNNode { return created(); } - virtual void resetOutputShape(); - virtual void resetOutputShape(const std::vector> &newShapes); + void redefineOutputMemory(const std::vector> &newShapes); /** * @brief Performs Node initialization based on graph context. @@ -696,7 +695,7 @@ class MKLDNNNode { protected: virtual std::vector> shapeInfer() const { - IE_THROW() << "MKLDNNNode::shapeInfer is not defined for node with type: " << getTypeStr(); + IE_THROW(NotImplemented) << "MKLDNNNode::shapeInfer is not defined for node with type: " << getTypeStr(); } virtual void executeDynamicImpl(mkldnn::stream strm); @@ -713,8 +712,8 @@ class MKLDNNNode { virtual std::unique_ptr getDefinedInputDesc(const NodeConfig &config, size_t idx) const; virtual std::unique_ptr getDefinedOutputDesc(const NodeConfig &config, size_t idx) const; - virtual std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); - virtual std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); /** * @brief Appends new item into ops list with the information on how the node should be executed as post operation. @@ -724,7 +723,7 @@ class MKLDNNNode { virtual void appendPostOps(mkldnn::post_ops& ops); virtual std::shared_ptr initPrimitiveAttr() const { return nullptr; } - typedef std::function + typedef std::function GetPrimitiveMemoryFormatFunc; std::vector internalBlobDesc; @@ -834,7 +833,8 @@ class MKLDNNNode { } private: - void redefineOutputMemory(const std::vector> &newShapes); + std::unique_ptr getBaseMemDescAtInputPort(size_t portNum) const; + std::unique_ptr getBaseMemDescAtOutputPort(size_t portNum) const; bool isDynamic = false; diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp index e060bc1aebe2d9..258207bf30e696 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp @@ -13,17 +13,17 @@ constexpr size_t channelsPos = 1lu; class PlainFormatCreator : public BlockedDescCreator { public: - BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const override { + CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const override { SizeVector order(srcShape.getRank()); std::iota(order.begin(), order.end(), 0); - return BlockedMemoryDesc(precision, srcShape, srcShape.getDims(), order); + return CpuBlockedMemoryDesc(precision, srcShape, srcShape.getDims(), order); } size_t getMinimalRank() const override { return 0lu; } }; class PerChannelCreator : public BlockedDescCreator { public: - BlockedMemoryDesc createDesc(const InferenceEngine::Precision &precision, const Shape& srcShape) const override { + CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision &precision, const Shape& srcShape) const override { SizeVector order(srcShape.getRank()); std::iota(order.begin(), order.end(), 0); SizeVector blkDims = srcShape.getDims(); @@ -37,7 +37,7 @@ class PerChannelCreator : public BlockedDescCreator { moveElementBack(blkDims, channelsPos); } - return BlockedMemoryDesc(precision, srcShape, blkDims, order); + return CpuBlockedMemoryDesc(precision, srcShape, blkDims, order); } size_t getMinimalRank() const override { return 3lu; } }; @@ -45,7 +45,7 @@ class PerChannelCreator : public BlockedDescCreator { class ChannelBlockedCreator : public BlockedDescCreator { public: ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {} - BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const override { + CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const override { if (srcShape.getRank() < 2) { IE_THROW() << "Can't create blocked tensor descriptor!"; } @@ -60,7 +60,7 @@ class ChannelBlockedCreator : public BlockedDescCreator { } blkDims.push_back(_blockSize); - return BlockedMemoryDesc(precision, srcShape, blkDims, order); + return CpuBlockedMemoryDesc(precision, srcShape, blkDims, order); } size_t getMinimalRank() const override { return 3lu; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h index 4d77c51148cc17..bac2a1a6914c85 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h @@ -6,7 +6,7 @@ #include #include "cpu_shape.h" -#include "cpu_blocked_memory_desc.h" +#include "memory_desc/cpu_blocked_memory_desc.h" namespace MKLDNNPlugin { @@ -27,17 +27,17 @@ class BlockedDescCreator { makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes); static std::pair makeFilteredRange(const CreatorsMap& map, Predicate predicate); - virtual BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const = 0; - std::unique_ptr createUniqueDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const { - return MKLDNNPlugin::make_unique(createDesc(precision, srcShape)); + virtual CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const = 0; + std::unique_ptr createUniqueDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const { + return MKLDNNPlugin::make_unique(createDesc(precision, srcShape)); } // TODO [DS]: phase 2 remove - virtual BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { + virtual CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { return createDesc(precision, Shape(srcDims)); } - std::unique_ptr createUniqueDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { - return MKLDNNPlugin::make_unique(createDesc(precision, srcDims)); + std::unique_ptr createUniqueDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { + return MKLDNNPlugin::make_unique(createDesc(precision, srcDims)); } virtual size_t getMinimalRank() const = 0; virtual ~BlockedDescCreator() = default; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp index 4bf60d6eb21f4a..34883afe2a15a3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp @@ -134,15 +134,15 @@ void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { auto srcBlockDesc = srcMemory0.GetDescriptor().data.format_desc.blocking; int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; - auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); - auto isTailCFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); + auto isPlainFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::ncsp); + auto isTailCFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::nspc); const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - if (srcMemory1.GetElementsCount() != spatialDimsCount) - IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetElementsCount() + if (srcMemory1.GetShape().getElementsCount() != spatialDimsCount) + IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetShape().getElementsCount() << ") inconsistent with pooling vector size (" << spatialDimsCount << ")"; auto inputDimVector = srcMemory0.GetDims(); @@ -165,8 +165,8 @@ void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { if (!selectedPrimitiveDescriptor) IE_THROW() << errorPrefix << "doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto srcStrides = getParentEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); - auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + auto srcStrides = getParentEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); // unified strides array const size_t tailDimsOffset = (isTailCFmt ? -1 : 0); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp index 8700a70c5b6450..19778a4dbe6ecb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp @@ -112,7 +112,7 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); const bool blocked = srcDesc.hasLayoutType(LayoutType::nCsp8c) || srcDesc.hasLayoutType(LayoutType::nCsp16c); const auto dimsSize = inDims.size(); @@ -130,7 +130,7 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { blockShape.erase(blockShape.begin() + 1); } - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); const size_t blockSize = blocked ? dstDesc.getBlockDims().back() : 1lu; const size_t blockCountInput = srcDesc.getBlockDims()[1]; @@ -224,13 +224,13 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { } void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) { case 1: batchToSpaceKernel::value_type>(); break; case 2: batchToSpaceKernel::value_type>(); break; case 4: batchToSpaceKernel::value_type>(); break; default: IE_THROW() << "BatchToSpace layer does not support precision '" << - std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) << "'"; + std::string(getParentEdgeAt(0)->getMemory().getDesc().getPrecision().name()) << "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp index 6ebb985fc2c9fc..bd456bae3f22e3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp @@ -988,7 +988,7 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { weiDims[2], weiDims[3], weiFirstDimBlockSize, 32}; std::vector weiOrder = {0, 1, 2, 3, 0, 1}; - config.inConfs[1].desc = MKLDNNPlugin::make_unique(Precision::BIN, Shape(weiDims), weiBlockDims, weiOrder); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(Precision::BIN, Shape(weiDims), weiBlockDims, weiOrder); //result auto outputPrecision = withBinarization ? Precision::BIN : Precision::FP32; @@ -1071,8 +1071,8 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { jcp.nb_oc_blocking = nstl::min(implType == impl_desc_type::jit_sse42 ? 2 : implType == impl_desc_type::jit_avx2 ? 4 : 6, jcp.nb_oc); - auto srcPrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); - auto dstPrecision = getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(); + auto srcPrecision = getParentEdgeAt(0)->getMemory().getDesc().getPrecision(); + auto dstPrecision = getChildEdgeAt(0)->getMemory().getDesc().getPrecision(); jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(dstPrecision); jcp.typesize_in = srcPrecision == Precision::BIN ? 1 : srcPrecision.size(); @@ -1295,21 +1295,21 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) { auto dst = reinterpret_cast(dstMemory->GetPtr()); auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - std::vector srcStride(srcDesc.getStrides().size()); + std::vector srcStride(srcDesc->getStrides().size()); for (int i = 0; i < srcStride.size(); i++) { - srcStride[srcDesc.getOrder()[i]] = srcDesc.getStrides()[i]; + srcStride[srcDesc->getOrder()[i]] = srcDesc->getStrides()[i]; } auto weiDesc = getParentEdgeAt(1)->getMemory().GetDescWithType(); - std::vector weightsStride(weiDesc.getShape().getRank()); + std::vector weightsStride(weiDesc->getShape().getRank()); for (int i = 0; i < weightsStride.size(); i++) { - weightsStride[weiDesc.getOrder()[i]] = weiDesc.getStrides()[i]; + weightsStride[weiDesc->getOrder()[i]] = weiDesc->getStrides()[i]; } auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - std::vector dstStride(dstDesc.getStrides().size()); + std::vector dstStride(dstDesc->getStrides().size()); for (int i = 0; i < dstStride.size(); i++) { - dstStride[dstDesc.getOrder()[i]] = dstDesc.getStrides()[i]; + dstStride[dstDesc->getOrder()[i]] = dstDesc->getStrides()[i]; } auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp index ef1b5ac5d08f73..29ce5305d02d3c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp @@ -71,7 +71,7 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { SizeVector dst_dims = getChildEdgeAt(0)->getMemory().getStaticDims(); SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getMemory().getStaticDims(); - auto srcDesc = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDescWithType(); SizeVector srcStrides = srcDesc.getStrides(); size_t data_size = srcDesc.getPrecision().size(); @@ -88,7 +88,7 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { IE_THROW() << "Output tensor dimension is smaller then input tensor dimension"; } - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); InferenceEngine::SizeVector dstStrides = dstDesc.getStrides(); InferenceEngine::SizeVector src_aligned(dst_dims.size()); InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp index 47ada64ffc025e..9ca7a24b5b7e3a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp @@ -22,7 +22,7 @@ #include #include "common/cpu_memcpy.h" #include "common/blocked_desc_creator.h" -#include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -143,8 +143,8 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { for (size_t i = 0; i < getParentEdges().size(); ++i) { config.inConfs[i].inPlace = -1; config.inConfs[i].constant = false; - config.inConfs[i].desc = MemoryDescUtils::applyUndefinedOffset( - itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getShape().getStaticDims())); + config.inConfs[i].desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(itr->second->createDesc( + inputPrecision, getParentEdgeAt(i)->getShape().getStaticDims())); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); if (itr->first != LayoutType::nspc) { @@ -167,8 +167,8 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - const auto &order = refConfig.outConfs[0].desc->as()->getOrder(); - const auto &blkDims = refConfig.outConfs[0].desc->as()->getBlockDims(); + const auto &order = refConfig.outConfs[0].desc->as()->getOrder(); + const auto &blkDims = refConfig.outConfs[0].desc->as()->getBlockDims(); auto numOfDim = blkDims.size(); SizeVector offsets(numOfDim, 0lu); @@ -184,14 +184,14 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { } } - config.outConfs[0].desc = MKLDNNPlugin::make_unique(outputPrecision, Shape(dstDims), blkDims, order, offset, offsets, strides); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(outputPrecision, Shape(dstDims), blkDims, order, offset, offsets, strides); for (size_t i = 0; i < getParentEdges().size(); i++) { - const auto& srcBlkDims = refConfig.inConfs[i].desc->as()->getBlockDims(); + const auto& srcBlkDims = refConfig.inConfs[i].desc->as()->getBlockDims(); const auto& shape = refConfig.inConfs[i].desc->getShape(); config.inConfs[i].inPlace = 0; - config.inConfs[i].desc = MKLDNNPlugin::make_unique(inputPrecision, shape, srcBlkDims, order, offset, offsets, strides); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(inputPrecision, shape, srcBlkDims, order, offset, offsets, strides); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -339,7 +339,7 @@ void MKLDNNConcatNode::createPrimitive() { IE_THROW() << "Preferable primitive descriptor is not set."; //check if selected Tensor descriptor has nspc layout and concat axis is C - if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + if (axis == channelAxis && getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { canOptimizeNspc = true; return; } @@ -354,7 +354,7 @@ void MKLDNNConcatNode::createPrimitive() { << getName() << "."; } - auto desc = srcMemPtr->GetDescriptor(); + auto desc = srcMemPtr->GetDescWithType()->getDnnlDesc(); auto& dims = getParentEdgeAt(i)->getShape().getStaticDims(); for (size_t j = 0; j < dims.size(); j++) { desc.data.dims[j] = dims[j]; @@ -363,7 +363,7 @@ void MKLDNNConcatNode::createPrimitive() { srcs_d.emplace_back(desc); } - auto desc = getChildEdgeAt(0)->getMemory().GetDescriptor(); + auto desc = getChildEdgeAt(0)->getMemory().GetDescWithType()->getDnnlDesc(); auto& dims = getChildEdgeAt(0)->getShape().getStaticDims(); for (size_t i = 0; i < dims.size(); i++) { desc.data.dims[i] = dims[i]; @@ -432,33 +432,33 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() { } // reset undefined offsets - config.outConfs[i].desc = MemoryDescUtils::resetOffset(config.outConfs[i].desc.get()); + config.outConfs[i].desc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(config.outConfs[i].desc.get()); } - auto firstOutBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[0].desc); + auto firstOutBlockingDesc = config.outConfs[0].desc->as(); size_t offset = 0; for (size_t i = 0; i < config.inConfs.size(); i++) { - auto inpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[i].desc); - config.inConfs[i].desc = MKLDNNPlugin::make_unique(inpBlockingDesc.getPrecision(), - inpBlockingDesc.getShape(), - inpBlockingDesc.getBlockDims(), - inpBlockingDesc.getOrder(), - firstOutBlockingDesc.getOffsetPadding() + offset, - firstOutBlockingDesc.getOffsetPaddingToData(), - firstOutBlockingDesc.getStrides()); + auto inpBlockingDesc = config.inConfs[i].desc->as(); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(inpBlockingDesc->getPrecision(), + inpBlockingDesc->getShape(), + inpBlockingDesc->getBlockDims(), + inpBlockingDesc->getOrder(), + firstOutBlockingDesc->getOffsetPadding() + offset, + firstOutBlockingDesc->getOffsetPaddingToData(), + firstOutBlockingDesc->getStrides()); size_t axisSize = 1; - auto firstInpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); - if (firstInpBlockingDesc.hasLayoutType(LayoutType::nspc)) { + auto firstInpBlockingDesc = config.inConfs[0].desc->as(); + if (firstInpBlockingDesc->hasLayoutType(LayoutType::nspc)) { // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for blocked - size_t realAxis = inverseOrder(firstInpBlockingDesc.getOrder(), axis); - for (size_t j = realAxis; j < inpBlockingDesc.getBlockDims().size(); j++) { - size_t jj = firstInpBlockingDesc.getOrder()[j]; - axisSize *= inpBlockingDesc.getBlockDims()[jj]; + size_t realAxis = inverseOrder(firstInpBlockingDesc->getOrder(), axis); + for (size_t j = realAxis; j < inpBlockingDesc->getBlockDims().size(); j++) { + size_t jj = firstInpBlockingDesc->getOrder()[j]; + axisSize *= inpBlockingDesc->getBlockDims()[jj]; } } else { // This works for nchw and nchw8c/nchw16c - for (size_t j = axis; j < inpBlockingDesc.getBlockDims().size(); j++) { - axisSize *= inpBlockingDesc.getBlockDims()[j]; + for (size_t j = axis; j < inpBlockingDesc->getBlockDims().size(); j++) { + axisSize *= inpBlockingDesc->getBlockDims()[j]; } } offset += axisSize; @@ -502,7 +502,7 @@ void MKLDNNConcatNode::execNspcSpecCase() { for (size_t i = 0; i < num_src; i++) { const MKLDNNMemory& src_mem = getParentEdgeAt(i)->getMemory(); - const size_t num_channels = src_mem.GetDims()[channelAxis]; + const size_t num_channels = src_mem.getStaticDims()[channelAxis]; channelsDataSize.push_back(num_channels * dataSize); src_ptrs.push_back(reinterpret_cast(src_mem.GetData())); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index 4bff8260c7900a..7dcb64b34e8fe9 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -18,7 +18,8 @@ #include #include #include "common/cpu_convert.h" -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -244,9 +245,9 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { outputDataType = memory::data_type::f32; if (eltwisePrecision == Precision::BF16) eltwisePrecision = Precision::FP32; - in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape(), inputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); - out_candidate = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), + out_candidate = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape(), outputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { @@ -285,36 +286,36 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c; memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c; - auto inputDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto outputDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto inputShape = getParentEdgeAt(0)->getShape(); + auto outputShape = getChildEdgeAt(0)->getShape(); if (IC == 1 && groupOC == 1) { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); + in_candidate = MKLDNNPlugin::make_unique(inputShape, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputShape, outputDataType, ncsp); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else if (IC < 4) { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); + in_candidate = MKLDNNPlugin::make_unique(inputShape, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputShape, outputDataType, nCsp16c); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); + out_candidate = MKLDNNPlugin::make_unique(outputShape, outputDataType, nCsp8c); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp16c); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp8c); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); + in_candidate = MKLDNNPlugin::make_unique(inputShape, inputDataType, nCsp16c); + out_candidate = MKLDNNPlugin::make_unique(outputShape, outputDataType, nCsp16c); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = MKLDNNPlugin::make_unique(inputShape, inputDataType, nCsp8c); + out_candidate = MKLDNNPlugin::make_unique(outputShape, outputDataType, nCsp8c); + createDescriptor({ in_candidate.get() }, { out_candidate.get() });; } - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); + in_candidate = MKLDNNPlugin::make_unique(inputShape, inputDataType, ncsp); + out_candidate = MKLDNNPlugin::make_unique(outputShape, outputDataType, ncsp); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); if (inputDataType != memory::data_type::bf16 && isNspcAvailable()) { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nspc); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nspc); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = MKLDNNPlugin::make_unique(inputShape, inputDataType, nspc); + out_candidate = MKLDNNPlugin::make_unique(outputShape, outputDataType, nspc);; + createDescriptor({ in_candidate.get() }, { out_candidate.get() });; } } } @@ -395,11 +396,12 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - auto srcDesc = getSrcMemDesc(itpd, i); - if (isGrouped || srcDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) - dataConfig.desc = std::move(srcDesc); - else - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*srcDesc); + auto desc = getSrcMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked && !isGrouped) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.inConfs.push_back(dataConfig); } @@ -414,10 +416,10 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = MKLDNNPlugin::make_unique(Shape(dwWeightsDims), weightsPrc, memory::format_tag::Goihw8g); config.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = MKLDNNPlugin::make_unique(Shape(dwBiasesDims), biasPrc, memory::format_tag::x); config.inConfs.push_back(dataConfig); } @@ -428,12 +430,12 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { } dataConfig.constant = false; - - auto dstDesc = getDstMemDesc(itpd, i); - if (isGrouped || dstDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) - dataConfig.desc = std::move(dstDesc); - else - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*dstDesc); + auto desc = getDstMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked && !isGrouped) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.outConfs.push_back(dataConfig); @@ -482,13 +484,13 @@ bool MKLDNNConvolutionNode::created() const { void MKLDNNConvolutionNode::createDescriptor(const std::vector& inputDesc, const std::vector& outputDesc) { - auto inDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); - auto outDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); + const auto inDesc = MemoryDescUtils::convertToDnnlMemoryDesc(*inputDesc[0])->getDnnlDesc(); + const auto outDesc = MemoryDescUtils::convertToDnnlMemoryDesc(*outputDesc[0])->getDnnlDesc(); - memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); + memory::data_type wdt = static_cast(inDesc.data.data_type); memory::data_type bdt = memory::data_type::f32; - if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) { + if (inDesc.data.data_type == mkldnn_s8 || inDesc.data.data_type == mkldnn_u8) { wdt = memory::data_type::s8; } @@ -556,10 +558,6 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { if (canBeExecutedInInt8()) { isStridedBlobsSupported = false; } - // TODO [NM]: fix strided blobs feature support for dynamic weights - // if (getOriginalInputsNumber() != 1) { - // isStridedBlobsSupported = false; - // } if (isStridedBlobsSupported) { createDescriptor({config.inConfs[0].desc.get()}, {config.outConfs[0].desc.get()}); @@ -600,10 +598,10 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = MKLDNNPlugin::make_unique(Shape(dwWeightsDims), weightsPrc, memory::format_tag::Goihw8g); cfg.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = MKLDNNPlugin::make_unique(Shape(dwBiasesDims), biasPrc, memory::format_tag::x); cfg.inConfs.push_back(dataConfig); } @@ -659,12 +657,12 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() { while (itd != descs.end()) { bool isSuitableDesc = true; if (!inputMemoryFormatsFilter.empty()) { - MKLDNNMemoryDesc src_tdesc(std::shared_ptr(*itd)->data.src_desc); - isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); + isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]); } if (!outputMemoryFormatsFilter.empty()) { - MKLDNNMemoryDesc dst_tdesc(std::shared_ptr(*itd)->data.dst_desc); - isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); + isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]); } if (!isSuitableDesc) { itd = descs.erase(itd); @@ -698,21 +696,21 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c isPossibleJitPlanar = false; std::shared_ptr convDesc(desc); - auto srcMemDesc = MKLDNNMemoryDesc {convDesc->data.src_desc}; - auto dstMemDesc = MKLDNNMemoryDesc {convDesc->data.dst_desc}; + auto srcMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.src_desc); + auto dstMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.dst_desc); auto srcDataType = convDesc->data.src_desc.data_type; auto dstDataType = convDesc->data.dst_desc.data_type; - bool isPlanarFloatConv = srcMemDesc.hasLayoutType(LayoutType::ncsp) - && dstMemDesc.hasLayoutType(LayoutType::ncsp) + bool isPlanarFloatConv = srcMemDesc->hasLayoutType(LayoutType::ncsp) + && dstMemDesc->hasLayoutType(LayoutType::ncsp) && srcDataType == memory::data_type::f32 && dstDataType == memory::data_type::f32; return !isPossibleJitPlanar && isPlanarFloatConv; } -std::unique_ptr MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - return MKLDNNPlugin::make_unique(std::move(desc)); +std::unique_ptr MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx); + return MKLDNNExtensionUtils::makeDescriptor(desc); } bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { @@ -826,7 +824,7 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn if (blb == nullptr) IE_THROW() << "Cannot get const blob for node " << getName() << "."; - auto const elementsCount = blb->GetElementsCount(); + auto const elementsCount = blb->GetDescWithType()->getPaddedElementsCount(); InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, dims, getWeightsLayoutByDims(dims, isGrouped)); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h index 7fa5ed80bb8040..e18a53e4567d08 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h @@ -32,7 +32,7 @@ class MKLDNNConvolutionNode : public MKLDNNNode { return false; } InferenceEngine::Precision getRuntimePrecision() const override; - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; const mkldnn::memory& getWeights() const; const mkldnn::memory& getBias() const; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp index 00a403c8bb6782..c6d07cd5903a63 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp @@ -59,6 +59,13 @@ void MKLDNNConvertNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << " has incorrect number of output edges"; } +bool MKLDNNConvertNode::isSupportedDesc(const MemoryDesc &desc) { + bool isSupported = desc.getType() & MemoryDescType::Blocked; + if (desc.getType() == MemoryDescType::DnnlBlocked) + isSupported &= desc.as()->hasEmptyExtraData(); + return isSupported; +} + void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -69,9 +76,16 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { config.dynBatchSupport = false; - // if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so - // they should be used as the actual descriptors. + bool canInitExternalDesc = false; if (input && output) { + canInitExternalDesc = true; + canInitExternalDesc &= isSupportedDesc(*input); + canInitExternalDesc &= isSupportedDesc(*output); + } + + // if input and output pointers are not null and not contain extra data, then the inp/output tensor descriptors were set using setDescs method, so + // they should be used as the actual descriptors. + if (canInitExternalDesc) { dataIn.desc = input->clone(); config.inConfs.push_back(dataIn); @@ -93,8 +107,8 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { auto range = BlockedDescCreator::makeFilteredRange(creators, insShape.getRank()); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(insPrecision, insShape.getDims())); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(outPrecision, outputShape.getDims())); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(insPrecision, insShape.getDims())); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(outPrecision, outputShape.getDims())); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -117,12 +131,16 @@ void MKLDNNConvertNode::createPrimitive() { void MKLDNNConvertNode::execute(mkldnn::stream strm) { auto& parentMem = getParentEdgeAt(0)->getMemory(); auto& childMem = getChildEdgeAt(0)->getMemory(); - if (parentMem.GetElementsCount() != childMem.GetElementsCount()) + + const auto parentPaddElemCount = parentMem.GetDescWithType()->getPaddedElementsCount(); + const auto childPaddElemCount = childMem.GetDescWithType()->getPaddedElementsCount(); + + if (parentPaddElemCount != childPaddElemCount) IE_THROW() << errorPrefix << " has different elements number in input and output buffers"; void* srcPtr = parentMem.GetPtr(); void* dstPtr = childMem.GetPtr(); - cpu_convert(srcPtr, dstPtr, parentMem.GetDesc().getPrecision(), childMem.GetDesc().getPrecision(), parentMem.GetElementsCount()); + cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount); } bool MKLDNNConvertNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h index 38707385f7a8ba..763de002ccc4cb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h @@ -40,9 +40,11 @@ class MKLDNNConvertNode : public MKLDNNNode { static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedDesc(const MemoryDesc &desc); + private: - std::unique_ptr input; - std::unique_ptr output; + MemoryDescPtr input; + MemoryDescPtr output; std::string errorPrefix; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp index 5124409cf8b9d8..85b2f50b8dc5a1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp @@ -133,7 +133,7 @@ template void MKLDNNCumSumNode::exec() { const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const std::vector strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType().getStrides(); + const std::vector strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType().getStrides(); if (reverse) { if (exclusive) { @@ -248,8 +248,8 @@ inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector &forSta } size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const { - const auto& axisPrecision = _axis.GetDesc().getPrecision(); - const int64_t dataShapeSize = static_cast(_data.GetDesc().getShape().getRank()); + const auto& axisPrecision = _axis.getDesc().getPrecision(); + const int64_t dataShapeSize = static_cast(_data.getDesc().getShape().getRank()); int64_t axisValueFromBlob; switch (axisPrecision) { case Precision::I32 : { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 62c173c72f5a29..7d9b4385e03ec5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -16,7 +16,8 @@ #include #include #include -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -42,8 +43,8 @@ bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); + internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> DnnlMemoryDesc { + return DnnlMemoryDesc(primitive_desc_it.weights_desc(0)); }); std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { @@ -244,13 +245,13 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]); internalBlobs.push_back(createWeiBlobAsIO(weightDims)); auto format = getParentEdgeAt(0)->getShape().getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + DnnlMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + DnnlMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); createDescriptor({&in_candidate}, {&out_candidate}); } else { for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + DnnlMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); + DnnlMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); createDescriptor({&in_candidate}, {&out_candidate}); } } @@ -292,20 +293,32 @@ void MKLDNNDeconvolutionNode::filterSupportedDescriptors() { bool isSuitableDesc = true; if (!inputMemoryFormatsFilter.empty()) { if (isInt8) { - auto src_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.src_desc); - isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); + const auto oneDnnDesc = dynamic_cast(src_tdesc.get()); + if (!oneDnnDesc) + isSuitableDesc = false; + isSuitableDesc &= oneDnnDesc->isSame(inputMemoryFormatsFilter[0]); } else { - auto src_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_src_desc); - isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_src_desc); + const auto oneDnnDesc = dynamic_cast(src_tdesc.get()); + if (!oneDnnDesc) + isSuitableDesc = false; + isSuitableDesc &= oneDnnDesc->isSame(inputMemoryFormatsFilter[0]); } } if (!outputMemoryFormatsFilter.empty()) { if (isInt8) { - auto dst_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.dst_desc); - isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); + const auto oneDnnDesc = dynamic_cast(src_tdesc.get()); + if (!oneDnnDesc) + isSuitableDesc = false; + isSuitableDesc &= oneDnnDesc->isSame(outputMemoryFormatsFilter[0]); } else { - auto dst_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_dst_desc); - isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_dst_desc); + const auto oneDnnDesc = dynamic_cast(src_tdesc.get()); + if (!oneDnnDesc) + isSuitableDesc = false; + isSuitableDesc &= oneDnnDesc->isSame(outputMemoryFormatsFilter[0]); } } if (!isSuitableDesc) { @@ -349,8 +362,8 @@ void MKLDNNDeconvolutionNode::createPrimitive() { void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { - const MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); - const MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); + const DnnlMemoryDesc in_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(*inputDesc[0]); + const DnnlMemoryDesc out_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(*outputDesc[0]); // grouping and autoblicking is not compatible if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended())) @@ -400,21 +413,18 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::unique_ptr MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx == 2) { - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2)); - return MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), dataType, - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(2)->getShape().getRank())); + return MKLDNNPlugin::make_unique(getOriginalInputPrecisionAtPort(2), Shape(getParentEdgeAt(2)->getShape().getStaticDims())); } - MKLDNNMemoryDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) - : isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)) : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx)); - return MKLDNNPlugin::make_unique(std::move(desc)); + auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx); + return MKLDNNExtensionUtils::makeDescriptor(desc); } -std::unique_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - return isInt8 ? MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)) : - MKLDNNPlugin::make_unique(primitive_desc_it.diff_src_desc(idx)); +std::unique_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx); + return MKLDNNExtensionUtils::makeDescriptor(desc); } InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index 15ee71d6af74ac..68d0373db73252 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -31,8 +31,8 @@ class MKLDNNDeconvolutionNode : public MKLDNNNode { return static_cast(getParentEdges().size()); } - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp index 370524be475d32..2758e60851b555 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp @@ -853,10 +853,11 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o : mayiuse(avx512_common) ? memory::format_tag::OIhw16i16o : memory::format_tag::OIhw8i8o; - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, dataFormat); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, offFormat); +<<<<<<< HEAD auto& wDims = getParentEdgeAt(2)->getShape().getStaticDims(); if (group > 1 && wDims.size() != 5) { @@ -877,21 +878,30 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { memory::data_type::f32, memory::format_tag::nchw); } config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), +======= + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), + memory::data_type::f32, weiFormat); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), +>>>>>>> New descriptor hierarchy (#20) memory::data_type::f32, dataFormat); supportedPrimitiveDescriptors.push_back({config, impl_type}); } else { // reference implementation - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32, memory::format_tag::oihw); +<<<<<<< HEAD if (inputsNumber > 3) { config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(3)->getShape().getStaticDims(), memory::data_type::f32, memory::format_tag::nchw); } config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, +======= + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, +>>>>>>> New descriptor hierarchy (#20) memory::format_tag::nchw); supportedPrimitiveDescriptors.push_back({config, impl_type}); } @@ -1130,23 +1140,28 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { float *dst = reinterpret_cast(dstMemory.GetPtr()); +<<<<<<< HEAD auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType(); +======= + auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType(); +>>>>>>> New descriptor hierarchy (#20) std::vector src_strides(src_block_desc.getStrides().size()); for (int i = 0; i < src_strides.size(); i++) { src_strides[src_block_desc.getOrder()[i]] = src_block_desc.getStrides()[i]; } - auto dst_block_desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dst_block_desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); std::vector dst_strides(dst_block_desc.getStrides().size()); for (int i = 0; i < dst_strides.size(); i++) { dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i]; } +<<<<<<< HEAD auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType().getStrides(); auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType().getStrides(); @@ -1155,6 +1170,10 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType().getStrides(); } +======= + auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType().getStrides(); + auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType().getStrides(); +>>>>>>> New descriptor hierarchy (#20) if (def_conv_kernel) { executeOptimized(src, offsets, weights, dst, src_strides, off_strides, dst_strides); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp index a117d3acbdcd4d..1a8cbca3951863 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp @@ -161,8 +161,8 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; @@ -194,8 +194,8 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -224,7 +224,7 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, srcBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp index 1796d49989e9eb..c24121b44c4d4d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp @@ -247,8 +247,8 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { const auto *input = reinterpret_cast(inputDataEdge->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(outputDataEdge->getMemoryPtr()->GetPtr()); - auto inputStrides = inputDataEdge->getMemory().GetDescWithType().getStrides(); - auto outputStrides = outputDataEdge->getMemory().GetDescWithType().getStrides(); + auto inputStrides = inputDataEdge->getMemory().GetDescWithType().getStrides(); + auto outputStrides = outputDataEdge->getMemory().GetDescWithType().getStrides(); if (inputShape != outputShape) { copyDataToOutputWithSignalSize(input, inputShape, inputStrides, output, outputShape, outputStrides); } else { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index dc29b14777738f..44e314ebed5a80 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -37,6 +37,7 @@ #include #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -960,14 +961,15 @@ std::map& op, std::string& errorMessage) noexcept { try { - if (initializers.find(op->get_type_info()) == initializers.end()) { - errorMessage = "Doesn't support Eltwise algorithm: " + std::string(op->get_type_name()); - return false; - } if (isDynamicNgraphNode(op)) { errorMessage = "Doesn't support op with dynamic shapes"; return false; } + + if (initializers.find(op->get_type_info()) == initializers.end()) { + errorMessage = "Doesn't support Eltwise algorithm: " + std::string(op->get_type_name()); + return false; + } } catch (...) { return false; } @@ -980,6 +982,7 @@ MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr& op, co if (!isSupportedOperation(op, errorMessage)) { IE_THROW(NotImplemented) << errorMessage; } + initializers[op->get_type_info()](op, *this); } size_t MKLDNNEltwiseNode::getOpInputsNum() const { @@ -1117,7 +1120,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { }; auto initDesc = [&] (LayoutType lt) -> NodeDesc { - auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> std::unique_ptr { + auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> std::unique_ptr { if (lt == ChannelsFirst && edge->getShape().getRank() != 1) { auto dims = edge->getShape().getStaticDims(); auto ndims = dims.size(); @@ -1133,7 +1136,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks[i] = dims[order[i]]; } - return MKLDNNPlugin::make_unique(prc, edge->getShape(), blocks, order, offset); + return MKLDNNPlugin::make_unique(prc, edge->getShape(), blocks, order, offset); } else if (lt == Blocked && edge->getShape().getRank() != 1 && edge->getShape().getStaticDims()[1] != 1) { size_t blockSize = mayiuse(x64::avx512_common) ? 16 : 8; @@ -1145,13 +1148,13 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks.push_back(blockSize); order.push_back(1); - return MKLDNNPlugin::make_unique(prc, edge->getShape(), blocks, order, offset); + return MKLDNNPlugin::make_unique(prc, edge->getShape(), blocks, order, offset); } else { std::vector blocks = edge->getShape().getStaticDims(); std::vector order(blocks.size()); std::iota(order.begin(), order.end(), 0); - return MKLDNNPlugin::make_unique(prc, edge->getShape(), blocks, order, offset); + return MKLDNNPlugin::make_unique(prc, edge->getShape(), blocks, order, offset); } }; @@ -1229,7 +1232,7 @@ void MKLDNNEltwiseNode::createPrimitive() { auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); std::vector order(maxInputSize); - auto outOrder = outBlockingDesc.getOrder(); + auto outOrder = outBlockingDesc->getOrder(); for (size_t i = 0; i < order.size(); i++) { if (i < order.size() - outOrder.size()) order[i] = i; @@ -1237,18 +1240,18 @@ void MKLDNNEltwiseNode::createPrimitive() { order[i] = outOrder[i - (order.size() - outOrder.size())] + (order.size() - outOrder.size()); } - size_t outRank = outBlockingDesc.getBlockDims().size(); + size_t outRank = outBlockingDesc->getBlockDims().size(); for (int i = 0; i < outRank; i++) { - dims_out[dims_out.size() - 1 - i] = outBlockingDesc.getBlockDims()[outRank - 1 - i]; + dims_out[dims_out.size() - 1 - i] = outBlockingDesc->getBlockDims()[outRank - 1 - i]; } for (int i = 0; i < inputNum; i++) { auto inBlockingDesc = getParentEdgeAt(i)->getMemory().GetDescWithType(); - size_t inRank = inBlockingDesc.getBlockDims().size(); + size_t inRank = inBlockingDesc->getBlockDims().size(); // WA to normalize blocked and planar layouts - auto inOrder = inBlockingDesc.getOrder(); - size_t startOff = outOrder.size() != outBlockingDesc.getShape().getRank() && + auto inOrder = inBlockingDesc->getOrder(); + size_t startOff = outOrder.size() != outBlockingDesc->getShape().getRank() && outOrder[outOrder.size() - 1] != inOrder[inOrder.size() - 1] ? 1 : 0; // WA to handle nspc layout with 1D tensors @@ -1257,7 +1260,7 @@ void MKLDNNEltwiseNode::createPrimitive() { } for (int j = 0; j < inRank; j++) { - dims_in[i][dims_in[i].size() - 1 - j - startOff] = inBlockingDesc.getBlockDims()[inRank - 1 - j]; + dims_in[i][dims_in[i].size() - 1 - j - startOff] = inBlockingDesc->getBlockDims()[inRank - 1 - j]; } } @@ -1273,7 +1276,7 @@ void MKLDNNEltwiseNode::createPrimitive() { offsets_out.resize(maxInputSize, 1); offset_out_calc(offsets_out, dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_out[j] *= getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size(); + offsets_out[j] *= getChildEdgeAt(0)->getMemory().getDesc().getPrecision().size(); } offsets_in.resize(inputNum); @@ -1281,17 +1284,17 @@ void MKLDNNEltwiseNode::createPrimitive() { offsets_in[i].resize(maxInputSize, 1); offset_in_calc(offsets_in[i], dims_in[i], dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_in[i][j] *= getParentEdgeAt(i)->getMemory().GetDesc().getPrecision().size(); + offsets_in[i][j] *= getParentEdgeAt(i)->getMemory().getDesc().getPrecision().size(); } } start_offset_in.resize(inputNum); for (size_t i = 0; i < inputNum; i++) { - start_offset_in[i] = getParentEdgeAt(i)->getMemory().GetDescriptor().data.offset0 * - MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(getParentEdgeAt(i)->getMemory().GetDescriptor().data.data_type)); + const auto desc = getParentEdgeAt(i)->getMemory().GetDescWithType(); + start_offset_in[i] = desc->getOffsetPadding() * desc->getPrecision().size(); } - start_offset_out = getChildEdgeAt(0)->getMemory().GetDescriptor().data.offset0 * - MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(getChildEdgeAt(0)->getMemory().GetDescriptor().data.data_type)); + const auto desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + start_offset_out = desc->getOffsetPadding() * desc->getPrecision().size(); }; auto collapseLastDims = [](std::vector& dims, int dimsToCollapse) { @@ -1326,10 +1329,10 @@ void MKLDNNEltwiseNode::createPrimitive() { }; auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - tensorRank = std::max(static_cast(optimalTensorRank), outBlockingDesc.getBlockDims().size()); + tensorRank = std::max(static_cast(optimalTensorRank), outBlockingDesc->getBlockDims().size()); initDims(tensorRank); - auto outOrder = outBlockingDesc.getOrder(); + auto outOrder = outBlockingDesc->getOrder(); size_t oc_size = 0; offsets_oc.resize(tensorRank, 0); if (isFusedWith(FakeQuantize)) { @@ -1359,7 +1362,7 @@ void MKLDNNEltwiseNode::createPrimitive() { bool hasDifferentDims = false; while (currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount && // we shouldn't collapse batch dimension in case dynamic batch is enabled - (!isDynBatchEnabled || (outBlockingDesc.getBlockDims().size() - collapsedDims > 2))) { + (!isDynBatchEnabled || (outBlockingDesc->getBlockDims().size() - collapsedDims > 2))) { if (dims_out.size() - collapsedDims - 2 < 0) break; @@ -1411,7 +1414,7 @@ void MKLDNNEltwiseNode::createPrimitive() { } } - batchDimIdx = tensorRank - outBlockingDesc.getBlockDims().size() + collapsedDims; + batchDimIdx = tensorRank - outBlockingDesc->getBlockDims().size() + collapsedDims; schedulerWorkAmount = fullWorkAmount / dims_out[dims_out.size() - 1]; initOffsets(tensorRank); @@ -1428,10 +1431,10 @@ void MKLDNNEltwiseNode::createPrimitive() { for (int i = 0; i < inputNum; i++) { jep.src_size[i] = dims_in[i][dims_in[i].size() - 1]; - jep.src_prc[i] = getParentEdgesAtPort(i).front()->getMemory().GetDesc().getPrecision(); + jep.src_prc[i] = getParentEdgesAtPort(i).front()->getMemory().getDesc().getPrecision(); } jep.dst_size = dims_out[dims_out.size() - 1]; - jep.dst_prc = getChildEdgesAtPort(0).front()->getMemory().GetDesc().getPrecision(); + jep.dst_prc = getChildEdgesAtPort(0).front()->getMemory().getDesc().getPrecision(); jep.oc_size = oc_size; jep.work_amount = dims_out.back(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp index 4499e91dacb9bd..46f6b347099493 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp @@ -122,7 +122,7 @@ void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().getDesc().getPrecision(), getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp index f185d08588157d..a782b1df86c8ff 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp @@ -89,7 +89,7 @@ void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().getDesc().getPrecision(), getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp index 1cea74dc5fb886..852c3648b35f6f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp @@ -124,7 +124,7 @@ void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().getDesc().getPrecision(), getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp index 13ada3cf81dfa5..c248676da2b098 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp @@ -434,8 +434,8 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { const size_t RH = _rates[0], RW = _rates[1]; const size_t PT = _pad_top, PL = _pad_left; - const std::vector istrides = getParentEdgeAt(0)->getMemory().GetDescWithType().getStrides(); - const std::vector ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + const std::vector istrides = getParentEdgeAt(0)->getMemory().GetDescWithType().getStrides(); + const std::vector ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); const std::vector ostrides_partial = {ostrides[0], KW * IC * ostrides[1], IC * ostrides[1], ostrides[1]}; if (extract_image_patches_kernel) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp index 48a955c424651e..70bedffae1cdfc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp @@ -19,7 +19,8 @@ #include "ie_parallel.hpp" #include -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" // Quantization ranges validation is switched off by default in order to avoid regressions on user side // #define VALIDATE_QUANTIZATION_RANGES @@ -1222,12 +1223,12 @@ void MKLDNNFakeQuantizeNode::createPrimitive() { jqp.dst_prc = config.outConfs[0].desc->getPrecision(); auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - jqp.s_str = srcDesc.getStrides(); + jqp.s_str = srcDesc->getStrides(); auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - jqp.d_str = dstDesc.getStrides(); + jqp.d_str = dstDesc->getStrides(); - jqp.is_planar = srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5); + jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5); jqp.op_type = getAlgorithm(); @@ -1259,7 +1260,7 @@ void MKLDNNFakeQuantizeNode::createPrimitive() { size_t axisSize = getParentEdgeAt(0)->getShape().getStaticDims()[getAxis()]; size_t axisPaddedSize = rnd_up(axisSize, 16); - MKLDNNMemoryDesc weightsDataDesc = {{(uint32_t)axisPaddedSize}, memory::data_type::f32, memory::format_tag::x}; + DnnlBlockedMemoryDesc weightsDataDesc(Shape(InferenceEngine::SizeVector{axisPaddedSize}), memory::data_type::f32, memory::format_tag::x); if (isBinarization()) { auto binarizationThresholdsDataMem = std::make_shared(getEngine()); @@ -1459,7 +1460,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { auto output_scale = reinterpret_cast(internalBlobMemory[4]->GetData()); auto output_shift = reinterpret_cast(internalBlobMemory[5]->GetData()); - auto& srcDesc = srcMemory->GetDesc(); + auto& srcDesc = srcMemory->getDesc(); auto srcDims = srcDesc.getShape().getStaticDims(); bool is_blk_format = !srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index ee8dc1b730b911..7415e384f7aa5d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -12,7 +12,8 @@ #include #include #include "utils/general_utils.h" -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -237,7 +238,7 @@ std::shared_ptr MKLDNNFullyConnectedNode::initPrimitiveA return attr; } -// WA: creation MKLDNNMemoryDesc with format == any is prohibited +// WA: creation DnnlMemoryDesc with format == any is prohibited // so we create mkldnn::memory::desc directly // we need specific method and can't remove createDescriptor from base class because its used into initDescriptor void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::desc &inputDesc, @@ -261,9 +262,9 @@ void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::de auto normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; auto normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; in_candidate = mkldnn::memory::desc(normalizedInDims, in_candidate.data_type(), - MKLDNNMemory::GetPlainFormatByRank(normalizedInDims.size())); + MKLDNNExtensionUtils::GetPlainFormatByRank(normalizedInDims.size())); out_candidate = mkldnn::memory::desc(normalizedOutDims, out_candidate.data_type(), - MKLDNNMemory::GetPlainFormatByRank(normalizedOutDims.size())); + MKLDNNExtensionUtils::GetPlainFormatByRank(normalizedOutDims.size())); } mkldnn::memory::desc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); @@ -285,26 +286,27 @@ void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::de void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { - createDescriptorInternal(MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0])); + createDescriptorInternal(MemoryDescUtils::convertToDnnlMemoryDesc(*inputDesc[0]), MemoryDescUtils::convertToDnnlMemoryDesc(*outputDesc[0])); } -std::unique_ptr MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); +std::unique_ptr MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx); if (getParentEdgeAt(idx)->getShape().getRank() == 3) { - desc = MKLDNNMemoryDesc(getParentEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); + return MKLDNNPlugin::make_unique(MKLDNNExtensionUtils::DataTypeToIEPrecision( + static_cast(desc.data.data_type)), getParentEdgeAt(idx)->getShape()); } - return MKLDNNPlugin::make_unique(std::move(desc)); + return MKLDNNExtensionUtils::makeDescriptor(desc); } -std::unique_ptr MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - auto desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); +std::unique_ptr MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = primitive_desc_it.dst_desc(idx); + if (getChildEdgeAt(idx)->getShape().getRank() == 3) { - desc = MKLDNNMemoryDesc(getChildEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(idx)->getShape().getRank())); + return MKLDNNPlugin::make_unique(MKLDNNExtensionUtils::DataTypeToIEPrecision( + static_cast(desc.data.data_type)), getChildEdgeAt(idx)->getShape()); } - return MKLDNNPlugin::make_unique(std::move(desc)); + return MKLDNNExtensionUtils::makeDescriptor(desc); } InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h index 01820fdfcc39ea..72e45ac076088b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h @@ -34,8 +34,8 @@ class MKLDNNFullyConnectedNode : public MKLDNNNode { return static_cast(getOriginalInputsNumber()); } - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp index 75ee34dbda5be4..2f299a91398f9f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp @@ -101,11 +101,11 @@ void MKLDNNGatherNDNode::gatherElementwise() { const auto *indices = reinterpret_cast(getParentEdgeAt(_indicesIndex)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto strides = getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides(); + auto strides = getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides(); const size_t* srcMultipliers = strides.data() + _batchDims; const size_t cycles = getChildEdgeAt(0)->getShape().getElementsCount() * - getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size() / (sizeof(dataType) * _batchNum); + getChildEdgeAt(0)->getMemory().getDesc().getPrecision().size() / (sizeof(dataType) * _batchNum); const size_t CS = cycles * _sliceRank; const size_t CB = cycles * _blockSize; const size_t workAmount = _batchNum * cycles; @@ -150,7 +150,7 @@ void MKLDNNGatherNDNode::gatherBlocks() { std::vector srcMultipliers(_sliceRank); for (size_t i = 0; i < _sliceRank ; i++) - srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides()[i + _batchDims]; + srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides()[i + _batchDims]; const size_t batchStep = _batchStep * _dataTypeSize; const size_t dataStep = _blockSize * _dataTypeSize; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp index f41a57730a57cc..323b2227bb9d04 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp @@ -95,7 +95,7 @@ void MKLDNNGatherNode::createPrimitive() { const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getShape().getStaticDims(); const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getShape().getStaticDims(); const SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); - dataSize = getParentEdgeAt(GATHER_DATA)->getMemory().GetDesc().getPrecision().size(); + dataSize = getParentEdgeAt(GATHER_DATA)->getMemory().getDesc().getPrecision().size(); indexRange = srcDims[axis]; batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp index 89fb6c08167f68..678556e5fbbe9f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp @@ -85,7 +85,7 @@ void MKLDNNGatherTreeNode::gatherTreeKernel() noexcept { const auto *step_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemoryPtr()->GetPtr()); const auto * const parent_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemoryPtr()->GetPtr()); const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getElementsCount() - - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetDescWithType().getOffsetPadding(); + - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetDescWithType().getOffsetPadding(); const auto *max_seq_len = reinterpret_cast(getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getMemoryPtr()->GetPtr()); auto end_token = (reinterpret_cast(getParentEdgeAt(GATHER_TREE_END_TOKEN)->getMemoryPtr()->GetPtr()))[0]; auto * final_idx = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp index ef87345daae9a1..56655cf6ca4d19 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp @@ -8,7 +8,8 @@ #include #include #include -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -30,13 +31,13 @@ NodeConfig MKLDNNGenericNode::convertLayerToNodeConfig(const InferenceEngine::La for (size_t i = 0; i < layerConfig.inConfs.size(); i++) { config.inConfs[i].inPlace = layerConfig.inConfs[i].inPlace; config.inConfs[i].constant = layerConfig.inConfs[i].constant; - config.inConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.inConfs[i].desc).clone(); + config.inConfs[i].desc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(layerConfig.inConfs[i].desc)->clone(); } config.outConfs.resize(layerConfig.outConfs.size()); for (size_t i = 0; i < layerConfig.outConfs.size(); i++) { config.outConfs[i].inPlace = layerConfig.outConfs[i].inPlace; config.outConfs[i].constant = layerConfig.outConfs[i].constant; - config.outConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.outConfs[i].desc).clone(); + config.outConfs[i].desc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(layerConfig.outConfs[i].desc)->clone(); } return config; } @@ -177,7 +178,7 @@ void MKLDNNGenericNode::execLayer() { for (size_t i = 0; i < outputShapes.size(); i++) { if (isDynBatch) { auto out_edge = getChildEdgesAtPort(i)[0]; - auto td = MemoryDescUtils::convertToTensorDesc(out_edge->getMemory().GetDesc()); + auto td = MemoryDescUtils::convertToTensorDesc(out_edge->getMemory().getDesc()); td.setDims(execOutputShapes[i]); outputs.push_back(make_blob_with_precision(td, out_edge->getMemory().GetData())); } else { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp index ad5b3111f8e107..b6f8a046c71c0e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp @@ -20,6 +20,7 @@ #include "common/cpu_convert.h" #include "utils/cpu_utils.hpp" #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -246,10 +247,10 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const } void MKLDNNInputNode::cloneBlobIfRequired() { - std::vector dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); + Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); const auto prec = convertPrecision(constOp->get_element_type()); - const size_t size = dims.size(); - MKLDNNMemoryDesc memDesc(dims, MKLDNNExtensionUtils::IEPrecisionToDataType(prec)); + const size_t size = shape.getRank(); + DnnlBlockedMemoryDesc memDesc(prec, shape); auto cloneBlob = [&, this] () { MKLDNNMemory memory{ getEngine() }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h index 172a84c53f0249..a20782e96ca84d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h @@ -25,7 +25,6 @@ class MKLDNNInputNode : public MKLDNNNode { void withMeanImage(); MKLDNNMemoryCPtr getMemoryPtr() const; - void resetOutputShape() override {} void executeDynamicImpl(mkldnn::stream strm) override {} private: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index df6e4930b54c73..da4a196207b4ed 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -1916,23 +1916,23 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); auto pushDesc = [&](memory::format_tag dataFormat, impl_desc_type implDetail) { - config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), + config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), inputDataType, dataFormat); - config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(TARGET_SHAPE_ID)->getShape().getStaticDims(), + config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(TARGET_SHAPE_ID)->getShape().getStaticDims(), targetShapeType, memory::format_tag::x); - config.inConfs[SCALES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(SCALES_ID)->getShape().getStaticDims(), scalesType, + config.inConfs[SCALES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(SCALES_ID)->getShape().getStaticDims(), scalesType, memory::format_tag::x); if (isAxesSpecified) - config.inConfs[AXES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES_ID)->getShape().getStaticDims(), axesType, + config.inConfs[AXES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES_ID)->getShape().getStaticDims(), axesType, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, dataFormat); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, dataFormat); supportedPrimitiveDescriptors.push_back({config, implDetail}); }; auto channels = getParentEdgeAt(DATA_ID)->getShape().getRank() > 1 ? getParentEdgeAt(DATA_ID)->getShape().getStaticDims()[1] : 1; if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), ref); + pushDesc(MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), ref); } else { // blk and by_channel JIT kernel on sse41 or above machine if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 4) { @@ -1967,7 +1967,7 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), jit_avx2); + pushDesc(MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), jit_avx2); } } } @@ -2011,10 +2011,10 @@ void MKLDNNInterpolateNode::createPrimitive() { jcp.ID = srcDimPad5d[2]; jcp.spatial_dim_size = spatialDimSize; - if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { + if (getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) { jcp.layout = InterpolateLayoutType::planar; - } else if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c)) { + } else if (getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c)) { jcp.layout = InterpolateLayoutType::block; } else { jcp.layout = InterpolateLayoutType::by_channel; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp index b107fca78343b2..90acfde2b07ec0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp @@ -6,7 +6,8 @@ #include #include #include -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -93,18 +94,16 @@ void MKLDNNLrnNode::getSupportedDescriptors() { const auto parentStaticDims = parentShape.getStaticDims(); for (auto format : getAvailableFormatsForDims(parentShape)) { - auto in_candidate = MKLDNNPlugin::make_unique(parentStaticDims, inputDataType, format); + auto in_candidate = MKLDNNPlugin::make_unique(parentStaticDims, inputDataType, format); createDescriptor({in_candidate.get()}, {}); } } -std::unique_ptr MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::unique_ptr MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx > 0) { - return MKLDNNPlugin::make_unique(getParentEdgeAt(idx)->getShape().getStaticDims(), - MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[idx]), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); + return MKLDNNPlugin::make_unique(getOriginalInputPrecisionAtPort(idx), getParentEdgeAt(idx)->getShape()); } else { - return MKLDNNNode::getSrcMemDesc(primitive_desc_it, idx); + return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx)); } } @@ -129,7 +128,7 @@ void MKLDNNLrnNode::createDescriptor(const std::vector &input const std::vector &outputDesc) { mkldnn::algorithm alg = isAcrossMaps ? mkldnn::algorithm::lrn_across_channels : mkldnn::algorithm::lrn_within_channel; MKLDNNDescriptor desc(std::shared_ptr( - new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), + new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, MemoryDescUtils::convertToDnnlMemoryDesc(*inputDesc[0]), size, alpha, beta, k))); descs.push_back(desc); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h index 295d16b369c191..dd14e461342495 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h @@ -22,7 +22,7 @@ class MKLDNNLrnNode : public MKLDNNNode { size_t descInputNumbers(MKLDNNDescriptor desc) override { return static_cast(getOriginalInputsNumber()); } - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; void createPrimitive() override; bool created() const override; bool canBeInPlace() const override { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp index db53e77c79a4f3..3107762a8ca718 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp @@ -142,7 +142,7 @@ void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dims, dataType, MKLDNNMemory::GetPlainFormatByRank(dims.size())); + dataConfig.desc = MKLDNNPlugin::make_unique(dims, dataType, MKLDNNExtensionUtils::GetPlainFormatByRank(dims.size())); return dataConfig; }; @@ -207,7 +207,7 @@ void MKLDNNMatMulNode::createPrimitive() { params.shift1 = params.M * params.N * params.MB2; params.shift2 = params.M * params.N; - runtimePrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); + runtimePrecision = getParentEdgeAt(0)->getMemory().getDesc().getPrecision(); } inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp index 3218bc54eb0300..4ed029940287fd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp @@ -8,6 +8,7 @@ #include "mkldnn_memory_node.hpp" #include "common/cpu_memcpy.h" #include "utils/general_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -59,14 +60,12 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(precision, getParentEdgeAt(0)->getShape()); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -106,7 +105,7 @@ MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const std::shared_ptr void MKLDNNMemoryInputNode::createPrimitive() { MKLDNNInputNode::createPrimitive(); - dataStore->Create(getChildEdgeAt(0)->getMemory().GetDesc()); + dataStore->Create(getChildEdgeAt(0)->getMemory().getDesc()); // default memory state is zero filled dataStore->FillZero(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp index cd3eb8df3d6ef0..4394dfadfd8a76 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp @@ -146,8 +146,8 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { int* selected_num = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr()); - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); if ((nms_eta >= 0) && (nms_eta < 1)) { nmsWithEta(boxes, scores, boxesStrides, scoresStrides); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index f476aa8dec5231..4359f2617fd79b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -23,6 +23,7 @@ #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -742,14 +743,14 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].inPlace = -1; config.outConfs[0].inPlace = canBeInplace ? 0 : -1; if (inputsNum == 2) { - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::s32, - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(1)->getShape().getRank())); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape(), memory::data_type::s32, + MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(1)->getShape().getRank())); config.inConfs[1].constant = true; } auto pushDesc = [&](memory::format_tag format, impl_desc_type impl_type) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape(), inputDataType, format); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape(), outputDataType, format); supportedPrimitiveDescriptors.push_back({config, impl_type}); }; @@ -790,7 +791,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { // planar if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), impl_type); + pushDesc(MKLDNNExtensionUtils::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), impl_type); } void MKLDNNMVNNode::createPrimitive() { @@ -916,7 +917,7 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) { if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) { IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform."; } - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) { mvn_pln(src_data, dst_data, dim); } else { mvn_blk(src_data, dst_data, dim); @@ -1170,7 +1171,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si size_t N = 1; size_t C = 1; size_t D = 1; size_t H = 1; size_t W = 1; std::tie(N, C, D, H, W) = shape5D; - bool is_nhwc = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); + bool is_nhwc = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc); size_t CB = div_up(C, blk_size); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp index b89dd896d075ff..d4948c2875a85b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp @@ -159,8 +159,8 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { scale = -0.5f / soft_nms_sigma; } - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); std::vector filtBoxes(max_output_boxes_per_class * num_batches * num_classes); @@ -197,7 +197,7 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { auto indicesMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr(); auto scoresMemPtr = getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr(); - auto& maxOutputDims = indicesMemPtr->GetDesc().getShape().getMaxDims(); + auto& maxOutputDims = indicesMemPtr->getDesc().getShape().getMaxDims(); const size_t selectedBoxesNum = maxOutputDims[0]; const size_t validOutputs = std::min(filtBoxes.size(), selectedBoxesNum); @@ -206,7 +206,7 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { indicesMemPtr->redefineDesc(getOutputMemDescAtPort(NMS_SELECTEDINDICES)->cloneWithNewDims(newDims)); scoresMemPtr->redefineDesc(getOutputMemDescAtPort(NMS_SELECTEDSCORES)->cloneWithNewDims(newDims)); - int selectedIndicesStride = indicesMemPtr->GetDescWithType().getStrides()[0]; + int selectedIndicesStride = indicesMemPtr->GetDescWithType().getStrides()[0]; int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index 10b1be0dac532d..da515f15597b3b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -21,6 +21,7 @@ #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -766,10 +767,10 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace = canBeInplace ? 0 : -1; auto pushDesc = [&](memory::format_tag format) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), inputDataType, format); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES)->getShape().getStaticDims(), memory::data_type::s32, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape(), inputDataType, format); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES)->getShape(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), outputDataType, format); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape(), outputDataType, format); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); }; @@ -786,7 +787,7 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { } if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(DATA)->getShape().getRank())); + pushDesc(MKLDNNExtensionUtils::GetPlainFormatByRank(getChildEdgeAt(DATA)->getShape().getRank())); } bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const { @@ -833,10 +834,10 @@ void MKLDNNNormalizeL2Node::createPrimitive() { jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.is_nchw = jcp.is_nhwc = jcp.is_blk = false; - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) { jcp.is_nchw = true; - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c)) { jcp.is_blk = true; } else { jcp.is_nhwc = true; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp index 350e86e556e85d..9e8dab056e2e7a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp @@ -131,7 +131,7 @@ void MKLDNNOneHotNode::execute(mkldnn::stream strm) { for (size_t i = 0; i < actual_axis; ++i) prefix_size *= input_dims[i]; - std::size_t suffix_size = getParentEdgeAt(0)->getShape().getElementsCount() / prefix_size; + std::size_t suffix_size = getParentEdgeAt(0)->getShape().GetShape().getElementsCount() / prefix_size; OneHotContext ctx = {this, prefix_size, suffix_size}; OV_SWITCH(MKLDNNPlugin, OneHotExecute, ctx, output_precision.size(), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp index 584eb4bce79051..822f7e27444da7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp @@ -131,16 +131,16 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { config.outConfs.resize(1); auto pushSupportedPrimitiveDescriptor = [&](memory::format_tag memoryFormat) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, memoryFormat); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_BEGIN_ID)->getShape().getStaticDims(), + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_BEGIN_ID)->getShape().getStaticDims(), memory::data_type::s32, memory::format_tag::x); - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_END_ID)->getShape().getStaticDims(), + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_END_ID)->getShape().getStaticDims(), memory::data_type::s32, memory::format_tag::x); if (isPadValueSpecified) - config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PAD_VALUE_ID)->getShape().getStaticDims(), + config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PAD_VALUE_ID)->getShape().getStaticDims(), memory::data_type::f32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, memoryFormat); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, memoryFormat); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); }; @@ -149,7 +149,7 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { else if (numOfDims == 5) pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::ndhwc); - pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); + pushSupportedPrimitiveDescriptor(MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); auto canUseBlocked = [=](const size_t blockSize) { return (padMode == CONSTANT && padsBegin[1] % blockSize == 0 && padsEnd[1] % blockSize == 0) || @@ -181,9 +181,9 @@ void MKLDNNPadNode::createPrimitive() { params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); - const auto inBlkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + const auto inBlkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); params.srcDims = inBlkDesc.getBlockDims(); - params.dstDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + params.dstDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t nDims = params.srcDims.size(); params.srcStrides.resize(nDims, 1); @@ -193,8 +193,8 @@ void MKLDNNPadNode::createPrimitive() { params.dstStrides[i] = params.dstStrides[i + 1] * params.dstDims[i + 1]; } - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c)) { padsBegin[1] /= params.srcDims[params.srcDims.size() - 1]; padsEnd[1] /= params.srcDims[params.srcDims.size() - 1]; padsBegin.push_back(0); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp index 5d6e900d75d9fd..8493399a0fba08 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -133,16 +133,16 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { if (outputDataType == memory::data_type::bf16) outputDataType = memory::data_type::f32; // i8 layers supports only ndhwc and nhwc layouts - const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); - const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else if ((inputRank == 4 || inputRank == 5) && parentDims[1] == 1) { // WA. We should force planar layout since it provides better performance - const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw); - const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? memory::format_tag::ncdhw : memory::format_tag::nchw); createDescriptor({ in_candidate.get() }, { out_candidate.get() }); } else { @@ -152,8 +152,8 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { } // It doesn't support any format for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { - const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, format); - const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, format); + const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, format); + const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, format); createDescriptor({in_candidate.get()}, {out_candidate.get()}); } } @@ -181,8 +181,8 @@ bool MKLDNNPoolingNode::created() const { void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); - MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); + DnnlMemoryDesc in_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(*inputDesc[0]); + DnnlMemoryDesc out_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(*outputDesc[0]); mkldnn::algorithm alg; if (algorithm == PoolingAvg) { @@ -253,8 +253,12 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); - dataConfig.desc = getSrcMemDesc(itpd, i); + auto desc = getSrcMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.inConfs.push_back(dataConfig); } @@ -262,8 +266,12 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); - dataConfig.desc = getDstMemDesc(itpd, i); + auto desc = getDstMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.outConfs.push_back(dataConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp index e56d6d2c245563..c0a3b015793de1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp @@ -182,7 +182,7 @@ inline float bilinearInterp(const inputType* data, const float x, const float y, return value; } -void MKLDNNPSROIPoolingNode::unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, +void MKLDNNPSROIPoolingNode::unpackParams(const CpuBlockedMemoryDesc& srcDesc, const CpuBlockedMemoryDesc& dstDesc, int& hInputStride, int& wInputStride, int& hOutputStride, int& wOutputStride, int& inBlockSize, int& outBlockSize, @@ -227,7 +227,7 @@ void MKLDNNPSROIPoolingNode::unpackParams(const BlockedMemoryDesc& srcDesc, cons template void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, const int n, const int roiBatchInd, - const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { + const CpuBlockedMemoryDesc& srcDesc, const CpuBlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; unsigned long inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride, @@ -310,7 +310,7 @@ void MKLDNNPSROIPoolingNode::executeAverage(const inputType *srcData, outputType template void MKLDNNPSROIPoolingNode::executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, const int currentRoi, const int roiBatchInd, - const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc) { + const CpuBlockedMemoryDesc& srcDesc, const CpuBlockedMemoryDesc& dstDesc) { int inBlockSize, outBlockSize, outBlockCount, hInputStride, wInputStride, hOutputStride, wOutputStride; unsigned long inputChannelsPadding, outputChannelsPadding; unpackParams(srcDesc, dstDesc, hInputStride, wInputStride, hOutputStride, wOutputStride, @@ -479,8 +479,8 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { const auto *bottomRoisBeginning = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); int realRois = 0; for (; realRois < nn; realRois++) { @@ -533,8 +533,8 @@ struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute { }; void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) { - auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); - auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); + auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); + auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) || (inputPrec == Precision::FP32 && outputPrec == Precision::FP32))) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h index 45f275fe1ddff5..c74606be2ec4a8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h @@ -50,7 +50,7 @@ class MKLDNNPSROIPoolingNode : public MKLDNNNode { std::string errorPrefix; - void unpackParams(const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc, + void unpackParams(const CpuBlockedMemoryDesc& srcDesc, const CpuBlockedMemoryDesc& dstDesc, int& hInputStride, int& wInputStride, int& hOutputStride, int& wOutputStride, int& inBlockSize, int& outBlockSize, @@ -60,12 +60,12 @@ class MKLDNNPSROIPoolingNode : public MKLDNNNode { template void executeAverage(const inputType *srcData, outputType *dstData, const float *bottomRois, const int n, const int roiBatchInd, - const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc); + const CpuBlockedMemoryDesc& srcDesc, const CpuBlockedMemoryDesc& dstDesc); template void executeBilinear(const inputType *srcData, outputType *dstData, const float *bottomRois, const int currentRoi, const int roiBatchInd, - const BlockedMemoryDesc& srcDesc, const BlockedMemoryDesc& dstDesc); + const CpuBlockedMemoryDesc& srcDesc, const CpuBlockedMemoryDesc& dstDesc); template void executeBilinearDeformable(const inputType *srcData, outputType *dstData, const float *bottomRois, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp index 86818d36140967..ad12e8cb6ebcf4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp @@ -92,7 +92,7 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { void MKLDNNRangeNode::execute(mkldnn::stream strm) { StatusCode retcode = OK; - switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision()) { + switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision()) { case Precision::FP32: retcode = rangeKernel(); break; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp index c76156ec4ae464..581998b7eae398 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp @@ -1474,11 +1474,11 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag outFormat, memory::data_type inDataType, memory::data_type outDataType, impl_desc_type impl_type) { - config.inConfs[REDUCE_DATA].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(), + config.inConfs[REDUCE_DATA].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(), inDataType, inFormat); - config.inConfs[REDUCE_INDEXES].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims(), + config.inConfs[REDUCE_INDEXES].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outDataType, outFormat); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outDataType, outFormat); supportedPrimitiveDescriptors.push_back({config, impl_type}); }; @@ -1490,8 +1490,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::jit_avx2; } - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), inputDataType, outputDataType, impl_type); + pushDesc(MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), + MKLDNNExtensionUtils::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), inputDataType, outputDataType, impl_type); if (keep_dims) { if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 4 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) { if (mayiuse(cpu::x64::avx512_common)) { @@ -1508,8 +1508,8 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { } } } else { - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), + pushDesc(MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), + MKLDNNExtensionUtils::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), memory::data_type::f32, memory::data_type::f32, impl_desc_type::ref); } } @@ -1526,7 +1526,7 @@ void MKLDNNReduceNode::createPrimitive() { IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor"; auto selectedPD = getSelectedPrimitiveDescriptor(); - planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp); + planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp); auto jcp = jit_reduce_config_params(); jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc->getPrecision()); @@ -1567,7 +1567,7 @@ void MKLDNNReduceNode::execute(mkldnn::stream strm) { const auto idx_data = reinterpret_cast(srcIndexesMemPtr->GetData()); size_t dst_size = dstMemPtr->GetSize(); src_dims = getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(); - src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType().getStrides(); + src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType().getStrides(); dims_size = src_dims.size(); calc_process_dst_dims(idx_data); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp index 336d334219ecaa..706d2fd69e9550 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp @@ -59,7 +59,7 @@ void MKLDNNReferenceNode::createPrimitive() {} std::vector> MKLDNNReferenceNode::shapeInfer() const { for (size_t i = 0; i < opToShapeInfer->get_input_size(); i++) { opToShapeInfer->get_input_tensor(i).set_partial_shape( - getParentEdgesAtPort(i)[0]->getMemory().GetDesc().getShape().toPartialShape()); + getParentEdgesAtPort(i)[0]->getMemory().getDesc().getShape().toPartialShape()); } opToShapeInfer->validate_and_infer_types(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp index c140baa88c533c..c9283acebba149 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp @@ -388,8 +388,8 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { output_size = B * IH * IW * mask_size * (classes + coords + 1); } - if (output_size != getChildEdgeAt(0)->getMemoryPtr()->GetElementsCount()) - IE_THROW() << "Incorrect layer configuration or output dimensions. " << output_size << " != " << getChildEdgeAt(0)->getMemoryPtr()->GetElementsCount(); + if (output_size != getChildEdgeAt(0)->getMemoryPtr()->GetShape().getElementsCount()) + IE_THROW() << "Incorrect layer configuration or output dimensions. " << output_size << " != " << getChildEdgeAt(0)->getMemoryPtr()->GetShape().getElementsCount(); size_t inputs_size = IH * IW * num_ * (classes + coords + 1); size_t total_size = 2 * IH * IW; @@ -397,8 +397,8 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), - getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(), output_size); + cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().getDesc().getPrecision(), + getChildEdgeAt(0)->getMemory().getDesc().getPrecision(), output_size); for (int b = 0; b < B; b++) { for (int n = 0; n < num_; n++) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index c1701deacdeeb4..6b58253c9a6621 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -11,6 +11,9 @@ #include "ie_parallel.hpp" #include "utils/general_utils.h" #include +#include "nodes/common/cpu_memcpy.h" +#include "nodes/common/cpu_convert.h" +#include "mkldnn_convert_node.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -77,37 +80,38 @@ void MKLDNNReorderNode::createPrimitive() { auto inDims = getParentEdgeAt(0)->getShape().getStaticDims(); if (!isOptimized) { + const auto &parentMem = getParentEdgeAt(0)->getMemory(); if (MKLDNNPlugin::one_of(inDims.size(), 4, 5) && inDims[1] <= 64 && inDims[1] >= 16 && - (getParentEdgeAt(0)->getMemory().GetElementsCount() / inDims[1]) >= 128 && - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && - getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && - getParentEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32 && - getChildEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32) { + (parentMem.getDesc().hasLayoutType(LayoutType::nspc) && + parentMem.GetDescWithType()->getPaddedElementsCount() / inDims[1]) >= 128 && + getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && + parentMem.getDesc().getPrecision() == Precision::FP32 && + getChildEdgeAt(0)->getMemory().getDesc().getPrecision() == Precision::FP32) { // oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation canUseOptimizedNspc2Ncsp = true; } else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) && MKLDNNPlugin::one_of(inDims.size(), 4, 5) && - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && - getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && - getParentEdgeAt(0)->getMemory().GetDataType() == getChildEdgeAt(0)->getMemory().GetDataType() && - MKLDNNExtensionUtils::sizeOfDataType(getParentEdgeAt(0)->getMemory().GetDataType()) == 1) { + parentMem.getDesc().hasLayoutType(LayoutType::ncsp) && + getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc) && + parentMem.GetDataType() == getChildEdgeAt(0)->getMemory().GetDataType() && + MKLDNNExtensionUtils::sizeOfDataType(parentMem.GetDataType()) == 1) { // oneDNN doesn't provide JIT reorder impl for non-avx2 targets so we fallback on simple c++ implementation which shows better perf canUseOptimizedNcsp2Nspc = true; } else { - createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(), - dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle()); + createReorderPrimitive(srcMemPtr->GetDescWithType()->getDnnlDesc(), srcMemPtr->GetPrimitive().get_data_handle(), + dstMemPtr->GetDescWithType()->getDnnlDesc(), dstMemPtr->GetPrimitive().get_data_handle()); } } } void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr) { src_blocked = std::make_shared(getEngine()); - src_blocked->Create(MKLDNNMemoryDesc(srcDesc), srcPtr, false); + src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); dst_blocked = std::make_shared(getEngine()); - dst_blocked->Create(MKLDNNMemoryDesc(dstDesc), dstPtr, false); + dst_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); mkldnn::primitive_attr attr; auto createReorder = [&]() -> bool { @@ -137,13 +141,13 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe // perform such conversion if the source tensor can be reshaped to the destination rank. This is // useful in situations when rank in IR does not much rank that is required by the oneDNN primitive, // but the input tensor can be reshaped (e.g. weights for grouped convolutions, biases etc.) - if (src_blocked->GetDesc().hasLayoutType(LayoutType::ncsp) && - src_blocked->GetDims().size() != dst_blocked->GetDims().size()) { - const auto newDims = dst_blocked->GetDims(); - const auto newFormat = MKLDNNMemory::GetPlainFormatByRank(newDims.size()); + if (src_blocked->getDesc().hasLayoutType(LayoutType::ncsp) && + src_blocked->GetShape().getRank() != dst_blocked->GetShape().getRank()) { + const auto newDims = dst_blocked->getStaticDims(); + const auto newFormat = MKLDNNExtensionUtils::GetPlainFormatByRank(newDims.size()); - auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat); - src_blocked->Create(MKLDNNMemoryDesc(newDesc), srcPtr, false); + auto newDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(newDims), src_blocked->GetDataType(), newFormat); + src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(newDesc), srcPtr, false); success = createReorder(); } @@ -247,8 +251,8 @@ void MKLDNNReorderNode::setDynamicBatchLim(int lim) { if (prim) { auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - memory::desc src_d = srcMemPtr->GetDescriptor(); - memory::desc dst_d = dstMemPtr->GetDescriptor(); + memory::desc src_d = srcMemPtr->GetDescWithType()->getDnnlDesc(); + memory::desc dst_d = dstMemPtr->GetDescWithType()->getDnnlDesc(); void *src_data_hdl = srcMemPtr->GetPrimitive().get_data_handle(); void *dst_data_hdl = dstMemPtr->GetPrimitive().get_data_handle(); @@ -277,4 +281,54 @@ std::string MKLDNNReorderNode::getReorderArgs(const MemoryDesc &parentDesc, cons return inArgs + "_" + outArgs; } +void MKLDNNReorderNode::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) { + if (size != 0) + IE_ASSERT(size <= output.GetSize()); + if (input.getDesc().isCompatible(output.getDesc())) { + auto srcPtr = static_cast(input.GetPtr()); + auto dstPtr = static_cast(output.GetPtr()); + + auto copySize = size == 0 ? output.GetSize() : size; + cpu_memcpy(dstPtr, srcPtr, copySize); + } else { + std::unique_ptr pReorder; + std::shared_ptr srcMemoryPtr; + std::vector tmpBuff; + + try { + pReorder = std::unique_ptr(new mkldnn::reorder(input.GetPrimitive(), output.GetPrimitive())); + srcMemoryPtr = input.GetPrimitivePtr(); + } + catch (const mkldnn::error& err) { + if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType() && MKLDNNConvertNode::isSupportedDesc(input.getDesc()) && + MKLDNNConvertNode::isSupportedDesc(output.getDesc())) { + //we probably could not make the reorder because there is no one supporting this precision conversion + //lets try to convert data first using cpu_convert + auto data = static_cast(input.GetPtr()); + tmpBuff.resize(input.GetSize()); + + const auto outPrc = MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()); + cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), + outPrc, input.GetSize() / input.getDesc().getPrecision().size()); + + MKLDNNMemory tmpMem(output.getEngine()); + auto tmpDesc = input.getDesc().clone(); + tmpDesc->setPrecision(outPrc); + tmpMem.Create(std::move(tmpDesc), tmpBuff.data()); + + pReorder = std::unique_ptr(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive())); + srcMemoryPtr = tmpMem.GetPrimitivePtr(); + } else { + throw; + } + } + if (pReorder) { + mkldnn::stream loc_stream(output.getEngine(), mkldnn::stream::flags::default_order); + pReorder->execute(loc_stream, *srcMemoryPtr, *output.GetPrimitivePtr()); + } else { + IE_THROW() << "Could not make mkldnn reorder."; + } + } +} + REG_MKLDNN_PRIM_FOR(MKLDNNReorderNode, Reorder); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h index da821878035e37..7a5e3ac4c8286e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h @@ -50,6 +50,8 @@ class MKLDNNReorderNode : public MKLDNNNode { static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc); + static void reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size = 0); + private: std::unique_ptr input; std::unique_ptr output; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp index f4256042379500..57abf379a0e5c1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp @@ -50,12 +50,12 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { for (size_t i = 0; i (getParentEdgeAt(i)->getShape().getStaticDims(), inputDataType); + config.inConfs[i].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(i)->getShape().getStaticDims(), inputDataType); } config.outConfs.resize(1); config.outConfs[0].inPlace = 0; config.outConfs[0].constant = false; - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp index ffa831a670db77..ea4a553aa9486a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp @@ -94,7 +94,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { const float *src_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_DATA)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision()) { + switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().getDesc().getPrecision()) { case Precision::FP32: { float *seq_lengths_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemoryPtr()->GetPtr()); for (i = 0; i < src_dims[batch_axis]; i++) { @@ -169,7 +169,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { break; default: IE_THROW() << "ReverseSequence layer does not support " - << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision() << " precision"; + << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().getDesc().getPrecision() << " precision"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp index 6e9086274d5a7d..451a6064e7c6f6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp @@ -299,17 +299,17 @@ void MKLDNNRNN::fillCellDesc() { out_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); - w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); + w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); + w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); // Add 5th input - w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); + w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); // Expected shapes std::vector D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb}; - std::vector in_candidate, out_candidate; + std::vector in_candidate, out_candidate; in_candidate.reserve(6); in_candidate.emplace_back(D_shape, dataType, memory::format_tag::nc); @@ -328,8 +328,8 @@ void MKLDNNRNN::fillCellDesc() { std::vector in_candidate_ptrs(in_candidate.size()); std::vector out_candidate_ptrs(out_candidate.size()); - std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); - std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const DnnlMemoryDesc& item) { return &item; }); + std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const DnnlMemoryDesc& item) { return &item; }); createDescriptor(in_candidate_ptrs, out_candidate_ptrs); } @@ -400,14 +400,14 @@ void MKLDNNRNN::fillSeqDesc() { out_data_d.emplace_back(std::vector{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); - w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); + w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); + w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); - w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); + w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); - std::vector in_candidate; + std::vector in_candidate; in_candidate.reserve(7); if (nativeOrder) @@ -426,7 +426,7 @@ void MKLDNNRNN::fillSeqDesc() { in_candidate.emplace_back(std::vector{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc); // R in_candidate.emplace_back(std::vector{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc); // B - std::vector out_candidate; + std::vector out_candidate; out_candidate.reserve(3); if (nativeOrder) { @@ -444,8 +444,8 @@ void MKLDNNRNN::fillSeqDesc() { std::vector in_candidate_ptrs(in_candidate.size()); std::vector out_candidate_ptrs(out_candidate.size()); - std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); - std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const DnnlMemoryDesc& item) { return &item; }); + std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const DnnlMemoryDesc& item) { return &item; }); createDescriptor(in_candidate_ptrs, out_candidate_ptrs); } @@ -470,8 +470,8 @@ void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t w_state_mem->Create(*w_state_d); internalBlobMemory.push_back(w_state_mem); - const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getShape().getElementsCount(); - const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getShape().getElementsCount(); + const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getShape().GetShape().getElementsCount(); + const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getShape().GetShape().getElementsCount(); auto *wInputNode = dynamic_cast(getParentEdgesAtPort(wIdx)[0]->getParent().get()); auto wConstBlob = wInputNode->getMemoryPtr(); @@ -526,7 +526,7 @@ void MKLDNNRNN::fillBiases(const int *gate_map) { auto *constInputNode = dynamic_cast(getParentEdgesAtPort(bIdx)[0]->getParent().get()); auto constBlob = constInputNode->getMemoryPtr(); - auto const elementsCount = constBlob->GetElementsCount(); + auto const elementsCount = constBlob->GetShape().getElementsCount(); std::vector ie_b_vec(elementsCount); cpu_convert(constBlob->GetPtr(), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h index 9e47637235f583..cc0ec058a66527 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h @@ -44,7 +44,7 @@ class MKLDNNRNN : public MKLDNNNode { void copyWeightsData(); private: - using MKLDNNMemoryDescPtr = std::unique_ptr; + using DnnlMemoryDescPtr = std::unique_ptr; InferenceEngine::Precision runtimePrecision; /** Specify mode Cell or Seq. true - Cell, false - Seq */ @@ -73,8 +73,8 @@ class MKLDNNRNN : public MKLDNNNode { const size_t L = 1; /**< What is it??. Constant for mkldnn impl */ const size_t D = 1; /**< Num of direction. 1 or 2 */ - std::vector in_data_d; - std::vector out_data_d; + std::vector in_data_d; + std::vector out_data_d; enum RNNInOutKind { Layer = 0, @@ -82,9 +82,9 @@ class MKLDNNRNN : public MKLDNNNode { CellState = 2 }; - MKLDNNMemoryDescPtr w_data_d; - MKLDNNMemoryDescPtr w_state_d; - MKLDNNMemoryDescPtr w_bias_d; + DnnlMemoryDescPtr w_data_d; + DnnlMemoryDescPtr w_state_d; + DnnlMemoryDescPtr w_bias_d; std::vector in_data_dims; std::vector out_data_dims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp index 0517350e09c6c1..23b738a0c304d8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp @@ -129,12 +129,12 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { }; for (auto fmts : supportedFormats) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, fmts.first); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, fmts.first); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, memory::format_tag::nc); - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::s32, + config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, fmts.second); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, fmts.second); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } @@ -180,8 +180,8 @@ void MKLDNNROIAlignNode::executeSpecified() { auto dstBlockDesc = dstMemory.GetDescriptor().data.format_desc.blocking; int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; - auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); - auto isNhwcFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); + auto isPlainFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::ncsp); + auto isNhwcFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::nspc); const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp index 23fd252ae2ba38..8b7df5b68672c1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp @@ -413,9 +413,9 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), dataType, format); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), dataType, memory::format_tag::nc); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, format); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), dataType, format); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), dataType, memory::format_tag::nc); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, format); supportedPrimitiveDescriptors.push_back({config, impl_type}); } @@ -481,9 +481,9 @@ void MKLDNNROIPoolingNode::execute() { IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto src_strides = srcMemory0.GetDescWithType().getStrides(); - auto dst_strides = dstMemory.GetDescWithType().getStrides(); - size_t src_roi_step = srcMemory1.GetDescWithType().getStrides()[0]; + auto src_strides = srcMemory0.GetDescWithType().getStrides(); + auto dst_strides = dstMemory.GetDescWithType().getStrides(); + size_t src_roi_step = srcMemory1.GetDescWithType().getStrides()[0]; int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking); int MB = jpp.mb; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp index 410051c7be4b78..1196faa08ec534 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp @@ -101,7 +101,7 @@ void MKLDNNRollNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dims.getStaticDims(), dataType, MKLDNNMemory::GetPlainFormatByRank(dims.getRank())); + dataConfig.desc = MKLDNNPlugin::make_unique(dims.getStaticDims(), dataType, MKLDNNExtensionUtils::GetPlainFormatByRank(dims.getRank())); return dataConfig; }; @@ -116,7 +116,7 @@ void MKLDNNRollNode::initSupportedPrimitiveDescriptors() { void MKLDNNRollNode::execute(mkldnn::stream strm) { - const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().GetDesc().getPrecision(); + const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().getDesc().getPrecision(); const auto& dataTypeSize = dataPrecision.size(); switch (dataTypeSize) { case sizeof(PrecisionTrait::value_type): { @@ -170,7 +170,7 @@ void MKLDNNRollNode::rollImpl() { const size_t elementSize = sizeof(DataType); const size_t nIterations = totalElements / blockSize; - const auto strides = dataEdge->getMemory().GetDescWithType().getStrides(); + const auto strides = dataEdge->getMemory().GetDescWithType().getStrides(); parallel_for(nIterations, [&](size_t iter) { size_t start = iter * blockSize; size_t leftBlockStartOffset = start; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp index af7b36dd7f361b..7c1c77e1b493f0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp @@ -201,22 +201,22 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { } auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag idxFormat, memory::format_tag updateFormat, memory::format_tag outFormat) { - config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, inFormat); - config.inConfs[INDICES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(), indicesType, + config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, inFormat); + config.inConfs[INDICES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(), indicesType, idxFormat); - config.inConfs[UPDATE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(), dataType, + config.inConfs[UPDATE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(), dataType, updateFormat); if (axisRelaxed) - config.inConfs[AXIS_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXIS_ID)->getShape().getStaticDims(), + config.inConfs[AXIS_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXIS_ID)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec), memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, outFormat); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, outFormat); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); }; - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(INDICES_ID)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(UPDATE_ID)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank())); + pushDesc(MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), + MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(INDICES_ID)->getShape().getRank()), + MKLDNNExtensionUtils::GetPlainFormatByRank(getParentEdgeAt(UPDATE_ID)->getShape().getRank()), + MKLDNNExtensionUtils::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank())); } void MKLDNNScatterUpdateNode::createPrimitive() { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp index 093ee7e82557b4..bdd2ef63ab5100 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp @@ -180,8 +180,8 @@ void MKLDNNSelectNode::execute_impl() { } void MKLDNNSelectNode::execute(mkldnn::stream strm) { - const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().size(); - const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().size(); + const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().getDesc().getPrecision().size(); + const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().getDesc().getPrecision().size(); switch (condPrecSize) { case 1: { @@ -192,7 +192,7 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().getDesc().getPrecision().name()); } break; } @@ -204,13 +204,13 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().getDesc().getPrecision().name()); } break; } default: { IE_THROW() << "Select layer doesn't support 'Condition' inputs' precision: " - + std::string(getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().name()); + + std::string(getParentEdgeAt(CONDITION)->getMemory().getDesc().getPrecision().name()); } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp index f83ddfed0d0a67..bb27f6f0d0bf0d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp @@ -127,8 +127,8 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_SHCH_ERROR << "has unidentified preferable primitive descriptor"; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); int batchRank = axis_; int spatialRank = dataRank_ - axis_ - 1; @@ -159,7 +159,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { const int channelDim = 1; if (isBlocked) { - const auto blkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + const auto blkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); size_t blkSize = blkDesc.getBlockDims().back(); size_t CB = div_up(inShape_[1], blkSize); SizeVector srcBlockedDims = blkDesc.getBlockDims(); @@ -181,7 +181,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { params.order[2] = 2; params.src_block_dims[2] = spatialShapeSize; } - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { if (axis_ == channelDim) { // axis on channel params.order[0] = 0; params.src_block_dims[0] = inShape_[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp index 9fe05e475fc1dc..2d46bb3138f9e4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -39,17 +39,17 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() { IE_THROW() << "Incorrect number of output edges for layer " << getName(); if (getParentEdgeAt(0)->getShape().getRank() == 3) { - MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::abc); createDescriptor({in_candidate.get()}, {}); } for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { const auto dims = getParentEdgeAt(0)->getShape().getStaticDims(); - if (MKLDNNMemoryDesc(dims, inputDataType, format).blocksExtended()) + if (DnnlMemoryDesc(dims, inputDataType, format).blocksExtended()) continue; - MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(dims, inputDataType, format); + MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(dims, inputDataType, format); createDescriptor({in_candidate.get()}, {}); } @@ -121,7 +121,7 @@ bool MKLDNNSoftMaxNode::created() const { void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); + DnnlMemoryDesc in_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(*inputDesc[0]); MKLDNNDescriptor desc(std::shared_ptr( new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis))); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp index 1861799f97c32b..8887adcb4d7410 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp @@ -112,15 +112,15 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const bool blocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c); + const bool blocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(outDims); auto outShape5D = getShape5D(inDims); auto blockShape = getShape5D(blockShapeIn); - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -129,10 +129,10 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { blockShape.erase(blockShape.begin() + 1); } - const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); const size_t blockSize = blocked ? outBlkDims.back() : 1lu; const size_t blockCountInput = outBlkDims[1]; - const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims()[1]; + const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -173,7 +173,7 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1]; - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -227,12 +227,12 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { } void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) { case 1: SpaceToBatchKernel::value_type>(); break; case 2: SpaceToBatchKernel::value_type>(); break; case 4: SpaceToBatchKernel::value_type>(); break; default: - IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) + IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getMemory().getDesc().getPrecision().name()) + "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp index 25003088139af9..4c39a5e9fc9ecb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp @@ -159,8 +159,8 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; @@ -191,8 +191,8 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -219,7 +219,7 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForBlocks, orderShiftForDims, firstSpatialOrder, dstBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp index 07ad5bcc14c0a1..9ba24ecdb29b37 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp @@ -10,7 +10,7 @@ #include #include #include "utils/general_utils.h" -#include +#include #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' " @@ -139,12 +139,12 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { config.inConfs.resize(INPUTS_NUM); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, srcShape.getStaticDims())); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, srcShape.getStaticDims())); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector {1})); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector {1})); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector{outputShapes.size()})); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector{outputShapes.size()})); config.inConfs[2].constant = true; } @@ -153,7 +153,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { for (size_t i = 0; i < outputShapes.size(); i++) { config.outConfs[i].inPlace = -1; config.outConfs[i].constant = false; - config.outConfs[i].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, outputShapes[i].getStaticDims())); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, outputShapes[i].getStaticDims())); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); @@ -171,7 +171,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { for (auto refPdIndex : pdIndexesToReuse) { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - const auto inBlockingDesc = refConfig.inConfs[0].desc->as(); + const auto inBlockingDesc = refConfig.inConfs[0].desc->as(); const auto& order = inBlockingDesc->getOrder(); const auto& blkDims = inBlockingDesc->getBlockDims(); auto numOfDim = blkDims.size(); @@ -189,15 +189,15 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } } - config.inConfs[0].desc = MKLDNNPlugin::make_unique(inpPrecision, srcShape, blkDims, order, offset, offsets, strides); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(inpPrecision, srcShape, blkDims, order, offset, offsets, strides); for (size_t i = 0; i < outputShapes.size(); i++) { - auto outBlockingDesc = refConfig.outConfs[i].desc->as(); + auto outBlockingDesc = refConfig.outConfs[i].desc->as(); const auto& outBlkDims = outBlockingDesc->getBlockDims(); const auto& dims = outBlockingDesc->getShape().getStaticDims(); config.outConfs[i].inPlace = 0; - config.outConfs[i].desc = MKLDNNPlugin::make_unique(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, strides); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, strides); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -213,9 +213,9 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createUniqueDesc(inpPrecision, srcShape.getStaticDims()); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector{1})); + config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector{1})); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector{outputShapes.size()})); + config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, Shape(SizeVector{outputShapes.size()})); config.inConfs[2].constant = true; } config.outConfs.resize(outputShapes.size()); @@ -240,13 +240,13 @@ void MKLDNNSplitNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; - auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->GetDesc(); + auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->getDesc(); canUseOptimizedNspc2Ncsp = false; if (axis == 1 && one_of(memDesc.getShape().getRank(), 4, 5) && memDesc.hasLayoutType(LayoutType::nspc)) { canUseOptimizedNspc2Ncsp = true; for (size_t i = 0; i < getChildEdges().size(); i++) { - auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->GetDesc(); + auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->getDesc(); if (!childMemDesc.hasLayoutType(LayoutType::ncsp)) canUseOptimizedNspc2Ncsp = false; } @@ -327,26 +327,26 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() { } // reset undefined offsets - config.inConfs[i].desc = MemoryDescUtils::resetOffset(config.inConfs[i].desc.get()); + config.inConfs[i].desc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(config.inConfs[i].desc.get()); } if (config.outConfs.size() != outputShapes.size()) THROW_ERROR << "has invalid config"; - auto firstInBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); + auto firstInBlockingDesc = config.inConfs[0].desc->as(); size_t offset = 0; for (size_t i = 0; i < outputShapes.size(); i++) { - auto outBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[i].desc); - config.outConfs[i].desc = MKLDNNPlugin::make_unique(outBlockingDesc.getPrecision(), - outBlockingDesc.getShape(), - outBlockingDesc.getBlockDims(), - outBlockingDesc.getOrder(), - firstInBlockingDesc.getOffsetPadding() + offset, - firstInBlockingDesc.getOffsetPaddingToData(), - firstInBlockingDesc.getStrides()); + auto outBlockingDesc = config.outConfs[i].desc->as(); + config.outConfs[i].desc = MKLDNNPlugin::make_unique(outBlockingDesc->getPrecision(), + outBlockingDesc->getShape(), + outBlockingDesc->getBlockDims(), + outBlockingDesc->getOrder(), + firstInBlockingDesc->getOffsetPadding() + offset, + firstInBlockingDesc->getOffsetPaddingToData(), + firstInBlockingDesc->getStrides()); size_t axisSize = 1; - for (size_t j = axis; j < outBlockingDesc.getBlockDims().size(); j++) { - axisSize *= outBlockingDesc.getBlockDims()[j]; + for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) { + axisSize *= outBlockingDesc->getBlockDims()[j]; } offset += axisSize; } @@ -464,7 +464,7 @@ void MKLDNNSplitNode::prepareOptimizedParams() { const auto outputPortsCount = outputShapes.size(); //find axis order position - const auto& order = inpTensorDesc.getOrder(); + const auto& order = inpTensorDesc->getOrder(); unsigned axisOrderPos = std::numeric_limits::max(); for (size_t i = 0; i < order.size(); ++i) { if (order[i] == axis) { @@ -476,8 +476,8 @@ void MKLDNNSplitNode::prepareOptimizedParams() { THROW_ERROR << "Can't find the axis in the input tensor order list"; } - uint8_t srcDataSize = inpTensorDesc.getPrecision().size(); - const auto& srcDims = inpTensorDesc.getBlockDims(); + uint8_t srcDataSize = inpTensorDesc->getPrecision().size(); + const auto& srcDims = inpTensorDesc->getBlockDims(); const auto getRank = srcDims.size(); optimizedParams.countStrides = 1; @@ -491,7 +491,7 @@ void MKLDNNSplitNode::prepareOptimizedParams() { auto outputEdge = this->getChildEdgesAtPort(i).front(); optimizedParams.dataSize[i] = srcDataSize; - auto desc = outputEdge->getMemory().GetDesc().as(); + auto desc = outputEdge->getMemory().getDesc().as(); for (size_t j = axisOrderPos; j < getRank; j++) optimizedParams.dataSize[i] *= desc->getBlockDims()[j]; @@ -516,7 +516,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { auto& srcMem = parentEdge->getMemory(); auto srcData = reinterpret_cast(srcMem.GetData()); - const auto dataSize = srcMem.GetDesc().getPrecision().size(); + const auto dataSize = srcMem.getDesc().getPrecision().size(); const size_t DHW = D*H*W; const size_t strideIB = DHW * IC * dataSize; @@ -532,7 +532,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { for (size_t j = axis; j < dims.size(); j++) { innerSize *= dims[j]; } - auto srcPtr = srcData + srcMem.GetDesc().getElementOffset(sIdx) * dataSize; + auto srcPtr = srcData + srcMem.getDesc().getElementOffset(sIdx) * dataSize; const size_t OC = dims[1]; const size_t strideOB = OC * strideOC; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp index 8e87617d3692b4..4e2901dcd00e30 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp @@ -243,12 +243,12 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { for (auto itr = range.first; itr != range.second; ++itr) { config.inConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getShape().getStaticDims()); - config.inConfs[BEGIN_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(BEGIN_ID)->getShape().getStaticDims(), beginDataType, + config.inConfs[BEGIN_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(BEGIN_ID)->getShape().getStaticDims(), beginDataType, mkldnn::memory::format_tag::x); - config.inConfs[END_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(END_ID)->getShape().getStaticDims(), endDataType, + config.inConfs[END_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(END_ID)->getShape().getStaticDims(), endDataType, mkldnn::memory::format_tag::x); if (hasStrides) - config.inConfs[STRIDE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(STRIDE_ID)->getShape().getStaticDims(), + config.inConfs[STRIDE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(STRIDE_ID)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(stridePrecision), mkldnn::memory::format_tag::x); @@ -267,8 +267,8 @@ void MKLDNNStridedSliceNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor."; - auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType(); - auto dstBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType(); + auto dstBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); auto srcOrder = srcBlockingDesc.getOrder(); params.srcDims = srcBlockingDesc.getBlockDims(); params.dstDims = dstBlockingDesc.getBlockDims(); @@ -278,7 +278,7 @@ void MKLDNNStridedSliceNode::createPrimitive() { if (params.parametersAreConstant) { size_t realNDims = params.dstDims.size(); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) + if (!getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; @@ -289,10 +289,10 @@ void MKLDNNStridedSliceNode::createPrimitive() { } void MKLDNNStridedSliceNode::orderParametersByLayouts() { - const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); - const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); - auto srcOrder = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType().getOrder(); + const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::nspc); + const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); + auto srcOrder = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType().getOrder(); if (isBlockedLayout) { const size_t blk = params.srcDims.back(); @@ -620,7 +620,7 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { if (srcDims.size() > 3 && params.equalDims && ellipsisMaskCounter != 0) addHiddenDims(srcDims.size()); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) + if (!getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp index 2e1a9f426ef55a..0f4ab519ce5e7d 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp @@ -56,8 +56,8 @@ class PortIteratorHelper : public PortMapHelper { auto axis = slice_rule.axis; auto stride = slice_rule.stride; - auto full_dims = full_blob->GetDims(); - auto part_dims = part_blob->GetDims(); + auto full_dims = full_blob->GetShape().getStaticDims(); + auto part_dims = part_blob->GetShape().getStaticDims(); auto abs_stride = std::abs(stride); auto sign_of_stride = stride < 0.0f ? -1 : 1; @@ -68,7 +68,7 @@ class PortIteratorHelper : public PortMapHelper { IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port"; // make chunk view - auto chunk_desc = full_blob->GetDescriptor(); + auto chunk_desc = full_blob->GetDescWithType()->getDnnlDesc(); chunk_desc.data.dims[axis] = abs_stride; chunk_desc.data.padded_dims[axis] = abs_stride; // TODO: asamption that plain tensor @@ -132,7 +132,7 @@ class IterCountPortHelper : public PortMapHelper { IterCountPortHelper(const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) { // Only scalar I32 tensor is supported IE_ASSERT(to->GetDataType() == memory::data_type::s32); - IE_ASSERT(to->GetDims() == memory::dims{1}); + IE_ASSERT(to->GetShape() == Shape(InferenceEngine::SizeVector{1})); mem_holder_dst = to->GetPrimitive(); } @@ -150,7 +150,7 @@ class asBoolCheck : public PortChecker { public: asBoolCheck(const MKLDNNMemoryPtr &mem) { IE_ASSERT(mem->GetDataType() == memory::data_type::u8); - IE_ASSERT(mem->GetDims() == memory::dims{1}); + IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1})); mem_holder = mem->GetPrimitive(); } @@ -167,7 +167,7 @@ class asIntCheck : public PortChecker { public: asIntCheck(const MKLDNNMemoryPtr &mem) { IE_ASSERT(mem->GetDataType() == memory::data_type::s32); - IE_ASSERT(mem->GetDims() == memory::dims{1}); + IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1})); mem_holder = mem->GetPrimitive(); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp index c92193c6e927c0..728790b199f97b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp @@ -133,13 +133,13 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_inner_dim *= batchToProcess(); } - if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp8c)) { + if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.getDesc().hasLayoutType(LayoutType::nCsp8c)) { /* * We may enable tile processing directly to appropriate output format (nChw8c) */ m_inner_dim *= 8; m_outer_dim /= 8; - } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp16c)) { + } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.getDesc().hasLayoutType(LayoutType::nCsp16c)) { /* * We may enable tile processing directly to appropriate output format (nChw16c) */ @@ -147,7 +147,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_outer_dim /= 16; } - m_inner_dim *= srcMemory.GetDesc().getPrecision().size(); + m_inner_dim *= srcMemory.getDesc().getPrecision().size(); for (int i = 0; i < m_outer_dim; ++i) { for (int t = 0; t < tiles; ++t) { cpu_memcpy(dst_ptr, src_ptr, m_inner_dim); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp index 5ea5b902e3e212..6913a6a9b29f22 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_transpose_node.cpp @@ -74,66 +74,66 @@ void MKLDNNTransposeNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].constant = false; config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), inputOrderDataType, + config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), inputOrderDataType, memory::format_tag::x); if (getParentEdgeAt(0)->getShape().getRank() == 4) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::nchw); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, memory::format_tag::nchw); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::nChw8c); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::nChw16c); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::nhwc); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, memory::format_tag::nhwc); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::ncdhw); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, memory::format_tag::ncdhw); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); if (srcDims[1] % 8 == 0) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::nCdhw8c); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (srcDims[1] % 16 == 0) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::nCdhw16c); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } if (prec == Precision::FP32 || prec == Precision::I8 || prec == Precision::U8) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, memory::format_tag::ndhwc); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, memory::format_tag::ndhwc); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } else { // general plain case - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); + config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType); + config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); } } @@ -148,7 +148,7 @@ void MKLDNNTransposeNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && std::find(optimizedOrders.begin(), optimizedOrders.end(), order) != optimizedOrders.end()) { isOptimized = true; return; @@ -157,11 +157,11 @@ void MKLDNNTransposeNode::createPrimitive() { PermuteParams params; params.data_size = getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); params.order = order; - auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); + auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); params.src_block_dims = srcDesc.getBlockDims(); params.src_block_order = srcDesc.getOrder(); - auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); params.dst_block_dims = dstDesc.getBlockDims(); params.dst_block_order = dstDesc.getOrder(); @@ -275,7 +275,7 @@ void MKLDNNTransposeNode::execute(mkldnn::stream strm) { int MB = batchToProcess(); if (isOptimized) { - const size_t dataSize = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size(); + const size_t dataSize = getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size(); TransposeContext ctx = {this, srcMemPtr, dstMemPtr, MB}; OV_SWITCH(MKLDNNPlugin, TransposeOptimizedEmitter, ctx, dataSize, OV_CASE(1, PrecisionTrait::value_type), diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp index 1272183c68beea..fb5d285de8192a 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.cpp @@ -11,7 +11,7 @@ #include "common/memory_desc_wrapper.hpp" #include -#include +#include using namespace InferenceEngine; @@ -65,7 +65,7 @@ static IEB_HEADER prepare_header(const MemoryDesc& desc) { return header; } -static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) { +static DnnlBlockedMemoryDesc parse_header(IEB_HEADER &header) { if (header.magic[0] != IEB_MAGIC[0] || header.magic[1] != IEB_MAGIC[1] || header.magic[2] != IEB_MAGIC[2] || @@ -76,16 +76,16 @@ static MKLDNNMemoryDesc parse_header(IEB_HEADER &header) { header.ver[1] != 1) IE_THROW() << "Dumper cannot parse file. Unsupported IEB format version."; - const auto prc = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision(static_cast(header.precision))); + const auto prc = Precision(static_cast(header.precision)); SizeVector dims(header.ndims); for (int i = 0; i < header.ndims; i++) dims[i] = header.dims[i]; - return MKLDNNMemoryDesc{dims, prc, MKLDNNMemory::GetPlainFormatByRank(dims.size()) }; + return DnnlBlockedMemoryDesc{prc, Shape(dims)}; } void BlobDumper::prepare_plain_data(const MKLDNNMemoryPtr &memory, std::vector &data) const { - const auto &desc = memory->GetDesc(); + const auto &desc = memory->getDesc(); size_t data_size = desc.getShape().getElementsCount(); const auto size = data_size * desc.getPrecision().size(); data.resize(size); @@ -132,7 +132,7 @@ void BlobDumper::dump(std::ostream &stream) const { if (memory == nullptr) IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - IEB_HEADER header = prepare_header(memory->GetDesc()); + IEB_HEADER header = prepare_header(memory->getDesc()); std::vector data; prepare_plain_data(this->memory, data); @@ -149,12 +149,12 @@ void BlobDumper::dumpAsTxt(std::ostream &stream) const { if (memory == nullptr) IE_THROW() << "Dumper cannot dump. Memory is not allocated."; - const auto dims = memory->GetDims(); - const auto &desc = memory->GetDesc(); + const auto &desc = memory->getDesc(); + const auto dims = desc.getShape().getStaticDims(); size_t data_size = desc.getShape().getElementsCount(); // Header like "U8 4D shape: 2 3 224 224 () - stream << memory->GetDesc().getPrecision().name() << " " + stream << memory->getDesc().getPrecision().name() << " " << dims.size() << "D " << "shape: "; for (size_t d : dims) stream << d << " "; diff --git a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h index 5271f351d6b492..5d079e797ef57a 100644 --- a/inference-engine/src/mkldnn_plugin/utils/blob_dump.h +++ b/inference-engine/src/mkldnn_plugin/utils/blob_dump.h @@ -5,6 +5,7 @@ #pragma once #include "mkldnn_memory.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" #include @@ -25,7 +26,7 @@ class BlobDumper { public: BlobDumper() = default; - BlobDumper(const MKLDNNMemoryDesc &desc) { + BlobDumper(const DnnlBlockedMemoryDesc &desc) { mkldnn::engine eng(mkldnn::engine::kind::cpu, 0); memory = std::make_shared(eng); memory->Create(desc); diff --git a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp index 0cd3975c39a88c..38a4f7ac0d7a8b 100644 --- a/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/cpu_utils.hpp @@ -4,7 +4,6 @@ #pragma once - namespace MKLDNNPlugin { /** @@ -90,4 +89,5 @@ inline InferenceEngine::Precision normalizeToSupportedPrecision(InferenceEngine: } return precision; } + } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/utils/general_utils.h b/inference-engine/src/mkldnn_plugin/utils/general_utils.h index a7cf69f43f8b15..35640212a5555b 100644 --- a/inference-engine/src/mkldnn_plugin/utils/general_utils.h +++ b/inference-engine/src/mkldnn_plugin/utils/general_utils.h @@ -134,29 +134,4 @@ inline InferenceEngine::Precision getMaxPrecision(std::vector& dims) { - std::stringstream output; - output << "{"; - - auto itr = dims.begin(); - do { - output << dim2str(*itr); - } while (++itr != dims.end() && output << ", "); - - output << "}"; - return output.str(); -} - -inline bool isDynamicNgraphNode(const std::shared_ptr& op) { - bool ret = op->is_dynamic(); - for (size_t i = 0; i < op->get_output_size(); i++) { - ret |= op->get_output_partial_shape(i).is_dynamic(); - } - return ret; -} - } // namespace MKLDNNPlugin \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp index c6b54febc5a6ec..27b8da19a8df89 100644 --- a/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp +++ b/inference-engine/src/mkldnn_plugin/utils/ngraph_utils.hpp @@ -39,4 +39,12 @@ inline const std::shared_ptr getNgraphOpAs(const std::shared_ptr& op) { + bool ret = op->is_dynamic(); + for (size_t i = 0; i < op->get_output_size(); i++) { + ret |= op->get_output_partial_shape(i).is_dynamic(); + } + return ret; +} + } // namespace MKLDNNPlugin \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp index 2e0b06c0e4d362..256a55a7710646 100644 --- a/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/utils/node_dumper.cpp @@ -9,7 +9,7 @@ #include "ie_common.h" #include "utils/blob_dump.h" #include "utils/debug_capabilities.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include #include @@ -66,7 +66,7 @@ void NodeDumper::dumpInputBlobs(const MKLDNNNodePtr& node) const { auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name; std::cout << "Dump inputs: " << dump_file << std::endl; - auto& desc = prEdge->getMemory().GetDesc(); + auto& desc = prEdge->getMemory().getDesc(); if (desc.getPrecision() == Precision::BIN) continue; @@ -99,7 +99,7 @@ void NodeDumper::dumpOutputBlobs(const MKLDNNNodePtr& node) const { auto dump_file = dumpDirName + "/#" + exec_order + "_" + file_name; std::cout << "Dump outputs: " << dump_file << std::endl; - auto& desc = childEdge->getMemory().GetDesc(); + auto& desc = childEdge->getMemory().getDesc(); if (desc.getPrecision() == Precision::BIN) continue; @@ -122,7 +122,7 @@ void NodeDumper::dumpInternalBlobs(const MKLDNNNodePtr& node) const { continue; MKLDNNMemoryPtr memory = std::make_shared(node->getEngine()); - memory->Create(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc), blb->buffer()); + memory->Create(MemoryDescUtils::convertToDnnlBlockedMemoryDesc(desc), blb->buffer()); BlobDumper dumper(memory); dump(dumper, dump_file); } diff --git a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp index 42325b0c88dd0f..85c1375bff5072 100644 --- a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp +++ b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp @@ -7,8 +7,10 @@ #include #include "mkldnn_memory.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/common/blocked_desc_creator.h" +#include "mkldnn_extension_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -16,15 +18,18 @@ using namespace testing; TEST(MemDescTest, Conversion) { // Check if conversion keep desc structure - // dnnl::memory::desc -> MKLDNNMemoryDesc -> BlockedMemoryDesc -> MKLDNNMemoryDesc -> dnnl::memory::desc + // dnnl::memory::desc -> DnnlBlockedMemoryDesc -> CpuBlockedMemoryDesc -> DnnlBlockedMemoryDesc -> dnnl::memory::desc auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt}; - MKLDNNMemoryDesc plg_tdesc {orig_tdesc}; - BlockedMemoryDesc blk_tdesc = MemoryDescUtils::convertToBlockedDescriptor(plg_tdesc); - MKLDNNMemoryDesc plg_tdesc_after = MemoryDescUtils::convertToMKLDNNMemoryDesc(blk_tdesc); - dnnl::memory::desc after_tdesc(plg_tdesc_after); - - return orig_tdesc == after_tdesc; + DnnlMemoryDescPtr plg_tdesc = MKLDNNExtensionUtils::makeDescriptor(orig_tdesc); + BlockedMemoryDescPtr blk_tdesc = MemoryDescUtils::convertToBlockedMemoryDesc(*plg_tdesc); + CpuBlockedMemoryDesc cpu_blk_tdesc = CpuBlockedMemoryDesc(blk_tdesc->getPrecision(), blk_tdesc->getShape(), blk_tdesc->getBlockDims(), + blk_tdesc->getOrder(), blk_tdesc->getOffsetPadding(), blk_tdesc->getOffsetPaddingToData(), + blk_tdesc->getStrides()); + DnnlMemoryDescPtr plg_tdesc_after = MemoryDescUtils::convertToDnnlMemoryDesc(cpu_blk_tdesc); + dnnl::memory::desc after_tdesc = plg_tdesc_after->getDnnlDesc(); + + return orig_tdesc == after_tdesc; }; std::pair payload[] { @@ -53,27 +58,30 @@ TEST(MemDescTest, UndefinedStateConversion) { }; for (auto tag : vecTags) { - MKLDNNMemoryDesc mkldnnDesc(cpuShape, mkldnn::memory::data_type::f32, tag); + DnnlBlockedMemoryDesc mkldnnDesc(cpuShape, mkldnn::memory::data_type::f32, tag); ASSERT_FALSE(mkldnnDesc.isDefined()); - auto blockedDesc = MemoryDescUtils::convertToBlockedDescriptor(mkldnnDesc); + auto blockedDesc = MemoryDescUtils::convertToBlockedMemoryDesc(mkldnnDesc); + CpuBlockedMemoryDesc cpuBlockedDesc = CpuBlockedMemoryDesc(blockedDesc->getPrecision(), blockedDesc->getShape(), blockedDesc->getBlockDims(), + blockedDesc->getOrder(), blockedDesc->getOffsetPadding(), + blockedDesc->getOffsetPaddingToData(), blockedDesc->getStrides()); - ASSERT_TRUE(mkldnnDesc.isCompatible(blockedDesc)); - ASSERT_TRUE(blockedDesc.isCompatible(mkldnnDesc)); + ASSERT_TRUE(mkldnnDesc.isCompatible(cpuBlockedDesc)); + ASSERT_TRUE(cpuBlockedDesc.isCompatible(mkldnnDesc)); - auto reconstructedDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(blockedDesc); + auto reconstructedDesc = MemoryDescUtils::convertToDnnlMemoryDesc(cpuBlockedDesc); - ASSERT_TRUE(mkldnnDesc.isCompatible(reconstructedDesc)); - ASSERT_TRUE(blockedDesc.isCompatible(reconstructedDesc)); + ASSERT_TRUE(mkldnnDesc.isCompatible(*reconstructedDesc)); + ASSERT_TRUE(cpuBlockedDesc.isCompatible(*reconstructedDesc)); - mkldnn::memory::desc dnnlDesc = mkldnnDesc; - mkldnn::memory::desc reconstDnnlDesc = reconstructedDesc; + mkldnn::memory::desc dnnlDesc = mkldnnDesc.getDnnlDesc(); + mkldnn::memory::desc reconstDnnlDesc = reconstructedDesc->getDnnlDesc(); ASSERT_EQ(dnnlDesc, reconstDnnlDesc); auto definedMemDesc = mkldnnDesc.cloneWithNewDims({16, 10, 15, 3}); - auto definedReconstructedMkldnnDesc = reconstructedDesc.cloneWithNewDims({16, 10, 15, 3}); + auto definedReconstructedMkldnnDesc = reconstructedDesc->cloneWithNewDims({16, 10, 15, 3}); ASSERT_TRUE(definedMemDesc->isCompatible(*definedReconstructedMkldnnDesc)); } @@ -88,22 +96,23 @@ TEST(MemDescTest, TurnToUninit) { auto creator = item.second; auto blockedDesc = creator->createDesc(Precision::FP32, cpuShape); - auto mkldnnDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(blockedDesc); + auto mkldnnDesc = MemoryDescUtils::convertToDnnlMemoryDesc(blockedDesc); - auto uninitMkldnnDesc = MemoryDescUtils::applyUndefinedOffset(mkldnnDesc); + auto uninitMkldnnDesc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*mkldnnDesc); - ASSERT_TRUE(uninitMkldnnDesc->isCompatible(mkldnnDesc)); + ASSERT_TRUE(uninitMkldnnDesc->isCompatible(*mkldnnDesc)); auto strides = blockedDesc.getStrides(); std::transform(strides.begin(), strides.begin() + cpuShape.getRank(), strides.begin(), [](size_t x) { return x * 3; }); - auto stridedBlockedDesc = BlockedMemoryDesc(blockedDesc.getPrecision(), blockedDesc.getShape(), blockedDesc.getBlockDims(), blockedDesc.getOrder(), + auto stridedBlockedDesc = CpuBlockedMemoryDesc(blockedDesc.getPrecision(), blockedDesc.getShape(), blockedDesc.getBlockDims(), + blockedDesc.getOrder(), 100500, blockedDesc.getOffsetPaddingToData(), strides); ASSERT_FALSE(blockedDesc.isCompatible(stridedBlockedDesc)); ASSERT_TRUE(uninitMkldnnDesc->isCompatible(stridedBlockedDesc)); - auto initMkldnnDesc = MemoryDescUtils::resetOffset(uninitMkldnnDesc.get()); + auto initMkldnnDesc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(uninitMkldnnDesc.get()); ASSERT_TRUE(initMkldnnDesc->isCompatible(blockedDesc)); ASSERT_FALSE(initMkldnnDesc->isCompatible(stridedBlockedDesc)); @@ -113,12 +122,12 @@ TEST(MemDescTest, TurnToUninit) { TEST(MemDescTest, CompareWithTensorDescRecomputedStrides) { auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt}; - MKLDNNMemoryDesc plg_tdesc {orig_tdesc}; - BlockedMemoryDesc blk_tdesc = MemoryDescUtils::convertToBlockedDescriptor(plg_tdesc); + DnnlMemoryDescPtr plg_tdesc = MKLDNNExtensionUtils::makeDescriptor(orig_tdesc); + BlockedMemoryDescPtr blk_tdesc = MemoryDescUtils::convertToBlockedMemoryDesc(*plg_tdesc); - BlockedMemoryDesc recomputed_blk_tdesc(blk_tdesc.getPrecision(), blk_tdesc.getShape(), blk_tdesc.getBlockDims(), blk_tdesc.getOrder()); + CpuBlockedMemoryDesc recomputed_blk_tdesc(blk_tdesc->getPrecision(), blk_tdesc->getShape(), blk_tdesc->getBlockDims(), blk_tdesc->getOrder()); - return blk_tdesc.isCompatible(recomputed_blk_tdesc); + return plg_tdesc->isCompatible(recomputed_blk_tdesc); }; std::pair payload[] { @@ -141,9 +150,9 @@ TEST(MemDescTest, isPlainCheck) { dnnl::memory::desc permt_tdesc {dims, type, dnnl::memory::format_tag::acdb}; dnnl::memory::desc blckd_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b}; - ASSERT_TRUE(MKLDNNMemoryDesc(plain_tdesc).hasLayoutType(LayoutType::ncsp)); - ASSERT_FALSE(MKLDNNMemoryDesc(permt_tdesc).hasLayoutType(LayoutType::ncsp)); - ASSERT_FALSE(MKLDNNMemoryDesc(blckd_tdesc).hasLayoutType(LayoutType::ncsp)); + ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(plain_tdesc)->hasLayoutType(LayoutType::ncsp)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(permt_tdesc)->hasLayoutType(LayoutType::ncsp)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blckd_tdesc)->hasLayoutType(LayoutType::ncsp)); } TEST(MemDescTest, isBlockedCCheck) { @@ -154,19 +163,19 @@ TEST(MemDescTest, isBlockedCCheck) { dnnl::memory::desc tailc_tdesc {dims, type, dnnl::memory::format_tag::acdb}; dnnl::memory::desc blck8_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b}; dnnl::memory::desc blck8_permCD_tdesc {dims, type, dnnl::memory::format_tag::aBdc16b}; - const MKLDNNMemoryDesc plain_mdesc(plain_tdesc); - const MKLDNNMemoryDesc tailc_mdesc(tailc_tdesc); - ASSERT_FALSE(plain_mdesc.hasLayoutType(LayoutType::nCsp8c) || plain_mdesc.hasLayoutType(LayoutType::nCsp16c)); - ASSERT_FALSE(tailc_mdesc.hasLayoutType(LayoutType::nCsp8c) || tailc_mdesc.hasLayoutType(LayoutType::nCsp16c)); - ASSERT_TRUE(MKLDNNMemoryDesc(blck8_tdesc).hasLayoutType(LayoutType::nCsp8c)); - ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_tdesc).hasLayoutType(LayoutType::nCsp16c)); + auto plain_mdesc = MKLDNNExtensionUtils::makeDescriptor(plain_tdesc); + auto tailc_mdesc = MKLDNNExtensionUtils::makeDescriptor(tailc_tdesc); + ASSERT_FALSE(plain_mdesc->hasLayoutType(LayoutType::nCsp8c) || plain_mdesc->hasLayoutType(LayoutType::nCsp16c)); + ASSERT_FALSE(tailc_mdesc->hasLayoutType(LayoutType::nCsp8c) || tailc_mdesc->hasLayoutType(LayoutType::nCsp16c)); + ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(blck8_tdesc)->hasLayoutType(LayoutType::nCsp8c)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_permCD_tdesc)->hasLayoutType(LayoutType::nCsp16c)); const auto crop_dims = dnnl::memory::dims {2, 1, 5, 7}; const auto crop_off = dnnl::memory::dims {1, 0, 0, 0}; dnnl::memory::desc blck8_crop_tdesc = blck8_tdesc.submemory_desc(crop_dims, crop_off); dnnl::memory::desc blck8_permCD_crop_tdesc = blck8_permCD_tdesc.submemory_desc(crop_dims, crop_off); - ASSERT_TRUE(MKLDNNMemoryDesc(blck8_crop_tdesc).hasLayoutType(LayoutType::nCsp8c)); - ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_crop_tdesc).hasLayoutType(LayoutType::nCsp8c)); + ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(blck8_crop_tdesc)->hasLayoutType(LayoutType::nCsp8c)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_permCD_crop_tdesc)->hasLayoutType(LayoutType::nCsp8c)); } TEST(MemDescTest, isTailCCheck) { @@ -177,18 +186,18 @@ TEST(MemDescTest, isTailCCheck) { dnnl::memory::desc tailc_tdesc {dims, type, dnnl::memory::format_tag::acdb}; dnnl::memory::desc permt_tdesc {dims, type, dnnl::memory::format_tag::bcda}; dnnl::memory::desc blck8_tdesc {dims, type, dnnl::memory::format_tag::aBcd8b}; - ASSERT_FALSE(MKLDNNMemoryDesc(plain_tdesc).hasLayoutType(LayoutType::nspc)); - ASSERT_FALSE(MKLDNNMemoryDesc(permt_tdesc).hasLayoutType(LayoutType::nspc)); - ASSERT_TRUE(MKLDNNMemoryDesc(tailc_tdesc).hasLayoutType(LayoutType::nspc)); - ASSERT_FALSE(MKLDNNMemoryDesc(blck8_tdesc).hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(plain_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(permt_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_TRUE(MKLDNNExtensionUtils::makeDescriptor(tailc_tdesc)->hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_tdesc)->hasLayoutType(LayoutType::nspc)); dnnl::memory::desc blck8_permCD_tdesc {dims, type, dnnl::memory::format_tag::aBdc16b}; - ASSERT_FALSE(MKLDNNMemoryDesc(blck8_permCD_tdesc).hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(blck8_permCD_tdesc)->hasLayoutType(LayoutType::nspc)); const auto crop_dims = dnnl::memory::dims {2, 1, 5, 7}; const auto crop_off = dnnl::memory::dims {1, 0, 0, 0}; dnnl::memory::desc tailc_crop_tdesc = blck8_tdesc.submemory_desc(crop_dims, crop_off); - ASSERT_FALSE(MKLDNNMemoryDesc(tailc_crop_tdesc).hasLayoutType(LayoutType::nspc)); + ASSERT_FALSE(MKLDNNExtensionUtils::makeDescriptor(tailc_crop_tdesc)->hasLayoutType(LayoutType::nspc)); } TEST(MemDescTest, constructWithPlainFormat) { @@ -213,39 +222,39 @@ TEST(MemDescTest, ComaptibleWithFormat) { TEST(MKLDNNMemDescTest, KeepOrder) { using mkldnn::memory; - std::vector dims = {7, 3, 1, 5}; + Shape dims(VectorDims{7, 3, 1, 5}); memory::data_type dataType = memory::data_type::u8; - MKLDNNMemoryDesc descPalanar(dims, dataType); + DnnlBlockedMemoryDesc descPalanar(MKLDNNExtensionUtils::DataTypeToIEPrecision(dataType), dims); ASSERT_THAT(descPalanar.getOrder(), ElementsAre(0, 1, 2, 3)); - MKLDNNMemoryDesc descTailC(dims, dataType, memory::format_tag::acdb); + DnnlBlockedMemoryDesc descTailC(dims, dataType, memory::format_tag::acdb); ASSERT_THAT(descTailC.getOrder(), ElementsAre(0, 2, 3, 1)); - MKLDNNMemoryDesc descBlockedC(dims, dataType, memory::format_tag::aBcd16b); + DnnlBlockedMemoryDesc descBlockedC(dims, dataType, memory::format_tag::aBcd16b); ASSERT_THAT(descBlockedC.getOrder(), ElementsAre(0, 1, 2, 3, 1)); - MKLDNNMemoryDesc descWeightBlocked(dims, dataType, memory::format_tag::ABcd16b16a2b); + DnnlBlockedMemoryDesc descWeightBlocked(dims, dataType, memory::format_tag::ABcd16b16a2b); ASSERT_THAT(descWeightBlocked.getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); - auto dnnDims = MKLDNNExtensionUtils::convertToDnnlDims(dims); + auto dnnDims = MKLDNNExtensionUtils::convertToDnnlDims(dims.getStaticDims()); memory::desc mkldnnDescPlanar(dnnDims, dataType, memory::format_tag::abcd); - ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescPlanar).getOrder(), ElementsAre(0, 1, 2, 3)); + ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescPlanar)->as()->getOrder(), ElementsAre(0, 1, 2, 3)); memory::desc mkldnnDescTailC(dnnDims, dataType, memory::format_tag::acdb); - ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescTailC).getOrder(), ElementsAre(0, 2, 3, 1)); + ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescTailC)->as()->getOrder(), ElementsAre(0, 2, 3, 1)); memory::desc mkldnnDescBlockedC(dnnDims, dataType, memory::format_tag::aBcd16b); - ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescBlockedC).getOrder(), ElementsAre(0, 1, 2, 3, 1)); + ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescBlockedC)->as()->getOrder(), ElementsAre(0, 1, 2, 3, 1)); memory::desc mkldnnDescWeightBlocked(dnnDims, dataType, memory::format_tag::ABcd16b16a2b); - ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescWeightBlocked).getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); + ASSERT_THAT(MKLDNNExtensionUtils::makeDescriptor(mkldnnDescWeightBlocked)->as()->getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); } TEST(MemDescTest, UndefinedState) { ngraph::PartialShape ngraphShape({{16}, {-1, -1}, {20, 30}, {7}}); MKLDNNPlugin::Shape pluginShape(ngraphShape); - MKLDNNMemoryDesc memDesc(pluginShape, mkldnn::memory::data_type::f32, mkldnn::memory::format_tag::nChw8c); + DnnlBlockedMemoryDesc memDesc(pluginShape, mkldnn::memory::data_type::f32, mkldnn::memory::format_tag::nChw8c); ASSERT_FALSE(memDesc.isDefined()); @@ -290,12 +299,12 @@ TEST(MemDescTest, MemSize) { auto creator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); auto blockedDescUndef = creator->createDesc(iePrc, pluginShapeUndef); - ASSERT_EQ(blockedDescUndef.getCurrentSize(), undefSize); + ASSERT_EQ(blockedDescUndef.getCurrentMemSize(), undefSize); ASSERT_EQ(blockedDescUndef.getMaxMemSize(), undefSize); - MKLDNNMemoryDesc memDescUndef(pluginShapeUndef, dnnlDataType, mkldnn::memory::format_tag::nhwc); + DnnlBlockedMemoryDesc memDescUndef(pluginShapeUndef, dnnlDataType, mkldnn::memory::format_tag::nhwc); - ASSERT_EQ(memDescUndef.getCurrentSize(), undefSize); + ASSERT_EQ(memDescUndef.getCurrentMemSize(), undefSize); ASSERT_EQ(memDescUndef.getMaxMemSize(), undefSize); ngraph::PartialShape ngraphShapeDefUpperBound({{16}, {7, 14}, {20, 30}, {7}}); @@ -303,15 +312,15 @@ TEST(MemDescTest, MemSize) { auto blockedDescDefUpper = creator->createDesc(iePrc, pluginShapeDefUpperBound); - ASSERT_EQ(blockedDescDefUpper.getCurrentSize(), undefSize); + ASSERT_EQ(blockedDescDefUpper.getCurrentMemSize(), undefSize); auto maxElementsCount = std::accumulate(pluginShapeDefUpperBound.getMaxDims().begin(), pluginShapeDefUpperBound.getMaxDims().end(), 1, std::multiplies()); ASSERT_EQ(blockedDescDefUpper.getMaxMemSize(), maxElementsCount * iePrc.size()); - MKLDNNMemoryDesc memDescDefUpper(pluginShapeDefUpperBound, dnnlDataType, mkldnn::memory::format_tag::nhwc); + DnnlBlockedMemoryDesc memDescDefUpper(pluginShapeDefUpperBound, dnnlDataType, mkldnn::memory::format_tag::nhwc); - ASSERT_EQ(memDescDefUpper.getCurrentSize(), undefSize); + ASSERT_EQ(memDescDefUpper.getCurrentMemSize(), undefSize); ASSERT_EQ(memDescDefUpper.getMaxMemSize(), maxElementsCount * MKLDNNExtensionUtils::sizeOfDataType(dnnlDataType)); ngraph::PartialShape ngraphShapeDefined({{16}, {16}, {10}, {7}}); @@ -319,24 +328,24 @@ TEST(MemDescTest, MemSize) { auto blockedDescDefined = creator->createDesc(iePrc, pluginShapeDefined); - ASSERT_NE(blockedDescDefined.getCurrentSize(), undefSize); + ASSERT_NE(blockedDescDefined.getCurrentMemSize(), undefSize); ASSERT_NE(blockedDescDefined.getMaxMemSize(), undefSize); - ASSERT_EQ(blockedDescDefined.getCurrentSize(), blockedDescDefined.getMaxMemSize()); + ASSERT_EQ(blockedDescDefined.getCurrentMemSize(), blockedDescDefined.getMaxMemSize()); - MKLDNNMemoryDesc memDescDefined(pluginShapeDefined, dnnlDataType, mkldnn::memory::format_tag::nhwc); + DnnlBlockedMemoryDesc memDescDefined(pluginShapeDefined, dnnlDataType, mkldnn::memory::format_tag::nhwc); - ASSERT_NE(memDescDefined.getCurrentSize(), undefSize); + ASSERT_NE(memDescDefined.getCurrentMemSize(), undefSize); ASSERT_NE(memDescDefined.getMaxMemSize(), undefSize); - ASSERT_EQ(memDescDefined.getCurrentSize(), memDescDefined.getMaxMemSize()); - ASSERT_EQ(blockedDescDefined.getCurrentSize(), memDescDefined.getCurrentSize()); + ASSERT_EQ(memDescDefined.getCurrentMemSize(), memDescDefined.getMaxMemSize()); + ASSERT_EQ(blockedDescDefined.getCurrentMemSize(), memDescDefined.getCurrentMemSize()); } TEST(isSameMethodTest, CheckTensorWithSameStrides) { auto isSameDataFormat = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc oneDnnDesc {dims, dnnl::memory::data_type::u8, fmt}; - MKLDNNMemoryDesc pluginDesc {oneDnnDesc}; - return pluginDesc.getFormat() == fmt; + auto pluginDesc = MKLDNNExtensionUtils::makeDescriptor(oneDnnDesc); + return pluginDesc->isSame(fmt); }; std::pair testCases[] {