diff --git a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp index 221fe0851581e3..27af8e5ccfbfb8 100644 --- a/docs/template_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp +++ b/docs/template_plugin/tests/functional/shared_tests_instances/behavior/infer_request_dynamic.cpp @@ -21,10 +21,11 @@ const std::vector> configs = { INSTANTIATE_TEST_CASE_P(smoke_BehaviorTests, InferRequestDynamicTests, ::testing::Combine( - ::testing::ValuesIn(netPrecisions), + ::testing::Values(ngraph::builder::subgraph::makeSplitConvConcat()), + ::testing::Values(std::vector, std::vector>>{{{1, 4, 20, 20}, {1, 10, 18, 18}}, + {{2, 4, 20, 20}, {2, 10, 18, 18}}}), ::testing::Values(CommonTestUtils::DEVICE_TEMPLATE), ::testing::ValuesIn(configs)), InferRequestDynamicTests::getTestCaseName); } // namespace - diff --git a/inference-engine/src/cldnn_engine/cldnn_engine.cpp b/inference-engine/src/cldnn_engine/cldnn_engine.cpp index 90e42510eaebe3..ae6ad04daa1f61 100644 --- a/inference-engine/src/cldnn_engine/cldnn_engine.cpp +++ b/inference-engine/src/cldnn_engine/cldnn_engine.cpp @@ -308,18 +308,6 @@ InferenceEngine::CNNNetwork clDNNEngine::CloneAndTransformNetwork(const Inferenc return true; }); - pass_config->set_callback( - [](const_node_ptr &node) -> bool { - return node->input_value(0).get_shape().back() == 4lu && - node->input_value(0).get_shape().front() == node->input_value(1).get_shape().front() && - node->input_value(0).get_shape()[1] == node->input_value(1).get_shape().back() && - node->input_value(0).get_shape().size() == 3lu && - node->input_value(1).get_shape().size() == 3lu; - }); - pass_config->set_callback( [](const_node_ptr &node) -> bool { const auto mvn = std::dynamic_pointer_cast(node); diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp deleted file mode 100644 index cc04db7f26f0a6..00000000000000 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp +++ /dev/null @@ -1,395 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "cpu_memory_desc.h" -#include "cpu_memory_desc_utils.h" -#include "mkldnn_memory.h" -#include "utils/general_utils.h" -#include "utils/cpu_utils.hpp" -#include -#include -#include -#include - -using namespace mkldnn; -using namespace MKLDNNPlugin; -using namespace InferenceEngine; - -namespace MKLDNNPlugin { - -/** - * Convert to BlockedDescriptor - * - * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} - * strides // the order of outer dims is encoded here - * inner_blks 4 16 4 - * inner_idxs 1 0 1 - * - * IE tensor desc has more expressive ability. Any oneDNN blocked tensor can be covreted. - * How to convert into IE representation: - * 0. Detect a new_outer_order of outer_dims via descending strides. - * 1. IE strides : concatenate strides in new_outer_order and inner strides. - * 2. IE dims : concatenate outer dims in new_outer_order with auto padding and inner blocks - * 3. IE order : concatenate new_outer_order and inner_idxs - */ -BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc) { - mkldnn::memory::desc desc = inpDesc; - const auto dims = desc.dims(); - - if (desc.data.format_kind != dnnl_blocked) - IE_THROW() << "Conversion is not possible"; - - const auto &blk_desc = desc.data.format_desc.blocking; - - const size_t outer_ndims = dims.size(); - const size_t inner_ndims = blk_desc.inner_nblks; - const size_t total_ndims = outer_ndims + inner_ndims; - - // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} - std::vector inner_strides(inner_ndims, 1); - for (size_t i = 1; i < blk_desc.inner_nblks; i++) { - inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; - } - - // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} - std::vector total_block_per_dim(outer_ndims, 1); - for (int i = 0; i < inner_ndims; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} - std::vector outer_order(outer_ndims); - std::iota(outer_order.begin(), outer_order.end(), 0); - std::sort(outer_order.begin(), outer_order.end(), - [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || - (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - - // IE blocked order - // [new_outer_order] U [inner_idxs] - SizeVector ie_blk_order(total_ndims, 0); - std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin()); - std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size()); - - // IE blocked strides - // [outer_strides via new_outer_order] U [inner_strides] - SizeVector ie_blk_strides(total_ndims, 0); - std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin()); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(), - [&] (size_t i) { return blk_desc.strides[i]; }); - - // IE blocked dims - // [dims via new_outer_order with auto pad] U [inner_blk_dims] - SizeVector ie_blk_dims(total_ndims, 0); - std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, - ie_blk_dims.end() - blk_desc.inner_nblks); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(), - [&] (size_t i) { return outer_block_dims[i]; }); - - // IE offset padded to data. Same as for oneDNN - SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; - size_t ie_blk_offset0 = desc.data.offset0; - - // TODO: The tensor desc implementation allow to specify offset_to_data for inner blocked dims. - // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will - // fill it with zero. - ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0); - - BlockedMemoryDesc res(MKLDNNMemory::convertToIePrec(desc.data_type()), SizeVector {begin(dims), end(dims)}, ie_blk_dims, - ie_blk_order, ie_blk_offset0, ie_blk_offset_to_data, ie_blk_strides); - return res; -} - - -InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) { - if (auto blockingDesc = dynamic_cast(&desc)) { - return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), - {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(), - blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()}); - } else if (auto mkldnnDesc = dynamic_cast(&desc)) { - auto blockingDesc = convertToBlockedDescriptor(*mkldnnDesc); - return InferenceEngine::TensorDesc(blockingDesc.getPrecision(), blockingDesc.getShape().getStaticDims(), - {blockingDesc.getBlockDims(), blockingDesc.getOrder(), blockingDesc.getOffsetPadding(), - blockingDesc.getOffsetPaddingToData(), blockingDesc.getStrides()}); - } - - IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc"; - - return InferenceEngine::TensorDesc(); -} - -MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const MemoryDesc& desc) { - if (MemoryDescType::Blocked == desc.getType()) { - return convertToMKLDNNMemoryDesc(*(desc.as())); - } else if (MemoryDescType::Mkldnn == desc.getType()) { - return *(desc.as()); - } else { - IE_THROW() << "Cannot convert MemoryDesc to MKLDNNMemoryDesc"; - } -} - -MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc) { - dnnl_memory_desc_t mkldnnDesc; - - // scalar case - if (desc.getShape().getRank() == 0) { - mkldnn::memory::desc convertedDesc; - convertedDesc.data.format_kind = dnnl_blocked; - convertedDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); - convertedDesc.data.ndims = 1; - convertedDesc.data.dims[0] = 1; - convertedDesc.data.padded_dims[0] = 1; - convertedDesc.data.format_desc.blocking.strides[0] = 1; - convertedDesc.data.padded_offsets[0] = 0; - convertedDesc.data.offset0 = desc.getOffsetPadding(); - return MKLDNNMemoryDesc(convertedDesc); - } - - auto dims = desc.getShape().getStaticDims(); - - auto ie_blkdDims = desc.getBlockDims(); - auto ie_order = desc.getOrder(); - auto ie_offsetsToData = desc.getOffsetPaddingToData(); - auto ie_strides = desc.getStrides(); - - size_t outer_ndims = dims.size(); - size_t inner_ndims = ie_order.size() - dims.size(); - - bool is_descending_strides = true; - for (int i = 1; i < ie_strides.size(); i++) { - is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); - } - - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims - // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) - IE_THROW() << "Unsupported case for conversion"; - - std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension - for (size_t i = 0; i < outer_ndims; i++) { - outer_order[ie_order[i]] = i; - } - bool outer_is_correct_permutation_of_n = - std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); - - if (!outer_is_correct_permutation_of_n) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted - for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { - inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); - } - - if (!inner_block_are_dense) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), - [](size_t pad) { return pad == 0; }); - - if (!inner_pad_offsets_is_zero) - IE_THROW() << "Unsupported case for conversion"; - - // Fill general memory desc fields - mkldnnDesc.format_kind = dnnl_blocked; - mkldnnDesc.extra.flags = 0; - mkldnnDesc.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); - mkldnnDesc.ndims = dims.size(); - mkldnnDesc.offset0 = desc.getOffsetPadding(); - std::copy(dims.begin(), dims.end(), mkldnnDesc.dims); - std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.padded_offsets); - std::fill(mkldnnDesc.padded_dims, mkldnnDesc.padded_dims + outer_ndims, 1); - for (size_t i = 0; i < ie_order.size(); i++) { - auto idx = ie_order[i]; - mkldnnDesc.padded_dims[idx] *= ie_blkdDims[i]; - } - - // Fill blocking desc - auto &dnn_blk_desc = mkldnnDesc.format_desc.blocking; - dnn_blk_desc.inner_nblks = inner_ndims; - std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); - std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); - for (size_t i = 0; i < outer_ndims; i++) { - dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; - } - - return MKLDNNMemoryDesc(mkldnnDesc); -} - - -/** - * Construct from IE::TensorDesc - * @param tDesc - * - * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} - * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. - * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence - * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims - * - * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) - * - * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of - * real dims spliting. - * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] - * but not [0]<=>[4] because it breacke spliting original dims into internal blocked dims - * Normalization of representation: Make strides growing but keep layout same as original. Not all - * layout allow us to meet normalize form of tensor desc. - * - * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] - */ -MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& tDesc) { - mkldnn::memory::desc mkldnnDesc({}, mkldnn::memory::data_type::undef, mkldnn::memory::format_tag::undef); - auto dims = tDesc.getDims(); - - // TODO: implicit conversion of dims is no good... - if (tDesc.getLayout() == Layout::SCALAR) { - mkldnnDesc.data.format_kind = dnnl_blocked; - mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - mkldnnDesc.data.ndims = 1; - mkldnnDesc.data.dims[0] = 1; - mkldnnDesc.data.padded_dims[0] = 1; - mkldnnDesc.data.format_desc.blocking.strides[0] = 1; - mkldnnDesc.data.padded_offsets[0] = 0; - mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - return MKLDNNMemoryDesc(mkldnnDesc); - } - - if (tDesc.getLayout() == Layout::ANY) { - mkldnnDesc.data.format_kind = dnnl_format_kind_any; - mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - mkldnnDesc.data.ndims = dims.size(); - std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); - std::copy(dims.begin(), dims.end(), mkldnnDesc.data.padded_dims); - mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::fill(mkldnnDesc.data.padded_offsets, mkldnnDesc.data.padded_offsets + dims.size(), 0); - return MKLDNNMemoryDesc(mkldnnDesc); - } - - auto ie_blkdDims = tDesc.getBlockingDesc().getBlockDims(); - auto ie_order = tDesc.getBlockingDesc().getOrder(); - auto ie_offsetsToData = tDesc.getBlockingDesc().getOffsetPaddingToData(); - auto ie_strides = tDesc.getBlockingDesc().getStrides(); - - size_t outer_ndims = dims.size(); - size_t inner_ndims = ie_order.size() - dims.size(); - - bool is_descending_strides = true; - for (int i = 1; i < ie_strides.size(); i++) { - is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); - } - - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims - // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) - IE_THROW() << "Unsupported case for conversion"; - - std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension - for (size_t i = 0; i < outer_ndims; i++) { - outer_order[ie_order[i]] = i; - } - bool outer_is_correct_permutation_of_n = - std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); - - if (!outer_is_correct_permutation_of_n) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted - for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { - inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); - } - - if (!inner_block_are_dense) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), - [](size_t pad) { return pad == 0; }); - - if (!inner_pad_offsets_is_zero) - IE_THROW() << "Unsupported case for conversion"; - - // Fill general memory desc fields - mkldnnDesc.data.format_kind = dnnl_blocked; - mkldnnDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(tDesc.getPrecision())); - mkldnnDesc.data.ndims = dims.size(); - mkldnnDesc.data.offset0 = tDesc.getBlockingDesc().getOffsetPadding(); - std::copy(dims.begin(), dims.end(), mkldnnDesc.data.dims); - std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.data.padded_offsets); - std::fill(mkldnnDesc.data.padded_dims, mkldnnDesc.data.padded_dims + outer_ndims, 1); - for (size_t i = 0; i < ie_order.size(); i++) { - auto idx = ie_order[i]; - mkldnnDesc.data.padded_dims[idx] *= ie_blkdDims[i]; - } - - // Fill blocking desc - auto &dnn_blk_desc = mkldnnDesc.data.format_desc.blocking; - dnn_blk_desc.inner_nblks = inner_ndims; - std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); - std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); - for (size_t i = 0; i < outer_ndims; i++) { - dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; - } - - return MKLDNNMemoryDesc(mkldnnDesc); -} - -BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MemoryDesc &desc) { - if (desc.getType() == MemoryDescType::Blocked) { - return *(desc.as()); - } else if (desc.getType() == MemoryDescType::Mkldnn) { - return MemoryDescUtils::convertToBlockedDescriptor(*(desc.as())); - } else { - IE_THROW() << "Cannot convert to blocked memory descriptor. Unsupported memory desc type"; - } -} - -MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc) { - if (desc.getFormatKind() != dnnl_format_kind_t::dnnl_blocked) - IE_THROW() << "applyUndefinedOffset doesn't support not dnnl_blocked MKLDNNMemoryDesc"; - - mkldnn::memory::desc retDesc = desc; - retDesc.data.offset0 = Shape::UNDEFINED_DIM; - return MKLDNNPlugin::make_unique(retDesc); -} - -MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const BlockedMemoryDesc &desc) { - std::vector strides; - std::vector offsetPaddingToData; - - strides.resize(desc.getBlockDims().size(), Shape::UNDEFINED_DIM); - offsetPaddingToData.resize(desc.getBlockDims().size(), 0); - size_t offsetPadding = Shape::UNDEFINED_DIM; - - return MKLDNNPlugin::make_unique(desc.getPrecision(), desc.getShape().getDims(), desc.getBlockDims(), - desc.getOrder(), offsetPadding, offsetPaddingToData, strides); -} - -MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) { - if (MemoryDescType::Blocked == desc->getType()) { - auto blockedDesc = desc->as(); - return MKLDNNPlugin::make_unique(blockedDesc->getPrecision(), blockedDesc->getShape().getDims(), - blockedDesc->getBlockDims(), blockedDesc->getOrder()); - } else if (MemoryDescType::Mkldnn == desc->getType()) { - auto mkldnnDesc = desc->as(); - mkldnn::memory::desc retDesc = *mkldnnDesc; - retDesc.data.offset0 = 0; - return MKLDNNPlugin::make_unique(retDesc); - } else { - IE_THROW() << "resetOffset support Blocked and Mkldnn descpriptors only"; - } -} - -InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) { - // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor - auto& memDesc = mem.GetDesc(); - InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); - - desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); - return MKLDNNPlugin::isEmptyTensorDesc(desc) ? make_blob_with_precision(desc) : make_blob_with_precision(desc, mem.GetData()); -} - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h deleted file mode 100644 index 5cc6b0fc1038c7..00000000000000 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -namespace MKLDNNPlugin { -class MKLDNNMemoryDesc; -class BlockedMemoryDesc; -class MKLDNNMemory; - -class MemoryDescUtils { -public: - /** - * @brief Converts MemoryDesc to InferenceEngine::TensorDesc - * @param desc MemoryDesc to be converted - * @return converted InferenceEngine::TensorDesc - */ - static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc); - - /** - * @brief Converts MemoryDesc to MKLDNNMemoryDesc - * @param desc MemoryDesc to be converted - * @return converted MKLDNNMemoryDesc - */ - static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const MemoryDesc& desc); - - /** - * @brief Converts BlockedMemoryDesc to MKLDNNMemoryDesc - * @param desc BlockedMemoryDesc to be converted - * @return converted MKLDNNMemoryDesc - */ - static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc); - - /** - * @brief Converts InferenceEngine::TensorDesc to MKLDNNMemoryDesc - * @param desc InferenceEngine::TensorDesc to be converted - * @return converted MKLDNNMemoryDesc - */ - static MKLDNNMemoryDesc convertToMKLDNNMemoryDesc(const InferenceEngine::TensorDesc& desc); - - /** - * @brief Converts MemoryDesc to BlockedMemoryDesc - * @param desc MemoryDesc to be converted - * @return converted BlockedMemoryDesc - */ - static BlockedMemoryDesc convertToBlockedDescriptor(const MemoryDesc& desc); - - /** - * @brief Converts MKLDNNMemoryDesc to BlockedMemoryDesc - * @param desc MKLDNNMemoryDesc to be converted - * @return converted BlockedMemoryDesc - */ - static BlockedMemoryDesc convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc); - - /** - * @brief Creates MKLDNNMemoryDesc with offset0 of UNDEFINED_DIM size - * @param desc modifiable MKLDNNMemoryDesc - * @return pointer to MKLDNNMemoryDesc - */ - static MemoryDescPtr applyUndefinedOffset(const MKLDNNMemoryDesc& desc); - - /** - * @brief Creates BlockedMemoryDesc with offsetPadding, strides of UNDEFINED_DIM size and offsetPaddingToData of 0 size - * @param desc modifiable BlockedMemoryDesc - * @return pointer to BlockedMemoryDesc - */ - static MemoryDescPtr applyUndefinedOffset(const BlockedMemoryDesc& desc); - - /** - * @brief Creates MemoryDesc with offsetPadding of 0 size - * @param desc modifiable MemoryDesc - * @return pointer to MemoryDesc - */ - static MemoryDescPtr resetOffset(const MemoryDesc* desc); - - /** - * @brief Creates InferenceEngine::Blob from MKLDNNMemory - * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob - * @return pointer to InferenceEngine::Blob - */ - static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem); -}; - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.cpp b/inference-engine/src/mkldnn_plugin/cpu_shape.cpp new file mode 100644 index 00000000000000..2e0ab9e67e2b32 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_shape.cpp @@ -0,0 +1,49 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_shape.h" +#include "utils/general_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" + +using namespace MKLDNNPlugin; + +bool Shape::isCompatible(const VectorDims &vecDims) const { + if (getRank() != vecDims.size()) { + return false; + } + + auto comparator = [](Dim lhs, Dim rhs) { + return (lhs == rhs) || (lhs == Shape::UNDEFINED_DIM); + }; + + if (!std::equal(getDims().begin(), getDims().end(), vecDims.begin(), comparator)) { + return false; + } + + if (!std::equal(getMaxDims().begin(), getMaxDims().end(), vecDims.begin(), [](Dim lhs, Dim rhs) { return lhs >= rhs; })) { + return false; + } + + if (!std::equal(getMinDims().begin(), getMinDims().end(), vecDims.begin(), [](Dim lhs, Dim rhs) { return lhs <= rhs; })) { + return false; + } + return true; +} + +std::string Shape::toString() const { + std::stringstream output; + output << "{"; + + size_t i = 0; + do { + if (dims[i] == Shape::UNDEFINED_DIM) { + output << MemoryDescUtils::dim2str(minDims[i]) << " - " << MemoryDescUtils::dim2str(maxDims[i]); + } else { + output << dims[i]; + } + } while (++i < dims.size() && output << ", "); + + output << "}"; + return output.str(); +} diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.h b/inference-engine/src/mkldnn_plugin/cpu_shape.h index fd063c2dc18c13..0972ebd227fa48 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_shape.h +++ b/inference-engine/src/mkldnn_plugin/cpu_shape.h @@ -9,7 +9,7 @@ #include #include #include -#include "mkldnn_dims.h" +#include "cpu_types.h" namespace MKLDNNPlugin { @@ -19,7 +19,9 @@ class Shape { explicit Shape(const ngraph::PartialShape& shape) { minDims = shape.get_min_shape(); + std::transform(minDims.begin(), minDims.end(), minDims.begin(), [](Dim x){ return ngraph::Interval::s_max == x ? UNDEFINED_DIM : x;}); maxDims = shape.get_max_shape(); + std::transform(maxDims.begin(), maxDims.end(), maxDims.begin(), [](Dim x){ return ngraph::Interval::s_max == x ? UNDEFINED_DIM : x;}); type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic; initDims(); @@ -34,7 +36,7 @@ class Shape { } /** - * @brief + * @brief * for static shape * maxDims = [2, 3, 4, 5] * minDims = [2, 3, 4, 5] @@ -46,12 +48,12 @@ class Shape { * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM] * @return return lower bound of shape = [1, 1, 1, 1] */ - const std::vector& getMinDims() const { + const VectorDims& getMinDims() const { return minDims; } /** - * @brief + * @brief * for static shape * maxDims = [2, 3, 4, 5] * minDims = [2, 3, 4, 5] @@ -63,15 +65,15 @@ class Shape { * dims = [UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM, UNDEFINED_DIM] * @return return upper bound of shape = [6, 6, 6, 6] */ - const std::vector& getMaxDims() const { + const VectorDims& getMaxDims() const { return maxDims; } /** - * @brief return defined shape or throw exception for dynamic case + * @brief return defined shape or throw exception for dynamic case * @return return shape */ - const std::vector& getStaticDims() const { + const VectorDims& getStaticDims() const { if (type != ShapeType::Static) { IE_THROW() << "Cannot get dims for non static shape"; } @@ -80,7 +82,7 @@ class Shape { } /** - * @brief + * @brief * for static shape * maxDims = [2, 3, 4, 5] * minDims = [2, 3, 4, 5] @@ -92,13 +94,18 @@ class Shape { * dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM] * @return return shape with defined and undefined dims = [2, 3, UNDEFINED_DIM, UNDEFINED_DIM] */ - const std::vector& getDims() const { + const VectorDims& getDims() const { return dims; } + bool isStatic() const { return type == ShapeType::Static; } + bool isDynamic() const { + return type == ShapeType::Dynamic; + } + size_t getRank() const { return minDims.size(); } @@ -118,14 +125,21 @@ class Shape { } ngraph::PartialShape toPartialShape() const { - std::vector nGraphDims; + using ngraph::Dimension; + std::vector nGraphDims; nGraphDims.reserve(minDims.size()); for (int i = 0; i < minDims.size(); i++) { - nGraphDims.emplace_back(minDims[i], maxDims[i]); + Dimension::value_type minDim = Shape::UNDEFINED_DIM == minDims[i] ? -1 : minDims[i]; + Dimension::value_type maxDim = Shape::UNDEFINED_DIM == maxDims[i] ? -1 : maxDims[i]; + nGraphDims.emplace_back(minDim, maxDim); } return ngraph::PartialShape(nGraphDims); } + bool isCompatible(const VectorDims& vecDims) const; + + std::string toString() const; + bool operator == (const Shape& rhs) const { return minDims == rhs.minDims && maxDims == rhs.maxDims; } @@ -134,7 +148,11 @@ class Shape { return !(*this == rhs); } - enum : size_t { + bool hasDefinedUpperBounds() const { + return std::all_of(maxDims.begin(), maxDims.end(), [](Dim dim){ return dim != UNDEFINED_DIM; }); + } + + enum : Dim { UNDEFINED_DIM = 0xffffffffffffffff }; @@ -151,9 +169,8 @@ class Shape { Dynamic } type {ShapeType::Static}; - std::vector minDims; - std::vector maxDims; - std::vector dims; + VectorDims minDims; + VectorDims maxDims; + VectorDims dims; }; - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_types.h b/inference-engine/src/mkldnn_plugin/cpu_types.h index 7c820c4db50ccf..130fc142e980f5 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_types.h +++ b/inference-engine/src/mkldnn_plugin/cpu_types.h @@ -4,8 +4,13 @@ #pragma once +#include + namespace MKLDNNPlugin { +using Dim = std::size_t; +using VectorDims = std::vector; + enum Type { Unknown, Generic, diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.cpp new file mode 100644 index 00000000000000..a7b231c37af35e --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.cpp @@ -0,0 +1,34 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "blocked_memory_desc.h" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; + +bool BlockedMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const { + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) + return false; + + if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) { + return false; + } + + if (!dimsEqualWeak(this->getOffsetPaddingToData(), rhs.getOffsetPaddingToData())) { + return false; + } + + // this check needed to avoid inserting unnecessary reorders if the memory is used in place and the batch size is equal to 1 + size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : + Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 + if (!dimsEqualWeak(this->getStrides(), rhs.getStrides(), skipAxis)) { + return false; + } + + if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) { + return false; + } + + return dimsEqualWeak(this->getOffsetPadding(), rhs.getOffsetPadding()); +} diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.h new file mode 100644 index 00000000000000..ac7a90185b999f --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/blocked_memory_desc.h @@ -0,0 +1,83 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_memory_desc.h" + +namespace MKLDNNPlugin { + +class BlockedMemoryDesc : public virtual MemoryDesc { +public: + BlockedMemoryDesc() {} + + /** + * @brief Returns the blocked dimensions + * + * @return blocked dimensions + */ + virtual const VectorDims& getBlockDims() const = 0; + + /** + * @brief Returns the vector of order + * + * @return order + */ + virtual const VectorDims& getOrder() const = 0; + + /** + * @brief Returns the per-dimension offset vector + * + * @return offsets + */ + virtual const VectorDims& getOffsetPaddingToData() const = 0; + + /** + * @brief Returns the offset to the current memory block + * + * @return offset + */ + virtual size_t getOffsetPadding() const = 0; + + /** + * @brief Returns strides for each dimension + * + * @return strides + */ + virtual const VectorDims& getStrides() const = 0; + + /** + * @brief Check that desc has padded dims + * + * @return true if exist padded dims, otherwise false + */ + virtual bool blocksExtended() const = 0; + + /** + * @brief Compute number of elements taking into account padded dims + * + * @return number of elements taking into account padded dims + */ + virtual size_t getPaddedElementsCount() const = 0; + +protected: + /** + * @brief Check descs on compatibility + * WARNING: Check only BlockedMemoryDesc specific attributes like: strides, order etc. + * Doesn't perform type check for descs + * Doesn't perform descs specific attributes check + * @return true if compatible, otherwise false + */ + bool isCompatible(const BlockedMemoryDesc &rhs) const; + + mutable VectorDims blockedDims; + mutable VectorDims strides; + mutable VectorDims order; + mutable VectorDims offsetPaddingToData; +}; + +using BlockedMemoryDescPtr = std::shared_ptr; +using BlockedMemoryDescCPtr = std::shared_ptr; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp similarity index 57% rename from inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp rename to inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp index 6041e1f3f7b63e..38ebcbba5d47b1 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.cpp @@ -4,11 +4,12 @@ #include "cpu_blocked_memory_desc.h" #include "mkldnn_memory.h" -#include "utils/cpu_utils.hpp" +#include "dnnl_blocked_memory_desc.h" using namespace MKLDNNPlugin; -BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims) : MemoryDesc(dims, Blocked) , precision(prc) { +CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape) : MemoryDesc(shape, Blocked), precision(prc) { + auto& dims = shape.getDims(); order.resize(dims.size()); std::iota(order.begin(), order.end(), 0); blockedDims = dims; @@ -21,15 +22,15 @@ BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std:: } } -BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims, const std::vector& blockedDims, - const std::vector& order, size_t offsetPadding, const std::vector& offsetPaddingToData, - const std::vector& strides) : MemoryDesc(dims, Blocked), precision(prc) { +CpuBlockedMemoryDesc::CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims, + const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData, + const VectorDims& strides) : MemoryDesc(shape, Blocked), precision(prc) { if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - IE_THROW() << "BlockedMemoryDesc do not support undefined order."; + IE_THROW() << "CpuBlockedMemoryDesc do not support undefined order."; } - if (std::any_of(blockedDims.begin() + dims.size(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { - IE_THROW() << "BlockedMemoryDesc doesn't support undefined blockedDims."; + if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "CpuBlockedMemoryDesc doesn't support undefined blockedDims."; } this->order = order; @@ -65,7 +66,7 @@ BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const std:: } } -bool BlockedMemoryDesc::isDefined() const { +bool CpuBlockedMemoryDesc::isDefinedImp() const { bool defined = true; defined = defined && std::none_of(blockedDims.cbegin(), blockedDims.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); defined = defined && std::none_of(strides.cbegin(), strides.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); @@ -76,48 +77,26 @@ bool BlockedMemoryDesc::isDefined() const { return defined; } -bool BlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { +bool CpuBlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { const MemoryDesc* pRhs = &rhs; - if (auto blockingDesc = dynamic_cast(pRhs)) { - return isCompatible(*blockingDesc); - } else if (auto mkldnnDesc = dynamic_cast(pRhs)) { - return mkldnnDesc->isCompatible(*this); + if (auto cpuBlkDesc = dynamic_cast(pRhs)) { + return isCompatible(*cpuBlkDesc); + } else if (auto dnnlBlkDesc = dynamic_cast(pRhs)) { + return isCompatible(*dnnlBlkDesc); } else { return false; } } -bool BlockedMemoryDesc::isCompatible(const BlockedMemoryDesc& rhs) const { - if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) - return false; - - if (!dimsEqualWeak(this->getBlockDims(), rhs.getBlockDims())) { - return false; - } - - if (!dimsEqualWeak(this->getOffsetPaddingToData(), rhs.getOffsetPaddingToData())) { - return false; - } - - // this check needed to avoid inserting unnecessary reorders if the memory is used in place and the batch size is equal to 1 - size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : - Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 - if (!dimsEqualWeak(this->getStrides(), rhs.getStrides(), skipAxis)) { - return false; - } - - if (!dimsEqualWeak(this->getOrder(), rhs.getOrder())) { - return false; - } - - return dimsEqualWeak(this->getOffsetPadding(), rhs.getOffsetPadding()); +bool CpuBlockedMemoryDesc::isCompatible(const CpuBlockedMemoryDesc &rhs) const { + return BlockedMemoryDesc::isCompatible(rhs); } -bool BlockedMemoryDesc::isCompatible(const MKLDNNMemoryDesc& rhs) const { +bool CpuBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc &rhs) const { return rhs.isCompatible(*this); } -size_t BlockedMemoryDesc::getMemSizeImp() const { +size_t CpuBlockedMemoryDesc::getCurrentMemSizeImp() const { int64_t e_size = getOffsetPadding() + 1; // size in bytes (from begin of data to last element) for (int j = 0; j < getBlockDims().size(); j++) e_size += (getBlockDims()[j] - 1) * getStrides()[j]; @@ -128,7 +107,21 @@ size_t BlockedMemoryDesc::getMemSizeImp() const { return e_size; } -size_t BlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const { +size_t CpuBlockedMemoryDesc::getMaxMemSize() const { + if (shape.isStatic()) { + return getCurrentMemSize(); + } + + auto& maxDims = shape.getMaxDims(); + if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + return UNDEFINED_SIZE; + } + + auto maxDimsDesc = cloneWithNewDims(maxDims); + return maxDimsDesc->getCurrentMemSize(); +} + +size_t CpuBlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const { InferenceEngine::SizeVector off_v = v; size_t n_blocked_dims = order.size(); @@ -148,7 +141,7 @@ size_t BlockedMemoryDesc::getOffset(const InferenceEngine::SizeVector& v) const return offset; } -size_t BlockedMemoryDesc::getElementOffset(size_t elemNumber) const { +size_t CpuBlockedMemoryDesc::getElementOffset(size_t elemNumber) const { // TODO [DS]: rewrite to support dynamic shapes auto& dims = shape.getStaticDims(); size_t n_dims = dims.size(); @@ -162,7 +155,7 @@ size_t BlockedMemoryDesc::getElementOffset(size_t elemNumber) const { return getOffset(pos); } -bool BlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { +bool CpuBlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { switch (layoutType) { case LayoutType::ncsp: return isPlainFormat(); @@ -177,7 +170,7 @@ bool BlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { } } -bool BlockedMemoryDesc::isPlainFormat() const { +bool CpuBlockedMemoryDesc::isPlainFormat() const { if (shape.getRank() != order.size()) { return false; } @@ -189,7 +182,7 @@ bool BlockedMemoryDesc::isPlainFormat() const { return true; } -bool BlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { +bool CpuBlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { if ((order.size() - shape.getRank()) != 1) { return false; } @@ -207,7 +200,7 @@ bool BlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { return true; } -bool BlockedMemoryDesc::isTailCFormat() const { +bool CpuBlockedMemoryDesc::isTailCFormat() const { if (shape.getRank() < 3) { return false; } @@ -223,7 +216,7 @@ bool BlockedMemoryDesc::isTailCFormat() const { return true; } -std::string BlockedMemoryDesc::serializeFormat() const { +std::string CpuBlockedMemoryDesc::serializeFormat() const { std::stringstream result; char startLetter = 'a'; std::unordered_map mapAxisBlockSize; @@ -245,3 +238,64 @@ std::string BlockedMemoryDesc::serializeFormat() const { return result.str(); } + +MemoryDescPtr CpuBlockedMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const { + if (std::any_of(dims.begin(), dims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + IE_THROW() << "Can't clone desc if new dims are undefined"; + } + + // TODO [DS]: add stride recalculation for strided blobs + for (int i = strides.size() - 2; i >= 0 ; i--) { + if (strides[i] == Shape::UNDEFINED_DIM) + break; + + if (strides[i] != strides[i + 1] * blockedDims[i + 1]) + IE_THROW(NotImplemented) << "Can't clone desc with new dims for not dense tensor"; + } + + VectorDims newBlockedDims(order.size()); + + for (size_t i = 0; i < dims.size(); ++i) { + newBlockedDims[order[i]] = dims[i]; + } + + for (size_t i = dims.size(); i < order.size(); ++i) { + if (newBlockedDims[order[i]] != Shape::UNDEFINED_DIM) { + newBlockedDims[order[i]] = div_up(newBlockedDims[order[i]], blockedDims[i]); + newBlockedDims[i] = blockedDims[i]; + } + } + + VectorDims newOffsetPaddingToData; + if (std::none_of(offsetPaddingToData.begin(), offsetPaddingToData.end(), [](size_t x){ return x == Shape::UNDEFINED_DIM;})) { + newOffsetPaddingToData = offsetPaddingToData; + } + + return std::make_shared(precision, Shape(dims), newBlockedDims, order, offsetPadding, newOffsetPaddingToData); +} + +bool CpuBlockedMemoryDesc::blocksExtended() const { + const size_t rank = shape.getRank(); + for (size_t i = rank; i < order.size(); i++) { + size_t idx = order[i]; + Dim paddedDim = 1; + for (size_t j = rank; j < order.size(); j++) { + if (order[j] == idx) + paddedDim *= blockedDims[j]; + } + if (blockedDims[idx] == Shape::UNDEFINED_DIM) { + paddedDim = Shape::UNDEFINED_DIM; + } else { + paddedDim *= blockedDims[idx]; + } + if (paddedDim != shape.getDims()[idx]) + return true; + } + return false; +} + +size_t CpuBlockedMemoryDesc::getPaddedElementsCount() const { + if (std::any_of(blockedDims.begin(), blockedDims.end(), [](Dim dim) { return dim == Shape::UNDEFINED_DIM; })) + IE_THROW() << "Can't compute padded elements count for non undefined blocked dims"; + return std::accumulate(blockedDims.begin(), blockedDims.end(), size_t{1}, std::multiplies()); +} diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h similarity index 51% rename from inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h rename to inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h index 2c5b8a7d53cbdb..40a465108f587a 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_blocked_memory_desc.h @@ -4,41 +4,32 @@ #pragma once -#include "cpu_memory_desc.h" +#include "blocked_memory_desc.h" +#include "utils/general_utils.h" namespace MKLDNNPlugin { -class MKLDNNMemoryDesc; - -class BlockedMemoryDesc : public MemoryDesc { +class CpuBlockedMemoryDesc : public BlockedMemoryDesc { public: - BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims); + CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape); - BlockedMemoryDesc(InferenceEngine::Precision prc, const std::vector& dims, const std::vector& blockedDims, - const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, - const std::vector& strides = {}); + CpuBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims, + const VectorDims& order, size_t offsetPadding = 0, const VectorDims& offsetPaddingToData = {}, + const VectorDims& strides = {}); MemoryDescPtr clone() const override { - return MKLDNNPlugin::make_unique(*this); + return std::make_shared(*this); } - bool isDefined() const override; - bool isCompatible(const MemoryDesc& rhs) const override; - - bool isCompatible(const BlockedMemoryDesc& rhs) const; - - bool isCompatible(const MKLDNNMemoryDesc& rhs) const; + bool isCompatible(const CpuBlockedMemoryDesc &rhs) const; + bool isCompatible(const DnnlBlockedMemoryDesc &rhs) const; InferenceEngine::Precision getPrecision() const override { return precision; } - void setPrecision(InferenceEngine::Precision prc) override { - precision = std::move(prc); - } - - const std::vector& getBlockDims() const { + const VectorDims& getBlockDims() const override { return blockedDims; } @@ -47,7 +38,7 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return order */ - const std::vector& getOrder() const { + const VectorDims& getOrder() const override { return order; } @@ -56,7 +47,7 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return offsets */ - const std::vector& getOffsetPaddingToData() const { + const VectorDims& getOffsetPaddingToData() const override { return offsetPaddingToData; } /** @@ -64,7 +55,7 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return offset */ - size_t getOffsetPadding() const { + size_t getOffsetPadding() const override { return offsetPadding; } @@ -73,28 +64,41 @@ class BlockedMemoryDesc : public MemoryDesc { * * @return strides */ - const std::vector& getStrides() const { + const VectorDims& getStrides() const override { return strides; } + bool blocksExtended() const override; + bool hasLayoutType(LayoutType layoutType) const override; std::string serializeFormat() const override; + size_t getMaxMemSize() const override; + + size_t getPaddedElementsCount() const override; + private: size_t getElementOffset(size_t elemNumber) const override; - size_t getMemSizeImp() const override; + size_t getCurrentMemSizeImp() const override; size_t getOffset(const InferenceEngine::SizeVector& v) const; bool isPlainFormat() const; bool isBlockedCFormat(size_t blk_size) const; bool isTailCFormat() const; + bool isDefinedImp() const override; + MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override; + + void setPrecision(InferenceEngine::Precision prc) override { + precision = std::move(prc); + } private: InferenceEngine::Precision precision; - std::vector blockedDims; - std::vector strides; - std::vector order; - std::vector offsetPaddingToData; size_t offsetPadding; + mutable VectorDims paddedDims; }; + +using CpuBlockedMemoryDescPtr = std::shared_ptr; +using CpuBlockedMemoryDescCPtr = std::shared_ptr; + } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h similarity index 52% rename from inference-engine/src/mkldnn_plugin/cpu_memory_desc.h rename to inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h index 31d2b4b2091f00..d791e717b2fae0 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc.h @@ -7,13 +7,32 @@ #include #include #include "cpu_shape.h" -#include "utils/general_utils.h" +#include "cpu_types.h" +#include "memory_desc/cpu_memory_desc_utils.h" + +/** + * @brief + * + * MemoryDesc - the descriptor of tensor representation in memory. Describes all required information + * for proper allocation and handling tensor in some buffer. The real memory is not present, just description. + * This object answers on question how and where data with logical index [x1, x2, .. xN] placed in real buffer. + * In the simplest case it describe a mapping between "logical offset" and "real offset". + * + */ namespace MKLDNNPlugin { +class MemoryDesc; + +using MemoryDescPtr = std::shared_ptr; +using MemoryDescCPtr = std::shared_ptr; + enum MemoryDescType { - Blocked, - Mkldnn + Undef = 0, + Blocked = 1, + Mkldnn = 1 << 1, + + DnnlBlocked = Blocked | Mkldnn }; enum class LayoutType : unsigned { @@ -37,27 +56,43 @@ class MemoryDesc { virtual InferenceEngine::Precision getPrecision() const = 0; - virtual void setPrecision(InferenceEngine::Precision prc) = 0; + virtual MemoryDescPtr clone() const = 0; - virtual std::unique_ptr clone() const = 0; + // clone descriptor with new dims. Throws an exception if some of the new dims conflicts with the internal shape (i.e. its defined dims ,rank, upper bounds) + MemoryDescPtr cloneWithNewDims(const VectorDims& dims) const { + if (!getShape().isCompatible(dims)) { + IE_THROW(ParameterMismatch) << "Can not clone with new dims. Descriptor's shape: " << getShape().toString() << + " is incompatible with provided dimensions: " << MemoryDescUtils::dims2str(dims) << "."; + } + + return cloneWithNewDimsImp(dims); + } virtual bool isCompatible(const MemoryDesc& rhs) const = 0; // Checks that all dimensions, offsets, strides, etc are defined (!= UNDEFINED_DIM) - virtual bool isDefined() const = 0; + bool isDefined() const { + if (descStatus::Unknown == status) { + status = isDefinedImp() ? descStatus::Defined : descStatus::Undefined; + } + return descStatus::Defined == status; + } virtual bool hasLayoutType(LayoutType layoutType) const = 0; virtual std::string serializeFormat() const = 0; + // Get memory upper bound if possible. Can be undefined + virtual size_t getMaxMemSize() const = 0; + /** * @brief Get minimal required memory size in bytes. * @return return minimal required memory size in bytes or UNDEFINED_SIZE in case undefined descriptor */ - size_t getCurrentSize() const { + size_t getCurrentMemSize() const { size_t retVal = UNDEFINED_SIZE; if (isDefined()) { - retVal = getMemSizeImp(); + retVal = getCurrentMemSizeImp(); } return retVal; } @@ -85,26 +120,37 @@ class MemoryDesc { static constexpr size_t UNDEFINED_SIZE = std::numeric_limits::max(); protected: - MemoryDesc(const Shape& shape, MemoryDescType type) - : shape(shape), type(type) {} + MemoryDesc() : type(MemoryDescType::Undef) {} + MemoryDesc(Shape shape, MemoryDescType type) + : shape(std::move(shape)), type(type) {} - MemoryDesc(const std::vector& dims, MemoryDescType type) + MemoryDesc(const VectorDims& dims, MemoryDescType type) : shape(dims), type(type) {} - virtual size_t getMemSizeImp() const = 0; + virtual void setPrecision(InferenceEngine::Precision prc) = 0; + + virtual size_t getCurrentMemSizeImp() const = 0; // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc. virtual size_t getElementOffset(size_t elemNumber) const = 0; + virtual bool isDefinedImp() const = 0; + + virtual MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const = 0; + MemoryDescType type; Shape shape; + mutable enum class descStatus : uint8_t { + Unknown, + Defined, + Undefined, + } status = descStatus::Unknown; + friend class BlobDumper; // WA: optimizedNspc2Ncsp used getElementOffset inside implementation friend class MKLDNNSplitNode; + friend MemoryDescPtr MemoryDescUtils::cloneWithNewPrecision(const MemoryDesc& desc, const InferenceEngine::Precision prec); }; -using MemoryDescPtr = std::unique_ptr; -using MemoryDescConstPtr = std::unique_ptr; - } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp new file mode 100644 index 00000000000000..03bce8179878f8 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.cpp @@ -0,0 +1,153 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "mkldnn_memory.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "utils/general_utils.h" +#include "utils/cpu_utils.hpp" +#include +#include +#include +#include +#include + +using namespace mkldnn; +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +namespace MKLDNNPlugin { + +DnnlMemoryDescPtr MemoryDescUtils::convertToDnnlMemoryDesc(const MemoryDescPtr &desc) { + if (MemoryDescType::Blocked == desc->getType()) { + const auto cpuDesc = desc->as(); + return std::shared_ptr(new DnnlBlockedMemoryDesc(cpuDesc->getPrecision(), cpuDesc->getShape(), cpuDesc->getBlockDims(), + cpuDesc->getOrder(), cpuDesc->getOffsetPadding(), + cpuDesc->getOffsetPaddingToData(), cpuDesc->getStrides())); + } else if (MemoryDescType::Mkldnn & desc->getType()) { + return std::dynamic_pointer_cast(desc); + } else { + IE_THROW() << "Cannot convert MemoryDesc to DnnlMemoryDesc"; + } +} + +DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const MemoryDesc& desc) { + if (MemoryDescType::DnnlBlocked == desc.getType()) { + return DnnlBlockedMemoryDesc(*desc.as()); + } else if (MemoryDescType::Blocked == desc.getType()) { + const auto cpuDesc = desc.as(); + return DnnlBlockedMemoryDesc(cpuDesc->getPrecision(), cpuDesc->getShape(), cpuDesc->getBlockDims(), cpuDesc->getOrder(), cpuDesc->getOffsetPadding(), + cpuDesc->getOffsetPaddingToData(), cpuDesc->getStrides()); + } else { + IE_THROW() << "Cannot convert MemoryDesc to DnnlMemoryDesc"; + } +} + +CpuBlockedMemoryDesc MemoryDescUtils::convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) { + if (desc.getLayout() == InferenceEngine::Layout::ANY) + IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to CpuBlockedMemoryDesc"; + const auto &blkDesc = desc.getBlockingDesc(); + return CpuBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(), + blkDesc.getOffsetPaddingToData(), blkDesc.getStrides()); +} + +DnnlBlockedMemoryDesc MemoryDescUtils::convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc) { + const auto &blkDesc = desc.getBlockingDesc(); + if (desc.getLayout() == InferenceEngine::Layout::ANY) + IE_THROW() << "Cannot convert InferenceEngine::TensorDesc with ANY layout to DnnlBlockedMemoryDesc"; + return DnnlBlockedMemoryDesc(desc.getPrecision(), Shape(desc.getDims()), blkDesc.getBlockDims(), blkDesc.getOrder(), blkDesc.getOffsetPadding(), + blkDesc.getOffsetPaddingToData(), blkDesc.getStrides()); +} + +BlockedMemoryDescPtr MemoryDescUtils::convertToBlockedMemoryDesc(const MemoryDescPtr &desc) { + if (desc->getType() & MemoryDescType::Blocked) { + return std::dynamic_pointer_cast(desc); + } else { + IE_THROW() << "Can not convert unsupported memory descriptor"; + } +} + +MemoryDescPtr MemoryDescUtils::cloneWithUndefStridesAndOffset(const MemoryDesc& desc) { + if (desc.getType() == MemoryDescType::Mkldnn) { + IE_THROW() << "Can't apply undefined offset for mkldnn memory desc"; + } + + const auto blkMemDesc = desc.as(); + + VectorDims strides; + VectorDims offsetPaddingToData; + strides.resize(blkMemDesc->getBlockDims().size(), Shape::UNDEFINED_DIM); + offsetPaddingToData.resize(blkMemDesc->getBlockDims().size(), 0); + size_t offsetPadding = Shape::UNDEFINED_DIM; + + if (blkMemDesc->getType() == MemoryDescType::Blocked) { + return std::make_shared(blkMemDesc->getPrecision(), blkMemDesc->getShape(), blkMemDesc->getBlockDims(), + blkMemDesc->getOrder(), offsetPadding, offsetPaddingToData, strides); + } else if (blkMemDesc->getType() == MemoryDescType::DnnlBlocked) { + return DnnlBlockedMemoryDescPtr(new DnnlBlockedMemoryDesc(blkMemDesc->getPrecision(), blkMemDesc->getShape(), + blkMemDesc->getBlockDims(), blkMemDesc->getOrder(), + offsetPadding, offsetPaddingToData, strides)); + } else { + IE_THROW() << "Cannot apply undefined offset. Unsupported memory desc type"; + } +} + +MemoryDescPtr MemoryDescUtils::cloneWithDefaultStridesAndOffset(const MemoryDesc& desc) { + const auto blkMemDesc = desc.as(); + + if (MemoryDescType::Blocked == desc.getType()) { + return std::make_shared(blkMemDesc->getPrecision(), blkMemDesc->getShape(), + blkMemDesc->getBlockDims(), blkMemDesc->getOrder()); + } else if (MemoryDescType::DnnlBlocked == desc.getType()) { + return DnnlBlockedMemoryDescPtr(new DnnlBlockedMemoryDesc(blkMemDesc->getPrecision(), blkMemDesc->getShape(), + blkMemDesc->getBlockDims(), blkMemDesc->getOrder())); + } else { + IE_THROW() << "cloneWithDefaultStridesAndOffset supports Blocked descriptors only"; + } +} + +MemoryDescPtr MemoryDescUtils::cloneWithNewPrecision(const MemoryDesc& desc, const InferenceEngine::Precision prec) { + MemoryDescPtr newDesc = desc.clone(); + newDesc->setPrecision(prec); + return newDesc; +} + +InferenceEngine::Blob::Ptr MemoryDescUtils::interpretAsBlob(const MKLDNNMemory &mem) { + // TODO [DS]: Rewrite when IE is moved to the new TensorDescriptor + auto& memDesc = mem.getDesc(); + InferenceEngine::TensorDesc desc = convertToTensorDesc(memDesc); + + desc = InferenceEngine::TensorDesc(desc.getPrecision(), memDesc.getShape().getStaticDims(), desc.getBlockingDesc()); + return make_blob_with_precision(desc, mem.GetData()); +} + +InferenceEngine::TensorDesc MemoryDescUtils::convertToTensorDesc(const MemoryDesc& desc) { + if (auto blockingDesc = dynamic_cast(&desc)) { + return InferenceEngine::TensorDesc(blockingDesc->getPrecision(), blockingDesc->getShape().getStaticDims(), + {blockingDesc->getBlockDims(), blockingDesc->getOrder(), blockingDesc->getOffsetPadding(), + blockingDesc->getOffsetPaddingToData(), blockingDesc->getStrides()}); + } else { + IE_THROW() << "Cannot convert MemoryDesc to InferenceEngine::TensorDesc"; + } +} + +std::string MemoryDescUtils::dim2str(Dim dim) { + return dim == Shape::UNDEFINED_DIM ? "?" : std::to_string(dim); +} + +std::string MemoryDescUtils::dims2str(const VectorDims& dims) { + std::stringstream output; + output << "{"; + + auto itr = dims.begin(); + do { + output << dim2str(*itr); + } while (++itr != dims.end() && output << ", "); + + output << "}"; + return output.str(); +} + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h new file mode 100644 index 00000000000000..04878af80451c3 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/cpu_memory_desc_utils.h @@ -0,0 +1,107 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "mkldnn/ie_mkldnn.h" + +namespace MKLDNNPlugin { + +class MemoryDesc; +class DnnlMemoryDesc; +class BlockedMemoryDesc; +class DnnlBlockedMemoryDesc; +class CpuBlockedMemoryDesc; +class MKLDNNMemory; + +class MemoryDescUtils { +public: + /** + * @brief Converts MemoryDesc to DnnlMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted DnnlMemoryDesc + */ + static std::shared_ptr convertToDnnlMemoryDesc(const std::shared_ptr &desc); + + /** + * @brief Converts MemoryDesc to DnnlBlockedMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted DnnlBockedMemoryDesc + */ + static DnnlBlockedMemoryDesc convertToDnnlBlockedMemoryDesc(const MemoryDesc& desc); + + /** + * @brief Converts InferenceEngine::TensorDesc to CpuBlockedMemoryDesc + * @param desc InferenceEngine::TensorDesc to be converted + * @return converted CpuBlockedMemoryDesc + */ + static CpuBlockedMemoryDesc convertToCpuBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc); + + /** + * @brief Converts InferenceEngine::TensorDesc to DnnlBlockedMemoryDesc + * @param desc InferenceEngine::TensorDesc to be converted + * @return converted DnnlBlockedMemoryDesc + */ + static DnnlBlockedMemoryDesc convertToDnnlBlockedMemoryDesc(const InferenceEngine::TensorDesc& desc); + + /** + * @brief Converts MemoryDesc to BlockedMemoryDesc + * @param desc MemoryDesc to be converted + * @return converted BlockedMemoryDesc + */ + static std::shared_ptr convertToBlockedMemoryDesc(const std::shared_ptr &desc); + + /** + * @brief Creates BlockedMemoryDesc with offsetPadding and strides of UNDEFINED_DIM size + * @param desc is the MemoryDesc to be cloned + * @return pointer to the new MemoryDesc + */ + static std::shared_ptr cloneWithUndefStridesAndOffset(const MemoryDesc& desc); + + /** + * @brief Creates MemoryDesc with offsetPadding of 0 size and default strides + * @param desc is the MemoryDesc to be cloned + * @return pointer to the new MemoryDesc + */ + static std::shared_ptr cloneWithDefaultStridesAndOffset(const MemoryDesc& desc); + + /** + * @brief Creates MemoryDesc with specified precision + * @param desc is the MemoryDesc to be cloned + * @return pointer to the new MemoryDesc + */ + static std::shared_ptr cloneWithNewPrecision(const MemoryDesc& desc, const InferenceEngine::Precision prec); + + /** + * @brief Creates InferenceEngine::Blob from MKLDNNMemory with the memory reuse + * @param desc MKLDNNMemory from which will be created InferenceEngine::Blob + * @return pointer to InferenceEngine::Blob + */ + static InferenceEngine::Blob::Ptr interpretAsBlob(const MKLDNNMemory& mem); + + /** + * @brief Converts MemoryDesc to InferenceEngine::TensorDesc + * @param desc MemoryDesc to be converted + * @return converted InferenceEngine::TensorDesc + */ + static InferenceEngine::TensorDesc convertToTensorDesc(const MemoryDesc& desc); + + /** + * @brief Converts dim to string, undefined dim represented as ? + * @param dim Dim to be converted + * @return dim as string + */ + static std::string dim2str(Dim dim); + + /** + * @brief Converts dims to string, undefined dim represented as ? + * @param dim Dims to be converted + * @return dims as string + */ + static std::string dims2str(const VectorDims& dims); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp new file mode 100644 index 00000000000000..e2834c1defa858 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.cpp @@ -0,0 +1,796 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include +#include + +using namespace MKLDNNPlugin; +using namespace InferenceEngine; + +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape) : MemoryDesc(shape, DnnlBlocked) { + const auto ndims = shape.getRank(); + const auto &dims = shape.getDims(); + mkldnn::memory::dims plain_strides; + if (std::any_of(dims.begin(), dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL); + } else { + plain_strides.resize(ndims, 1); + for (size_t i = 1; i < ndims; i++) { + plain_strides[ndims - i -1] = plain_strides[ndims - i] * dims[ndims - i]; + } + } + + desc = {MKLDNNExtensionUtils::convertToDnnlDims(dims), MKLDNNExtensionUtils::IEPrecisionToDataType(prc), plain_strides}; + + order.resize(ndims); + std::iota(order.begin(), order.end(), 0); + + initBlockedParams(); +} + +/** + * Construct from blocked parameters + * + * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} + * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. + * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence + * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims + * + * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) + * + * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of + * real dims spliting. + * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] + * but not [0]<=>[4] because it break splitting original dims into internal blocked dims + * Normalization of representation: Make strides growing but keep layout same as original. Not all + * layout allow us to meet normalize form of tensor desc. + * + * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] + */ +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims, + const VectorDims& order, size_t offsetPadding, const VectorDims& offsetPaddingToData, + const VectorDims& strides) : MemoryDesc(shape, DnnlBlocked) { + using namespace mkldnn; + // scalar case + if (shape.getRank() == 0) { + desc.data.format_kind = dnnl_blocked; + desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + desc.data.ndims = 1; + desc.data.dims[0] = 1; + desc.data.padded_dims[0] = 1; + desc.data.format_desc.blocking.strides[0] = 1; + desc.data.padded_offsets[0] = 0; + desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding); + return; + } + + if (order.size() != blockedDims.size()) { + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, order and blocked dims must have equals size"; + } + + if (!offsetPaddingToData.empty() && offsetPaddingToData.size() != order.size()) { + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, offsetPaddingToData must have equal size with order and blocked dims"; + } + + if (!strides.empty() && strides.size() != order.size()) { + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, strides must have equal size with order and blocked dims"; + } + + if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined order."; + } + + if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "DnnlBlockedMemoryDesc doesn't support undefined blockedDims."; + } + + auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims()); + + size_t outer_ndims = dims.size(); + size_t inner_ndims = order.size() - dims.size(); + + if (!strides.empty()) { + bool is_descending_strides = true; + for (int i = 1; i < strides.size(); i++) { + is_descending_strides &= (strides[i - 1] >= strides[i]); + } + + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims + // and may be we can achieve correct "descending strides" form which allow conversion. + if (!is_descending_strides) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides); + } + + VectorDims outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension + for (size_t i = 0; i < outer_ndims; i++) { + outer_order[order[i]] = i; + } + bool outer_is_correct_permutation_of_n = + std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); + + if (!outer_is_correct_permutation_of_n) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc because of incorrect order: " << vec2str(order); + + if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) { + bool inner_block_are_dense = one_of(strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted + for (int i = outer_ndims; i < strides.size() - 1; i++) { + inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]); + } + + if (!inner_block_are_dense) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc from strides: " << vec2str(strides) << " inner blocks are not dense."; + } + + // Fill general memory desc fields + desc.data.format_kind = dnnl_blocked; + desc.data.extra.flags = 0; + desc.data.data_type = memory::convert_to_c(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + desc.data.ndims = dims.size(); + desc.data.offset0 = MKLDNNExtensionUtils::convertToDnnlDim(offsetPadding); + std::copy(dims.begin(), dims.end(), desc.data.dims); + + if (!offsetPaddingToData.empty()) { + bool inner_pad_offsets_is_zero = std::all_of(offsetPaddingToData.begin() + outer_ndims, offsetPaddingToData.end(), + [](size_t pad) { return pad == 0; }); + + if (!inner_pad_offsets_is_zero) + IE_THROW() << "Can not construct DnnlBlockedMemoryDesc, inner pad offsets is not zero: " << vec2str(offsetPaddingToData); + auto dnnlPaddedOffsets = MKLDNNExtensionUtils::convertToDnnlDims(offsetPaddingToData); + std::copy(dnnlPaddedOffsets.begin(), dnnlPaddedOffsets.begin() + outer_ndims, desc.data.padded_offsets); + } else { + std::fill(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + outer_ndims, 0); + } + + std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1); + auto dnnlBlkDims = MKLDNNExtensionUtils::convertToDnnlDims(blockedDims); + + for (size_t i = 0; i < order.size(); i++) { + auto idx = order[i]; + if (desc.data.padded_dims[idx] != DNNL_RUNTIME_DIM_VAL && dnnlBlkDims[i] != DNNL_RUNTIME_DIM_VAL) { + desc.data.padded_dims[idx] *= dnnlBlkDims[i]; + } else { + desc.data.padded_dims[idx] = DNNL_RUNTIME_DIM_VAL; + } + } + + // Fill blocking desc + auto &dnn_blk_desc = desc.data.format_desc.blocking; + dnn_blk_desc.inner_nblks = inner_ndims; + std::copy(dnnlBlkDims.end() - inner_ndims, dnnlBlkDims.end(), dnn_blk_desc.inner_blks); + std::copy(order.end() - inner_ndims, order.end(), dnn_blk_desc.inner_idxs); + + if (strides.empty()) { + if (std::any_of(dnnlBlkDims.begin(), dnnlBlkDims.end(), [](memory::dim val) { return val == DNNL_RUNTIME_DIM_VAL; })) { + std::fill(std::begin(dnn_blk_desc.strides), std::begin(dnn_blk_desc.strides) + outer_ndims, DNNL_RUNTIME_DIM_VAL); + } else { + //TODO [DS]: phase 2: refactor + std::vector tmpStrides(order.size()); + tmpStrides[order.size() - 1] = 1; + for (size_t i = 2; i <= order.size(); i++) { + tmpStrides[order.size() - i] = tmpStrides[order.size() - (i - 1)] * dnnlBlkDims[blockedDims.size() - (i - 1)]; + } + for (size_t i = 0; i < outer_ndims; i++) { + dnn_blk_desc.strides[i] = tmpStrides[outer_order[i]]; + } + } + } else { + for (size_t i = 0; i < outer_ndims; i++) { + auto dnnlStrides = MKLDNNExtensionUtils::convertToDnnlDims(strides); + dnn_blk_desc.strides[i] = dnnlStrides[outer_order[i]]; + } + } + + this->order = order; + + initBlockedParams(); +} + +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) : + MemoryDesc(shape, DnnlBlocked) { + using namespace mkldnn; + if (format == memory::format_tag::any || format == memory::format_tag::undef) + IE_THROW(Unexpected) << "Can't create mkldnn::desc with any or undef format"; + + const auto dims = shape.getDims(); + if (format == memory::format_tag::x && shape.getRank() == 0) { + desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); + } else { + desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(dims), dataType, format); + } + + VectorDims perm; + VectorDims inner_blks; + VectorDims inner_idxs; + + mkldnn::impl::memory_desc_wrapper::compute_blocking(mkldnn::memory::convert_to_c(format), perm, inner_blks, inner_idxs); + + order.swap(perm); + order.insert(order.end(), inner_idxs.begin(), inner_idxs.end()); + + initBlockedParams(); +} + +bool DnnlBlockedMemoryDesc::isCompatible(const MemoryDesc& rhs) const { + if (auto desc = dynamic_cast(&rhs)) { + return isCompatible(*desc); + } else if (auto desc = dynamic_cast(&rhs)) { + return isCompatible(*desc); + } else { + return false; + } +} + +bool DnnlBlockedMemoryDesc::isCompatible(const CpuBlockedMemoryDesc& rhs) const { + return this->desc.data.extra.flags == dnnl_memory_extra_flag_none && BlockedMemoryDesc::isCompatible(rhs); +} + +bool DnnlBlockedMemoryDesc::isCompatible(const DnnlBlockedMemoryDesc& rhs) const { + using namespace dnnl; + using namespace impl; + using namespace impl::utils; + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { + return false; + } + + if (this->desc == rhs.desc) { + return true; + } + memory_desc_wrapper wrappedThis(this->desc.data); + memory_desc_wrapper wrappedRhs(rhs.desc.data); + if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) + return false; + + int stride_start = wrappedThis.ndims() > 0 && wrappedThis.dims()[0] == 1 ? 1 : 0; // ignore batch axis stride if batch size == 1 + + const auto thisExtra = this->desc.data.extra; + const auto rhsExtra = rhs.desc.data.extra; + return this->getOrder() == rhs.getOrder() && (thisExtra.flags == rhsExtra.flags && thisExtra.compensation_mask == rhsExtra.compensation_mask && + thisExtra.scale_adjust == rhsExtra.scale_adjust) && wrappedThis.similar_to(wrappedRhs, true, true, 0, stride_start, true, true); +} + +DnnlBlockedMemoryDesc::DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc) : + MemoryDesc(MKLDNNExtensionUtils::convertToVectorDims(mdesc.dims()), DnnlBlocked) { + desc = mdesc; + if (desc.data.format_kind == dnnl::impl::format_kind::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; + + mkldnn::impl::memory_desc_wrapper descWrapped(desc.data); + if (!descWrapped.is_blocking_desc()) + IE_THROW(Unexpected) << "Can't create DnnlBlockedMemoryDesc from not blocking desc"; + + if (descWrapped.has_runtime_dims_or_strides()) { + IE_THROW(Unexpected) << "Cannot calculate order from undefined dims or strides"; + } + + const auto dims = desc.dims(); + + const auto &blk_desc = descWrapped.blocking_desc(); + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + VectorDims inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + } + + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + VectorDims total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + VectorDims outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + VectorDims outer_order(outer_ndims); + std::iota(outer_order.begin(), outer_order.end(), 0); + std::sort(outer_order.begin(), outer_order.end(), + [&blk_desc, &outer_block_dims](size_t ind_l, size_t ind_r) { + return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || + (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + + + // blocked order + // [new_outer_order] U [inner_idxs] + SizeVector blk_order(total_ndims, 0); + std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); + std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); + order.swap(blk_order); + + initBlockedParams(); +} + +bool DnnlBlockedMemoryDesc::hasLayoutType(LayoutType layoutType) const { + switch (layoutType) { + case LayoutType::ncsp: + return isPlainFormat(); + case LayoutType::nspc: + return isTailCFormat(); + case LayoutType::nCsp8c: + return isBlockedCFormat(8); + case LayoutType::nCsp16c: + return isBlockedCFormat(16); + default: + return false; + } +} + +bool DnnlBlockedMemoryDesc::isPlainFormat() const { + if (shape.getRank() != order.size()) { + return false; + } + for (size_t i = 0; i < order.size(); ++i) { + if (order[i] != i) { + return false; + } + } + return true; +} + +bool DnnlBlockedMemoryDesc::isBlockedCFormat(size_t blk_size) const { + const auto &blocking = desc.data.format_desc.blocking; + + if (desc.data.format_kind !=dnnl_blocked || + blocking.inner_nblks != 1 || + blocking.inner_idxs[0] != 1) + return false; + + if ((order.size() - shape.getRank()) != 1) { + return false; + } + for (size_t i = 0; i < order.size() - 1; ++i) { + if (order[i] != i) { + return false; + } + } + if (blk_size != UNREACHABLE_DIM && blk_size != blocking.inner_blks[0]) { + return false; + } + + return true; +} + +bool DnnlBlockedMemoryDesc::isTailCFormat() const { + if (shape.getRank() < 3) { + return false; + } + if (shape.getRank() != order.size()) { + return false; + } + if (!std::is_sorted(order.begin(), --order.end())) { + return false; + } + if (order.back() != 1) { + return false; + } + return true; +} + +MemoryDescPtr DnnlBlockedMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const { + if (std::any_of(dims.begin(), dims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + IE_THROW() << "Can't clone desc if new dims are undefined"; + } + + // TODO [DS]: add stride recalculation for strided blobs + getStrides(); + getBlockDims(); + for (int i = strides.size() - 2; i >= 0 ; i--) { + if (strides[i] == Shape::UNDEFINED_DIM) + break; + + if (strides[i] != strides[i + 1] * blockedDims[i + 1]) + IE_THROW(NotImplemented) << "Can't clone desc with new dims for not dense tensor"; + } + + using namespace dnnl::impl::utils; + auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims); + mkldnn::memory::desc newMklDesc = desc; + array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size()); + std::vector perm(order.begin(), order.begin() + mklDims.size()); + auto& blockingDesc = newMklDesc.data.format_desc.blocking; + auto numInnerBlks = blockingDesc.inner_nblks; + std::vector innerBlks(std::begin(blockingDesc.inner_blks), std::begin(blockingDesc.inner_blks) + numInnerBlks); + std::vector innerIdxs(std::begin(blockingDesc.inner_idxs), std::begin(blockingDesc.inner_idxs) + numInnerBlks); + auto retCode = dnnl::impl::fill_blocked(newMklDesc.data, perm, innerBlks, innerIdxs); + if (retCode != dnnl::impl::status::success) { + IE_THROW() << "Can not clone DnnlBlockedMemoryDesc with dims: " << MemoryDescUtils::dims2str(dims); + } + return DnnlBlockedMemoryDescPtr(new DnnlBlockedMemoryDesc(newMklDesc)); +} + +static const std::map> form_tags_by_ndims { + {0, { + mkldnn::memory::format_tag::a // TODO :: really 1d layout for scalar?? + }}, {1, { + mkldnn::memory::format_tag::a + }}, {2, { + mkldnn::memory::format_tag::ab, + mkldnn::memory::format_tag::ba + }}, {3, { + mkldnn::memory::format_tag::abc, + mkldnn::memory::format_tag::acb, + mkldnn::memory::format_tag::bac, + mkldnn::memory::format_tag::bca, + mkldnn::memory::format_tag::cba, + + mkldnn::memory::format_tag::Abc16a, + mkldnn::memory::format_tag::ABc16a16b, + mkldnn::memory::format_tag::ABc4a4b, + mkldnn::memory::format_tag::aBc16b, + mkldnn::memory::format_tag::aBc32b, + mkldnn::memory::format_tag::ABc16b16a, + mkldnn::memory::format_tag::Abc4a, + mkldnn::memory::format_tag::aBc4b, + mkldnn::memory::format_tag::ABc4b16a4b, + mkldnn::memory::format_tag::ABc2b8a4b, + mkldnn::memory::format_tag::ABc16b16a4b, + mkldnn::memory::format_tag::ABc16b16a2b, + mkldnn::memory::format_tag::ABc4b4a, + mkldnn::memory::format_tag::ABc8a16b2a, + mkldnn::memory::format_tag::ABc8a8b, + mkldnn::memory::format_tag::ABc8a4b, + mkldnn::memory::format_tag::aBc8b, + mkldnn::memory::format_tag::ABc8b16a2b, + mkldnn::memory::format_tag::ABc8b8a, + mkldnn::memory::format_tag::Acb16a, + mkldnn::memory::format_tag::Acb4a, + mkldnn::memory::format_tag::Acb8a, + mkldnn::memory::format_tag::BAc16a16b, + mkldnn::memory::format_tag::BAc16b16a, + }}, {4, { // Popular + mkldnn::memory::format_tag::abcd, // plain + mkldnn::memory::format_tag::acdb, // tail_c + mkldnn::memory::format_tag::aBcd8b, // blocked 8c + mkldnn::memory::format_tag::aBcd16b, // blocked 16c + + mkldnn::memory::format_tag::abdc, + + mkldnn::memory::format_tag::bacd, + mkldnn::memory::format_tag::bcda, + mkldnn::memory::format_tag::cdba, + mkldnn::memory::format_tag::dcab, + + mkldnn::memory::format_tag::Abcd8a, + mkldnn::memory::format_tag::Abcd16a, + mkldnn::memory::format_tag::Abcd32a, + mkldnn::memory::format_tag::ABcd16a16b, + mkldnn::memory::format_tag::aBcd32b, + mkldnn::memory::format_tag::ABcd16b16a, + mkldnn::memory::format_tag::aBCd16b16c, + mkldnn::memory::format_tag::aBCd16c16b, + mkldnn::memory::format_tag::Abcd4a, + mkldnn::memory::format_tag::aBcd4b, + mkldnn::memory::format_tag::ABcd4b16a4b, + mkldnn::memory::format_tag::ABcd2b8a4b, + mkldnn::memory::format_tag::ABcd4b4a, + mkldnn::memory::format_tag::ABcd4a4b, + mkldnn::memory::format_tag::aBCd4c16b4c, + mkldnn::memory::format_tag::aBCd2c8b4c, + mkldnn::memory::format_tag::ABcd16b16a4b, + mkldnn::memory::format_tag::ABcd16b16a2b, + mkldnn::memory::format_tag::aBCd16c16b4c, + mkldnn::memory::format_tag::aBCd16c16b2c, + mkldnn::memory::format_tag::aBCd4c4b, + mkldnn::memory::format_tag::aBCd4b4c, + mkldnn::memory::format_tag::ABcd8a16b2a, + mkldnn::memory::format_tag::ABcd8a8b, + mkldnn::memory::format_tag::ABcd8a32b, + mkldnn::memory::format_tag::ABcd32a32b, + mkldnn::memory::format_tag::ABcd8a4b, + + mkldnn::memory::format_tag::ABcd8b16a2b, + mkldnn::memory::format_tag::aBCd8b16c2b, + mkldnn::memory::format_tag::ABcd8b8a, + mkldnn::memory::format_tag::aBCd8b8c, + mkldnn::memory::format_tag::aBCd8b4c, + mkldnn::memory::format_tag::aBCd8c16b2c, + mkldnn::memory::format_tag::aBCd8c8b, + + mkldnn::memory::format_tag::ABcd4a8b8a4b, + mkldnn::memory::format_tag::ABcd2a8b8a2b, + + mkldnn::memory::format_tag::aBdc16b, + mkldnn::memory::format_tag::aBdc4b, + mkldnn::memory::format_tag::aBdc8b, + mkldnn::memory::format_tag::aCBd16b16c, + mkldnn::memory::format_tag::aCBd16c16b, + mkldnn::memory::format_tag::Acdb16a, + mkldnn::memory::format_tag::Acdb4a, + mkldnn::memory::format_tag::Acdb8a, + mkldnn::memory::format_tag::BAcd16a16b, + mkldnn::memory::format_tag::BAcd16b16a, + mkldnn::memory::format_tag::ABcd32a32b, + mkldnn::memory::format_tag::Acdb32a, + mkldnn::memory::format_tag::aBCd2b4c2b, + mkldnn::memory::format_tag::aBCd2c4b2c, + mkldnn::memory::format_tag::aBCd4b8c2b, + mkldnn::memory::format_tag::aBCd4c8b2c, + }}, {5, { // Popular + mkldnn::memory::format_tag::abcde, // plain + mkldnn::memory::format_tag::acdeb, // tail_c + mkldnn::memory::format_tag::aBcde8b, // blocked 8c + mkldnn::memory::format_tag::aBcde16b, // blocked 16c + + mkldnn::memory::format_tag::abdec, + mkldnn::memory::format_tag::acbde, + mkldnn::memory::format_tag::bacde, + mkldnn::memory::format_tag::bcdea, + mkldnn::memory::format_tag::cdeba, + mkldnn::memory::format_tag::decab, + + mkldnn::memory::format_tag::Abcde16a, + mkldnn::memory::format_tag::Abcde32a, + mkldnn::memory::format_tag::ABcde16a16b, + mkldnn::memory::format_tag::aBcde32b, + mkldnn::memory::format_tag::ABcde16b16a, + mkldnn::memory::format_tag::aBCde16b16c, + mkldnn::memory::format_tag::aBCde16c16b, + mkldnn::memory::format_tag::aBCde2c8b4c, + mkldnn::memory::format_tag::Abcde4a, + mkldnn::memory::format_tag::aBcde4b, + mkldnn::memory::format_tag::ABcde4b4a, + mkldnn::memory::format_tag::ABcde4a4b, + mkldnn::memory::format_tag::aBCde4b4c, + mkldnn::memory::format_tag::aBCde4c16b4c, + mkldnn::memory::format_tag::aBCde16c16b4c, + mkldnn::memory::format_tag::aBCde16c16b2c, + mkldnn::memory::format_tag::aBCde4c4b, + mkldnn::memory::format_tag::Abcde8a, + mkldnn::memory::format_tag::ABcde8a8b, + mkldnn::memory::format_tag::ABcde8a4b, + mkldnn::memory::format_tag::ABcde8b16a2b, + mkldnn::memory::format_tag::ABcde4b16a4b, + mkldnn::memory::format_tag::ABcde2b8a4b, + mkldnn::memory::format_tag::aBCde8b16c2b, + mkldnn::memory::format_tag::ABcde8b8a, + mkldnn::memory::format_tag::aBCde8b8c, + mkldnn::memory::format_tag::aBCde8b4c, + mkldnn::memory::format_tag::aBCde4b8c8b4c, + mkldnn::memory::format_tag::aBCde2b8c8b2c, + mkldnn::memory::format_tag::aBCde8c16b2c, + mkldnn::memory::format_tag::aBCde8c8b, + mkldnn::memory::format_tag::aBdec16b, + mkldnn::memory::format_tag::aBdec4b, + mkldnn::memory::format_tag::aBdec8b, + mkldnn::memory::format_tag::aCBde16b16c, + mkldnn::memory::format_tag::aCBde16c16b, + mkldnn::memory::format_tag::Acdeb16a, + mkldnn::memory::format_tag::Acdeb4a, + mkldnn::memory::format_tag::Acdeb8a, + mkldnn::memory::format_tag::BAcde16b16a, + mkldnn::memory::format_tag::BAcde16a16b, + mkldnn::memory::format_tag::aBdec32b, + mkldnn::memory::format_tag::aBCde2b4c2b, + mkldnn::memory::format_tag::aBCde2c4b2c, + mkldnn::memory::format_tag::aBCde4b8c2b, + mkldnn::memory::format_tag::aBCde4c8b2c, + }}, {6, { // Popular + mkldnn::memory::format_tag::abcdef, // plain + mkldnn::memory::format_tag::acbdef, // permute + mkldnn::memory::format_tag::defcab, // permute + mkldnn::memory::format_tag::aBcdef16b, // blocked 16c + + mkldnn::memory::format_tag::aBCdef16b16c, + mkldnn::memory::format_tag::aBCdef16c16b, + mkldnn::memory::format_tag::aBcdef4b, + mkldnn::memory::format_tag::aBCdef2c8b4c, + mkldnn::memory::format_tag::aBCdef4c4b, + mkldnn::memory::format_tag::aBCdef4b4c, + mkldnn::memory::format_tag::aBCdef8b8c, + mkldnn::memory::format_tag::aBCdef8b4c, + mkldnn::memory::format_tag::aBCdef8c16b2c, + mkldnn::memory::format_tag::aBCdef4c16b4c, + mkldnn::memory::format_tag::aBCdef8c8b, + + mkldnn::memory::format_tag::aBdefc16b, + mkldnn::memory::format_tag::aCBdef16c16b, + mkldnn::memory::format_tag::aCBdef16b16c, + mkldnn::memory::format_tag::aBdefc4b, + mkldnn::memory::format_tag::aBdefc8b, + + mkldnn::memory::format_tag::Abcdef4a, + mkldnn::memory::format_tag::Abcdef8a, + mkldnn::memory::format_tag::Abcdef16a, + mkldnn::memory::format_tag::Abcdef32a, + mkldnn::memory::format_tag::aBCdef2b4c2b, + mkldnn::memory::format_tag::aBCdef2c4b2c, + mkldnn::memory::format_tag::aBCdef4b8c2b, + mkldnn::memory::format_tag::aBCdef4c8b2c, + }} +}; + +bool DnnlBlockedMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const { + mkldnn::memory::desc refDesc(desc.dims(), desc.data_type(), fmt); + + if (desc.data.ndims != refDesc.data.ndims) + return false; + + if (desc.data.format_kind != dnnl_blocked || refDesc.data.format_kind != dnnl_blocked) + IE_THROW() << "DnnlMemoryDesc::isSame is not implemented for non blocked memory format"; + + auto actualBlkDesc = desc.data.format_desc.blocking; + auto refBlkDesc = refDesc.data.format_desc.blocking; + if (actualBlkDesc.inner_nblks != refBlkDesc.inner_nblks) + return false; + + for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) + if (actualBlkDesc.inner_blks[i] != refBlkDesc.inner_blks[i]) + return false; + + for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) + if (actualBlkDesc.inner_idxs[i] != refBlkDesc.inner_idxs[i]) + return false; + + auto actualStrides = desc.data.format_desc.blocking.strides; + auto refStrides = refDesc.data.format_desc.blocking.strides; + + VectorDims actualOrder(desc.data.ndims); + { + const auto dims = desc.dims(); + VectorDims total_block_per_dim(dims.size(), 1); + const auto &blk_desc = desc.data.format_desc.blocking; + for (int i = 0; i < blk_desc.inner_nblks; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + VectorDims outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + std::iota(actualOrder.begin(), actualOrder.end(), 0); + std::sort(actualOrder.begin(), actualOrder.end(), + [&actualStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { + return (actualStrides[ind_l] > actualStrides[ind_r]) || + (actualStrides[ind_l] == actualStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + } + + VectorDims refOrder(refDesc.data.ndims); + { + const auto dims = refDesc.dims(); + VectorDims total_block_per_dim(dims.size(), 1); + const auto &blk_desc = refDesc.data.format_desc.blocking; + for (int i = 0; i < blk_desc.inner_nblks; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + VectorDims outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + std::iota(refOrder.begin(), refOrder.end(), 0); + std::sort(refOrder.begin(), refOrder.end(), + [&refStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { + return (refStrides[ind_l] > refStrides[ind_r]) || + (refStrides[ind_l] == refStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + } + + if (actualOrder != refOrder) { + return false; + } + + return true; +} + +mkldnn::memory::format_tag DnnlBlockedMemoryDesc::getFormat() const { + // TODO [OneDNN]: Previously it was a field of tdesc, but now the brute + // force search here. Please avoid of using this method. + const auto ndims = desc.dims().size(); + + // There are no suitable format_tag for this + if (ndims == 0 || ndims > 6) + return mkldnn::memory::format_tag::undef; + + for (const auto fmt : form_tags_by_ndims.at(ndims)) { + if (this->isSame(fmt)) + return fmt; + } + + return mkldnn::memory::format_tag::undef; +} + +std::string DnnlBlockedMemoryDesc::serializeFormat() const { + auto fmt = getFormat(); + return mkldnn::utils::fmt2str(fmt); +} + +size_t DnnlBlockedMemoryDesc::getMaxMemSize() const { + if (shape.isStatic()) { + return getCurrentMemSize(); + } + + auto& maxDims = shape.getMaxDims(); + if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + return UNDEFINED_SIZE; + } + + auto maxDimsDesc = cloneWithNewDims(maxDims); + return maxDimsDesc->getCurrentMemSize(); +} + +size_t DnnlBlockedMemoryDesc::getPaddedElementsCount() const { + return std::accumulate(std::begin(desc.data.padded_dims), std::begin(desc.data.padded_dims) + desc.data.ndims, size_t{1}, + std::multiplies()); +} + +bool DnnlBlockedMemoryDesc::blocksExtended() const { + for (int i = 0; i < desc.data.ndims; i++) { + if (desc.data.dims[i] != desc.data.padded_dims[i]) + return true; + } + return false; +} + +void DnnlBlockedMemoryDesc::initBlockDims() { + const auto dims = desc.dims(); + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + VectorDims total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + // blocked dims + // [dims via new_outer_order with auto pad] U [inner_blk_dims] + VectorDims outer_block_dims = MKLDNNExtensionUtils::convertToVectorDims(dims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + if (outer_block_dims[i] != Shape::UNDEFINED_DIM) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + VectorDims outer_order(outer_ndims); + std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); + + blockedDims.resize(total_ndims, 0); + std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, + blockedDims.end() - blk_desc.inner_nblks); + std::transform(outer_order.begin(), outer_order.end(), blockedDims.begin(), + [&] (size_t i) { return outer_block_dims[i]; }); +} + +void DnnlBlockedMemoryDesc::initStrides() { + const auto dims = desc.dims(); + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + VectorDims inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + VectorDims outer_order(outer_ndims); + std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); + + // blocked strides + // [outer_strides via new_outer_order] U [inner_strides] + strides.resize(total_ndims, 0); + std::copy(inner_strides.rbegin(), inner_strides.rend(), strides.rbegin()); + std::transform(outer_order.begin(), outer_order.end(), strides.begin(), + [&](size_t i) { return blk_desc.strides[i] == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : blk_desc.strides[i]; }); +} + +void DnnlBlockedMemoryDesc::initOffsetPadding() { + offsetPaddingToData = VectorDims(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + getOrder().size()); +} diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h new file mode 100644 index 00000000000000..297eb5badeccc4 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_blocked_memory_desc.h @@ -0,0 +1,99 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "blocked_memory_desc.h" +#include "mkldnn_memory.h" +#include "mkldnn_extension_utils.h" + +namespace MKLDNNPlugin { + +class DnnlBlockedMemoryDesc : public BlockedMemoryDesc, public DnnlMemoryDesc { +public: + // Creates planar DnnlBlockedMemoryDesc + DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape); + + DnnlBlockedMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); + + MemoryDescPtr clone() const override { + return std::make_shared(*this); + } + + bool isCompatible(const MemoryDesc& rhs) const override; + bool isCompatible(const DnnlBlockedMemoryDesc& rhs) const; + bool isCompatible(const CpuBlockedMemoryDesc& rhs) const; + + const VectorDims& getBlockDims() const override { + return blockedDims; + } + + const VectorDims& getOrder() const override { + return order; + } + + const VectorDims& getOffsetPaddingToData() const override { + return offsetPaddingToData; + } + + size_t getOffsetPadding() const override { + return MKLDNNExtensionUtils::convertToDim(desc.data.offset0); + } + + const VectorDims& getStrides() const override { + return strides; + } + + bool hasLayoutType(LayoutType layoutType) const override; + + bool isSame(mkldnn::memory::format_tag fmt) const override; + + std::string serializeFormat() const override; + + size_t getMaxMemSize() const override; + + bool blocksExtended() const override; + + size_t getPaddedElementsCount() const override; + +private: + DnnlBlockedMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const VectorDims& blockedDims, + const VectorDims& order, size_t offsetPadding = 0, const VectorDims& offsetPaddingToData = {}, + const VectorDims& strides = {}); + + DnnlBlockedMemoryDesc(const mkldnn::memory::desc& mdesc); + + MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override; + + bool isPlainFormat() const; + bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const; + bool isTailCFormat() const; + + // WA: we need to initialize blocked params into ctor to avoid bugs when we calculate these params in throughput mode + // TODO [DS]: should be reimplemented to avoid useless calculation + void initBlockedParams() { + initBlockDims(); + initStrides(); + initOffsetPadding(); + } + + void initBlockDims(); + void initStrides(); + void initOffsetPadding(); + + /** + * Try to define original format tag use on creation + * + * @return format tag if was able to define it + */ + mkldnn::memory::format_tag getFormat() const; + + friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); + friend class MemoryDescUtils; +}; + +using DnnlBlockedMemoryDescPtr = std::shared_ptr; +using DnnlBlockedMemoryDescCPtr = std::shared_ptr; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp new file mode 100644 index 00000000000000..c61667f8df437e --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.cpp @@ -0,0 +1,75 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "dnnl_memory_desc.h" +#include "mkldnn_extension_utils.h" +#include +#include "mkldnn/ie_mkldnn.h" + +namespace MKLDNNPlugin { + +DnnlMemoryDesc::DnnlMemoryDesc(const mkldnn::memory::desc& desc) : + MemoryDesc(Shape(MKLDNNExtensionUtils::convertToVectorDims(desc.dims())), Mkldnn), desc(desc) { + if (desc.data.format_kind == dnnl::impl::format_kind::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; +} + +size_t DnnlMemoryDesc::getCurrentMemSizeImp() const { + return MKLDNNExtensionUtils::getMemSizeForOneDnnDesc(desc); +} + +size_t DnnlMemoryDesc::getElementOffset(size_t elemNumber) const { + mkldnn::impl::memory_desc_wrapper wrapped(desc.data); + return wrapped.off_l(elemNumber); +} + +bool DnnlMemoryDesc::isCompatible(const MemoryDesc &rhs) const { + if (MemoryDescType::Mkldnn == rhs.getType()) { + return this->desc == rhs.as()->desc; + } else { + return false; + } +} + +// TODO: add serialization for packed format +std::string DnnlMemoryDesc::serializeFormat() const { + if (desc.data.format_kind == dnnl_format_kind_wino) { + switch (desc.data.format_desc.wino_desc.wino_format) { + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOIoi: return "wino_aaOIoi"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOio: return "wino_aaOio"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOBiOo: return "wino_aaOBiOo"; + case dnnl_wino_memory_format_t::dnnl_wino_wei_OBaaIBOIio: return "wino_OBaaIBOIio"; + default: return "wino_undef"; + } + } + return "undef"; +} + +bool DnnlMemoryDesc::isDefinedImp() const { + mkldnn::impl::memory_desc_wrapper wrappedThis(desc.data); + + if (wrappedThis.has_runtime_dims_or_strides()) { + return false; + } + + return wrappedThis.offset0() != DNNL_RUNTIME_DIM_VAL; +} + +InferenceEngine::Precision DnnlMemoryDesc::getPrecision() const { + return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type()); +} + +MemoryDescPtr DnnlMemoryDesc::cloneWithNewDimsImp(const VectorDims &dims) const { + IE_THROW(Unexpected) << "Cannot clone non blocked oneDNN desc with new dims"; +} + +size_t DnnlMemoryDesc::getMaxMemSize() const { + if (shape.isDynamic()) { + IE_THROW() << "Can't compute max mem size for DnnlMemoryDesc with dynaimc shape"; + } + + return getCurrentMemSize(); +} + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h new file mode 100644 index 00000000000000..fd79994643bcd2 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/memory_desc/dnnl_memory_desc.h @@ -0,0 +1,71 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "cpu_blocked_memory_desc.h" +#include "mkldnn_extension_utils.h" + +namespace MKLDNNPlugin { + +class DnnlMemoryDesc; + +using DnnlMemoryDescPtr = std::shared_ptr; +using DnnlMemoryDescCPtr = std::shared_ptr; + +class DnnlMemoryDesc : public virtual MemoryDesc { +public: + mkldnn::memory::data_type getDataType() const { + return static_cast(desc.data.data_type); + } + + dnnl_format_kind_t getFormatKind() const { + return desc.data.format_kind; + } + + MemoryDescPtr clone() const override { + return std::make_shared(*this); + } + + std::string serializeFormat() const override; + + InferenceEngine::Precision getPrecision() const override; + + bool isCompatible(const MemoryDesc& rhs) const override; + + size_t getMaxMemSize() const override; + + const mkldnn::memory::desc& getDnnlDesc() const { + return desc; + } + + bool hasLayoutType(LayoutType layoutType) const override { return false; } + + virtual bool isSame(mkldnn::memory::format_tag fmt) const { return false; } + + bool hasEmptyExtraData() const { return desc.data.extra.flags == dnnl_memory_extra_flag_none; } + +protected: + DnnlMemoryDesc() {} + static constexpr size_t UNREACHABLE_DIM = std::numeric_limits::max(); + + mkldnn::memory::desc desc; + + void setPrecision(InferenceEngine::Precision prc) override { + desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); + } + +private: + explicit DnnlMemoryDesc(const mkldnn::memory::desc& desc); + + size_t getElementOffset(size_t elemNumber) const override; + + size_t getCurrentMemSizeImp() const override; + bool isDefinedImp() const override; + MemoryDescPtr cloneWithNewDimsImp(const VectorDims& dims) const override; + + friend DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc); +}; + +} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_dims.h b/inference-engine/src/mkldnn_plugin/mkldnn_dims.h deleted file mode 100644 index 4960660935fe1d..00000000000000 --- a/inference-engine/src/mkldnn_plugin/mkldnn_dims.h +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (C) 2018-2021 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "perf_count.h" -#include -#include -#include -#include -#include - -namespace MKLDNNPlugin { - -class MKLDNNDims { -public: - MKLDNNDims() = default; - - explicit MKLDNNDims(const InferenceEngine::SizeVector& size) { - dims = std::vector(size.begin(), size.end()); - } - - explicit MKLDNNDims(const std::vector& dim) { - dims = dim; - } - - MKLDNNDims(const mkldnn_dims_t dnn_dims, int dnn_ndims) { - dims = std::vector(dnn_dims, dnn_dims + dnn_ndims); - } - - explicit MKLDNNDims(std::initializer_list ilist) : dims(ilist) {} - explicit MKLDNNDims(std::initializer_list ilist) : dims(ilist.begin(), ilist.end()) {} - - InferenceEngine::SizeVector ToSizeVector() const { - InferenceEngine::SizeVector size; - for (auto i : dims) { - size.push_back(i); - } - - return size; - } - - int ndims() const { - return dims.size(); - } - - ptrdiff_t size() const { - return size(0); - } - - ptrdiff_t size(int start) const { - ptrdiff_t size = 1; - - for (int i = start; i < dims.size(); i++) { - size *= dims[i]; - } - - return size; - } - - void push_back(int val) { - dims.push_back(val); - } - - operator mkldnn::memory::dims() const { - // TODO: it will convert each time.. not good - return mkldnn::memory::dims(dims.begin(), dims.end()); - } - - bool operator == (const MKLDNNDims& rhs) const { - if (dims.size() != rhs.dims.size()) { - return false; - } - - return std::equal(rhs.dims.begin(), rhs.dims.end(), dims.begin()); - } - - bool operator != (const MKLDNNDims& rhs) const { - return !(*this == rhs); - } - - ptrdiff_t& operator[](int idx) { - return dims[idx]; - } - - ptrdiff_t operator[](int idx) const { - return dims[idx]; - } - -private: - std::vector dims; -}; - -} // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp index acce99cfbd38d9..ad45a5d39adedb 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.cpp @@ -166,8 +166,6 @@ void MKLDNNEdge::allocate(const void* mem_ptr) { auto& inputDesc = getInputDesc(); auto& outputDesc = getOutputDesc(); - if (!inputDesc.isDefined() || !outputDesc.isDefined()) - IE_THROW() << "Cannot allocate memory for undefined descriptors."; if (!inputDesc.isCompatible(outputDesc)) IE_THROW() << "Cannot allocate memory for incompatible descriptors."; @@ -222,57 +220,6 @@ void MKLDNNEdge::changeStatus(MKLDNNEdge::Status state) { status = state; } -// TODO [DS]: remove while DynamicShapes migration -// TODO [DS]: How should we validate shape compatibility? -// TODO [DS]: Why do we allow uninitialized shape? -const Shape& MKLDNNEdge::getShape() { - if (!shape.getRank()) { - Shape inShape; - Shape outShape; - auto childPtr = getChild(); - auto parentPtr = getParent(); - - int inNum = getOutputNum(); - if (inNum < 0) { - IE_THROW() << "Error cannot find input data for " << child.lock()->getName() - << " from " << parent.lock()->getName(); - } - if (inNum < childPtr->inputShapes.size()) { - outShape = childPtr->inputShapes[inNum]; - } - - int outNum = getInputNum(); - if (outNum < 0) { - IE_THROW() << "Error cannot find output data for " << parent.lock()->getName() - << " to " << child.lock()->getName(); - } - if (outNum >= parentPtr->outputShapes.size()) - outNum = 0; - if (outNum < parentPtr->outputShapes.size()) { - inShape = parentPtr->outputShapes[outNum]; - } - - if (inShape.getRank() && outShape.getRank() && inShape.getRank() != outShape.getRank() && inShape.getElementsCount() != outShape.getElementsCount()) - IE_THROW() << "Nodes " << getParent()->getName() << " and " << getChild()->getName() - << " have incompatible dimensions!"; - - if (outShape.getRank() != 0) { - shape = outShape; - } else if (inShape.getRank() != 0) { - shape = inShape; - } else { - shape = Shape(InferenceEngine::SizeVector({1})); - } - - - if (!(outShape.getRank() == 0 && inShape.getRank() == 0) && !shape.getRank()) - IE_THROW() << "Cannot detect right dims for nodes " << getParent()->getName() - << " and " << getChild()->getName(); - } - - return shape; -} - const MemoryDesc& MKLDNNEdge::getInputDesc() const { auto parentPtr = getParent(); if (parentPtr->getSelectedPrimitiveDescriptor() == nullptr) @@ -321,20 +268,14 @@ const MemoryDesc& MKLDNNEdge::getDesc() const { } const MKLDNNMemory &MKLDNNEdge::getMemory() { - if (status == Status::NotAllocated) { - memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine())); - memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData()); - memoryFromEdge.reset(); - changeStatus(Status::Allocated); - } - - return *memoryPtr; + return *getMemoryPtr(); } MKLDNNMemoryPtr &MKLDNNEdge::getMemoryPtr() { if (status == Status::NotAllocated) { memoryPtr.reset(new MKLDNNMemory(getParent()->getEngine())); - memoryPtr->Create(getDesc(), getSharedEdge()->getMemoryPtr()->GetData()); + const auto &desc = getDesc(); + memoryPtr->Create(desc, desc.isDefined() ? getSharedEdge()->getMemoryPtr()->GetData() : nullptr); memoryFromEdge.reset(); changeStatus(Status::Allocated); } @@ -353,7 +294,6 @@ void MKLDNNEdge::validate() { getMemory(); getParent(); getChild(); - getShape(); if (status != Status::Allocated) { IE_THROW() << "Error memory is not allocated!"; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h index 5e6f4d23542f9f..b247cd7a76ea30 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_edge.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_edge.h @@ -6,7 +6,7 @@ #include #include "cpu_shape.h" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" #include "mkldnn_weights_cache.hpp" #include @@ -51,7 +51,6 @@ class MKLDNNEdge { const std::shared_ptr getParent() const; const std::shared_ptr getChild() const; - const Shape &getShape(); const MKLDNNMemory& getMemory(); MKLDNNMemoryPtr& getMemoryPtr(); @@ -68,6 +67,10 @@ class MKLDNNEdge { MKLDNNEdgePtr getSharedEdge() const; MKLDNNEdgePtr getSharedEdge(std::nothrow_t) const; + bool hasDefinedMaxSize() { + return getDesc().getMaxMemSize() != MemoryDesc::UNDEFINED_SIZE; + } + private: std::string name() const; @@ -78,7 +81,6 @@ class MKLDNNEdge { bool useExternalMemory = false; MKLDNNEdgeWeakPtr memoryFromEdge; - Shape shape; MKLDNNMemoryPtr memoryPtr; Status status = Status::Uninitialized; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp index d1c851645b1d78..dd8cb0f003939d 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp @@ -4,9 +4,8 @@ #include "mkldnn_extension_utils.h" #include "utils/general_utils.h" -#include #include -#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -77,10 +76,63 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d } } -InferenceEngine::SizeVector MKLDNNExtensionUtils::convertToSizeVector(const mkldnn::memory::dims& dims) { - return InferenceEngine::SizeVector(dims.begin(), dims.end()); +Dim MKLDNNExtensionUtils::convertToDim(const dnnl::memory::dim &dim) { + return dim == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast(dim); +} +dnnl::memory::dim MKLDNNExtensionUtils::convertToDnnlDim(const Dim &dim) { + return dim == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast(dim); +} + +VectorDims MKLDNNExtensionUtils::convertToVectorDims(const memory::dims& dims) { + std::vector vecResult; + vecResult.reserve(dims.size()); + std::back_insert_iterator> itr(vecResult); + std::transform(dims.begin(), dims.end(), itr, convertToDim); + return vecResult; +} + +memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const VectorDims& dims) { + memory::dims vecResult; + vecResult.reserve(dims.size()); + std::back_insert_iterator itr(vecResult); + std::transform(dims.begin(), dims.end(), itr, convertToDnnlDim); + return vecResult; +} + +memory::format_tag MKLDNNExtensionUtils::GetPlainFormatByRank(size_t rank) { + switch (rank) { + case 0: + case 1: + return memory::format_tag::a; + case 2: + return memory::format_tag::ab; + case 3: + return memory::format_tag::abc; + case 4: + return memory::format_tag::abcd; + case 5: + return memory::format_tag::abcde; + case 6: + return memory::format_tag::abcdef; + default: + return memory::format_tag::undef; + } +} + +DnnlMemoryDescPtr MKLDNNExtensionUtils::makeDescriptor(const mkldnn::memory::desc &desc) { + if (desc.data.format_kind == dnnl_blocked) { + return std::shared_ptr(new DnnlBlockedMemoryDesc(desc)); + } else { + return std::shared_ptr(new DnnlMemoryDesc(desc)); + } } -std::vector MKLDNNExtensionUtils::convertToDnnlDims(const InferenceEngine::SizeVector& dims) { - return std::vector(dims.begin(), dims.end());; +size_t MKLDNNExtensionUtils::getMemSizeForOneDnnDesc(mkldnn::memory::desc desc) { + const auto offset0 = desc.data.offset0; + desc.data.offset0 = 0; + size_t size = desc.get_size(); + if (size == DNNL_RUNTIME_SIZE_VAL) + return MemoryDesc::UNDEFINED_SIZE; + size += offset0 * sizeOfDataType(desc.data_type()); + return size; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h index 8e7f9a1b3742e7..524341a136a7ca 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.h @@ -11,17 +11,30 @@ #include #include "mkldnn.hpp" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" namespace MKLDNNPlugin { +class DnnlMemoryDesc; + class MKLDNNExtensionUtils { public: static uint8_t sizeOfDataType(mkldnn::memory::data_type dataType); static mkldnn::memory::data_type IEPrecisionToDataType(const InferenceEngine::Precision& prec); static InferenceEngine::Precision DataTypeToIEPrecision(mkldnn::memory::data_type dataType); - static InferenceEngine::SizeVector convertToSizeVector(const mkldnn::memory::dims& dims); - static std::vector convertToDnnlDims(const InferenceEngine::SizeVector& dims); + static Dim convertToDim(const dnnl::memory::dim &dim); + static dnnl::memory::dim convertToDnnlDim(const Dim &dim); + static VectorDims convertToVectorDims(const mkldnn::memory::dims& dims); + static std::vector convertToDnnlDims(const VectorDims& dims); + static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank); + + /** + * @brief Creates DnnlBlockedMemoryDesc if desc is blocked, otherwise DnnlMemoryDesc + * @param desc mkldnn::memory::desc from which one of the descriptors will be created + * @return pointer to DnnlBlockedMemoryDesc or DnnlMemoryDesc + */ + static std::shared_ptr makeDescriptor(const mkldnn::memory::desc &desc); + static size_t getMemSizeForOneDnnDesc(mkldnn::memory::desc desc); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index eb6f5fc523b520..8f361edce5b0e1 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -39,7 +39,7 @@ #include "utils/node_dumper.h" #include "utils/ngraph_utils.hpp" #include "utils/cpu_utils.hpp" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include #include @@ -47,6 +47,7 @@ #include #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -214,8 +215,11 @@ void MKLDNNGraph::Replicate(const CNNNetwork &network, const MKLDNNExtensionMana graphNodes.push_back(node); if (op->get_type_info() == ngraph::op::v0::Parameter::type_info) { - if (inputsInfo.count(node->getName()) != 0) { + const auto inInfo = inputsInfo.find(node->getName()); + if (inInfo != inputsInfo.end()) { inputNodesMap[node->getName()] = node; + if (inInfo->second->getInputData()->isDynamic()) + isDynamicGraph = true; } } @@ -439,8 +443,8 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { } static bool isReorderAvailable(const MemoryDesc& parentDesc, const MemoryDesc& childDesc, const mkldnn::engine& eng) { - memory::desc dstMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(childDesc); - memory::desc srcMemDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(parentDesc);; + memory::desc dstMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(childDesc.clone())->getDnnlDesc(); + memory::desc srcMemDesc = MemoryDescUtils::convertToDnnlMemoryDesc(parentDesc.clone())->getDnnlDesc(); mkldnn::primitive_attr attr; dnnl_primitive_desc_t result = nullptr; @@ -522,6 +526,9 @@ static edge_clusters_t findEdgeClusters(const std::vector & graph edge_cluster_idx_map_t edge_cluster_indices; for (auto &edge : graphEdges) { + if (!edge->hasDefinedMaxSize()) + continue; + auto edge_it = edge_cluster_indices.find(edge); if (edge_it != edge_cluster_indices.end()) @@ -602,7 +609,7 @@ void MKLDNNGraph::AllocateWithReuse() { int e_start = edge->getParent()->execIndex; int e_finish = edge->getChild()->execIndex; - int64_t e_size = edge->getDesc().getCurrentSize(); // size in bytes (from the beginning of data to the last element) + int64_t e_size = edge->getDesc().getMaxMemSize(); // size in bytes (from the beginning of data to the last element) if (e_size == MemoryDesc::UNDEFINED_SIZE) { IE_THROW() << "Can not allocate memory since the size is undefined."; } @@ -639,7 +646,7 @@ void MKLDNNGraph::AllocateWithReuse() { size_t total_size = static_cast(memSolver.solve()) * alignment; memWorkspace = std::make_shared(eng); - memWorkspace->Create(MKLDNNMemoryDesc({total_size}, mkldnn::memory::data_type::s8)); + memWorkspace->Create(DnnlBlockedMemoryDesc(InferenceEngine::Precision::I8, Shape(InferenceEngine::SizeVector{total_size}))); if (edge_clusters.empty()) return; @@ -658,7 +665,7 @@ void MKLDNNGraph::AllocateWithReuse() { // TODO: WA for some test (like strided_slice_test) which use tensors with // shapes {0}. And it is implisitly converted into {1} tensor. // Zeroing of input data allow pass tests. - if (edge->getParent()->type == Input) + if (edge->getParent()->type == Input && edge->getMemoryPtr()->getDesc().getMaxMemSize() != MemoryDesc::UNDEFINED_SIZE) edge->getMemoryPtr()->FillZero(); count++; @@ -679,8 +686,11 @@ void MKLDNNGraph::Allocate() { // Allocate memory space for all edges marked with NeedAllocation AllocateWithReuse(); - // Resolve all other edges with status NotAllocated or in-place - for (auto& node : graphNodes) node->resolveNotAllocatedEdges(); + // Resolve all other edges with status NotAllocated and in-place + for (auto& node : graphNodes) node->resolveInPlaceEdges(); + + // Create dummy memory with undefined desc for edges that are not allocated on the previous stages (memory solver and inPlace resolving) + for (auto& edge : graphEdges) edge->allocate(); // Check all getters. Should work. for (auto& edge : graphEdges) edge->validate(); @@ -703,7 +713,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: void *inter_data_ptr = input->second->getChildEdgeAt(0)->getMemory().GetData(); if (ext_data_ptr != inter_data_ptr) { - auto ext_tdesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(in->getTensorDesc()); + auto ext_tdesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(in->getTensorDesc()); auto ext_mem = MKLDNNMemory(eng); ext_mem.Create(ext_tdesc, ext_data_ptr, false); @@ -714,7 +724,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: // todo: make sure 'name' exists in this map... if (_normalizePreprocMap.find(name) != _normalizePreprocMap.end()) { if (in->getTensorDesc().getPrecision() == InferenceEngine::Precision::FP32) { - _normalizePreprocMap[name].NormalizeImage(input->second->getChildEdgeAt(0)->getShape(), + _normalizePreprocMap[name].NormalizeImage(input->second->getOutputShapeAtPort(0), reinterpret_cast(inter_data_ptr), in->getTensorDesc().getLayout()); } else { @@ -726,7 +736,7 @@ void MKLDNNGraph::PushInputData(const std::string& name, const InferenceEngine:: } } -void MKLDNNGraph::PullOutputData(const BlobMap &out) { +void MKLDNNGraph::PullOutputData(BlobMap &out) { if (!IsReady()) IE_THROW() << "Wrong state. Topology not ready."; @@ -735,50 +745,60 @@ void MKLDNNGraph::PullOutputData(const BlobMap &out) { auto node = outputMap.second; const MKLDNNMemory& intr_blob = node->getParentEdgeAt(0)->getMemory(); - if (!out.count(name)) { + const auto ext_blob = out.find(name); + if (ext_blob == out.end()) { IE_THROW(Unexpected) << "The network outputs do not contain mkldnn graph output node name: \"" << name << "\""; } - const Blob::Ptr &ext_blob = out.at(name); + const auto actualDesc = MemoryDescUtils::convertToTensorDesc(intr_blob.getDesc()); + const auto &expectedDesc = ext_blob->second->getTensorDesc(); + + // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it + // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar + bool isScalarOutput = false; + if (actualDesc.getLayout() == SCALAR) { + isScalarOutput = expectedDesc.getLayout() == SCALAR || + (!expectedDesc.getDims().empty() && + std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies()) == 1); + } else if (expectedDesc.getLayout() == SCALAR) { + isScalarOutput = actualDesc.getLayout() == SCALAR || + (!actualDesc.getDims().empty() && + std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies()) == 1); + } + + if (out[name]->getTensorDesc().getDims() != intr_blob.getStaticDims() && !isScalarOutput) { + if (!node->isDynamicNode()) + IE_THROW() << "Output blob and node dims mismatch for node with name: \"" << name << "\""; + out[name]->setShape(intr_blob.getStaticDims()); + } + + auto srcPrec = actualDesc.getPrecision(); + auto dstPrec = expectedDesc.getPrecision(); - auto srcPrec = MKLDNNExtensionUtils::DataTypeToIEPrecision(intr_blob.GetDataType()); - auto dstPrec = ext_blob->getTensorDesc().getPrecision(); - if (srcPrec == dstPrec && ext_blob->byteSize() != intr_blob.GetSize()) + if (srcPrec == dstPrec && ext_blob->second->byteSize() != intr_blob.GetSize()) IE_THROW() << "Output blob byte size is not equal network output byte size (" - << ext_blob->byteSize() << "!=" << intr_blob.GetSize() << ")."; - if (ext_blob->size() != intr_blob.GetElementsCount()) - IE_THROW() << "Output blob number of elements is not equal network output number of elements (" - << ext_blob->size() << "!=" << intr_blob.GetElementsCount() << ")."; + << ext_blob->second->byteSize() << "!=" << intr_blob.GetSize() << ")."; - void *ext_blob_ptr = ext_blob->buffer(); + void *ext_blob_ptr = ext_blob->second->buffer(); void *intr_blob_ptr = intr_blob.GetData(); // That is the same memory. No need to copy if (ext_blob_ptr == intr_blob_ptr) continue; - int MB = intr_blob.GetDims()[0]; - int MB_to_process = node->batchToProcess(); + const auto &outDims = intr_blob.getStaticDims(); + size_t size_to_copy = intr_blob.GetDescWithType()->getPaddedElementsCount(); // TODO: Should we support InferenceEngine::PluginConfigParams::KEY_DYN_BATCH_LIMIT??? - if (config.batchLimit) - MB_to_process = std::min(config.batchLimit, MB_to_process); - size_t size_to_copy = intr_blob.GetElementsCount() * MB_to_process / MB; - - const auto actualDesc = MemoryDescUtils::convertToTensorDesc(node->getParentEdgeAt(0)->getDesc()); - const auto expectedDesc = ext_blob->getTensorDesc(); - - // TODO [NM]: need to create universal reorder which will be detect cases when we really need to use it - // WA: for cases when output shape after transformation will be 1x1x1x1 but model output is scalar - bool isScalarOutput = false; - if (actualDesc.getLayout() == SCALAR) { - isScalarOutput = expectedDesc.getLayout() == SCALAR || - std::accumulate(expectedDesc.getDims().begin(), expectedDesc.getDims().end(), (size_t)1, std::multiplies()) == 1; - } else if (expectedDesc.getLayout() == SCALAR) { - isScalarOutput = actualDesc.getLayout() == SCALAR || - std::accumulate(actualDesc.getDims().begin(), actualDesc.getDims().end(), (size_t)1, std::multiplies()) == 1; + // TODO [DS]: phase 2: should we support this behaviour? Looks obsolete in the dynamic shapes paradigm + if (config.batchLimit) { + if (node->isDynamicNode()) { + IE_THROW(NotImplemented) << "[DS] not implemented dynamic batch for node with dynamic shape"; + } + int MB_to_process = node->batchToProcess(); + size_to_copy = std::accumulate(outDims.begin() + 1, outDims.end(), (size_t)1, std::multiplies()) * MB_to_process; } if (actualDesc.getBlockingDesc() != expectedDesc.getBlockingDesc() && !isScalarOutput) { - auto outBlobDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(expectedDesc); + auto outBlobDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(expectedDesc); auto outBloMem = MKLDNNMemory(eng); outBloMem.Create(outBlobDesc, ext_blob_ptr, false); @@ -816,7 +836,11 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) { ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node)); OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute); - node->execute(stream); + if (node->isDynamicNode()) { + node->executeDynamic(stream); + } else { + node->execute(stream); + } ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node)); } @@ -1070,6 +1094,7 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { if (!parent) continue; MKLDNNEdgePtr &remEdge = p_edge; + const auto portCandidate = remEdge->getOutputNum(); int inNum = 0; if (remEdge) { inNum = remEdge->getInputNum(); @@ -1081,8 +1106,9 @@ void MKLDNNGraph::DropDWConvNode(const MKLDNNNodePtr &node) { MKLDNNEdgePtr newEdge(new MKLDNNEdge(parent, parentConv, inNum, outNum)); graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parentConv->inputShapes.push_back(Shape(newEdge->getShape())); + parentConv->inputShapes.push_back(node->getInputShapeAtPort(portCandidate)); } + parentConv->outputShapes[0] = node->getOutputShapeAtPort(0); } void MKLDNNGraph::RemoveDroppedNodes() { diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index a946d47bc6ada7..a3101de17b24b8 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -57,7 +57,7 @@ class MKLDNNGraph { } void PushInputData(const std::string& name, const InferenceEngine::Blob::Ptr &in); - void PullOutputData(const InferenceEngine::BlobMap &out); + void PullOutputData(InferenceEngine::BlobMap &out); void Infer(MKLDNNInferRequest* request = nullptr, int batch = -1); @@ -85,6 +85,20 @@ class MKLDNNGraph { return outputNodesMap; } + MKLDNNNodePtr getInputNodeByName(const std::string &name) { + auto input = inputNodesMap.find(name); + if (input == inputNodesMap.end()) + IE_THROW() << "CPU execution graph doesn't contain input node with name: " << name; + return input->second; + } + + MKLDNNNodePtr getOutputNodeByName(const std::string &name) { + auto output = outputNodesMap.find(name); + if (output == outputNodesMap.end()) + IE_THROW() << "CPU execution graph doesn't contain output node with name: " << name; + return output->second; + } + bool hasInputWithName(const std::string& name) const { return inputNodesMap.count(name); } @@ -172,6 +186,10 @@ class MKLDNNGraph { return isQuantizedFlag; } + bool isDynamic() const { + return isDynamicGraph; + } + protected: void VisitNode(MKLDNNNodePtr node, std::vector& sortedNodes); @@ -196,8 +214,6 @@ class MKLDNNGraph { MKLDNNMemoryPtr memWorkspace; - std::map inputNodesMap; - std::map outputNodesMap; std::vector graphNodes; std::vector graphEdges; @@ -205,6 +221,7 @@ class MKLDNNGraph { std::string _name; bool isQuantizedFlag = false; + bool isDynamicGraph = false; static mkldnn::engine eng; @@ -226,6 +243,9 @@ class MKLDNNGraph { friend std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); private: + // TODO: change std::map to std::unordered_map + std::map inputNodesMap; + std::map outputNodesMap; // these node pointers (from graphNodes) are to avoid regular checking for // constant node in ExecuteConstantNodesOnly and Infer methods std::vector constantGraphNodes; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp index 523d5dce81b424..4c790cebf83696 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_dumper.cpp @@ -45,11 +45,11 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no std::string outputPrecisionsStr; if (!node->getChildEdges().empty()) { - outputPrecisionsStr = node->getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); + outputPrecisionsStr = node->getChildEdgeAt(0)->getMemory().getDesc().getPrecision().name(); bool isAllEqual = true; for (size_t i = 1; i < node->getChildEdges().size(); i++) { - if (node->getChildEdgeAt(i - 1)->getMemory().GetDesc().getPrecision() != node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision()) { + if (node->getChildEdgeAt(i - 1)->getMemory().getDesc().getPrecision() != node->getChildEdgeAt(i)->getMemory().getDesc().getPrecision()) { isAllEqual = false; break; } @@ -58,12 +58,12 @@ std::map extract_node_metadata(const MKLDNNNodePtr &no // If all output precisions are the same, we store the name only once if (!isAllEqual) { for (size_t i = 1; i < node->getChildEdges().size(); i++) - outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getMemory().GetDesc().getPrecision().name()); + outputPrecisionsStr += "," + std::string(node->getChildEdgeAt(i)->getMemory().getDesc().getPrecision().name()); } } else { // Branch to correctly handle output nodes if (!node->getParentEdges().empty()) { - outputPrecisionsStr = node->getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name(); + outputPrecisionsStr = node->getParentEdgeAt(0)->getMemory().getDesc().getPrecision().name(); } } serialization_info[ExecGraphInfoSerialization::OUTPUT_PRECISIONS] = outputPrecisionsStr; @@ -160,7 +160,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph auto meta_data = extract_node_metadata(node); std::shared_ptr return_node; if (is_input) { - auto& desc = node->getChildEdgeAt(0)->getMemory().GetDesc(); + auto& desc = node->getChildEdgeAt(0)->getMemory().getDesc(); auto param = std::make_shared(details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); return_node = param; params.push_back(param); @@ -172,7 +172,7 @@ std::shared_ptr dump_graph_as_ie_ngraph_net(const MKLDNNGraph get_inputs(node), node->getSelectedPrimitiveDescriptor()->getConfig().outConfs.size()); for (size_t port = 0; port < return_node->get_output_size(); ++port) { - auto& desc = node->getChildEdgeAt(port)->getMemory().GetDesc(); + auto& desc = node->getChildEdgeAt(port)->getMemory().getDesc(); return_node->set_output_type(port, details::convertPrecision(desc.getPrecision()), desc.getShape().toPartialShape()); } } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp index c450fc63ece68e..eb99ef078a719d 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp @@ -44,7 +44,7 @@ #include #include "mkldnn_itt.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -156,14 +156,14 @@ void MKLDNNGraphOptimizer::ApplyImplSpecificGraphOptimizations(MKLDNNGraph &grap void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { return node->getType() == Convolution && node->getChildEdges().size() == 1 && node->getParentEdges().size() == 2 && node->getFusedWith().empty(); }; - auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) return false; @@ -171,8 +171,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { if (biasNode->getType() != Input || !biasNode->isConstant() || biasNode->getChildEdges().size() != 1) return false; - auto convOutDims = parentNode->getChildEdgesAtPort(0)[0]->getShape().getDims(); - auto biasDims = getNormalizedDimsBySize(biasNode->getChildEdgesAtPort(0)[0]->getShape().getDims(), + auto convOutDims = parentNode->getOutputShapeAtPort(0).getDims(); + auto biasDims = getNormalizedDimsBySize(biasNode->getOutputShapeAtPort(0).getDims(), convOutDims.size()); // TODO [NM]: Legacy ConvBias fusion transformation supports both per-tensor (via explicit broadcasing) and per-channel cases. // Most of the real models contain per-channel bias, so we need to reavaluate the need to support per-tensor variant. @@ -193,13 +193,13 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(parentNode, childNode)) { + if (!isSuitableChildNode(parentNode, childNode)) { parent++; continue; } @@ -255,7 +255,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndBias(MKLDNNGraph &graph) { graphEdges.push_back(newEdge); parent->addEdge(newEdge); - parent->outputShapes[inNum] = Shape(SizeVector{parentEltwise->outputShapes[0].getStaticDims()[1]}); + parent->outputShapes[inNum] = Shape(VectorDims{parentEltwise->outputShapes[0].getStaticDims()[1]}); parentEltwise->inputShapes.push_back(parent->outputShapes[0]); } } @@ -305,7 +305,7 @@ void MKLDNNGraphOptimizer::FuseDeconvolutionAndSimpleOperation(MKLDNNGraph &grap void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableSecondInput = [](MKLDNNNodePtr node, SizeVector dataDims) { + auto isSuitableSecondInput = [](MKLDNNNodePtr node, VectorDims dataDims) { if (node->getType() != Input || !node->isConstant()) return false; auto secondInputDims = node->outputShapes[0].getDims(); @@ -323,32 +323,32 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { return true; }; - auto isSutableParentNode = [&](MKLDNNNodePtr node) { + auto isSuitableParentNode = [&](MKLDNNNodePtr node) { if (node->getAlgorithm() != EltwiseMultiply || !node->getFusedWith().empty() || node->getParentEdges().size() != 2 || node->getChildEdges().size() != 1) return false; - return isSutableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getParentEdgesAtPort(0)[0]->getShape().getDims()); + return isSuitableSecondInput(node->getParentEdgesAtPort(1)[0]->getParent(), node->getInputShapeAtPort(0).getDims()); }; - auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { if (childNode->getAlgorithm() != EltwiseAdd || !childNode->getFusedWith().empty() || childNode->getParentEdges().size() != 2) return false; - return isSutableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getParentEdgesAtPort(0)[0]->getShape().getDims()) && - parentNode->canFuse(childNode); + return isSuitableSecondInput(childNode->getParentEdgesAtPort(1)[0]->getParent(), childNode->getInputShapeAtPort(0).getStaticDims()) && + parentNode->canFuse(childNode); }; auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(parentNode, childNode)) { + if (!isSuitableChildNode(parentNode, childNode)) { parent++; continue; } @@ -419,11 +419,11 @@ void MKLDNNGraphOptimizer::FuseMultiplyAndAdd(MKLDNNGraph &graph) { void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableConvNode = [](MKLDNNNodePtr node) { + auto isSuitableConvNode = [](MKLDNNNodePtr node) { bool retVal = false; if (node->getType() == Convolution) { if (auto convNode = std::dynamic_pointer_cast(node)) { - auto rank = convNode->getParentEdgeAt(0)->getShape().getRank(); + auto rank = convNode->getInputShapeAtPort(0).getRank(); // int8 depthwise convolution does not support fusing zero points in 3D case if (implication(convNode->isDepthWise(), rank == 4)) { retVal = true; @@ -438,8 +438,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (convNode == nullptr) IE_THROW() << "Cannot get convolution node " << node->getName(); - int IC = node->getParentEdgesAtPort(0)[0]->getShape().getDims()[1]; - int OC = node->getChildEdgesAtPort(0)[0]->getShape().getDims()[1]; + int IC = node->getInputShapeAtPort(0).getDims()[1]; + int OC = node->getOutputShapeAtPort(0).getDims()[1]; if (Shape::UNDEFINED_DIM == IC || Shape::UNDEFINED_DIM == OC) { return false; @@ -467,11 +467,11 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (arg0->getOriginalOutputPrecisionAtPort(0) != Precision::U8) return false; - if (parent0->getParentEdgesAtPort(1)[0]->getShape().getRank() < 2) { + if (parent0->getInputShapeAtPort(1).getRank() < 2) { return false; } - auto zpDims = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims(); + auto zpDims = parent0->getInputShapeAtPort(1).getDims(); if (zpDims[0] != 1 || !dimsEqualStrong(zpDims[1], IC)) return false; @@ -496,7 +496,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { if (zeroPointsData == nullptr) IE_THROW() << "zeroPointsBlob has not allocated buffer"; - auto zeroPointDataSize = parent0->getParentEdgesAtPort(1)[0]->getShape().getDims()[1]; + auto zeroPointDataSize = parent0->getInputShapeAtPort(1).getDims()[1]; if (Shape::UNDEFINED_DIM == zeroPointDataSize) { return false; } @@ -580,7 +580,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndZeroPoints(MKLDNNGraph &graph) { for (int i = 0; i < graphNodes.size(); i++) { auto conv = graphNodes[i]; - if (!isSutableConvNode(conv)) continue; + if (!isSuitableConvNode(conv)) continue; auto dataEltwise = conv->getParentEdgesAtPort(0)[0]->getParent(); auto weightsEltwise = conv->getParentEdgesAtPort(1)[0]->getParent(); @@ -605,14 +605,14 @@ static bool BF16QuantizeNodeFusing(MKLDNNNodePtr parentNode, MKLDNNNodePtr child void MKLDNNGraphOptimizer::FuseFullyConnectedAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { - return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getParentEdgeAt(0)->getShape().getRank() != 3; + auto isSuitableParentNode = [](MKLDNNNodePtr node) { + return node->getType() == FullyConnected && node->getChildEdges().size() == 1 && node->getInputShapeAtPort(0).getRank() != 3; }; auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } @@ -658,7 +658,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { return conv->getWeightDims()[weightRank - 1] == 1 && conv->getWeightDims()[weightRank - 2] == 1; }; - auto isSutableParentConvolution = [&](MKLDNNNodePtr node) { + auto isSuitableParentConvolution = [&](MKLDNNNodePtr node) { if (node->isDropped()) return false; @@ -671,8 +671,8 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { const auto &strides = conv->getStride(); const auto &paddings = conv->getPaddingL(); - const auto &inDims = node->getParentEdgeAt(0)->getShape().getDims(); - const auto &outDims = node->getChildEdgeAt(0)->getShape().getDims(); + const auto &inDims = node->getInputShapeAtPort(0).getDims(); + const auto &outDims = node->getOutputShapeAtPort(0).getDims(); bool isSupportedParams = conv->getGroupNum() == 1 && inDims.size() == 4 && dimsEqualStrong(inDims[inDims.size() - 1], outDims[outDims.size() - 1]) && @@ -686,7 +686,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { return node->getChildEdges().size() == 1 && isConvolutionNode(node->getChildEdgeAt(0)->getChild()); }; - auto isSutableChildConvolution = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { + auto isSuitableChildConvolution = [&](const MKLDNNNodePtr &parentNode, const MKLDNNNodePtr &childNode) { if (parentNode->isDropped() || childNode->isDropped()) return false; @@ -729,7 +729,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { convChild->getStride()[stridesSize - 1] == convChild->getStride()[stridesSize - 2] && withBias && one_of(convChild->getStride()[stridesSize - 1], 1, 2) && - childNode->getChildEdgeAt(0)->getShape().getRank() == 4; + childNode->getOutputShapeAtPort(0).getRank() == 4; return isSupportedParams; }; @@ -761,10 +761,10 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { if (!isConvolutionNode(graphNodes[i])) continue; auto parentConvNode = graphNodes[i]; - if (!isSutableParentConvolution(parentConvNode)) continue; + if (!isSuitableParentConvolution(parentConvNode)) continue; auto childConvNode = parentConvNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildConvolution(parentConvNode, childConvNode)) continue; + if (!isSuitableChildConvolution(parentConvNode, childConvNode)) continue; if (!isFusingWorthwhile(parentConvNode, childConvNode)) continue; @@ -783,7 +783,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndDWConvolution(MKLDNNGraph &graph) { void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1 && node->getOriginalOutputPrecisionAtPort(0) == Precision::FP32; }; @@ -791,7 +791,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDN auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } @@ -831,14 +831,14 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperationThroughMaxPool(MKLDN void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { return (node->getType() == Convolution || node->getType() == BinaryConvolution) && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } @@ -876,7 +876,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionAndSimpleOperation(MKLDNNGraph &graph) void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { if (node->getType() == Pooling) { if (!one_of(node->getOriginalInputPrecisionAtPort(0), Precision::U8, Precision::I8)) return false; @@ -885,16 +885,16 @@ void MKLDNNGraphOptimizer::FusePoolingAndFakeQuantize(MKLDNNGraph &graph) { return false; }; - auto isSutableChildNode = [](MKLDNNNodePtr node) { + auto isSuitableChildNode = [](MKLDNNNodePtr node) { return node->getType() == FakeQuantize && node->getAlgorithm() != Algorithm::FQBinarization; }; for (int i = 0; i < graphNodes.size(); i++) { auto parent = graphNodes[i]; - if (!isSutableParentNode(parent)) continue; + if (!isSuitableParentNode(parent)) continue; auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(child)) continue; + if (!isSuitableChildNode(child)) continue; child->fuseInto(parent); @@ -994,15 +994,15 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG if (std::dynamic_pointer_cast(graphNode)->isWithBroadcast()) continue; // TODO: Enlarge to several inputs - bool isSutableNode = graphNode->getParentEdges().size() == 2; - if (!isSutableNode) + bool isSuitableNode = graphNode->getParentEdges().size() == 2; + if (!isSuitableNode) continue; - auto parent1 = graphNode->getParentEdgeAt(0)->getParent(); - auto parent2 = graphNode->getParentEdgeAt(1)->getParent(); + auto parent1 = graphNode->getParentEdgesAtPort(0)[0]->getParent(); + auto parent2 = graphNode->getParentEdgesAtPort(1)[0]->getParent(); - bool isSutableParent1 = parent1->getType() == Convolution || parent1->getType() == BinaryConvolution; - bool isSutableParent2 = parent2->getType() == Convolution || parent2->getType() == BinaryConvolution; + bool isSuitableParent1 = parent1->getType() == Convolution || parent1->getType() == BinaryConvolution; + bool isSuitableParent2 = parent2->getType() == Convolution || parent2->getType() == BinaryConvolution; auto canFuseSum = [](MKLDNNBinaryConvolutionNode *binConv, MKLDNNNodePtr fuseCandidate) { if (binConv->getImplType() == impl_desc_type::ref) @@ -1025,34 +1025,34 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG auto* binConvNode1 = dynamic_cast(parent1.get()); if (binConvNode1) { - isSutableParent1 = isSutableParent1 && canFuseSum(binConvNode1, graphNode); + isSuitableParent1 = isSuitableParent1 && canFuseSum(binConvNode1, graphNode); } auto* binConvNode2 = dynamic_cast(parent2.get()); if (binConvNode2) { - isSutableParent2 = isSutableParent2 && canFuseSum(binConvNode2, graphNode); + isSuitableParent2 = isSuitableParent2 && canFuseSum(binConvNode2, graphNode); } auto* convNode1 = dynamic_cast(parent1.get()); if (convNode1) { if (!convNode1->canBeExecutedInInt8()) { - isSutableParent1 = isSutableParent1 && convNode1->getFusedWith().empty(); + isSuitableParent1 = isSuitableParent1 && convNode1->getFusedWith().empty(); } } auto* convNode2 = dynamic_cast(parent2.get()); if (convNode2) { if (!convNode2->canBeExecutedInInt8()) { - isSutableParent2 = isSutableParent2 && convNode2->getFusedWith().empty(); + isSuitableParent2 = isSuitableParent2 && convNode2->getFusedWith().empty(); } } - if (!isSutableParent1 && !isSutableParent2) + if (!isSuitableParent1 && !isSuitableParent2) continue; - auto mergedConv = isSutableParent1 ? parent1 : parent2; - auto peerNode = isSutableParent1 ? parent2 : parent1; - if (isSutableParent1 && isSutableParent2) { + auto mergedConv = isSuitableParent1 ? parent1 : parent2; + auto peerNode = isSuitableParent1 ? parent2 : parent1; + if (isSuitableParent1 && isSuitableParent2) { if ((peerNode->getType() == Convolution || peerNode->getType() == BinaryConvolution) && mergedConv->getChildEdges().size() != 1) { mergedConv = parent2; @@ -1070,7 +1070,7 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG bool fuse_allowed = mergedConv->getChildEdges().size() == 1; for (size_t j = 0; fuse_allowed && j < mergedConv->getParentEdges().size(); j++) - if (mergedConv->getParentEdgeAt(j)->getParent() == peerNode) + if (mergedConv->getParentEdgesAtPort(j)[0]->getParent() == peerNode) fuse_allowed = false; // Fused Conv+Sum prim will be used inplace. That's mean that input blob will @@ -1154,14 +1154,14 @@ void MKLDNNGraphOptimizer::FuseConvolutionSumAndConvolutionSumActivation(MKLDNNG void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { return (node->getType() == MVN) && (node->getChildEdges().size() == 1); }; auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } @@ -1196,7 +1196,7 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) return node->getType() == Interpolate && node->getChildEdges().size() == 1; }; - auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { // Avoid cycle dependencies for (auto &childParentEdge : childNode->getParentEdges()) { for (auto &parentParentEdge : parentNode->getParentEdges()) { @@ -1219,7 +1219,7 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) } auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(parentNode, childNode)) { + if (!isSuitableChildNode(parentNode, childNode)) { parent++; continue; } @@ -1244,14 +1244,14 @@ void MKLDNNGraphOptimizer::FuseInterpolateAndSimpleOperation(MKLDNNGraph &graph) void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { return node->getType() == NormalizeL2 && node->getChildEdges().size() == 1; }; auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } @@ -1282,11 +1282,11 @@ void MKLDNNGraphOptimizer::FuseNormalizeL2AndSimpleOperation(MKLDNNGraph &graph) void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { return node->getType() == Eltwise && node->getChildEdges().size() == 1; }; - auto isSutableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { + auto isSuitableChildNode = [&](MKLDNNNodePtr parentNode, MKLDNNNodePtr childNode) { if (parentNode->isConstant() && !childNode->isConstant()) return false; for (auto &childParentEdge : childNode->getParentEdges()) { @@ -1312,13 +1312,13 @@ void MKLDNNGraphOptimizer::FuseEltwiseAndSimple(MKLDNNGraph &graph) { auto parent = graphNodes.begin(); while (parent != graphNodes.end()) { auto parentNode = *parent; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { parent++; continue; } auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(parentNode, childNode)) { + if (!isSuitableChildNode(parentNode, childNode)) { parent++; continue; } @@ -1422,10 +1422,10 @@ void MKLDNNGraphOptimizer::DropDoubleReorders(MKLDNNGraph &graph) { if (nn == nullptr) IE_THROW() << "Cannot get reorder layer " << nextNode->getName(); - MKLDNNNodePtr p = n->getParentEdgeAt(0)->getParent(); - MKLDNNNodePtr c = nn->getChildEdgeAt(0)->getChild(); + MKLDNNNodePtr p = n->getParentEdgesAtPort(0)[0]->getParent(); + MKLDNNNodePtr c = nn->getChildEdgesAtPort(0)[0]->getChild(); - auto oldEdgeNum = n->getParentEdgeAt(0)->getInputNum(); + auto oldEdgeNum = n->getParentEdgesAtPort(0)[0]->getInputNum(); graph.DropNode(node); graph.DropNode(nextNode); @@ -1461,11 +1461,11 @@ void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) { MKLDNNNodePtr& broadcastNode = graphNode; MKLDNNNodePtr eltwiseNode = broadcastNode->getChildEdgeAt(0)->getChild(); eltwiseNode->inputShapes[broadcastNode->getChildEdgeAt(0)->getOutputNum()] - = broadcastNode->getParentEdgeAt(0)->getShape(); + = broadcastNode->getInputShapeAtPort(0); auto& edges = graph.GetEdges(); for (size_t i = 1lu; i < broadcastNode->getParentEdges().size(); i++) { - auto constParent = broadcastNode->getParentEdgeAt(i)->getParent(); + auto constParent = broadcastNode->getParentEdgesAtPort(i)[0]->getParent(); for (auto it = edges.begin(); it != edges.end(); it++) { if ((*it) == constParent->getChildEdgeAt(0)) { edges.erase(it); @@ -1481,11 +1481,11 @@ void MKLDNNGraphOptimizer::FuseBroadcastAndEltwise(MKLDNNGraph &graph) { void MKLDNNGraphOptimizer::FuseClampAndFakeQuantize(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableClampNode = [](MKLDNNNodePtr node) { + auto isSuitableClampNode = [](MKLDNNNodePtr node) { return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->getAlgorithm() == EltwiseClamp; }; - auto isSutableFakeQuantizeNode = [](MKLDNNNodePtr node) { + auto isSuitableFakeQuantizeNode = [](MKLDNNNodePtr node) { return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization; }; @@ -1516,10 +1516,10 @@ void MKLDNNGraphOptimizer::FuseClampAndFakeQuantize(MKLDNNGraph &graph) { for (int i = 0; i < graphNodes.size(); i++) { auto parent = graphNodes[i]; - if (!isSutableClampNode(parent)) continue; + if (!isSuitableClampNode(parent)) continue; auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSutableFakeQuantizeNode(child)) continue; + if (!isSuitableFakeQuantizeNode(child)) continue; if (fuseClampAndFakeQuantizeNodes(parent, child)) { graph.DropNode(parent); @@ -1531,16 +1531,16 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph auto& graphNodes = graph.GetNodes(); auto getConstPort = [](const MKLDNNNodePtr node) -> int { - if (node->getParentEdgeAt(0)->getParent()->getType() == Input && node->getParentEdgeAt(0)->getParent()->isConstant()) { + if (node->getParentEdgesAtPort(0)[0]->getParent()->getType() == Input && node->getParentEdgesAtPort(0)[0]->getParent()->isConstant()) { return 0; - } else if (node->getParentEdgeAt(1)->getParent()->getType() == Input && node->getParentEdgeAt(1)->getParent()->isConstant()) { + } else if (node->getParentEdgesAtPort(1)[0]->getParent()->getType() == Input && node->getParentEdgesAtPort(1)[0]->getParent()->isConstant()) { return 1; } else { return -1; } }; - auto isSutableScaleShiftNode = [getConstPort](MKLDNNNodePtr node) { + auto isSuitableScaleShiftNode = [getConstPort](MKLDNNNodePtr node) { if (one_of(node->getAlgorithm(), EltwiseAdd, EltwiseSubtract, EltwiseMultiply, EltwiseDivide, EltwiseMulAdd)) { MKLDNNNode *parent = nullptr; if (node->getAlgorithm() != EltwiseMulAdd) { @@ -1548,14 +1548,14 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph if (constPort == -1) { return false; } - parent = node->getParentEdgeAt(1 - constPort)->getParent().get(); + parent = node->getParentEdgesAtPort(1 - constPort)[0]->getParent().get(); } return node->getType() == Eltwise && node->getChildEdges().size() == 1 && node->canBePerformedAsScaleShift(parent); } return false; }; - auto isSutableFakeQuantizeNode = [](MKLDNNNodePtr node) { + auto isSuitableFakeQuantizeNode = [](MKLDNNNodePtr node) { return node->getType() == FakeQuantize && node->getAlgorithm() != FQBinarization; }; @@ -1566,7 +1566,7 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph std::vector scalesBuffer; std::vector shiftsBuffer; - parent->fillScalesAndShifts(parent->getParentEdgeAt(1 - getConstPort(parent))->getParent().get(), scalesBuffer, shiftsBuffer, 1); + parent->fillScalesAndShifts(parent->getParentEdgesAtPort(1 - getConstPort(parent))[0]->getParent().get(), scalesBuffer, shiftsBuffer, 1); for (int i = 0; i < scalesBuffer.size(); i++) if (scalesBuffer[i] == 0.f) @@ -1644,10 +1644,10 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph for (int i = 0; i < graphNodes.size(); i++) { auto parent = graphNodes[i]; - if (!isSutableScaleShiftNode(parent)) continue; + if (!isSuitableScaleShiftNode(parent)) continue; auto child = parent->getChildEdgeAt(0)->getChild(); - if (!isSutableFakeQuantizeNode(child)) continue; + if (!isSuitableFakeQuantizeNode(child)) continue; if (fuseScaleShiftAndFakeQuantizeNodes(parent, child)) { auto parentEdges = parent->parentEdges; @@ -1667,11 +1667,11 @@ void MKLDNNGraphOptimizer::FusePerformedAsScaleShiftAndFakeQuantize(MKLDNNGraph void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { auto& graphNodes = graph.GetNodes(); - auto isSutableParentNode = [](MKLDNNNodePtr node) { + auto isSuitableParentNode = [](MKLDNNNodePtr node) { return node->getType() == Transpose && node->getChildEdges().size() == 1; }; - auto isSutableChildNode = [](MKLDNNNodePtr node) { + auto isSuitableChildNode = [](MKLDNNNodePtr node) { return node->getType() == Reorder && node->getChildEdges().size() == 1; }; @@ -1685,33 +1685,32 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { } auto& transposeOrder = transposeNode->getOrder(); - auto layoutOrder = MemoryDescUtils::convertToBlockedDescriptor( - *transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc).getOrder(); + auto layoutOrder = transposeNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->as()->getOrder(); - auto inBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc); - auto outBlockedDesc = MemoryDescUtils::convertToBlockedDescriptor(*reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc); + auto inBlockedDesc = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->as(); + auto outBlockedDesc = reorderNode->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->as(); - auto& inOrder = inBlockedDesc.getOrder(); - auto& outOrder = outBlockedDesc.getOrder(); + auto& inOrder = inBlockedDesc->getOrder(); + auto& outOrder = outBlockedDesc->getOrder(); if (transposeOrder.size() != layoutOrder.size() || layoutOrder.size() != inOrder.size() || inOrder.size() != outOrder.size()) { return false; } // revLayoutOrder - reverse permutation for layoutOrder - auto revLayoutOrder = SizeVector(layoutOrder.size()); + auto revLayoutOrder = VectorDims(layoutOrder.size()); for (int i = 0; i < revLayoutOrder.size(); i++) { revLayoutOrder[layoutOrder[i]] = i; } // newTransposeOrder - Transpose layout-aware permutation - auto newTransposeOrder = SizeVector(transposeOrder.size()); + auto newTransposeOrder = VectorDims(transposeOrder.size()); for (int i = 0; i < newTransposeOrder.size(); i++) { newTransposeOrder[i] = layoutOrder[transposeOrder[revLayoutOrder[i]]]; } // reorderOrder - Reorder layout-aware permutation - auto reorderOrder = SizeVector(outOrder.size()); + auto reorderOrder = VectorDims(outOrder.size()); for (int i = 0; i < reorderOrder.size(); i++) { for (int j = 0; j < reorderOrder.size(); j++) { if (outOrder[i] == inOrder[j]) { @@ -1722,7 +1721,7 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { } // summaryOrder - resulting Transpose+Reorder permutation - auto summaryOrder = SizeVector(transposeOrder.size()); + auto summaryOrder = VectorDims(transposeOrder.size()); for (int i = 0; i < summaryOrder.size(); i++) { summaryOrder[i] = reorderOrder[newTransposeOrder[i]]; } @@ -1774,9 +1773,8 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { auto inPrec = inDesc->getPrecision(); auto outPrec = outDesc->getPrecision(); - auto reorderInDesc = inDesc->clone(); - auto reorderOutDesc = outDesc->clone(); - reorderOutDesc->setPrecision(inPrec); + auto reorderInDesc = inDesc; + auto reorderOutDesc = MemoryDescUtils::cloneWithNewPrecision(*outDesc, inPrec); std::string reorderlayerName = parentParentNode->getName() + "_" + MKLDNNReorderNode::getReorderArgs(*reorderInDesc, *reorderOutDesc) + "_" + "fake"; @@ -1796,8 +1794,8 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { // case 2 if (inPrec != outPrec) { - auto reorderInDesc2 = reorderOutDesc->clone(); - auto reorderOutDesc2 = outDesc->clone(); + auto reorderInDesc2 = reorderOutDesc; + auto reorderOutDesc2 = outDesc; std::string reorderLayerName2 = reorderNode->getName() + "_" + MKLDNNReorderNode::getReorderArgs(*reorderInDesc2, *reorderOutDesc2) + "_" + childChildNode->getName(); @@ -1808,11 +1806,11 @@ void MKLDNNGraphOptimizer::MergeTransposeAndReorder(MKLDNNGraph &graph) { for (int i = 0; i < graphNodes.size(); i++) { auto parentNode = graphNodes[i]; - if (!isSutableParentNode(parentNode)) { + if (!isSuitableParentNode(parentNode)) { continue; } auto childNode = parentNode->getChildEdgeAt(0)->getChild(); - if (!isSutableChildNode(childNode)) { + if (!isSuitableChildNode(childNode)) { continue; } @@ -1860,4 +1858,4 @@ void MKLDNNGraphOptimizer::reshapeRnnSeq(MKLDNNGraph &graph) { graph.RemoveEdge(edge); } } -} +} \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp index 1ce31f3ecb882c..4a76e33ae43bd4 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.cpp @@ -22,6 +22,7 @@ #include #include "utils/general_utils.h" #include "utils/cpu_utils.hpp" +#include "memory_desc/dnnl_blocked_memory_desc.h" MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs, @@ -35,11 +36,11 @@ MKLDNNPlugin::MKLDNNInferRequest::MKLDNNInferRequest(InferenceEngine::InputsData IE_THROW() << "No graph was found"; graph = &(execNetwork->GetGraph()._graph); - // Allocate all input blobs + // Allocate all input blobs if shape is static, delay allocation otherwise for (const auto& it : _networkInputs) { MKLDNNInferRequest::GetBlob(it.first); } - // Allocate all output blobs + // Allocate all output blobs if shape is static, delay allocation otherwise for (const auto& it : _networkOutputs) { MKLDNNInferRequest::GetBlob(it.first); } @@ -164,6 +165,18 @@ void MKLDNNPlugin::MKLDNNInferRequest::PullStates() { } } +void MKLDNNPlugin::MKLDNNInferRequest::redefineMemoryForInputNodes() { + const auto cpuInputNodes = graph->GetInputNodesMap(); + + for (const auto &blob : _inputs) { + const auto inputNode = cpuInputNodes.find(blob.first); + if (inputNode == cpuInputNodes.end()) { + IE_THROW() << "CPU execution graph doesn't contain input node with name: " << blob.first; + } + if (inputNode->second->isDynamicNode()) + inputNode->second->redefineOutputMemory({blob.second->getTensorDesc().getDims()}); + } +} void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() { using namespace openvino::itt; @@ -173,6 +186,9 @@ void MKLDNNPlugin::MKLDNNInferRequest::InferImpl() { ThrowIfCanceled(); + if (graph->isDynamic()) + redefineMemoryForInputNodes(); + execDataPreprocessing(_inputs); changeDefaultPtr(); @@ -221,30 +237,25 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } if (_inputs.find(name) == _inputs.end()) { - auto pBlob = graph->getInputBlob(name); - if (!pBlob) { - IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name; - } - - InferenceEngine::TensorDesc desc = pBlob->getTensorDesc(); - if (_networkInputs.find(name) != _networkInputs.end()) { - InferenceEngine::Layout l = _networkInputs[name]->getLayout(); - InferenceEngine::Precision p = _networkInputs[name]->getPrecision(); - InferenceEngine::SizeVector dims = _networkInputs[name]->getTensorDesc().getDims(); + InferenceEngine::TensorDesc desc = _networkInputs[name]->getTensorDesc(); + bool isDynamic = _networkInputs[name]->getInputData()->isDynamic(); - desc = InferenceEngine::TensorDesc(p, dims, l); - } + _inputs[name] = make_blob_with_precision(desc); + _inputs[name]->allocate(); - _inputs[name] = make_blob_with_precision(desc); - _inputs[name]->allocate(); - if (pBlob->getTensorDesc() == desc && - graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { - externalPtr[name] = _inputs[name]->buffer(); + if (!isDynamic && + desc == MemoryDescUtils::convertToTensorDesc(graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc()) && + graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { + externalPtr[name] = _inputs[name]->buffer(); + } + } else { + IE_THROW() << "Blob with name: " << name << " exists in MKLDNN graph, but absents in network inputs"; } } data = _inputs[name]; checkBlob(data, name, true); + // check if preprocess required, but still wasn't set auto preProcessedInput = std::find_if(std::begin(_networkInputs), std::end(_networkInputs), [&](const std::pair& pair) { @@ -263,52 +274,60 @@ InferenceEngine::Blob::Ptr MKLDNNPlugin::MKLDNNInferRequest::GetBlob(const std:: } if (graph->hasOutputWithName(name)) { + const auto outNode = graph->getOutputNodeByName(name); if (_outputs.find(name) == _outputs.end()) { - auto pBlob = graph->getOutputBlob(name); - if (!pBlob) { - IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name; - } - - if (!data) { - InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc(); - desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision())); - - // WA: need to avoid exception thrown when we compare blocking desc in SetBlob - // in situation if we push output blobs as inputs for next network (in Hetero plugin) - // it may be that output tensor desc will be different from real input tensor desc for next network - // because the optimal descriptor was chosen (e.g. inPlace case for Split node) - auto currBlockDesc = InferenceEngine::BlockingDesc(desc.getBlockingDesc().getBlockDims(), desc.getBlockingDesc().getOrder()); - desc = InferenceEngine::TensorDesc(desc.getPrecision(), desc.getDims(), currBlockDesc); - - data = make_blob_with_precision(desc); - data->allocate(); - } else { - const auto& expectedTensorDesc = pBlob->getTensorDesc(); + if (_networkOutputs.find(name) != _networkOutputs.end()) { + bool isDynamic = outNode->isDynamicNode(); + const auto &desc = outNode->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); + + if (!data) { + InferenceEngine::TensorDesc desc = _networkOutputs[name]->getTensorDesc(); + desc.setPrecision(normalizeToSupportedPrecision(desc.getPrecision())); + + data = make_blob_with_precision(desc); + data->allocate(); + } else { + const auto& expectedTensorDesc = isDynamic ? InferenceEngine::TensorDesc(desc.getPrecision(), + InferenceEngine::TensorDesc::getLayoutByRank(desc.getShape().getRank())) + : MemoryDescUtils::convertToTensorDesc(desc); + const auto &tensorDesc = data->getTensorDesc(); + if (expectedTensorDesc.getPrecision() != tensorDesc.getPrecision()) { + IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different precision: " + << tensorDesc.getPrecision() << " for input and " << expectedTensorDesc.getPrecision() + << " for output."; + } - if (expectedTensorDesc.getPrecision() != data->getTensorDesc().getPrecision()) { - IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different precision: " - << data->getTensorDesc().getPrecision() << " for input and " << expectedTensorDesc.getPrecision() - << " for output."; - } + if (expectedTensorDesc.getDims() != tensorDesc.getDims()) { + IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different shapes."; + } - if (expectedTensorDesc.getDims() != data->getTensorDesc().getDims()) { - IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs with different shapes."; + if (tensorDesc.getLayout() != InferenceEngine::Layout::ANY && expectedTensorDesc.getLayout() != InferenceEngine::Layout::ANY) { + if (tensorDesc.getLayout() != expectedTensorDesc.getLayout() && !(tensorDesc.getLayout() == InferenceEngine::Layout::BLOCKED && + InferenceEngine::TensorDesc(tensorDesc.getPrecision(), tensorDesc.getDims(), tensorDesc.getBlockingDesc()).getLayout() == + expectedTensorDesc.getLayout())) { + IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name << " but expect blobs" << + " with different layouts."; + } + + if (expectedTensorDesc.getBlockingDesc() != tensorDesc.getBlockingDesc()) + IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name + << " but expect blobs with different blocking descriptors."; + } } - if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && expectedTensorDesc.getLayout() != InferenceEngine::Layout::ANY && - expectedTensorDesc.getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) { - IE_THROW(ParameterMismatch) << "Network input and output use the same name: " << name - << " but expect blobs with different blocking descriptors."; + _outputs[name] = data; + if (!isDynamic && !externalPtr.count(name) && data->getTensorDesc() == MemoryDescUtils::convertToTensorDesc(desc) && + !graph->getProperty().batchLimit) { + externalPtr[name] = data->buffer(); } - } - - _outputs[name] = data; - if (!externalPtr.count(name) && data->getTensorDesc() == pBlob->getTensorDesc() && !graph->getProperty().batchLimit) { - externalPtr[name] = data->buffer(); + } else { + IE_THROW() << "Blob with name: " << name << " exists in MKLDNN graph, but absents in network outputs"; } } + data = _outputs[name]; - checkBlob(data, name, false); + if (!outNode->isDynamicNode()) + checkBlob(data, name, false); } if (!data) { IE_THROW() << "Cannot find blob with name: " << name; @@ -335,11 +354,12 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In InferenceEngine::DataPtr foundOutput; size_t dataSize = data->size(); findInputAndOutputBlobByName(name, foundInput, foundOutput); + const auto &blobDesc = data->getTensorDesc(); if (foundInput) { - if (foundInput->getPrecision() != data->getTensorDesc().getPrecision()) { + if (foundInput->getPrecision() != blobDesc.getPrecision()) { IE_THROW(ParameterMismatch) << "Failed to set input blob with precision: " - << data->getTensorDesc().getPrecision() << ", if CNNNetwork input blob precision is: " << foundInput->getPrecision(); + << blobDesc.getPrecision() << ", if CNNNetwork input blob precision is: " << foundInput->getPrecision(); } const bool preProcRequired = preProcessingRequired(foundInput, data); @@ -360,26 +380,29 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In size_t inputSize = foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR ? InferenceEngine::details::product(foundInput->getTensorDesc().getDims()) : 1; - if (dataSize != inputSize) { + + const bool isDynamic = foundInput->getInputData()->isDynamic(); + if (!isDynamic && dataSize != inputSize) { IE_THROW() << "Input blob size is not equal network input size (" << dataSize << "!=" << inputSize << ")."; } - if (foundInput->getTensorDesc().getDims() != data->getTensorDesc().getDims()) { + if (!isDynamic && foundInput->getTensorDesc().getDims() != blobDesc.getDims()) { IE_THROW(ParameterMismatch) << "Failed to set input blob. Dimensions mismatch."; } - if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && - foundInput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) { - IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch."; - } + if (blobDesc.getLayout() != InferenceEngine::Layout::ANY && foundInput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY) { + if (isDynamic && InferenceEngine::TensorDesc(foundInput->getPrecision(), blobDesc.getDims(), foundInput->getLayout()).getBlockingDesc() != + blobDesc.getBlockingDesc()) + IE_THROW(ParameterMismatch) << "Failed to set input blob. Layouts mismatch."; - auto pBlob = graph->getInputBlob(name); - if (!pBlob) { - IE_THROW() << "MKLDNN graph doesn't contain input node with name: " << name; + if (!isDynamic && foundInput->getTensorDesc().getBlockingDesc() != blobDesc.getBlockingDesc()) + IE_THROW(ParameterMismatch) << "Failed to set input blob. Blocking descriptor mismatch."; } - if (data->getTensorDesc() == pBlob->getTensorDesc() && + const auto &actualDesc = graph->getInputNodeByName(name)->getChildEdgesAtPort(0)[0]->getMemory().getDesc(); + if (blobDesc.getLayout() != InferenceEngine::Layout::ANY && + actualDesc.isCompatible(MemoryDescUtils::convertToCpuBlockedMemoryDesc(blobDesc)) && graph->_normalizePreprocMap.find(name) == graph->_normalizePreprocMap.end() && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { @@ -393,31 +416,34 @@ void MKLDNNPlugin::MKLDNNInferRequest::SetBlob(const std::string& name, const In IE_THROW(NotImplemented) << "cannot set compound blob: supported only for input pre-processing"; } - if (foundOutput->getPrecision() != data->getTensorDesc().getPrecision()) { + if (foundOutput->getPrecision() != blobDesc.getPrecision()) { IE_THROW(ParameterMismatch) << "Failed to set output blob with precision: " - << data->getTensorDesc().getPrecision() << ", if CNNNetwork output blob precision is: " << foundOutput->getPrecision(); + << blobDesc.getPrecision() << ", if CNNNetwork output blob precision is: " << foundOutput->getPrecision(); } size_t outputSize = foundOutput->getTensorDesc().getLayout() != InferenceEngine::Layout::SCALAR ? InferenceEngine::details::product(foundOutput->getDims()) : 1; - if (dataSize != outputSize) { + + const bool isDynamic = foundOutput->isDynamic(); + if (!isDynamic && dataSize != outputSize) { IE_THROW() << "Output blob size is not equal network output size (" << dataSize << "!=" << outputSize << ")."; } - if (foundOutput->getTensorDesc().getDims() != data->getTensorDesc().getDims()) { + if (!isDynamic && foundOutput->getTensorDesc().getDims() != blobDesc.getDims()) { IE_THROW(ParameterMismatch) << "Failed to set output Blob. Dimensions mismatch."; } - if (data->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && foundOutput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY && - foundOutput->getTensorDesc().getBlockingDesc() != data->getTensorDesc().getBlockingDesc()) { + + if (blobDesc.getLayout() != InferenceEngine::Layout::ANY && foundOutput->getTensorDesc().getLayout() != InferenceEngine::Layout::ANY) { + if (isDynamic && InferenceEngine::TensorDesc(foundOutput->getPrecision(), blobDesc.getDims(), foundOutput->getLayout()).getBlockingDesc() != + blobDesc.getBlockingDesc()) + IE_THROW(ParameterMismatch) << "Failed to set input blob. Layouts mismatch."; + + if (!isDynamic && foundOutput->getTensorDesc().getBlockingDesc() != blobDesc.getBlockingDesc()) IE_THROW(ParameterMismatch) << "Failed to set output blob. Blocking descriptor mismatch."; } - auto pBlob = graph->getOutputBlob(name); - if (!pBlob) - IE_THROW() << "MKLDNN graph doesn't contain output node with name: " << name; - - if (data->getTensorDesc() == pBlob->getTensorDesc() && - !graph->getProperty().batchLimit) { + const auto &desc = graph->getOutputNodeByName(name)->getParentEdgesAtPort(0)[0]->getMemory().getDesc(); + if (!isDynamic && blobDesc == MemoryDescUtils::convertToTensorDesc(desc) && !graph->getProperty().batchLimit) { externalPtr[name] = data->buffer(); } else if (externalPtr.find(name) != externalPtr.end()) { externalPtr.erase(name); @@ -432,8 +458,8 @@ static inline void changeEdgePtr(const MKLDNNPlugin::MKLDNNEdgePtr &edge, void * void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() { for (auto& it : externalPtr) { - auto input = graph->inputNodesMap.find(it.first); - if (input != graph->inputNodesMap.end()) { + auto input = graph->GetInputNodesMap().find(it.first); + if (input != graph->GetInputNodesMap().end()) { if (input->second->getChildEdgeAt(0)->getMemory().GetPrimitive().get_data_handle() == it.second) continue; // Input cannot be in-place with other primitives @@ -467,7 +493,7 @@ void MKLDNNPlugin::MKLDNNInferRequest::changeDefaultPtr() { } MKLDNNNodePtr output; - for (auto& out : graph->outputNodesMap) { + for (auto& out : graph->GetOutputNodesMap()) { if (out.first == it.first) { output = out.second; break; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h index f99c42cc7e8139..3141ed18ddc542 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_infer_request.h @@ -51,6 +51,7 @@ class MKLDNNInferRequest : public InferenceEngine::IInferRequestInternal { void PushInputData(); void PushStates(); void PullStates(); + void redefineMemoryForInputNodes(); void pushInput(const std::string& inputName, InferenceEngine::Blob::Ptr& inputBlob, InferenceEngine::Precision dataType); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp index a6a64120f00172..5f957f1965ff9c 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp @@ -18,8 +18,10 @@ #include "nodes/common/cpu_convert.h" #include "mkldnn/ie_mkldnn.h" #include "cpu_shape.h" -#include "cpu_memory_desc_utils.h" -#include "mkldnn_extension_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "utils/cpu_utils.hpp" +#include "nodes/mkldnn_reorder_node.h" +#include "memory_desc/cpu_memory_desc.h" using namespace InferenceEngine; using namespace mkldnn; @@ -39,15 +41,11 @@ namespace { MKLDNNMemory::MKLDNNMemory(const mkldnn::engine& eng) : eng(eng) {} size_t MKLDNNMemory::GetSize() const { - uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(GetDataType())); - return GetElementsCount() * itemSize; -} - -size_t MKLDNNMemory::GetElementsCount() const { - auto desc = GetDescriptor(); - std::vector dims(desc.data.padded_dims, - desc.data.padded_dims + desc.data.ndims); - return std::accumulate(std::begin(dims), std::end(dims), (size_t) 1, std::multiplies()); + auto size = getDesc().getCurrentMemSize(); + if (size == MemoryDesc::UNDEFINED_SIZE) { + IE_THROW() << "Can't get memory size for undefined shape"; + } + return size; } void MKLDNNMemory::Create(const memory::dims& dims, memory::data_type data_type, memory::format_tag format, const void* data) { @@ -55,7 +53,7 @@ void MKLDNNMemory::Create(const memory::dims& dims, memory::data_type data_type, format = memory::format_tag::any; } - memory::desc desc = MKLDNNMemoryDesc(MKLDNNExtensionUtils::convertToSizeVector(dims), data_type, format); + memory::desc desc = mkldnn::memory::desc(dims, data_type, format); Create(desc, data); } @@ -91,94 +89,34 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo } void MKLDNNMemory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { - pMemDesc = desc.clone(); - Create(mkldnn::memory::desc(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc)), data, pads_zeroing); + Create(desc.clone(), data, pads_zeroing); } - -void MKLDNNMemory::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) { - if (size != 0) - IE_ASSERT(size <= output.GetDescriptor().get_size()); - if (input.GetDescriptor() == output.GetDescriptor()) { - auto srcPtr = static_cast(input.GetPtr()); - auto dstPtr = static_cast(output.GetPtr()); - - auto copySize = size == 0 ? output.GetSize() : size; - cpu_memcpy(dstPtr, srcPtr, copySize); +void MKLDNNMemory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { + pMemDesc = std::move(desc); + if (nullptr != data) { + useExternalStorage = true; } else { - std::unique_ptr pReorder; - std::shared_ptr srcMemoryPtr; - std::vector tmpBuff; - - try { - pReorder = std::unique_ptr(new mkldnn::reorder(input.GetPrimitive(), output.GetPrimitive())); - srcMemoryPtr = input.prim; - } - catch (const mkldnn::error& err) { - if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType()) { - //we probably could not make the reorder because there is no one supporting this precision conversion - //lets try to convert data first using cpu_convert - auto data = static_cast(input.GetPtr()); - tmpBuff.resize(input.GetSize()); - - cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), - MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()), input.GetElementsCount()); - - MKLDNNMemory tmpMem(output.eng); - tmpMem.Create(input.GetDims(), output.GetDataType(), input.GetMKLDNNDesc().getFormat(), tmpBuff.data()); - - pReorder = std::unique_ptr(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive())); - srcMemoryPtr = tmpMem.prim; - } else { - throw; - } - } - if (pReorder) { - mkldnn::stream loc_stream(output.eng, stream::flags::default_order); - pReorder->execute(loc_stream, *srcMemoryPtr, *output.prim); - } else { - IE_THROW() << "Could not make mkldnn reorder."; - } + useExternalStorage = false; } -} - -// TODO: It should be done via wrap into Memory; -void MKLDNNMemory::SetData(memory::data_type dataType, memory::format_tag format, const void* data, size_t size, bool ftz) const { - IE_ASSERT(!one_of(format, memory::format_tag::undef, memory::format_tag::any)); - - auto dst_desc = GetDescriptor(); - memory::desc src_desc{dst_desc.dims(), dataType, format}; - IE_ASSERT(size <= dst_desc.get_size()); - - if (dst_desc == src_desc) { - uint8_t itemSize = MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(dataType)); - uint8_t* dataPtr = static_cast(GetData()); - // We cannot support strides for i/o blobs because it affects performance. - dataPtr += itemSize * prim->get_desc().data.offset0; - cpu_memcpy(dataPtr, data, size); + if (pMemDesc->isDefined()) { + Create(MemoryDescUtils::convertToDnnlMemoryDesc(pMemDesc)->getDnnlDesc(), data, pads_zeroing); } else { - auto memData = this->GetDescriptor().data; - memory::dims dims(memData.dims, memData.dims + memData.ndims); - - MKLDNNMemory src(this->eng); - src.Create(dims, dataType, format, data); - - reorderData(src, *this); + //delayed dynamic allocation + size_t maxMemSize = pMemDesc->getMaxMemSize(); + VectorDims dummySize{MemoryDesc::UNDEFINED_SIZE == maxMemSize ? 0 : maxMemSize}; + DnnlBlockedMemoryDesc dummyDesc(InferenceEngine::Precision::U8, Shape(dummySize)); + Create(dummyDesc.getDnnlDesc(), data, false); // no pads zeroing } - if (ftz - && dataType == memory::data_type::f32 - && prim->get_desc().data.format_kind != dnnl_format_kind_wino - && GetDataType() != memory::data_type::bf16) { - // Internal blobs haven't strides yet. - auto *memData = static_cast(GetData()); - memData += prim->get_desc().data.offset0; - setSubnormalsToZero(memData, GetSize() / sizeof(float)); + size_t newUpperBound = MKLDNNExtensionUtils::getMemSizeForOneDnnDesc(prim->get_desc()); + if (newUpperBound > memUpperBound) { + memUpperBound = newUpperBound; } } void MKLDNNMemory::SetData(const MKLDNNMemory& src, size_t size, bool ftz) const { - reorderData(src, *this, size); + MKLDNNReorderNode::reorderData(src, *this, size); if (ftz && src.GetDataType() == memory::data_type::f32 @@ -193,726 +131,48 @@ void MKLDNNMemory::SetData(const MKLDNNMemory& src, size_t size, bool ftz) const void MKLDNNMemory::FillZero() { void* dataPtr = GetData(); - memset(dataPtr, 0, GetSize()); -} - -memory::format_tag MKLDNNMemory::GetPlainFormatByRank(size_t rank) { - switch (rank) { - case 0: - case 1: - return memory::format_tag::a; - case 2: - return memory::format_tag::ab; - case 3: - return memory::format_tag::abc; - case 4: - return memory::format_tag::abcd; - case 5: - return memory::format_tag::abcde; - case 6: - return memory::format_tag::abcdef; - default: - return memory::format_tag::undef; - } -} - -InferenceEngine::Layout MKLDNNMemory::GetPlainLayout(const memory::dims& dims) { - switch (dims.size()) { - case 0: return Layout::SCALAR; - case 1: return Layout::C; - case 2: return Layout::NC; - case 3: return Layout::CHW; - case 4: return Layout::NCHW; - case 5: return Layout::NCDHW; - default: - return Layout::BLOCKED; - } -} - -Precision MKLDNNMemory::convertToIePrec(memory::data_type dataType) { - return MKLDNNExtensionUtils::DataTypeToIEPrecision(dataType); -} - -memory::data_type MKLDNNMemory::convertToDataType(const InferenceEngine::Precision &precision) { - return MKLDNNExtensionUtils::IEPrecisionToDataType(precision); -} - -memory::format_tag MKLDNNMemory::Convert(const InferenceEngine::Layout layout) { - switch (layout) { - case NCHW: - return memory::format_tag::nchw; - case NHWC: - return memory::format_tag::nhwc; - case NCDHW: - return memory::format_tag::ncdhw; - case NDHWC: - return memory::format_tag::ndhwc; - case CHW: - return memory::format_tag::tnc; - case NC: - return memory::format_tag::nc; - case C: - return memory::format_tag::x; - case SCALAR: - return memory::format_tag::x; - default: - return memory::format_tag::undef; - } -} - -std::string MKLDNNMemory::formatToString(memory::format_tag fmt) { - return mkldnn::utils::fmt2str(fmt); + if (dataPtr != nullptr) + memset(dataPtr, 0, getDesc().getMaxMemSize()); } void *MKLDNNMemory::GetPtr() const { auto ptr = static_cast(GetData()); - auto md = GetDescriptor().data; + auto md = prim->get_desc().data; mkldnn::impl::memory_desc_wrapper wrapper(md); ptr += wrapper.offset0() * wrapper.data_type_size(); return ptr; } -template<> -MKLDNNMemoryDesc MKLDNNMemory::GetDescWithType() const { - if (auto descPtr = dynamic_cast(pMemDesc.get())) { - return *descPtr; - } else { - switch (pMemDesc->getType()) { - case (MemoryDescType::Blocked): - return MemoryDescUtils::convertToMKLDNNMemoryDesc(*(pMemDesc->as())); - default: - IE_THROW() << "Can not convert unsupported memory descriptor"; - } - } +void MKLDNNMemory::redefineDesc(const MemoryDesc& desc, void *data) { + redefineDesc(desc.clone(), data); } -template<> -BlockedMemoryDesc MKLDNNMemory::GetDescWithType() const { - if (auto descPtr = dynamic_cast(pMemDesc.get())) { - return *descPtr; - } else { - switch (pMemDesc->getType()) { - case (MemoryDescType::Mkldnn): - return MemoryDescUtils::convertToBlockedDescriptor(*(pMemDesc->as())); - default: - IE_THROW() << "Can not convert unsupported memory descriptor"; +void MKLDNNMemory::redefineDesc(MemoryDescPtr desc, void *data) { + if (data != nullptr) { + this->Create(std::move(desc), data, false); + } else if (useExternalStorage) { + size_t descMaxSize = desc->getMaxMemSize(); + if (MemoryDesc::UNDEFINED_SIZE == descMaxSize) { + IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } - } -} - -bool MKLDNNMemoryDesc::operator==(const MKLDNNMemoryDesc &rhs) const { - return this->desc == rhs.desc; -} - -bool MKLDNNMemoryDesc::operator!=(const MKLDNNMemoryDesc &rhs) const { - return !(*this == rhs); -} - -MKLDNNMemoryDesc::operator mkldnn::memory::desc() const { - return desc; -} - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::desc& desc) : - MemoryDesc(Shape(MKLDNNExtensionUtils::convertToSizeVector(desc.dims())), Mkldnn), desc(desc) { - if (desc.data.format_kind == dnnl::impl::format_kind::any) - IE_THROW(Unexpected) << "Memory format any is prohibited!"; -} - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) - : MemoryDesc(Shape(_dims), Mkldnn) { - if (format == memory::format_tag::any) - IE_THROW(Unexpected) << "Memory format any is prohibited!"; - if (format != memory::format_tag::undef) { - if (format == memory::format_tag::x && _dims.size() == 0) { - desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); + if (descMaxSize <= memUpperBound) { + this->Create(std::move(desc), prim->get_data_handle(), false); } else { - desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, format); + this->Create(std::move(desc), nullptr, false); } } else { - // Trying to create plain descriptor - // This WA is needed since memory::format_tag doesn't contain plain tag for tensors with rank > 6D - mkldnn::memory::dims strides(_dims.size(), 1); - for (int d = _dims.size() - 2; d >= 0; d--) { - strides[d] = strides[d + 1] * _dims[d + 1]; - } - - desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, strides); - } -} - -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType) - : MemoryDesc(Shape(_dims), Mkldnn), desc() { - const auto ndims = _dims.size(); - mkldnn::memory::dims plain_strides(ndims, 1); - for (size_t i = 1; i < ndims; i++) { - plain_strides[ndims - i -1] = plain_strides[ndims - i] * _dims[ndims - i]; - } - desc = {MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, plain_strides}; -} - -static const std::map> form_tags_by_ndims { - {0, { - mkldnn::memory::format_tag::a // TODO :: really 1d layout for scalar?? - }}, {1, { - mkldnn::memory::format_tag::a - }}, {2, { - mkldnn::memory::format_tag::ab, - mkldnn::memory::format_tag::ba - }}, {3, { - mkldnn::memory::format_tag::abc, - mkldnn::memory::format_tag::acb, - mkldnn::memory::format_tag::bac, - mkldnn::memory::format_tag::bca, - mkldnn::memory::format_tag::cba, - - mkldnn::memory::format_tag::Abc16a, - mkldnn::memory::format_tag::ABc16a16b, - mkldnn::memory::format_tag::ABc4a4b, - mkldnn::memory::format_tag::aBc16b, - mkldnn::memory::format_tag::aBc32b, - mkldnn::memory::format_tag::ABc16b16a, - mkldnn::memory::format_tag::Abc4a, - mkldnn::memory::format_tag::aBc4b, - mkldnn::memory::format_tag::ABc4b16a4b, - mkldnn::memory::format_tag::ABc2b8a4b, - mkldnn::memory::format_tag::ABc16b16a4b, - mkldnn::memory::format_tag::ABc16b16a2b, - mkldnn::memory::format_tag::ABc4b4a, - mkldnn::memory::format_tag::ABc8a16b2a, - mkldnn::memory::format_tag::ABc8a8b, - mkldnn::memory::format_tag::ABc8a4b, - mkldnn::memory::format_tag::aBc8b, - mkldnn::memory::format_tag::ABc8b16a2b, - mkldnn::memory::format_tag::ABc8b8a, - mkldnn::memory::format_tag::Acb16a, - mkldnn::memory::format_tag::Acb4a, - mkldnn::memory::format_tag::Acb8a, - mkldnn::memory::format_tag::BAc16a16b, - mkldnn::memory::format_tag::BAc16b16a, - }}, {4, { // Popular - mkldnn::memory::format_tag::abcd, // plain - mkldnn::memory::format_tag::acdb, // tail_c - mkldnn::memory::format_tag::aBcd8b, // blocked 8c - mkldnn::memory::format_tag::aBcd16b, // blocked 16c - - mkldnn::memory::format_tag::abdc, - - mkldnn::memory::format_tag::bacd, - mkldnn::memory::format_tag::bcda, - mkldnn::memory::format_tag::cdba, - mkldnn::memory::format_tag::dcab, - - mkldnn::memory::format_tag::Abcd8a, - mkldnn::memory::format_tag::Abcd16a, - mkldnn::memory::format_tag::Abcd32a, - mkldnn::memory::format_tag::ABcd16a16b, - mkldnn::memory::format_tag::aBcd32b, - mkldnn::memory::format_tag::ABcd16b16a, - mkldnn::memory::format_tag::aBCd16b16c, - mkldnn::memory::format_tag::aBCd16c16b, - mkldnn::memory::format_tag::Abcd4a, - mkldnn::memory::format_tag::aBcd4b, - mkldnn::memory::format_tag::ABcd4b16a4b, - mkldnn::memory::format_tag::ABcd2b8a4b, - mkldnn::memory::format_tag::ABcd4b4a, - mkldnn::memory::format_tag::ABcd4a4b, - mkldnn::memory::format_tag::aBCd4c16b4c, - mkldnn::memory::format_tag::aBCd2c8b4c, - mkldnn::memory::format_tag::ABcd16b16a4b, - mkldnn::memory::format_tag::ABcd16b16a2b, - mkldnn::memory::format_tag::aBCd16c16b4c, - mkldnn::memory::format_tag::aBCd16c16b2c, - mkldnn::memory::format_tag::aBCd4c4b, - mkldnn::memory::format_tag::aBCd4b4c, - mkldnn::memory::format_tag::ABcd8a16b2a, - mkldnn::memory::format_tag::ABcd8a8b, - mkldnn::memory::format_tag::ABcd8a32b, - mkldnn::memory::format_tag::ABcd32a32b, - mkldnn::memory::format_tag::ABcd8a4b, - - mkldnn::memory::format_tag::ABcd8b16a2b, - mkldnn::memory::format_tag::aBCd8b16c2b, - mkldnn::memory::format_tag::ABcd8b8a, - mkldnn::memory::format_tag::aBCd8b8c, - mkldnn::memory::format_tag::aBCd8b4c, - mkldnn::memory::format_tag::aBCd8c16b2c, - mkldnn::memory::format_tag::aBCd8c8b, - - mkldnn::memory::format_tag::ABcd4a8b8a4b, - mkldnn::memory::format_tag::ABcd2a8b8a2b, - - mkldnn::memory::format_tag::aBdc16b, - mkldnn::memory::format_tag::aBdc4b, - mkldnn::memory::format_tag::aBdc8b, - mkldnn::memory::format_tag::aCBd16b16c, - mkldnn::memory::format_tag::aCBd16c16b, - mkldnn::memory::format_tag::Acdb16a, - mkldnn::memory::format_tag::Acdb4a, - mkldnn::memory::format_tag::Acdb8a, - mkldnn::memory::format_tag::BAcd16a16b, - mkldnn::memory::format_tag::BAcd16b16a, - mkldnn::memory::format_tag::ABcd32a32b, - mkldnn::memory::format_tag::Acdb32a, - mkldnn::memory::format_tag::aBCd2b4c2b, - mkldnn::memory::format_tag::aBCd2c4b2c, - mkldnn::memory::format_tag::aBCd4b8c2b, - mkldnn::memory::format_tag::aBCd4c8b2c, - }}, {5, { // Popular - mkldnn::memory::format_tag::abcde, // plain - mkldnn::memory::format_tag::acdeb, // tail_c - mkldnn::memory::format_tag::aBcde8b, // blocked 8c - mkldnn::memory::format_tag::aBcde16b, // blocked 16c - - mkldnn::memory::format_tag::abdec, - mkldnn::memory::format_tag::acbde, - mkldnn::memory::format_tag::bacde, - mkldnn::memory::format_tag::bcdea, - mkldnn::memory::format_tag::cdeba, - mkldnn::memory::format_tag::decab, - - mkldnn::memory::format_tag::Abcde16a, - mkldnn::memory::format_tag::Abcde32a, - mkldnn::memory::format_tag::ABcde16a16b, - mkldnn::memory::format_tag::aBcde32b, - mkldnn::memory::format_tag::ABcde16b16a, - mkldnn::memory::format_tag::aBCde16b16c, - mkldnn::memory::format_tag::aBCde16c16b, - mkldnn::memory::format_tag::aBCde2c8b4c, - mkldnn::memory::format_tag::Abcde4a, - mkldnn::memory::format_tag::aBcde4b, - mkldnn::memory::format_tag::ABcde4b4a, - mkldnn::memory::format_tag::ABcde4a4b, - mkldnn::memory::format_tag::aBCde4b4c, - mkldnn::memory::format_tag::aBCde4c16b4c, - mkldnn::memory::format_tag::aBCde16c16b4c, - mkldnn::memory::format_tag::aBCde16c16b2c, - mkldnn::memory::format_tag::aBCde4c4b, - mkldnn::memory::format_tag::Abcde8a, - mkldnn::memory::format_tag::ABcde8a8b, - mkldnn::memory::format_tag::ABcde8a4b, - mkldnn::memory::format_tag::ABcde8b16a2b, - mkldnn::memory::format_tag::ABcde4b16a4b, - mkldnn::memory::format_tag::ABcde2b8a4b, - mkldnn::memory::format_tag::aBCde8b16c2b, - mkldnn::memory::format_tag::ABcde8b8a, - mkldnn::memory::format_tag::aBCde8b8c, - mkldnn::memory::format_tag::aBCde8b4c, - mkldnn::memory::format_tag::aBCde4b8c8b4c, - mkldnn::memory::format_tag::aBCde2b8c8b2c, - mkldnn::memory::format_tag::aBCde8c16b2c, - mkldnn::memory::format_tag::aBCde8c8b, - mkldnn::memory::format_tag::aBdec16b, - mkldnn::memory::format_tag::aBdec4b, - mkldnn::memory::format_tag::aBdec8b, - mkldnn::memory::format_tag::aCBde16b16c, - mkldnn::memory::format_tag::aCBde16c16b, - mkldnn::memory::format_tag::Acdeb16a, - mkldnn::memory::format_tag::Acdeb4a, - mkldnn::memory::format_tag::Acdeb8a, - mkldnn::memory::format_tag::BAcde16b16a, - mkldnn::memory::format_tag::BAcde16a16b, - mkldnn::memory::format_tag::aBdec32b, - mkldnn::memory::format_tag::aBCde2b4c2b, - mkldnn::memory::format_tag::aBCde2c4b2c, - mkldnn::memory::format_tag::aBCde4b8c2b, - mkldnn::memory::format_tag::aBCde4c8b2c, - }}, {6, { // Popular - mkldnn::memory::format_tag::abcdef, // plain - mkldnn::memory::format_tag::acbdef, // permute - mkldnn::memory::format_tag::defcab, // permute - mkldnn::memory::format_tag::aBcdef16b, // blocked 16c - - mkldnn::memory::format_tag::aBCdef16b16c, - mkldnn::memory::format_tag::aBCdef16c16b, - mkldnn::memory::format_tag::aBcdef4b, - mkldnn::memory::format_tag::aBCdef2c8b4c, - mkldnn::memory::format_tag::aBCdef4c4b, - mkldnn::memory::format_tag::aBCdef4b4c, - mkldnn::memory::format_tag::aBCdef8b8c, - mkldnn::memory::format_tag::aBCdef8b4c, - mkldnn::memory::format_tag::aBCdef8c16b2c, - mkldnn::memory::format_tag::aBCdef4c16b4c, - mkldnn::memory::format_tag::aBCdef8c8b, - - mkldnn::memory::format_tag::aBdefc16b, - mkldnn::memory::format_tag::aCBdef16c16b, - mkldnn::memory::format_tag::aCBdef16b16c, - mkldnn::memory::format_tag::aBdefc4b, - mkldnn::memory::format_tag::aBdefc8b, - - mkldnn::memory::format_tag::Abcdef4a, - mkldnn::memory::format_tag::Abcdef8a, - mkldnn::memory::format_tag::Abcdef16a, - mkldnn::memory::format_tag::Abcdef32a, - mkldnn::memory::format_tag::aBCdef2b4c2b, - mkldnn::memory::format_tag::aBCdef2c4b2c, - mkldnn::memory::format_tag::aBCdef4b8c2b, - mkldnn::memory::format_tag::aBCdef4c8b2c, - }} -}; - -mkldnn::memory::format_tag MKLDNNMemoryDesc::getFormat() const { - // TODO [OneDNN]: Previously it was a field of tdesc, but now the brute - // force search here. Please avoid of using this method. - const auto ndims = desc.dims().size(); - - // There are no suitable format_tag for this - if (ndims == 0 || ndims > 6) - return mkldnn::memory::format_tag::undef; - - for (const auto fmt : form_tags_by_ndims.at(ndims)) { - if (this->isSame(fmt)) - return fmt; + this->Create(std::move(desc), nullptr, false); } - - return mkldnn::memory::format_tag::undef; } -bool MKLDNNMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const { - memory::desc refDesc(desc.dims(), desc.data_type(), fmt); - - if (desc.data.ndims != refDesc.data.ndims) - return false; - - if (desc.data.format_kind != dnnl_blocked || refDesc.data.format_kind != dnnl_blocked) - IE_THROW() << "MKLDNNMemoryDesc::isSame is not implemented for non blocked memory format"; - - auto actualBlkDesc = desc.data.format_desc.blocking; - auto refBlkDesc = refDesc.data.format_desc.blocking; - if (actualBlkDesc.inner_nblks != refBlkDesc.inner_nblks) - return false; - - for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) - if (actualBlkDesc.inner_blks[i] != refBlkDesc.inner_blks[i]) - return false; - - for (size_t i = 0; i < actualBlkDesc.inner_nblks; ++i) - if (actualBlkDesc.inner_idxs[i] != refBlkDesc.inner_idxs[i]) - return false; - - auto actualStrides = desc.data.format_desc.blocking.strides; - auto refStrides = refDesc.data.format_desc.blocking.strides; - - std::vector actualOrder(desc.data.ndims); - { - const auto dims = desc.dims(); - std::vector total_block_per_dim(dims.size(), 1); - const auto &blk_desc = desc.data.format_desc.blocking; - for (int i = 0; i < blk_desc.inner_nblks; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - std::iota(actualOrder.begin(), actualOrder.end(), 0); - std::sort(actualOrder.begin(), actualOrder.end(), - [&actualStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (actualStrides[ind_l] > actualStrides[ind_r]) || - (actualStrides[ind_l] == actualStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - } - - std::vector refOrder(refDesc.data.ndims); - { - const auto dims = refDesc.dims(); - std::vector total_block_per_dim(dims.size(), 1); - const auto &blk_desc = refDesc.data.format_desc.blocking; - for (int i = 0; i < blk_desc.inner_nblks; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + dims.size()); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - std::iota(refOrder.begin(), refOrder.end(), 0); - std::sort(refOrder.begin(), refOrder.end(), - [&refStrides, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (refStrides[ind_l] > refStrides[ind_r]) || - (refStrides[ind_l] == refStrides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - } - - if (actualOrder != refOrder) { - return false; - } - - return true; -} - -bool MKLDNNMemoryDesc::isPlainFormat() const { - if (desc.data.format_kind != dnnl_blocked || - desc.data.format_desc.blocking.inner_nblks != 0) - return false; - - const auto ndims = desc.data.ndims; - const auto dims = desc.data.dims; - const auto &strides = desc.data.format_desc.blocking.strides; - bool is_plain_strides = (strides[ndims-1] == 1); - for (int i = 0; i < ndims - 1; i++) { - is_plain_strides &= (strides[i] == strides[i+1] * dims[i+1]); - } - - return is_plain_strides; -} - -bool MKLDNNMemoryDesc::isBlockedCFormat(size_t blk_size) const { - const auto &blocking = desc.data.format_desc.blocking; - - if (desc.data.format_kind != dnnl_blocked || - blocking.inner_nblks != 1 || - blocking.inner_idxs[0] != 1) - return false; - - const auto &ndims = desc.data.ndims; - const auto &strides = desc.data.format_desc.blocking.strides; - const auto &dims = desc.data.padded_dims; - - if (blk_size == UNREACHABLE_DIM) { - blk_size = blocking.inner_blks[0]; - } else { - if (blk_size != blocking.inner_blks[0]) - return false; - } - - bool is_direct_order = (strides[ndims-1] == blocking.inner_blks[0]); - for (int i = 0; i < ndims - 1; i++) { - auto dim = (i == 0) ? div_up(dims[i+1], blk_size) : dims[i+1]; - is_direct_order &= (strides[i] >= strides[i+1] * dim); - } - - return is_direct_order; -} - -bool MKLDNNMemoryDesc::isTailCFormat() const { - const auto &blocking = desc.data.format_desc.blocking; - - if (desc.data.format_kind != dnnl_blocked || - blocking.inner_nblks != 0) - return false; - - const auto &ndims = desc.data.ndims; - const auto &strides = desc.data.format_desc.blocking.strides; - const auto &dims = desc.data.padded_dims; - - // dense permutation of acd..b - bool is_tailc_strides = (strides[1] == 1 && strides[ndims-1] == dims[1] && strides[0] == dims[2] * strides[2]); - for (int i = 2; i < ndims - 1; i++) { - is_tailc_strides &= (strides[i] == strides[i+1] * dims[i+1]); - } - - return is_tailc_strides; -} - -bool MKLDNNMemoryDesc::blocksExtended() const { - for (int i = 0; i < desc.data.ndims; i++) { - if (desc.data.dims[i] != desc.data.padded_dims[i]) - return true; - } - return false; -} - -size_t MKLDNNMemoryDesc::getMemSizeImp() const { - return desc.get_size(); -} - -size_t MKLDNNMemoryDesc::getElementOffset(size_t elemNumber) const { - mkldnn::impl::memory_desc_wrapper wrapped(desc.data); - return wrapped.off_l(elemNumber); -} - -bool MKLDNNMemoryDesc::isCompatible(const MemoryDesc &rhs) const { - if (MemoryDescType::Blocked == rhs.getType()) { - return isCompatible(*(rhs.as())); - } else if (MemoryDescType::Mkldnn == rhs.getType()) { - return isCompatible(*(rhs.as())); - } else { - return false; - } -} - -bool MKLDNNMemoryDesc::isCompatible(const MKLDNNMemoryDesc &rhs) const { - using namespace dnnl; - using namespace impl; - using namespace dnnl::impl::utils; - if (this->desc == rhs.desc) { - return true; - } - mkldnn::impl::memory_desc_wrapper wrappedThis(this->desc.data); - mkldnn::impl::memory_desc_wrapper wrappedRhs(rhs.desc.data); - if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) - return false; - if (wrappedThis.is_wino_desc() || wrappedThis.is_rnn_packed_desc()) return false; - - const auto &blk = wrappedThis.blocking_desc(); - const auto &r_blk = wrappedRhs.blocking_desc(); - - int stride_start = wrappedThis.ndims() >0 && wrappedThis.dims()[0] == 1 ? 1 : 0; //ignore batch axis stride if batch size == 1 - - // Here is a slightly modified version of mkldnn::impl::memory_desc_wrapper::similar_to() call able to skip specific strides check. - return wrappedThis.ndims() == wrappedRhs.ndims() - && wrappedThis.format_kind() == wrappedRhs.format_kind() - && wrappedThis.data_type() == wrappedRhs.data_type() - && array_cmp(wrappedThis.dims(), wrappedRhs.dims(), wrappedThis.ndims()) - && array_cmp(blk.strides + stride_start, r_blk.strides + stride_start, wrappedThis.ndims() - stride_start) - && blk.inner_nblks == r_blk.inner_nblks - && array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks) - && array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks) - && array_cmp(wrappedThis.padded_dims(), wrappedRhs.padded_dims(), wrappedRhs.ndims()) - && array_cmp(wrappedThis.padded_offsets(), wrappedRhs.padded_offsets(), wrappedThis.ndims()) - && dimsEqualWeak(wrappedThis.offset0(), wrappedRhs.offset0()); -} - - -/** - * Check compatibility with BlockedMemoryDesc - * - * mkl: IOhw_4i16o4i dims {32, 64, 128, 128} - * strides // the order of outer dims is encoded here - * inner_blks 4 16 4 - * inner_idxs 1 0 1 - * - * BlockedMemoryDesc desc has more expressive ability. - * How to check compatibility with BlockedMemoryDesc representation: - * 0. Detect a new_outer_order of outer_dims via descending strides. - * 1. BlockedMemoryDesc strides : concatenate strides in new_outer_order and inner strides. - * 2. BlockedMemoryDesc dims : concatenate outer dims in new_outer_order with auto padding and inner blocks - * 3. BlockedMemoryDesc order : concatenate new_outer_order and inner_idxs - */ - -bool MKLDNNMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const { - if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { - return false; - } - - const auto dims = desc.dims(); - - if (desc.data.format_kind != dnnl_blocked) { - return false; - } - - const auto &blk_desc = desc.data.format_desc.blocking; - - const size_t outer_ndims = dims.size(); - const size_t inner_ndims = blk_desc.inner_nblks; - const size_t total_ndims = outer_ndims + inner_ndims; - - // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} - std::vector inner_strides(inner_ndims, 1); - for (size_t i = 1; i < blk_desc.inner_nblks; i++) { - inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; - } - - // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} - std::vector total_block_per_dim(outer_ndims, 1); - for (int i = 0; i < inner_ndims; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} - std::vector outer_order(outer_ndims); - std::iota(outer_order.begin(), outer_order.end(), 0); - std::sort(outer_order.begin(), outer_order.end(), - [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || - (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - - // blocked order - // [new_outer_order] U [inner_idxs] - SizeVector blk_order(total_ndims, 0); - std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); - std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); - - if (!dimsEqualWeak(blk_order, rhs.getOrder())) { - return false; - } - - //TODO [DS]: undefined offset is also used now as an indicator of undefined strides - if (desc.data.offset0 != Shape::UNDEFINED_DIM) { - // blocked strides - // [outer_strides via new_outer_order] U [inner_strides] - SizeVector blk_strides(total_ndims, 0); - std::copy(inner_strides.rbegin(), inner_strides.rend(), blk_strides.rbegin()); - std::transform(outer_order.begin(), outer_order.end(), blk_strides.begin(), - [&](size_t i) { return blk_desc.strides[i]; }); - - size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : - Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 - if (!dimsEqualWeak(blk_strides, rhs.getStrides(), skipAxis)) { - return false; - } - } - - // blocked dims - // [dims via new_outer_order with auto pad] U [inner_blk_dims] - SizeVector blk_dims(total_ndims, 0); - std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, - blk_dims.end() - blk_desc.inner_nblks); - std::transform(outer_order.begin(), outer_order.end(), blk_dims.begin(), - [&] (size_t i) { return outer_block_dims[i]; }); - - if (!dimsEqualWeak(blk_dims, rhs.getBlockDims())) { - return false; - } - - // offset padded to data. Same as for oneDNN - SizeVector blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; - // TODO: The BlockedMemoryDesc implementation allow to specify offset_to_data for inner blocked dims. - // Which is not obvious behavior. It required offset_to_data.size == total_ndims, so will - // fill it with zero. - blk_offset_to_data.insert(blk_offset_to_data.end(), inner_ndims, 0); - if (!dimsEqualWeak(blk_offset_to_data, rhs.getOffsetPaddingToData())) { - return false; - } - - return dimsEqualWeak(desc.data.offset0, rhs.getOffsetPadding()); -} - -bool MKLDNNMemoryDesc::hasLayoutType(LayoutType layoutType) const { - switch (layoutType) { - case LayoutType::ncsp: - return isPlainFormat(); - case LayoutType::nspc: - return isTailCFormat(); - case LayoutType::nCsp8c: - return isBlockedCFormat(8); - case LayoutType::nCsp16c: - return isBlockedCFormat(16); - default: - return false; - } -} - -std::string MKLDNNMemoryDesc::serializeFormat() const { - if (desc.data.format_kind == dnnl_format_kind_wino) { - switch (desc.data.format_desc.wino_desc.wino_format) { - case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOIoi: return "wino_aaOIoi"; - case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOio: return "wino_aaOio"; - case dnnl_wino_memory_format_t::dnnl_wino_wei_aaOBiOo: return "wino_aaOBiOo"; - case dnnl_wino_memory_format_t::dnnl_wino_wei_OBaaIBOIio: return "wino_OBaaIBOIio"; - default: return "wino_undef"; - } - } - auto fmt = getFormat(); - return mkldnn::utils::fmt2str(fmt); -} - -bool MKLDNNMemoryDesc::isDefined() const { - return desc.data.offset0 != Shape::UNDEFINED_DIM; +template<> +DnnlMemoryDescPtr MKLDNNMemory::GetDescWithType() const { + return MemoryDescUtils::convertToDnnlMemoryDesc(pMemDesc); } -InferenceEngine::Precision MKLDNNMemoryDesc::getPrecision() const { - return MKLDNNExtensionUtils::DataTypeToIEPrecision(desc.data_type()); +template<> +BlockedMemoryDescPtr MKLDNNMemory::GetDescWithType() const { + return MemoryDescUtils::convertToBlockedMemoryDesc(pMemDesc); } -void MKLDNNMemoryDesc::setPrecision(InferenceEngine::Precision prc) { - desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); -} } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h index 42284edca8368f..dea62afb97da89 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h @@ -5,13 +5,14 @@ #pragma once #include "ie_layouts.h" -#include "mkldnn_dims.h" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" #include "mkldnn_extension_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include #include #include -#include + +#include "memory_desc/dnnl_memory_desc.h" #include #include @@ -22,11 +23,6 @@ /** * @file contains a concept classes to work with memory/tensor/blob abstractions on plugin level. * - * MKLDNNMemoryDesc - the descriptor of tensor representation in memory. Describes all required information - * for proper allocation and handling tensor in some buffer. The real memory is not present, just description. - * This object answers on question how and where data with logical index [x1, x2, .. xN] placed in real buffer. - * In the simplest case it describe a mapping between "logical offset" and "real offset". - * * MKLDNNMemory is an abstraction of some real tensor which contains some data. As in short it's a pair of * memory descriptor and raw buffer handler to contains data. In case of system memory raw buffer it's simple * "void*" on some system memory buffer. @@ -35,82 +31,6 @@ namespace MKLDNNPlugin { -/** - * Represent internal plugin abstraction of tensor description - * - */ -class MKLDNNMemoryDesc : public MemoryDesc { -public: - /** Construct a tensor desc with plain layout format (like ND C array) */ - MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType); - - /** Construct a tensor desc with specified layout format tag. Any and Undef is not supported */ - MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); - - explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc); - - /** - * Try to define original format tag use on creation - * - * @return format tag if was able to define it - */ - mkldnn::memory::format_tag getFormat() const; - - mkldnn::memory::data_type getDataType() const { - return static_cast(desc.data.data_type); - } - - MKLDNNDims getDims() const { - return MKLDNNDims(desc.data.dims, desc.data.ndims); - } - - bool blocksExtended() const; - operator bool() const { - return getFormat() != mkldnn::memory::format_tag::any && getFormat() != mkldnn::memory::format_tag::undef; - } - - bool operator == (const MKLDNNMemoryDesc& rhs) const; - bool operator != (const MKLDNNMemoryDesc& rhs) const; - - operator mkldnn::memory::desc() const; - - bool isSame(mkldnn::memory::format_tag fmt) const; - - dnnl_format_kind_t getFormatKind() const { - return desc.data.format_kind; - } - - std::unique_ptr clone() const override { - return MKLDNNPlugin::make_unique(*this); - } - - bool hasLayoutType(LayoutType layoutType) const override; - - std::string serializeFormat() const override; - - bool isDefined() const override; - - InferenceEngine::Precision getPrecision() const override; - - void setPrecision(InferenceEngine::Precision prc) override; - - bool isCompatible(const MemoryDesc& rhs) const override; - bool isCompatible(const BlockedMemoryDesc& rhs) const; - bool isCompatible(const MKLDNNMemoryDesc& rhs) const; - -private: - size_t getElementOffset(size_t elemNumber) const override; - size_t getMemSizeImp() const override; - bool isPlainFormat() const; - bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const; - bool isTailCFormat() const; - -private: - static constexpr size_t UNREACHABLE_DIM = std::numeric_limits::max(); - mkldnn::memory::desc desc; -}; - - class MKLDNNMemory { public: explicit MKLDNNMemory(const mkldnn::engine& eng); @@ -129,18 +49,14 @@ class MKLDNNMemory { return prim; } - mkldnn::memory::desc GetDescriptor() const { - return prim->get_desc(); - } - - const MemoryDesc& GetDesc() const { + const MemoryDesc& getDesc() const { return *pMemDesc; } template ::value && !std::is_reference::value, int>::type = 0, typename std::enable_if::value, int>::type = 0> - T GetDescWithType() const; + std::shared_ptr GetDescWithType() const; /** * Return handler of buffer. Real data may starts from some other offset @@ -161,33 +77,42 @@ class MKLDNNMemory { void* GetPtr() const; mkldnn::memory::data_type GetDataType() const { - return static_cast(GetDescriptor().data.data_type); + return MKLDNNExtensionUtils::IEPrecisionToDataType(getDesc().getPrecision()); } size_t GetSize() const; - size_t GetElementsCount() const; - mkldnn::memory::dims GetDims() const { - auto data = GetDescriptor().data; - return {std::begin(data.dims), std::begin(data.dims) + data.ndims}; + const Shape& GetShape() const { + return getDesc().getShape(); } void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); + void Create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); + + // Redefines descriptor. The memory descriptor will be replaced with the new one. + // Memory will not be reallocated if the new tensor size is less or equal the upper bound. + // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. + void redefineDesc(const MemoryDesc& desc, void *data = nullptr); + void redefineDesc(MemoryDescPtr desc, void *data = nullptr); - // Like a plain format - void SetData(mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format, const void* data, size_t size, bool ftz = true) const; void SetData(const MKLDNNMemory& memory, size_t size = 0, bool ftz = true) const; void FillZero(); - static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank); - static InferenceEngine::Layout GetPlainLayout(const mkldnn::memory::dims& dims); - static mkldnn::memory::format_tag Convert(const InferenceEngine::Layout layout); - static InferenceEngine::Precision convertToIePrec(mkldnn::memory::data_type dataType); - static mkldnn::memory::data_type convertToDataType(const InferenceEngine::Precision &precision); + bool hasExternalStorage() const { + return useExternalStorage; + } - static std::string formatToString(mkldnn::memory::format_tag fmt); + const VectorDims& getStaticDims() const { + return getDesc().getShape().getStaticDims(); + } - static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0); + mkldnn::engine getEngine() const { + return eng; + } + + bool isUsedExternalStorage() const { + return useExternalStorage; + } private: void Create(const mkldnn::memory::dims& dims, mkldnn::memory::data_type data_type, mkldnn::memory::format_tag format, @@ -195,14 +120,12 @@ class MKLDNNMemory { void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true); - const MKLDNNMemoryDesc GetMKLDNNDesc() const { - return MKLDNNMemoryDesc(prim->get_desc()); - } - private: MemoryDescPtr pMemDesc; std::shared_ptr prim; mkldnn::engine eng; + bool useExternalStorage = false; + size_t memUpperBound = 0ul; }; using MKLDNNMemoryPtr = std::shared_ptr; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h index 3cbe768370cd01..92f8cf4f2de42d 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory_state.h @@ -8,7 +8,7 @@ #include "blob_factory.hpp" #include "mkldnn_memory.h" #include "nodes/common/cpu_memcpy.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" #include @@ -18,7 +18,7 @@ class MKLDNNVariableState : public InferenceEngine::IVariableStateInternal { public: MKLDNNVariableState(std::string name, MKLDNNMemoryPtr storage) : InferenceEngine::IVariableStateInternal{name} { - state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->GetDesc())); + state = make_blob_with_precision(MemoryDescUtils::convertToTensorDesc(storage->getDesc())); state->allocate(); cpu_memcpy(state->buffer(), storage->GetData(), storage->GetSize()); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp index 7e29589caf9a21..73189f8b6e563e 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.cpp @@ -55,7 +55,8 @@ #include "utils/general_utils.h" #include "utils/cpu_utils.hpp" #include "nodes/common/cpu_convert.h" -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -226,6 +227,7 @@ static const InferenceEngine::details::caseless_unordered_map { "ExperimentalDetectronPriorGridGenerator", ExperimentalDetectronPriorGridGenerator}, { "ExperimentalDetectronGenerateProposalsSingleImage", ExperimentalDetectronGenerateProposalsSingleImage}, { "ExtractImagePatches", ExtractImagePatches}, + { "NonMaxSuppression", NonMaxSuppression}, { "NonMaxSuppressionIEInternal", NonMaxSuppression}, { "MatrixNms", MatrixNms}, { "MulticlassNms", MulticlassNms} @@ -240,6 +242,26 @@ Type TypeFromName(const std::string type) { } } +template<> +DnnlMemoryDescPtr MKLDNNNode::getInputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtInputPort(portNum)); +} + +template<> +BlockedMemoryDescPtr MKLDNNNode::getInputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtInputPort(portNum)); +} + +template<> +DnnlMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToDnnlMemoryDesc(getBaseMemDescAtOutputPort(portNum)); +} + +template<> +BlockedMemoryDescPtr MKLDNNNode::getOutputMemDescAtPort(size_t portNum) const { + return MemoryDescUtils::convertToBlockedMemoryDesc(getBaseMemDescAtOutputPort(portNum)); +} + } // namespace MKLDNNPlugin MKLDNNNode::NodesFactory & MKLDNNNode::factory() { @@ -257,11 +279,11 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en for (size_t i = 0; i < op->get_input_size(); i++) { const auto &shape = op->get_input_partial_shape(i); - - bool isScalar = false; - if (shape.rank().is_static()) { - isScalar = shape.rank().get_length() == 0; + if (shape.rank().is_dynamic()) { + IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank"; } + + bool isScalar = shape.rank().get_length() == 0; inputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape); originalInputPrecisions.emplace_back(details::convertPrecision(op->get_input_element_type(i))); } @@ -272,16 +294,19 @@ MKLDNNNode::MKLDNNNode(const std::shared_ptr& op, const mkldnn::en } for (size_t i = 0; i < op->get_output_size(); i++) { const auto &shape = op->get_output_partial_shape(i); - - bool isScalar = false; - if (shape.rank().is_static()) { - isScalar = shape.rank().get_length() == 0; + if (shape.rank().is_dynamic()) { + IE_THROW(Unexpected) << "CPU plug-in doesn't support operation with dynamic rank"; } + + bool isScalar = shape.rank().get_length() == 0; outputShapes.emplace_back(isScalar ? ngraph::PartialShape{1} : shape); originalOutputPrecisions.emplace_back(details::convertPrecision(op->get_output_element_type(i))); } } + isDynamic = std::any_of(inputShapes.begin(), inputShapes.end(), [](const Shape& shape){ return shape.isDynamic(); }) || + std::any_of(outputShapes.begin(), outputShapes.end(), [](const Shape& shape){ return shape.isDynamic(); }); + const auto& rtInfo = op->get_rt_info(); if (rtInfo.count("originalLayersNames")) { originalLayers = getRTInfoValue(rtInfo, "originalLayersNames"); @@ -462,16 +487,18 @@ bool MKLDNNNode::canBeInPlace() const { return false; } - auto inShape = getParentEdgeAt(0)->getShape(); - for (size_t cIdx = 0; cIdx < getChildEdges().size(); cIdx++) { - if (getChildEdgeAt(cIdx)->getShape() != inShape) { + auto inShape = getInputShapeAtPort(0); + for (size_t cIdx = 0; cIdx < outputShapes.size(); cIdx++) { + if (getOutputShapeAtPort(cIdx) != inShape) { return false; } } return true; } -void MKLDNNNode::resolveNotAllocatedEdges() { +void MKLDNNNode::resolveInPlaceEdges() { + // TODO [DS]: first version dynamic shapes do not support inPlace logic + // after enabling inPlace logic for dynamic shapes we need to update this method for nodes with several edges at single port const NodeDesc *selected_pd = getSelectedPrimitiveDescriptor(); if (!selected_pd) IE_THROW() << "Cannot find selected primitive descriptor for node: " << getName(); @@ -501,6 +528,28 @@ void MKLDNNNode::resolveNotAllocatedEdges() { } } +MemoryDescPtr MKLDNNNode::getBaseMemDescAtInputPort(size_t portNum) const { + if (auto primDesc = getSelectedPrimitiveDescriptor()) { + const auto& inConfs = primDesc->getConfig().inConfs; + if (inConfs.size() < portNum) { + IE_THROW() << "Can't get input memory desc at port: " << portNum << ", incorrect port number"; + } + return inConfs[portNum].desc; + } + IE_THROW() << "Can't get input memory desc, primitive descriptor is not selected"; +} + +MemoryDescPtr MKLDNNNode::getBaseMemDescAtOutputPort(size_t portNum) const { + if (auto primDesc = getSelectedPrimitiveDescriptor()) { + const auto& outConfs = primDesc->getConfig().outConfs; + if (outConfs.size() < portNum) { + IE_THROW() << "Can't get output memory desc at port: " << portNum << ", incorrect port number"; + } + return outConfs[portNum].desc; + } + IE_THROW() << "Can't get output memory desc, primitive descriptor is not selected"; +} + std::string MKLDNNNode::getPrimitiveDescriptorType() { auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor(); @@ -636,6 +685,37 @@ void MKLDNNNode::execute(mkldnn::stream strm) { } } +void MKLDNNNode::executeDynamic(mkldnn::stream strm) { + const auto newShapes = shapeInfer(); + if (!newShapes.empty()) + redefineOutputMemory(newShapes); + executeDynamicImpl(strm); +} + +void MKLDNNNode::redefineOutputMemory(const std::vector &newShapes) { + if (newShapes.size() != outputShapes.size()) { + IE_THROW() << "Number shapes mismatch with real outputs number for node with name: " << getName(); + } + for (size_t i = 0; i < outputShapes.size(); i++) { + const auto edges = getChildEdgesAtPort(i); + const auto memDesc = getBaseMemDescAtOutputPort(i)->cloneWithNewDims(newShapes[i]); + size_t sharedEdgeNum = 0; + for (size_t j = 0; j < edges.size(); j++) { + if (!edges[j]->getMemory().isUsedExternalStorage()) { + sharedEdgeNum = j; + break; + } + } + edges[sharedEdgeNum]->getMemoryPtr()->redefineDesc(*memDesc); + void *data = edges[sharedEdgeNum]->getMemoryPtr()->GetData(); + for (size_t j = 0; j < edges.size(); j++) { + if (j == sharedEdgeNum) + continue; + edges[j]->getMemoryPtr()->redefineDesc(*memDesc, data); + } + } +} + void MKLDNNNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -650,7 +730,12 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { PortConfig portConfig; portConfig.inPlace = -1; portConfig.constant = false; - portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); + auto desc = getSrcMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + portConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + portConfig.desc = std::move(desc); + } config.inConfs.push_back(portConfig); } @@ -658,7 +743,12 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { PortConfig portConfig; portConfig.inPlace = canBeInPlace() ? 0 : -1; portConfig.constant = false; - portConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); + auto desc = getDstMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + portConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + portConfig.desc = std::move(desc); + } config.outConfs.push_back(portConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -673,10 +763,9 @@ void MKLDNNNode::initSupportedPrimitiveDescriptors() { void MKLDNNNode::filterSupportedPrimitiveDescriptors() { // Compare by partial layout descriptor (without particular strides values) auto areCompatible = [](const MemoryDesc& desc, mkldnn::memory::format_tag fmt) -> bool { - MKLDNNMemoryDesc fmt_tdesc = MKLDNNMemoryDesc{desc.getShape().getStaticDims(), - MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), - fmt}; - + auto fmt_tdesc = DnnlBlockedMemoryDesc(desc.getShape(), + MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), + fmt); return desc.isCompatible(fmt_tdesc); }; @@ -710,12 +799,12 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) { if (!selectedPD) { return; } - std::vector inDescs; + std::vector inDescs; for (const auto& inConf : config.inConfs) - inDescs.push_back(inConf.desc.get()); - std::vector outDescs; + inDescs.emplace_back(inConf.desc); + std::vector outDescs; for (const auto& outConf : config.outConfs) - outDescs.push_back(outConf.desc.get()); + outDescs.emplace_back(outConf.desc); createDescriptor(inDescs, outDescs); std::shared_ptr attr = initPrimitiveAttr(); @@ -773,12 +862,12 @@ void MKLDNNNode::initDescriptor(const NodeConfig& config) { for (size_t i = 0; i < selectedConfig.inConfs.size(); i++) { if (!selectedConfig.inConfs[i].desc->isCompatible(*config.inConfs[i].desc)) - IE_THROW() << "Incorrect descriptor for node: " << getName(); + IE_THROW() << "Incorrect descriptor for node: " << getName() << " on " << i << " intput port"; } for (size_t i = 0; i < selectedConfig.outConfs.size(); i++) { if (!selectedConfig.outConfs[i].desc->isCompatible(*config.outConfs[i].desc)) - IE_THROW() << "Incorrect descriptor for node: " << getName(); + IE_THROW() << "Incorrect descriptor for node: " << getName() << " on " << i << " output port"; } rightConfig = config; } @@ -799,7 +888,7 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de IE_THROW() << "Destination memory didn't allocate for node " << getName() << " from node " << getParentEdgeAt(i)->getParent()->getName() << "."; } - std::vector intDescs; + std::vector intDescs; for (auto &it : internalBlobDesc) intDescs.push_back(it(itpd, 0)); @@ -809,13 +898,13 @@ void MKLDNNNode::prepareMemory(const NodeDesc *selected_pd, mkldnn::primitive_de auto create = [&] () { // TODO [DS]: internal blobs should be removed or rewritten using Memory object - auto newDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(internalBlob->getTensorDesc()); + auto newDesc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(internalBlob->getTensorDesc()); MKLDNNMemory memory{ engine }; memory.Create(newDesc, internalBlob->buffer()); MKLDNNMemoryPtr _ptr = MKLDNNMemoryPtr(new MKLDNNMemory(engine)); - _ptr->Create(intDescs[i]); + _ptr->Create(*intDescs[i]); _ptr->SetData(memory); return _ptr; @@ -950,14 +1039,14 @@ const std::vector& MKLDNNNode::getPrimitivesPriority() { return implPriorities; } -std::unique_ptr MKLDNNNode::getDefinedInputDesc(const NodeConfig &config, size_t idx) const { +MemoryDescPtr MKLDNNNode::getDefinedInputDesc(const NodeConfig &config, size_t idx) const { int num = getParentEdgeAt(idx)->getInputNum(); auto *selectedPD = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor(); if (!selectedPD) IE_THROW() << "Cannot get selected primitive descriptor for node: " << getParentEdgeAt(idx)->getParent()->getName(); if (config.inConfs[idx].desc->isDefined()) { - return config.inConfs[idx].desc->clone(); + return config.inConfs[idx].desc; } if (config.inConfs[idx].inPlace >= 0) { @@ -966,26 +1055,26 @@ std::unique_ptr MKLDNNNode::getDefinedInputDesc(const NodeConfig &co if (num >= 0) { auto parentConf = selectedPD->getConfig().outConfs[num]; - parentConf.desc->setPrecision(config.inConfs[idx].desc->getPrecision()); + parentConf.desc = MemoryDescUtils::cloneWithNewPrecision(*parentConf.desc, config.inConfs[idx].desc->getPrecision()); if (!parentConf.desc->isDefined() && parentConf.inPlace >= 0) getParentEdgeAt(idx)->getParent()->initOptimalPrimitiveDescriptor(); parentConf = getParentEdgeAt(idx)->getParent()->getSelectedPrimitiveDescriptor()->getConfig().outConfs[num]; if (parentConf.desc->isDefined() && parentConf.desc->isCompatible(*config.inConfs[idx].desc)) { - return parentConf.desc->clone(); + return parentConf.desc; } } - return MemoryDescUtils::resetOffset(config.inConfs[idx].desc.get()); + return MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.inConfs[idx].desc); } -std::unique_ptr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t idx) const { +MemoryDescPtr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &config, size_t idx) const { int num = getChildEdgeAt(idx)->getOutputNum(); auto *selectedPD = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor(); if (!selectedPD) IE_THROW() << "Cannot get selected primitive descriptor for node: " << getChildEdgeAt(idx)->getChild()->getName(); if (config.outConfs[idx].desc->isDefined()) { - return config.outConfs[idx].desc->clone(); + return config.outConfs[idx].desc; } if (config.outConfs[idx].inPlace >= 0) { @@ -994,16 +1083,16 @@ std::unique_ptr MKLDNNNode::getDefinedOutputDesc(const NodeConfig &c if (num >= 0) { auto childConf = selectedPD->getConfig().inConfs[num]; - childConf.desc->setPrecision(config.outConfs[idx].desc->getPrecision()); + childConf.desc = MemoryDescUtils::cloneWithNewPrecision(*childConf.desc, config.outConfs[idx].desc->getPrecision()); if (!childConf.desc->isDefined() && childConf.inPlace >= 0) getChildEdgeAt(idx)->getChild()->initOptimalPrimitiveDescriptor(); childConf = getChildEdgeAt(idx)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[idx].desc)) { - return childConf.desc->clone(); + return childConf.desc; } } - return MemoryDescUtils::resetOffset(config.outConfs[idx].desc.get()); + return MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.outConfs[idx].desc); } void MKLDNNNode::initOptimalPrimitiveDescriptor() { @@ -1036,19 +1125,20 @@ bool MKLDNNNode::isConfigDefined(const NodeConfig &config) const { return true; } -std::unique_ptr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - return MKLDNNPlugin::make_unique(primitive_desc_it.src_desc(idx)); +MemoryDescPtr MKLDNNNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.src_desc(idx)); } -std::unique_ptr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - return MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)); +MemoryDescPtr MKLDNNNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx)); } int MKLDNNNode::batchToProcess() { return dynBatchLim == 0 ? getMaxBatch() : std::min(getMaxBatch(), dynBatchLim); } -int MKLDNNNode::getMaxBatch() { +// TODO [DS]: how we should process this for dynamic shape? +size_t MKLDNNNode::getMaxBatch() { // FIXME: batch != 0 dims number if (!inputShapes.empty()) { if (inputShapes[0].getRank()) @@ -1175,7 +1265,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr if (ex.getStatus() != NOT_IMPLEMENTED) { throw; } else { - errorMessage += getExceptionDescWithoutStatus(ex); + const auto currErrorMess = getExceptionDescWithoutStatus(ex); + if (!currErrorMess.empty()) + errorMessage += "\n" + currErrorMess; } IE_SUPPRESS_DEPRECATED_END } @@ -1190,7 +1282,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr if (ex.getStatus() != NOT_IMPLEMENTED) { throw; } else { - errorMessage += getExceptionDescWithoutStatus(ex); + const auto currErrorMess = getExceptionDescWithoutStatus(ex); + if (!currErrorMess.empty()) + errorMessage += "\n" + currErrorMess; } IE_SUPPRESS_DEPRECATED_END } @@ -1206,7 +1300,9 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr if (ex.getStatus() != NOT_IMPLEMENTED) { throw; } else { - errorMessage += getExceptionDescWithoutStatus(ex); + const auto currErrorMess = getExceptionDescWithoutStatus(ex); + if (!currErrorMess.empty()) + errorMessage += "\n" + currErrorMess; } IE_SUPPRESS_DEPRECATED_END } @@ -1222,7 +1318,7 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr if (!newNode) { std::string errorDetails; if (!errorMessage.empty()) { - errorDetails = "\nDetails: \n" + errorMessage; + errorDetails = "\nDetails: " + errorMessage; } IE_THROW() << "Unsupported operation of type: " << op->get_type_name() << " name: " << op->get_friendly_name() << errorDetails; } @@ -1233,7 +1329,7 @@ MKLDNNNode* MKLDNNNode::NodesFactory::create(const std::shared_ptr bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const { size_t fusingPort = 0; for (size_t i = (parentNode == nullptr ? 1 : 0); i < getParentEdges().size(); i++) { - MKLDNNNode *node = getParentEdgeAt(i)->getParent().get(); + MKLDNNNode *node = getParentEdgesAtPort(i)[0]->getParent().get(); if (node == nullptr) { IE_THROW() << "Cannot get parent node for " << getName() << " on " << i << " port"; } @@ -1247,11 +1343,11 @@ bool MKLDNNNode::canBePerformedAsScaleShift(const MKLDNNNode *parentNode) const } const auto isBroadcastableToDataInput = [&]() { - const auto dataShape = getParentEdgeAt(fusingPort)->getShape().getStaticDims(); + auto& dataShape = getInputShapeAtPort(fusingPort).getDims(); for (size_t i = 0; i < getParentEdges().size(); i++) { if (i == fusingPort) continue; - auto weightShape = getParentEdgeAt(i)->getShape().getStaticDims(); + auto& weightShape = getInputShapeAtPort(i).getDims(); if (getParentEdgesAtPort(i)[0]->getParent()->getChildEdges().size() != 1 || !isPerTensorOrPerChannelBroadcastable(dataShape, weightShape)) return false; } @@ -1295,7 +1391,7 @@ void MKLDNNNode::fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector& buffer) { auto *constInputNode = dynamic_cast(constInput.get()); auto constBlob = constInputNode->getMemoryPtr(); - auto const elementsCount = constBlob->GetElementsCount(); + const auto elementsCount = constBlob->GetDescWithType()->getPaddedElementsCount(); buffer.resize(elementsCount); cpu_convert(constBlob->GetPtr(), &buffer[0], diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index 77dab59e904d09..2f0962bff2ceb4 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -11,7 +11,6 @@ #include #include #include -#include "mkldnn_dims.h" #include "mkldnn_memory.h" #include "mkldnn_edge.h" #include "mkldnn_descriptor.h" @@ -29,11 +28,12 @@ #include #include "cpu_types.h" #include "cpu_shape.h" -#include "cpu_memory_desc.h" +#include "memory_desc/cpu_memory_desc.h" namespace MKLDNNPlugin { using MKLDNNNodePtr = std::shared_ptr; +using MKLDNNNodeConstPtr = std::shared_ptr; using MKLDNNNodeWeakPtr = std::weak_ptr; Type TypeFromName(const std::string type); @@ -238,7 +238,7 @@ struct PortConfig { this->constant = rhs.constant; this->inPlace = rhs.inPlace; if (rhs.desc) { - this->desc = rhs.desc->clone(); + this->desc = rhs.desc; } } @@ -246,7 +246,7 @@ struct PortConfig { this->constant = rhs.constant; this->inPlace = rhs.inPlace; if (rhs.desc) { - this->desc = rhs.desc->clone(); + this->desc = rhs.desc; } return *this; } @@ -257,7 +257,7 @@ struct PortConfig { // TODO [DS]: better to make private and const bool constant = false; int inPlace = -1; - std::unique_ptr desc; + MemoryDescPtr desc; }; struct NodeConfig { @@ -454,6 +454,44 @@ class MKLDNNNode { return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex]; } + /** + * @brief Returns input selected primitive descriptor on the specified port + * must be used after selectOptimalPrimitiveDescriptor stage + * @param portNum port number + * @return pointer to selected primitive descriptor with type MemoryDesc + */ + MemoryDescPtr getBaseMemDescAtInputPort(size_t portNum) const; + + /** + * @brief Returns output selected primitive descriptor on the specified port + * must be used after selectOptimalPrimitiveDescriptor stage + * @param portNum port number + * @return pointer to selected primitive descriptor with type MemoryDesc + */ + MemoryDescPtr getBaseMemDescAtOutputPort(size_t portNum) const; + + /** + * @brief Returns input selected primitive descriptor on the specified port + * must be used after selectOptimalPrimitiveDescriptor stage + * @param portNum port number + * @return pointer to selected primitive descriptor with type T + */ + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + std::shared_ptr getInputMemDescAtPort(size_t portNum) const; + + /** + * @brief Returns output selected primitive descriptor on the specified port + * must be used after selectOptimalPrimitiveDescriptor stage + * @param portNum port number + * @return pointer to selected primitive descriptor with type T + */ + template ::value && !std::is_reference::value, int>::type = 0, + typename std::enable_if::value, int>::type = 0> + std::shared_ptr getOutputMemDescAtPort(size_t portNum) const; + void selectPrimitiveDescriptorByIndex(int index) { if (index < 0 || index >= supportedPrimitiveDescriptors.size()) selectedPrimitiveDescriptorIndex = -1; @@ -467,8 +505,12 @@ class MKLDNNNode { virtual void setDynamicBatchLim(int lim); - void resolveNotAllocatedEdges(); + void resolveInPlaceEdges(); + virtual void execute(mkldnn::stream strm); + void executeDynamic(mkldnn::stream strm); + void redefineOutputMemory(const std::vector &newShapes); + virtual void initSupportedPrimitiveDescriptors(); /** @@ -484,8 +526,8 @@ class MKLDNNNode { virtual void getSupportedDescriptors() = 0; // TODO [DS]: Should be moved into Node derivative class - virtual void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) {} + virtual void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) {} virtual void initDescriptor(const NodeConfig& config); virtual bool created() const = 0; virtual bool created(const MKLDNNExtensionManager::Ptr& extMgr) { @@ -621,10 +663,23 @@ class MKLDNNNode { originalOutputPrecisions.push_back(precision); } + // TODO: alighn behaviour for original(Input/Output)Precisions and (input/output)Shapes + /** + * @brief Returns inputs number which have ngraph nodes. + * Inputs number compute as size of originalInputPrecisions vector + * IMPORTANT!!! + * FuseConvolutionAndBias and FuseMultiplyAndAdd change originalInputPrecisions vector + * @return original inputs number + */ size_t getOriginalInputsNumber() const { return originalInputPrecisions.size(); } + /** + * @brief Returns outputs number which have ngraph nodes. + * Outputs number compute as size of originalOutputPrecisions vector + * @return original outputs number + */ size_t getOriginalOutputsNumber() const { return originalOutputPrecisions.size(); } @@ -647,7 +702,33 @@ class MKLDNNNode { bool canBePerformedAsScaleShift(const MKLDNNNode *parentNode = nullptr) const; + bool isDynamicNode() const { + return isDynamic; + } + + const Shape& getInputShapeAtPort(size_t port) const { + if (inputShapes.size() <= port) { + IE_THROW() << "Incorrect input port number for node " << getName(); + } + return inputShapes[port]; + } + + const Shape& getOutputShapeAtPort(size_t port) const { + if (outputShapes.size() <= port) { + IE_THROW() << "Incorrect output port number for node " << getName(); + } + return outputShapes[port]; + } + protected: + // TODO [DS] : make pure after all nodes will be support dynamic shapes + virtual std::vector shapeInfer() const { + IE_THROW(NotImplemented) << "[DS] MKLDNNNode::shapeInfer is not defined for node with type: " << getTypeStr(); + } + virtual void executeDynamicImpl(mkldnn::stream strm) { + IE_THROW(NotImplemented) << "[DS] executeDynamicImpl not implemented for node with type: " << getTypeStr(); + } + bool canFuseSimpleOperation(const MKLDNNNodePtr& node) const; // TODO [mandrono]: place outside of the node API void fillScalesAndShifts(const MKLDNNNode *parentNode, std::vector &scales, std::vector &shifts, const int align = -1); @@ -656,13 +737,13 @@ class MKLDNNNode { this->type = type; } - virtual int getMaxBatch(); + virtual size_t getMaxBatch(); - virtual std::unique_ptr getDefinedInputDesc(const NodeConfig &config, size_t idx) const; - virtual std::unique_ptr getDefinedOutputDesc(const NodeConfig &config, size_t idx) const; - virtual std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); - virtual std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual MemoryDescPtr getDefinedInputDesc(const NodeConfig &config, size_t idx) const; + virtual MemoryDescPtr getDefinedOutputDesc(const NodeConfig &config, size_t idx) const; + virtual MemoryDescPtr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); + virtual MemoryDescPtr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx); /** * @brief Appends new item into ops list with the information on how the node should be executed as post operation. @@ -672,7 +753,7 @@ class MKLDNNNode { virtual void appendPostOps(mkldnn::post_ops& ops); virtual std::shared_ptr initPrimitiveAttr() const { return nullptr; } - typedef std::function + typedef std::function GetPrimitiveMemoryFormatFunc; std::vector internalBlobDesc; @@ -755,7 +836,7 @@ class MKLDNNNode { PortConfig portConfig; portConfig.inPlace = portConfigurator.inPlace; portConfig.constant = portConfigurator.constant; - portConfig.desc = portConfigurator.blockedDescCreator->createUniqueDesc(prc, shape.getStaticDims()); + portConfig.desc = portConfigurator.blockedDescCreator->createSharedDesc(prc, shape); port.push_back(std::move(portConfig)); @@ -764,14 +845,14 @@ class MKLDNNNode { NodeConfig config; for (size_t i = 0; i < inPortConfigs.size(); i++) { - auto shape = inPortConfigs[i].shape.getRank() == 0 ? getParentEdgesAtPort(i)[0]->getShape() : inPortConfigs[i].shape; + auto shape = inPortConfigs[i].shape.getRank() == 0 ? getInputShapeAtPort(i) : inPortConfigs[i].shape; auto prc = inPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalInputPrecisionAtPort(i) : inPortConfigs[i].prc; if (!fill_port(inPortConfigs[i], shape, prc, config.inConfs)) return; } for (size_t i = 0; i < outPortConfigs.size(); i++) { - auto dims = outPortConfigs[i].shape.getRank() == 0 ? getChildEdgesAtPort(i)[0]->getShape() : outPortConfigs[i].shape; + auto dims = outPortConfigs[i].shape.getRank() == 0 ? getOutputShapeAtPort(i) : outPortConfigs[i].shape; auto prc = outPortConfigs[i].prc == InferenceEngine::Precision::UNSPECIFIED ? getOriginalOutputPrecisionAtPort(i) : outPortConfigs[i].prc; if (!fill_port(outPortConfigs[i], dims, prc, config.outConfs)) return; @@ -782,6 +863,8 @@ class MKLDNNNode { } private: + bool isDynamic = false; + std::vector parentEdges; std::vector childEdges; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index ed42651a738965..6ae8336ded2eac 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -289,6 +289,19 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { return node->input_value(0).get_partial_shape().rank().get_length() > 5; }); + pass_config->set_callback( + [](const_node_ptr &node) -> bool { + for (size_t i = 0; i < node->get_output_size(); i++) { + const auto outputs = node->get_output_target_inputs(i); + for (const auto &out : outputs) { + if (out.get_node()->get_type_info() != ngraph::op::v0::Result::type_info) { + return false; + } + } + } + return true; + }); + // List of enabled/disabled transformations pass_config->disable(); pass_config->disable(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp index 85566b3833ac6b..258207bf30e696 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.cpp @@ -13,21 +13,21 @@ constexpr size_t channelsPos = 1lu; class PlainFormatCreator : public BlockedDescCreator { public: - BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { - SizeVector order(srcDims.size()); + CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const override { + SizeVector order(srcShape.getRank()); std::iota(order.begin(), order.end(), 0); - return BlockedMemoryDesc(precision, srcDims, srcDims, order); + return CpuBlockedMemoryDesc(precision, srcShape, srcShape.getDims(), order); } size_t getMinimalRank() const override { return 0lu; } }; class PerChannelCreator : public BlockedDescCreator { public: - BlockedMemoryDesc createDesc(const InferenceEngine::Precision &precision, const InferenceEngine::SizeVector &srcDims) const override { - SizeVector order(srcDims.size()); + CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision &precision, const Shape& srcShape) const override { + SizeVector order(srcShape.getRank()); std::iota(order.begin(), order.end(), 0); - SizeVector blkDims = srcDims; - if (srcDims.size() > 2) { + SizeVector blkDims = srcShape.getDims(); + if (srcShape.getRank() > 2) { auto moveElementBack = [](SizeVector& vector, size_t indx) { auto itr = vector.begin() + indx; std::rotate(itr, itr + 1, vector.end()); @@ -37,7 +37,7 @@ class PerChannelCreator : public BlockedDescCreator { moveElementBack(blkDims, channelsPos); } - return BlockedMemoryDesc(precision, srcDims, blkDims, order); + return CpuBlockedMemoryDesc(precision, srcShape, blkDims, order); } size_t getMinimalRank() const override { return 3lu; } }; @@ -45,22 +45,22 @@ class PerChannelCreator : public BlockedDescCreator { class ChannelBlockedCreator : public BlockedDescCreator { public: ChannelBlockedCreator(size_t blockSize) : _blockSize(blockSize) {} - BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const override { - if (srcDims.size() < 2) { + CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const override { + if (srcShape.getRank() < 2) { IE_THROW() << "Can't create blocked tensor descriptor!"; } - SizeVector order(srcDims.size()); + SizeVector order(srcShape.getRank()); std::iota(order.begin(), order.end(), 0); order.push_back(channelsPos); - SizeVector blkDims = srcDims; + SizeVector blkDims = srcShape.getDims(); if (Shape::UNDEFINED_DIM != blkDims[channelsPos]) { blkDims[channelsPos] = blkDims[channelsPos] / _blockSize + (blkDims[channelsPos] % _blockSize ? 1 : 0); } blkDims.push_back(_blockSize); - return BlockedMemoryDesc(precision, srcDims, blkDims, order); + return CpuBlockedMemoryDesc(precision, srcShape, blkDims, order); } size_t getMinimalRank() const override { return 3lu; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h index f53524288e4e7c..95da8560cb3786 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h +++ b/inference-engine/src/mkldnn_plugin/nodes/common/blocked_desc_creator.h @@ -6,7 +6,7 @@ #include #include "cpu_shape.h" -#include "cpu_blocked_memory_desc.h" +#include "memory_desc/cpu_blocked_memory_desc.h" namespace MKLDNNPlugin { @@ -27,10 +27,12 @@ class BlockedDescCreator { makeFilteredRange(const CreatorsMap& map, unsigned rank, const std::vector& supportedTypes); static std::pair makeFilteredRange(const CreatorsMap& map, Predicate predicate); - virtual BlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const = 0; - std::unique_ptr createUniqueDesc(const InferenceEngine::Precision& precision, const InferenceEngine::SizeVector& srcDims) const { - return MKLDNNPlugin::make_unique(createDesc(precision, srcDims)); + virtual CpuBlockedMemoryDesc createDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const = 0; + + std::shared_ptr createSharedDesc(const InferenceEngine::Precision& precision, const Shape& srcShape) const { + return std::make_shared(createDesc(precision, srcShape)); } + virtual size_t getMinimalRank() const = 0; virtual ~BlockedDescCreator() = default; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp index 4bf60d6eb21f4a..fb185be8103d91 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.cpp @@ -21,16 +21,20 @@ using namespace InferenceEngine; using namespace mkldnn; using namespace mkldnn::impl::cpu::x64; -bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNAdaptivePoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveAvgPool::type_info)) { - auto adaPool = std::dynamic_pointer_cast(op); + auto adaPool = std::dynamic_pointer_cast(op); if (!adaPool) { errorMessage = "Only opset8 AdaptiveAvgPooling operation is supported"; return false; } } else if (one_of(op->get_type_info(), ngraph::op::v8::AdaptiveMaxPool::type_info)) { - auto adaPool = std::dynamic_pointer_cast(op); + auto adaPool = std::dynamic_pointer_cast(op); if (!adaPool) { errorMessage = "Only opset8 AdaptiveMaxPooling operation is supported"; return false; @@ -69,19 +73,19 @@ void MKLDNNAdaptivePoolingNode::getSupportedDescriptors() { if (getChildEdges().size() != (algorithm == AdaptivePoolingMax ? 2 : 1)) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getParentEdges().size(); - auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto childDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto parentDims = getInputShapeAtPort(0).getStaticDims(); + auto childDims = getOutputShapeAtPort(0).getStaticDims(); spatialDimsCount = parentDims.size() - 2; if (!one_of(spatialDimsCount, 1, 2, 3)) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getInputShapeAtPort(0).getRank(); } - if (getParentEdgeAt(1)->getShape().getRank() != 1) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); + if (getInputShapeAtPort(1).getRank() != 1) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getInputShapeAtPort(1).getRank(); } - if (getChildEdgeAt(0)->getShape().getRank() != getParentEdgeAt(0)->getShape().getRank()) { + if (getOutputShapeAtPort(0).getRank() != getInputShapeAtPort(0).getRank()) { IE_THROW() << errorPrefix << "must keep data rank"; } } @@ -99,7 +103,7 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() { config.outConfs.resize((algorithm == Algorithm::AdaptivePoolingAvg ? 1 : 2)); std::vector dataFormats{ LayoutType::ncsp }; - if (getParentEdgeAt(0)->getShape().getStaticDims()[1] != 1) { + if (getInputShapeAtPort(0).getStaticDims()[1] != 1) { dataFormats.push_back(LayoutType::nspc); dataFormats.push_back(LayoutType::nCsp16c); dataFormats.push_back(LayoutType::nCsp8c); @@ -118,8 +122,8 @@ void MKLDNNAdaptivePoolingNode::initSupportedPrimitiveDescriptors() { } void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { - auto inputPrec = getParentEdgeAt(0)->getMemory().GetDescriptor().data.data_type; - auto outputPrec = getChildEdgeAt(0)->getMemory().GetDescriptor().data.data_type; + auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType(); + auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType(); if (!(inputPrec == mkldnn_f32 && outputPrec == mkldnn_f32)) IE_THROW() << errorPrefix << "doesn't support demanded precisions"; @@ -131,21 +135,22 @@ void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { indexDst = reinterpret_cast(getChildEdgeAt(1)->getMemoryPtr()->GetPtr()); } - auto srcBlockDesc = srcMemory0.GetDescriptor().data.format_desc.blocking; + auto isPlainFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::ncsp); + auto isTailCFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::nspc); + auto isBlkFmt = srcMemory0.getDesc().hasLayoutType(LayoutType::nCsp16c) || srcMemory0.getDesc().hasLayoutType(LayoutType::nCsp8c); - int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; - auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); - auto isTailCFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); + auto srcBlockDesc = srcMemory0.GetDescWithType(); + int blockSize = isBlkFmt ? srcBlockDesc->getBlockDims().back() : 1; const auto *src = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *srcPooledSpatialShapes = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - if (srcMemory1.GetElementsCount() != spatialDimsCount) - IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetElementsCount() + if (srcMemory1.GetShape().getElementsCount() != spatialDimsCount) + IE_THROW() << errorPrefix << "has input spatial dimension (" << srcMemory1.GetShape().getElementsCount() << ") inconsistent with pooling vector size (" << spatialDimsCount << ")"; - auto inputDimVector = srcMemory0.GetDims(); + auto inputDimVector = srcMemory0.getStaticDims(); const int N = static_cast(inputDimVector[0]); const int C = static_cast(inputDimVector[1]); const int ID = static_cast(spatialDimsCount == 3 ? inputDimVector[2] : 1); @@ -159,14 +164,14 @@ void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { const int iHW = IH * IW; const int oDHW = OD * OH * OW, oHW = OH * OW; - const int chPadding = srcMemory0.GetDescriptor().data.padded_dims[1]; + const int chPadding = blockSize * (isBlkFmt ? srcBlockDesc->getBlockDims()[1] : srcMemory0.GetShape().getStaticDims()[1]); const int blockCount = (isTailCFmt ? 1 : chPadding / blockSize); auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); if (!selectedPrimitiveDescriptor) IE_THROW() << errorPrefix << "doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto srcStrides = getParentEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); - auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + auto srcStrides = srcBlockDesc->getStrides(); + auto dstStrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType()->getStrides(); // unified strides array const size_t tailDimsOffset = (isTailCFmt ? -1 : 0); @@ -233,20 +238,20 @@ void MKLDNNAdaptivePoolingNode::execute(mkldnn::stream strm) { [&](int n, int blkIdx, int od, int oh, int ow) { auto srcData = src + n * inStrides[0] + blkIdx * inStrides[1]; auto dstData = dst + n * outStrides[0] + blkIdx * outStrides[1] + - od * outStrides[2] + oh * outStrides[3] + ow * outStrides[4]; + od * outStrides[2] + oh * outStrides[3] + ow * outStrides[4]; int cStart = 0, cEnd = C, inResidual = 0, outResidual = 0; if (!isTailCFmt) { - cStart = blkIdx * blockSize; - cEnd = (blkIdx == blockCount - 1 ? C : cStart + blockSize); + cStart = blkIdx * blockSize; + cEnd = (blkIdx == blockCount - 1 ? C : cStart + blockSize); } for (int c = cStart; c < cEnd; c++) { - if (isTailCFmt) { - inResidual = c * inStrides[1]; - outResidual = c * outStrides[1]; - } else if (!isPlainFmt) { - inResidual = outResidual = c % blockSize; - } - pool(srcData + inResidual, dstData + outResidual, od, oh, ow, n * C + c); + if (isTailCFmt) { + inResidual = c * inStrides[1]; + outResidual = c * outStrides[1]; + } else if (!isPlainFmt) { + inResidual = outResidual = c % blockSize; + } + pool(srcData + inResidual, dstData + outResidual, od, oh, ow, n * C + c); }}); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h index 386e57f4dcf01f..77a7e14d87a1ce 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_adaptive_pooling.h @@ -22,7 +22,7 @@ class MKLDNNAdaptivePoolingNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: int spatialDimsCount; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp index 8700a70c5b6450..43b8fa708ad589 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.cpp @@ -16,8 +16,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNBatchToSpaceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto batchToSpace = std::dynamic_pointer_cast(op); if (!batchToSpace) { errorMessage = "Only opset2 BatchToSpace operation is supported"; @@ -47,8 +51,8 @@ MKLDNNBatchToSpaceNode::MKLDNNBatchToSpaceNode(const std::shared_ptrget_input_size() != 4 || op->get_output_size() != 1) IE_THROW() << errorPrefix << " has incorrect number of input or output edges!"; - inDims = op->get_input_shape(0); - outDims = op->get_output_shape(0); + inDims = getInputShapeAtPort(0).getStaticDims(); + outDims = getOutputShapeAtPort(0).getStaticDims(); if (inDims.size() < 4 || inDims.size() > 5) IE_THROW() << errorPrefix << " has unsupported 'data' input rank: " << inDims.size(); if (inDims.size() != outDims.size()) @@ -114,14 +118,14 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - const bool blocked = srcDesc.hasLayoutType(LayoutType::nCsp8c) || srcDesc.hasLayoutType(LayoutType::nCsp16c); + const bool blocked = srcDesc->hasLayoutType(LayoutType::nCsp8c) || srcDesc->hasLayoutType(LayoutType::nCsp16c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(inDims); auto outShape5D = getShape5D(outDims); auto blockShape = getShape5D(blockShapeIn); - if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { + if (srcDesc->hasLayoutType(LayoutType::nspc) && one_of(srcDesc->getShape().getRank(), 4, 5)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -132,9 +136,9 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - const size_t blockSize = blocked ? dstDesc.getBlockDims().back() : 1lu; - const size_t blockCountInput = srcDesc.getBlockDims()[1]; - const size_t blockCountOutput = dstDesc.getBlockDims()[1]; + const size_t blockSize = blocked ? dstDesc->getBlockDims().back() : 1lu; + const size_t blockCountInput = srcDesc->getBlockDims()[1]; + const size_t blockCountOutput = dstDesc->getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -169,7 +173,7 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - cropsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - cropsBeginIn[1]; - if (srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5)) { + if (srcDesc->hasLayoutType(LayoutType::nspc) && one_of(srcDesc->getShape().getRank(), 4, 5)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -224,13 +228,13 @@ void MKLDNNBatchToSpaceNode::batchToSpaceKernel() { } void MKLDNNBatchToSpaceNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) { case 1: batchToSpaceKernel::value_type>(); break; case 2: batchToSpaceKernel::value_type>(); break; case 4: batchToSpaceKernel::value_type>(); break; default: IE_THROW() << "BatchToSpace layer does not support precision '" << - std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) << "'"; + std::string(getParentEdgeAt(0)->getMemory().getDesc().getPrecision().name()) << "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h index 353ea634511dc3..e262108ded04d7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_batch_to_space_node.h @@ -22,7 +22,7 @@ class MKLDNNBatchToSpaceNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: template diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp index 183bc158ff2399..efe30a1c668bf4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.cpp @@ -872,8 +872,13 @@ struct jit_uni_bin_conv_kernel_f32 : public jit_uni_bin_conv_kernel, public jit_ } }; -bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNBinaryConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto binConv = std::dynamic_pointer_cast(op); if (!binConv) { errorMessage = "Only opset1 BinaryConvolution operation is supported"; @@ -942,16 +947,16 @@ void MKLDNNBinaryConvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges"; - if (getParentEdgeAt(0)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); + if (getInputShapeAtPort(0).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getInputShapeAtPort(0).getRank(); } - if (getParentEdgeAt(1)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); + if (getInputShapeAtPort(1).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getInputShapeAtPort(1).getRank(); } - if (getChildEdgeAt(0)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); + if (getOutputShapeAtPort(0).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getOutputShapeAtPort(0).getRank(); } } @@ -979,20 +984,20 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { //activation auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); - config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims()); + config.inConfs[0].desc = nspcCreator->createSharedDesc(Precision::BIN, getInputShapeAtPort(0)); //weights size_t weiFirstDimBlockSize = implType == impl_desc_type::jit_avx512 ? 16 : 8; //memory::format_tag::OIhw16o32i : memory::format_tag::OIhw8o32i; - auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims(); + auto weiDims = getInputShapeAtPort(1).getStaticDims(); std::vector weiBlockDims = {div_up(weiDims[0], weiFirstDimBlockSize), div_up(weiDims[1], 32), weiDims[2], weiDims[3], weiFirstDimBlockSize, 32}; std::vector weiOrder = {0, 1, 2, 3, 0, 1}; - config.inConfs[1].desc = MKLDNNPlugin::make_unique(Precision::BIN, weiDims, weiBlockDims, weiOrder); + config.inConfs[1].desc = std::make_shared(Precision::BIN, Shape(weiDims), weiBlockDims, weiOrder); //result auto outputPrecision = withBinarization ? Precision::BIN : Precision::FP32; - config.outConfs[0].desc = nspcCreator->createUniqueDesc(outputPrecision, getChildEdgeAt(0)->getShape().getStaticDims()); + config.outConfs[0].desc = nspcCreator->createSharedDesc(outputPrecision, getOutputShapeAtPort(0)); if (withSum) { config.inConfs.push_back(config.outConfs[0]); config.outConfs[0].inPlace = 2; @@ -1003,9 +1008,9 @@ void MKLDNNBinaryConvolutionNode::initSupportedPrimitiveDescriptors() { auto weiCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); auto nspcCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); - config.inConfs[0].desc = nspcCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(0)->getShape().getStaticDims()); - config.inConfs[1].desc = weiCreator->createUniqueDesc(Precision::BIN, getParentEdgeAt(1)->getShape().getStaticDims()); - config.outConfs[0].desc = nspcCreator->createUniqueDesc(Precision::FP32, getChildEdgeAt(0)->getShape().getStaticDims()); + config.inConfs[0].desc = nspcCreator->createSharedDesc(Precision::BIN, getInputShapeAtPort(0)); + config.inConfs[1].desc = weiCreator->createSharedDesc(Precision::BIN, getInputShapeAtPort(1)); + config.outConfs[0].desc = nspcCreator->createSharedDesc(Precision::FP32, getOutputShapeAtPort(0)); supportedPrimitiveDescriptors.push_back({config, implType}); } } @@ -1015,9 +1020,9 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { if (!selectedPrimitiveDescriptor) IE_THROW() << "CPU binary convolution with name '" << getName() << "' doesn't have primitive descriptors."; - auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto weiDims = getParentEdgeAt(1)->getShape().getStaticDims(); - auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto srcDims = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims(); + auto weiDims = getParentEdgesAtPort(1)[0]->getMemory().getStaticDims(); + auto dstDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); auto implType = selectedPrimitiveDescriptor->getImplementationType(); @@ -1071,8 +1076,8 @@ void MKLDNNBinaryConvolutionNode::createPrimitive() { jcp.nb_oc_blocking = nstl::min(implType == impl_desc_type::jit_sse42 ? 2 : implType == impl_desc_type::jit_avx2 ? 4 : 6, jcp.nb_oc); - auto srcPrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); - auto dstPrecision = getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(); + auto srcPrecision = getParentEdgeAt(0)->getMemory().getDesc().getPrecision(); + auto dstPrecision = getChildEdgeAt(0)->getMemory().getDesc().getPrecision(); jcp.dst_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(dstPrecision); jcp.typesize_in = srcPrecision == Precision::BIN ? 1 : srcPrecision.size(); @@ -1295,21 +1300,21 @@ void MKLDNNBinaryConvolutionNode::execute(mkldnn::stream strm) { auto dst = reinterpret_cast(dstMemory->GetPtr()); auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - std::vector srcStride(srcDesc.getStrides().size()); + std::vector srcStride(srcDesc->getStrides().size()); for (int i = 0; i < srcStride.size(); i++) { - srcStride[srcDesc.getOrder()[i]] = srcDesc.getStrides()[i]; + srcStride[srcDesc->getOrder()[i]] = srcDesc->getStrides()[i]; } auto weiDesc = getParentEdgeAt(1)->getMemory().GetDescWithType(); - std::vector weightsStride(weiDesc.getShape().getRank()); + std::vector weightsStride(weiDesc->getShape().getRank()); for (int i = 0; i < weightsStride.size(); i++) { - weightsStride[weiDesc.getOrder()[i]] = weiDesc.getStrides()[i]; + weightsStride[weiDesc->getOrder()[i]] = weiDesc->getStrides()[i]; } auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - std::vector dstStride(dstDesc.getStrides().size()); + std::vector dstStride(dstDesc->getStrides().size()); for (int i = 0; i < dstStride.size(); i++) { - dstStride[dstDesc.getOrder()[i]] = dstDesc.getStrides()[i]; + dstStride[dstDesc->getOrder()[i]] = dstDesc->getStrides()[i]; } auto selectedPrimitiveDescriptor = getSelectedPrimitiveDescriptor(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h index f67070f8440dc1..f868375aaf9cce 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bin_conv_node.h @@ -87,7 +87,7 @@ class MKLDNNBinaryConvolutionNode : public MKLDNNNode { void setPostOps(mkldnn::primitive_attr &attr); bool canFuse(const MKLDNNNodePtr& node) const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; impl_desc_type getImplType() { return implType; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp index ef9c14ad0d4eef..6bef76d3c7ec44 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.cpp @@ -17,8 +17,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNBroadcastNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto broadcast = std::dynamic_pointer_cast(op); if (!broadcast) { errorMessage = "Only opset1 Broadcast operation is supported"; @@ -67,13 +71,13 @@ void MKLDNNBroadcastNode::initSupportedPrimitiveDescriptors() { } void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { - size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getMemory().GetDesc().getShape().getStaticDims())[0]; - SizeVector dst_dims = getChildEdgeAt(0)->getMemory().GetDesc().getShape().getStaticDims(); - SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDesc().getShape().getStaticDims(); + size_t shape_size = (getParentEdgeAt(BROADCAST_SHAPE)->getMemory().getStaticDims())[0]; + SizeVector dst_dims = getChildEdgeAt(0)->getMemory().getStaticDims(); + SizeVector src_dims = getParentEdgeAt(BROADCAST_INPUT)->getMemory().getStaticDims(); auto srcDesc = getParentEdgeAt(BROADCAST_INPUT)->getMemory().GetDescWithType(); - SizeVector srcStrides = srcDesc.getStrides(); - size_t data_size = srcDesc.getPrecision().size(); + SizeVector srcStrides = srcDesc->getStrides(); + size_t data_size = srcDesc->getPrecision().size(); if (!src_dims.size()) src_dims = SizeVector(1, 1); @@ -89,7 +93,7 @@ void MKLDNNBroadcastNode::execute(mkldnn::stream strm) { } auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - InferenceEngine::SizeVector dstStrides = dstDesc.getStrides(); + InferenceEngine::SizeVector dstStrides = dstDesc->getStrides(); InferenceEngine::SizeVector src_aligned(dst_dims.size()); InferenceEngine::SizeVector srcStrides_aligned(dst_dims.size()); size_t prefix_size = dst_dims.size() - src_dims.size(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h index 0d86a2a99efce7..932bded6c7c000 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_broadcast_node.h @@ -22,7 +22,7 @@ class MKLDNNBroadcastNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: static const size_t BROADCAST_INPUT = 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp index 602f4954c3ba91..4567d11548976e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.cpp @@ -13,8 +13,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNBucketizeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto bucketsize = std::dynamic_pointer_cast(op); if (!bucketsize) { errorMessage = "Only opset3 Bucketize operation is supported"; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.h index 472e6aee3cfb03..cafb8b11f1d4f4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_bucketize_node.h @@ -19,7 +19,7 @@ class MKLDNNBucketizeNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: template diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp index 12567b74c0ff92..3779395bd15e16 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_concat_node.cpp @@ -11,7 +11,6 @@ #include "mkldnn.hpp" #include "mkldnn/iml_type_mapper.h" -#include "mkldnn_dims.h" #include "mkldnn_edge.h" #include "mkldnn_memory.h" #include "ie_parallel.hpp" @@ -22,7 +21,7 @@ #include #include "common/cpu_memcpy.h" #include "common/blocked_desc_creator.h" -#include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -34,6 +33,11 @@ namespace { bool MKLDNNConcatNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto concatOp = ngraph::as_type_ptr(op); if (!concatOp) { errorMessage = "Node is not an instance of the Concat operation."; @@ -62,9 +66,9 @@ MKLDNNConcatNode::MKLDNNConcatNode(const std::shared_ptr& op, cons } void MKLDNNConcatNode::getSupportedDescriptors() { - auto& firstParentDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto& firstParentDims = getInputShapeAtPort(0).getStaticDims(); for (size_t i = 1; i < getParentEdges().size(); i++) { - auto& dims = getParentEdgeAt(i)->getShape().getStaticDims(); + auto& dims = getInputShapeAtPort(i).getStaticDims(); bool incorrectDims = false; for (size_t j = 0; j < firstParentDims.size(); j++) { if (j == axis) @@ -87,7 +91,7 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { auto& originInputPrecisions = getOriginalInputPrecisions(); inputPrecision = originInputPrecisions[0]; bool isMixedPrecision = false; - for (int i = 1; i < getOriginalInputsNumber(); i++) { + for (int i = 1; i < inputShapes.size(); i++) { if (originInputPrecisions[0] != originInputPrecisions[i]) { isMixedPrecision = true; break; @@ -101,19 +105,19 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { // Concat supports only equal precisions for inputs and output outputPrecision = inputPrecision; - auto& dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + const auto& dstShape = getOutputShapeAtPort(0); std::vector tdCreatorTypes = {LayoutType::ncsp, LayoutType::nspc}; // check if blocked layouts are available the channels size should be evenly divided by the block size to avoid slow oneDNN ref implementation - if (dstDims.size() > channelAxis) { + if (dstShape.getRank() > channelAxis) { for (auto item : { std::make_pair(8lu, LayoutType::nCsp8c), std::make_pair(16lu, LayoutType::nCsp16c)}) { - SizeVector blkDims = dstDims; + const VectorDims &blkDims = dstShape.getStaticDims(); if (blkDims[channelAxis] % item.first) continue; bool blocked = true; for (size_t i = 0; i < getParentEdges().size(); i++) { - auto& srcDims = getParentEdgeAt(i)->getShape().getStaticDims(); + auto& srcDims = getInputShapeAtPort(i).getStaticDims(); if (srcDims[channelAxis] % item.first) { blocked = false; break; @@ -128,7 +132,8 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { std::vector pdIndexesToReuse; auto& creatorsMap = BlockedDescCreator::getCommonCreators(); - auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast(dstDims.size()), tdCreatorTypes); + + auto itrRange = BlockedDescCreator::makeFilteredRange(creatorsMap, static_cast(dstShape.getRank()), tdCreatorTypes); for (auto itr = itrRange.first; itr != itrRange.second; ++itr) { NodeConfig config; @@ -136,15 +141,14 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { config.outConfs.resize(1); config.outConfs[0].inPlace = -1; config.outConfs[0].constant = false; - config.outConfs[0].desc = itr->second->createUniqueDesc(outputPrecision, dstDims); + config.outConfs[0].desc = itr->second->createSharedDesc(outputPrecision, dstShape); config.inConfs.resize(getParentEdges().size()); for (size_t i = 0; i < getParentEdges().size(); ++i) { config.inConfs[i].inPlace = -1; config.inConfs[i].constant = false; - config.inConfs[i].desc = MemoryDescUtils::applyUndefinedOffset( - itr->second->createDesc(inputPrecision, getParentEdgeAt(i)->getShape().getStaticDims())); + config.inConfs[i].desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(itr->second->createDesc(inputPrecision, getInputShapeAtPort(i))); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); if (itr->first != LayoutType::nspc) { @@ -167,8 +171,8 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - const auto &order = refConfig.outConfs[0].desc->as()->getOrder(); - const auto &blkDims = refConfig.outConfs[0].desc->as()->getBlockDims(); + const auto &order = refConfig.outConfs[0].desc->as()->getOrder(); + const auto &blkDims = refConfig.outConfs[0].desc->as()->getBlockDims(); auto numOfDim = blkDims.size(); SizeVector offsets(numOfDim, 0lu); @@ -184,14 +188,14 @@ void MKLDNNConcatNode::initSupportedPrimitiveDescriptors() { } } - config.outConfs[0].desc = MKLDNNPlugin::make_unique(outputPrecision, dstDims, blkDims, order, offset, offsets, strides); + config.outConfs[0].desc = std::make_shared(outputPrecision, dstShape, blkDims, order, offset, offsets, strides); for (size_t i = 0; i < getParentEdges().size(); i++) { - const auto& srcBlkDims = refConfig.inConfs[i].desc->as()->getBlockDims(); - const auto& dims = refConfig.inConfs[i].desc->getShape().getStaticDims(); + const auto& srcBlkDims = refConfig.inConfs[i].desc->as()->getBlockDims(); + const auto& shape = refConfig.inConfs[i].desc->getShape(); config.inConfs[i].inPlace = 0; - config.inConfs[i].desc = MKLDNNPlugin::make_unique(inputPrecision, dims, srcBlkDims, order, offset, offsets, strides); + config.inConfs[i].desc = std::make_shared(inputPrecision, shape, srcBlkDims, order, offset, offsets, strides); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -257,7 +261,7 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { } size_t maxCount = 0; - auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto outDims = getOutputShapeAtPort(0).getStaticDims(); LayoutType convertTo = LayoutType::ncsp; for (auto &it : formatFrequency) { if (it.second > maxCount) { @@ -279,7 +283,7 @@ void MKLDNNConcatNode::selectOptimalPrimitiveDescriptor() { break; } for (size_t i = 0; i < getParentEdges().size(); i++) { - auto& inpDims = getParentEdgeAt(i)->getShape().getStaticDims(); + auto& inpDims = getInputShapeAtPort(i).getStaticDims(); if (inpDims[1] % item.first != 0) { convertTo = LayoutType::ncsp; break; @@ -339,7 +343,7 @@ void MKLDNNConcatNode::createPrimitive() { IE_THROW() << "Preferable primitive descriptor is not set."; //check if selected Tensor descriptor has nspc layout and concat axis is C - if (axis == channelAxis && getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + if (axis == channelAxis && getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { canOptimizeNspc = true; return; } @@ -353,9 +357,9 @@ void MKLDNNConcatNode::createPrimitive() { IE_THROW() << "Source memory from " << parent->getName() << " didn't allocate for node " << getName() << "."; } - - auto desc = srcMemPtr->GetDescriptor(); - auto& dims = getParentEdgeAt(i)->getShape().getStaticDims(); +// DnnlBlockedMemoryDesc + auto desc = srcMemPtr->GetDescWithType()->getDnnlDesc(); + auto& dims = getInputShapeAtPort(i).getStaticDims(); for (size_t j = 0; j < dims.size(); j++) { desc.data.dims[j] = dims[j]; } @@ -363,8 +367,8 @@ void MKLDNNConcatNode::createPrimitive() { srcs_d.emplace_back(desc); } - auto desc = getChildEdgeAt(0)->getMemory().GetDescriptor(); - auto& dims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto desc = getChildEdgeAt(0)->getMemory().GetDescWithType()->getDnnlDesc(); + auto& dims = getOutputShapeAtPort(0).getStaticDims(); for (size_t i = 0; i < dims.size(); i++) { desc.data.dims[i] = dims[i]; desc.data.padded_dims[i] = dims[i]; @@ -393,14 +397,12 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() { auto config = selected_pd->getConfig(); if (!isConfigDefined(config)) { for (size_t i = 0; i < config.inConfs.size(); i++) { - config.inConfs[i].desc = getDefinedInputDesc(config, i); // Concat doesn't support different precision on inputs - config.inConfs[i].desc->setPrecision(inputPrecision); + config.inConfs[i].desc = MemoryDescUtils::cloneWithNewPrecision(*getDefinedInputDesc(config, i), inputPrecision); } for (size_t i = 0; i < config.outConfs.size(); i++) { - config.outConfs[i].desc = getDefinedOutputDesc(config, i); - config.outConfs[i].desc->setPrecision(outputPrecision); + config.outConfs[i].desc = MemoryDescUtils::cloneWithNewPrecision(*getDefinedOutputDesc(config, i), outputPrecision); } initDescriptor(config); @@ -418,47 +420,49 @@ void MKLDNNConcatNode::initOptimalPrimitiveDescriptor() { int num = getChildEdgeAt(i)->getOutputNum(); if (num >= 0) { auto childConf = getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()->getConfig().inConfs[num]; - childConf.desc->setPrecision(config.outConfs[i].desc->getPrecision()); + childConf.desc = MemoryDescUtils::cloneWithNewPrecision(*childConf.desc, config.outConfs[i].desc->getPrecision()); if (getChildEdgeAt(i)->getChild()->getSelectedPrimitiveDescriptor()) { if (!childConf.desc->isDefined() && childConf.inPlace >= 0) getChildEdgeAt(i)->getChild()->initOptimalPrimitiveDescriptor(); if (childConf.desc->isDefined() && childConf.desc->isCompatible(*config.outConfs[i].desc)) { - config.outConfs[i].desc = childConf.desc->clone(); + config.outConfs[i].desc = childConf.desc; continue; } } } // reset undefined offsets - config.outConfs[i].desc = MemoryDescUtils::resetOffset(config.outConfs[i].desc.get()); + config.outConfs[i].desc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.outConfs[i].desc); } - auto firstOutBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[0].desc); + auto firstOutBlockingDesc = config.outConfs[0].desc->as(); size_t offset = 0; for (size_t i = 0; i < config.inConfs.size(); i++) { - auto inpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[i].desc); - config.inConfs[i].desc = MKLDNNPlugin::make_unique(inpBlockingDesc.getPrecision(), - inpBlockingDesc.getShape().getStaticDims(), - inpBlockingDesc.getBlockDims(), - inpBlockingDesc.getOrder(), - firstOutBlockingDesc.getOffsetPadding() + offset, - firstOutBlockingDesc.getOffsetPaddingToData(), - firstOutBlockingDesc.getStrides()); + auto oldDesc = config.inConfs[i].desc; + auto inpBlockingDesc = oldDesc->as(); + + config.inConfs[i].desc = std::make_shared(inpBlockingDesc->getPrecision(), + inpBlockingDesc->getShape(), + inpBlockingDesc->getBlockDims(), + inpBlockingDesc->getOrder(), + firstOutBlockingDesc->getOffsetPadding() + offset, + firstOutBlockingDesc->getOffsetPaddingToData(), + firstOutBlockingDesc->getStrides()); size_t axisSize = 1; - auto firstInpBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); - if (firstInpBlockingDesc.hasLayoutType(LayoutType::nspc)) { + auto firstInpBlockingDesc = config.inConfs[0].desc->as(); + if (firstInpBlockingDesc->hasLayoutType(LayoutType::nspc)) { // This is more general and works for any "direct" Layout (such as nchw or nhwc), but it doesn't work for blocked - size_t realAxis = inverseOrder(firstInpBlockingDesc.getOrder(), axis); - for (size_t j = realAxis; j < inpBlockingDesc.getBlockDims().size(); j++) { - size_t jj = firstInpBlockingDesc.getOrder()[j]; - axisSize *= inpBlockingDesc.getBlockDims()[jj]; + size_t realAxis = inverseOrder(firstInpBlockingDesc->getOrder(), axis); + for (size_t j = realAxis; j < inpBlockingDesc->getBlockDims().size(); j++) { + size_t jj = firstInpBlockingDesc->getOrder()[j]; + axisSize *= inpBlockingDesc->getBlockDims()[jj]; } } else { // This works for nchw and nchw8c/nchw16c - for (size_t j = axis; j < inpBlockingDesc.getBlockDims().size(); j++) { - axisSize *= inpBlockingDesc.getBlockDims()[j]; + for (size_t j = axis; j < inpBlockingDesc->getBlockDims().size(); j++) { + axisSize *= inpBlockingDesc->getBlockDims()[j]; } } offset += axisSize; @@ -502,7 +506,7 @@ void MKLDNNConcatNode::execNspcSpecCase() { for (size_t i = 0; i < num_src; i++) { const MKLDNNMemory& src_mem = getParentEdgeAt(i)->getMemory(); - const size_t num_channels = src_mem.GetDims()[channelAxis]; + const size_t num_channels = src_mem.getStaticDims()[channelAxis]; channelsDataSize.push_back(num_channels * dataSize); src_ptrs.push_back(reinterpret_cast(src_mem.GetData())); @@ -520,4 +524,4 @@ void MKLDNNConcatNode::execNspcSpecCase() { }); } -REG_MKLDNN_PRIM_FOR(MKLDNNConcatNode, Concatenation); +REG_MKLDNN_PRIM_FOR(MKLDNNConcatNode, Concatenation); \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp index 4bff8260c7900a..8c4d16d61d1234 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.cpp @@ -18,14 +18,20 @@ #include #include #include "common/cpu_convert.h" -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!ngraph::is_type(op) && !ngraph::is_type(op)) { errorMessage = "Only opset1 Convolution and GroupConvolution operations are supported"; return false; @@ -194,8 +200,7 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - int ndims = getParentEdgesAtPort(0)[0]->getShape().getRank(); - MKLDNNDims weightsDims = MKLDNNDims(weightDims); + int ndims = getInputShapeAtPort(0).getRank(); withDWConv = isFusedWith(Convolution); @@ -226,9 +231,9 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { for (int j = 0; j < paddingR.size(); j++) { int with_group = isGrouped ? 1 : 0; - int krn = weightsDims[with_group + 2 + j]; - int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + j]; - int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + j]; + int krn = weightDims[with_group + 2 + j]; + int src = getInputShapeAtPort(0).getStaticDims()[2 + j]; + int dst = getOutputShapeAtPort(0).getStaticDims()[2 + j]; krn = (krn - 1)*(dilation[j] + 1) + 1; int calc_dst = (src - krn + paddingL[j]) / stride[j] + 1; @@ -244,11 +249,11 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { outputDataType = memory::data_type::f32; if (eltwisePrecision == Precision::BF16) eltwisePrecision = Precision::FP32; - in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + in_candidate = std::make_shared(getInputShapeAtPort(0), inputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); - out_candidate = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), + out_candidate = std::make_shared(getOutputShapeAtPort(0), outputDataType, ndims == 5 ? memory::format_tag::ndhwc : memory::format_tag::nhwc); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + createDescriptor({ in_candidate }, { out_candidate }); } else { inputDataType = (getOriginalInputPrecisionAtPort(0) == Precision::BF16 && !(isDepthWise() && ndims == 5)) ? memory::data_type::bf16 : memory::data_type::f32; @@ -285,36 +290,36 @@ void MKLDNNConvolutionNode::getSupportedDescriptors() { memory::format_tag nCsp16c = ndims == 4 ? memory::format_tag::nChw16c : memory::format_tag::nCdhw16c; memory::format_tag nCsp8c = ndims == 4 ? memory::format_tag::nChw8c : memory::format_tag::nCdhw8c; - auto inputDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto outputDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto inputShape = getInputShapeAtPort(0); + auto outputShape = getOutputShapeAtPort(0); if (IC == 1 && groupOC == 1) { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = std::make_shared(inputShape, inputDataType, ncsp); + out_candidate = std::make_shared(outputShape, outputDataType, ncsp); + createDescriptor({ in_candidate }, { out_candidate }); } else if (IC < 4) { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = std::make_shared(inputShape, inputDataType, ncsp); + out_candidate = std::make_shared(outputShape, outputDataType, nCsp16c); + createDescriptor({ in_candidate }, { out_candidate }); + out_candidate = std::make_shared(outputShape, outputDataType, nCsp8c); + createDescriptor({ in_candidate }, { out_candidate }); } else { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp16c); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp16c); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nCsp8c); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nCsp8c); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = std::make_shared(inputShape, inputDataType, nCsp16c); + out_candidate = std::make_shared(outputShape, outputDataType, nCsp16c); + createDescriptor({ in_candidate }, { out_candidate }); + in_candidate = std::make_shared(inputShape, inputDataType, nCsp8c); + out_candidate = std::make_shared(outputShape, outputDataType, nCsp8c); + createDescriptor({ in_candidate }, { out_candidate });; } - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, ncsp); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, ncsp); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = std::make_shared(inputShape, inputDataType, ncsp); + out_candidate = std::make_shared(outputShape, outputDataType, ncsp); + createDescriptor({ in_candidate }, { out_candidate }); if (inputDataType != memory::data_type::bf16 && isNspcAvailable()) { - in_candidate = MKLDNNPlugin::make_unique(inputDims, inputDataType, nspc); - out_candidate = MKLDNNPlugin::make_unique(outputDims, outputDataType, nspc); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + in_candidate = std::make_shared(inputShape, inputDataType, nspc); + out_candidate = std::make_shared(outputShape, outputDataType, nspc);; + createDescriptor({ in_candidate }, { out_candidate });; } } } @@ -395,11 +400,12 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - auto srcDesc = getSrcMemDesc(itpd, i); - if (isGrouped || srcDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) - dataConfig.desc = std::move(srcDesc); - else - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*srcDesc); + auto desc = getSrcMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked && !isGrouped) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.inConfs.push_back(dataConfig); } @@ -414,10 +420,10 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = std::make_shared(Shape(dwWeightsDims), weightsPrc, memory::format_tag::Goihw8g); config.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = std::make_shared(Shape(dwBiasesDims), biasPrc, memory::format_tag::x); config.inConfs.push_back(dataConfig); } @@ -428,18 +434,18 @@ void MKLDNNConvolutionNode::initSupportedPrimitiveDescriptors() { } dataConfig.constant = false; - - auto dstDesc = getDstMemDesc(itpd, i); - if (isGrouped || dstDesc->getFormatKind() != dnnl_format_kind_t::dnnl_blocked) - dataConfig.desc = std::move(dstDesc); - else - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*dstDesc); + auto desc = getDstMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked && !isGrouped) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.outConfs.push_back(dataConfig); if (withSum) { dataConfig.inPlace = -1; - dataConfig.desc->setPrecision(eltwisePrecision); + dataConfig.desc = MemoryDescUtils::cloneWithNewPrecision(*dataConfig.desc, dataConfig.desc->getPrecision()); config.inConfs.push_back(dataConfig); } } @@ -480,21 +486,19 @@ bool MKLDNNConvolutionNode::created() const { return getType() == Convolution; } -void MKLDNNConvolutionNode::createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) { - auto inDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); - auto outDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); +void MKLDNNConvolutionNode::createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) { + const auto inDesc = MemoryDescUtils::convertToDnnlMemoryDesc(inputDesc[0])->getDnnlDesc(); + const auto outDesc = MemoryDescUtils::convertToDnnlMemoryDesc(outputDesc[0])->getDnnlDesc(); - memory::data_type wdt = MKLDNNExtensionUtils::IEPrecisionToDataType(inDesc.getPrecision()); + memory::data_type wdt = static_cast(inDesc.data.data_type); memory::data_type bdt = memory::data_type::f32; - if (inDesc.getPrecision() == Precision::U8 || inDesc.getPrecision() == Precision::I8) { + if (inDesc.data.data_type == mkldnn_s8 || inDesc.data.data_type == mkldnn_u8) { wdt = memory::data_type::s8; } - MKLDNNDims blocked_weightDims(weightDims); - MKLDNNDims blocked_biasesDims(biasesDims); - mkldnn::memory::desc wgh_candidate(blocked_weightDims, wdt, memory::format_tag::any); + mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), wdt, memory::format_tag::any); std::vector algorithms; @@ -506,7 +510,7 @@ void MKLDNNConvolutionNode::createDescriptor(const std::vector conv_desc; if (withBiases) { - mkldnn::memory::desc bias_candidate(blocked_biasesDims, bdt, memory::format_tag::any); + mkldnn::memory::desc bias_candidate(MKLDNNExtensionUtils::convertToDnnlDims(biasesDims), bdt, memory::format_tag::any); conv_desc.reset(new convolution_forward::desc(prop_kind::forward_scoring, alg, inDesc, wgh_candidate, bias_candidate, outDesc, @@ -556,13 +560,9 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { if (canBeExecutedInInt8()) { isStridedBlobsSupported = false; } - // TODO [NM]: fix strided blobs feature support for dynamic weights - // if (getOriginalInputsNumber() != 1) { - // isStridedBlobsSupported = false; - // } if (isStridedBlobsSupported) { - createDescriptor({config.inConfs[0].desc.get()}, {config.outConfs[0].desc.get()}); + createDescriptor({config.inConfs[0].desc}, {config.outConfs[0].desc}); } mkldnn::primitive_attr attr; @@ -600,10 +600,10 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dwWeightsDims, weightsPrc, memory::format_tag::Goihw8g); + dataConfig.desc = std::make_shared(Shape(dwWeightsDims), weightsPrc, memory::format_tag::Goihw8g); cfg.inConfs.push_back(dataConfig); - dataConfig.desc = MKLDNNPlugin::make_unique(dwBiasesDims, biasPrc, memory::format_tag::x); + dataConfig.desc = std::make_shared(Shape(dwBiasesDims), biasPrc, memory::format_tag::x); cfg.inConfs.push_back(dataConfig); } @@ -614,7 +614,7 @@ void MKLDNNConvolutionNode::initDescriptor(const NodeConfig& config) { dataConfig.desc = getDstMemDesc(itpd, j); if (withSum) { auto eltwiseConfig = dataConfig; - eltwiseConfig.desc->setPrecision(eltwisePrecision); + eltwiseConfig.desc = MemoryDescUtils::cloneWithNewPrecision(*eltwiseConfig.desc, eltwisePrecision); cfg.inConfs.push_back(eltwiseConfig); dataConfig.inPlace = getParentEdges().size() - 1; } @@ -659,12 +659,12 @@ void MKLDNNConvolutionNode::filterSupportedDescriptors() { while (itd != descs.end()) { bool isSuitableDesc = true; if (!inputMemoryFormatsFilter.empty()) { - MKLDNNMemoryDesc src_tdesc(std::shared_ptr(*itd)->data.src_desc); - isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); + isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]); } if (!outputMemoryFormatsFilter.empty()) { - MKLDNNMemoryDesc dst_tdesc(std::shared_ptr(*itd)->data.dst_desc); - isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); + isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]); } if (!isSuitableDesc) { itd = descs.erase(itd); @@ -698,21 +698,21 @@ bool MKLDNNConvolutionNode::isPossibleToSkipInitConfig(MKLDNNDescriptor &desc) c isPossibleJitPlanar = false; std::shared_ptr convDesc(desc); - auto srcMemDesc = MKLDNNMemoryDesc {convDesc->data.src_desc}; - auto dstMemDesc = MKLDNNMemoryDesc {convDesc->data.dst_desc}; + auto srcMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.src_desc); + auto dstMemDesc = MKLDNNExtensionUtils::makeDescriptor(convDesc->data.dst_desc); auto srcDataType = convDesc->data.src_desc.data_type; auto dstDataType = convDesc->data.dst_desc.data_type; - bool isPlanarFloatConv = srcMemDesc.hasLayoutType(LayoutType::ncsp) - && dstMemDesc.hasLayoutType(LayoutType::ncsp) + bool isPlanarFloatConv = srcMemDesc->hasLayoutType(LayoutType::ncsp) + && dstMemDesc->hasLayoutType(LayoutType::ncsp) && srcDataType == memory::data_type::f32 && dstDataType == memory::data_type::f32; return !isPossibleJitPlanar && isPlanarFloatConv; } -std::unique_ptr MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); - return MKLDNNPlugin::make_unique(std::move(desc)); +std::shared_ptr MKLDNNConvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx); + return MKLDNNExtensionUtils::makeDescriptor(desc); } bool MKLDNNConvolutionNode::canFuse(const MKLDNNNodePtr& node) const { @@ -755,8 +755,8 @@ bool MKLDNNConvolutionNode::isNspcAvailable() const { } // A bunch of heuristics are designed to cut off not optimal nspc convolution applications - auto inpDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto inpDims = getInputShapeAtPort(0).getStaticDims(); + auto outDims = getOutputShapeAtPort(0).getStaticDims(); auto ndims = inpDims.size(); if (isDepthWise()) { @@ -826,7 +826,7 @@ InferenceEngine::Blob::Ptr MKLDNNConvolutionNode::createInternalBlob(InferenceEn if (blb == nullptr) IE_THROW() << "Cannot get const blob for node " << getName() << "."; - auto const elementsCount = blb->GetElementsCount(); + auto const elementsCount = blb->GetDescWithType()->getPaddedElementsCount(); InferenceEngine::TensorDesc desc(InferenceEngine::Precision::FP32, dims, getWeightsLayoutByDims(dims, isGrouped)); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h index 7fa5ed80bb8040..b787f71068691e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_conv_node.h @@ -18,10 +18,10 @@ class MKLDNNConvolutionNode : public MKLDNNNode { public: MKLDNNConvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void initDescriptor(const NodeConfig& config) override; void createPrimitive() override; void selectOptimalPrimitiveDescriptor() override; @@ -32,7 +32,7 @@ class MKLDNNConvolutionNode : public MKLDNNNode { return false; } InferenceEngine::Precision getRuntimePrecision() const override; - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::shared_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; const mkldnn::memory& getWeights() const; const mkldnn::memory& getBias() const; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp index 00a403c8bb6782..d91c291d86236c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.cpp @@ -7,13 +7,19 @@ #include "common/cpu_convert.h" #include "common/blocked_desc_creator.h" #include +#include "utils/ngraph_utils.hpp" using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNConvertNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto convert = std::dynamic_pointer_cast(op); if (!convert) { errorMessage = "Only opset1 Convert operation is supported"; @@ -59,6 +65,13 @@ void MKLDNNConvertNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << " has incorrect number of output edges"; } +bool MKLDNNConvertNode::isSupportedDesc(const MemoryDesc &desc) { + bool isSupported = desc.getType() & MemoryDescType::Blocked; + if (desc.getType() == MemoryDescType::DnnlBlocked) + isSupported &= desc.as()->hasEmptyExtraData(); + return isSupported; +} + void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; @@ -69,21 +82,28 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { config.dynBatchSupport = false; - // if input and output pointers are not null, then the inp/output tensor descriptors were set using setDescs method, so - // they should be used as the actual descriptors. + bool canInitExternalDesc = false; if (input && output) { - dataIn.desc = input->clone(); + canInitExternalDesc = true; + canInitExternalDesc &= isSupportedDesc(*input); + canInitExternalDesc &= isSupportedDesc(*output); + } + + // if input and output pointers are not null and not contain extra data, then the inp/output tensor descriptors were set using setDescs method, so + // they should be used as the actual descriptors. + if (canInitExternalDesc) { + dataIn.desc = input; config.inConfs.push_back(dataIn); // inp/out layouts must be the same - dataConfigOut.desc = config.inConfs[0].desc->clone(); - dataConfigOut.desc->setPrecision(output->getPrecision()); + dataConfigOut.desc = config.inConfs[0].desc; + dataConfigOut.desc = MemoryDescUtils::cloneWithNewPrecision(*dataConfigOut.desc, output->getPrecision()); config.outConfs.push_back(dataConfigOut); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); - } else if (getOriginalInputsNumber() == 1 && getOriginalOutputsNumber() == 1) { - const Shape& insShape = getParentEdgeAt(0)->getShape(); + } else if (inputShapes.size() == 1 && outputShapes.size() == 1) { + const Shape& insShape = getInputShapeAtPort(0); auto insPrecision = getOriginalInputPrecisionAtPort(0); - const Shape& outputShape = getChildEdgeAt(0)->getShape(); + const Shape& outputShape = getOutputShapeAtPort(0); auto outPrecision = getOriginalOutputPrecisionAtPort(0); config.inConfs.push_back(dataIn); @@ -93,8 +113,8 @@ void MKLDNNConvertNode::initSupportedPrimitiveDescriptors() { auto range = BlockedDescCreator::makeFilteredRange(creators, insShape.getRank()); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(insPrecision, insShape.getDims())); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(outPrecision, outputShape.getDims())); + config.inConfs[0].desc = std::make_shared(itr->second->createDesc(insPrecision, insShape)); + config.outConfs[0].desc = std::make_shared(itr->second->createDesc(outPrecision, outputShape)); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -117,12 +137,16 @@ void MKLDNNConvertNode::createPrimitive() { void MKLDNNConvertNode::execute(mkldnn::stream strm) { auto& parentMem = getParentEdgeAt(0)->getMemory(); auto& childMem = getChildEdgeAt(0)->getMemory(); - if (parentMem.GetElementsCount() != childMem.GetElementsCount()) + + const auto parentPaddElemCount = parentMem.GetDescWithType()->getPaddedElementsCount(); + const auto childPaddElemCount = childMem.GetDescWithType()->getPaddedElementsCount(); + + if (parentPaddElemCount != childPaddElemCount) IE_THROW() << errorPrefix << " has different elements number in input and output buffers"; void* srcPtr = parentMem.GetPtr(); void* dstPtr = childMem.GetPtr(); - cpu_convert(srcPtr, dstPtr, parentMem.GetDesc().getPrecision(), childMem.GetDesc().getPrecision(), parentMem.GetElementsCount()); + cpu_convert(srcPtr, dstPtr, parentMem.getDesc().getPrecision(), childMem.getDesc().getPrecision(), parentPaddElemCount); } bool MKLDNNConvertNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h index 38707385f7a8ba..9ed32a4d17ee78 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_convert_node.h @@ -38,11 +38,13 @@ class MKLDNNConvertNode : public MKLDNNNode { const MemoryDesc& getInput() const { return *input; } const MemoryDesc& getOutput() const { return *output; } - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + + static bool isSupportedDesc(const MemoryDesc &desc); private: - std::unique_ptr input; - std::unique_ptr output; + MemoryDescPtr input; + MemoryDescPtr output; std::string errorPrefix; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp index 2bf514fffda72e..bc4e88b3f955ac 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.cpp @@ -12,8 +12,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNCTCGreedyDecoderNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNCTCGreedyDecoderNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto greedyDecOp = ngraph::as_type_ptr(op); if (!greedyDecOp) { errorMessage = "Node is not an instance of the CTCGreedyDecoder operation from operation set v0."; @@ -69,9 +73,9 @@ void MKLDNNCTCGreedyDecoderNode::execute(mkldnn::stream strm) { const float* sequenceMask = reinterpret_cast(getParentEdgeAt(SEQUENCE_LENGTH_INDEX)->getMemoryPtr()->GetPtr()); float* outputSequences = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0]; - const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1]; - const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2]; + const size_t T = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[0]; + const size_t B = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[1]; + const int C = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[2]; const size_t BC = B * C; const size_t CB1 = C * (B - 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h index 26554ae7333dca..aa42dda6a97f5b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_node.h @@ -19,7 +19,7 @@ class MKLDNNCTCGreedyDecoderNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const size_t DATA_INDEX = 0lu; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp index acd273a9ad9b82..ab6a5c79e0a69e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.cpp @@ -12,8 +12,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNCTCGreedyDecoderSeqLenNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNCTCGreedyDecoderSeqLenNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto greedyDecOp = ngraph::as_type_ptr(op); if (!greedyDecOp) { errorMessage = "Node is not an instance of the CTCGreedyDecoderSeqLen operation from operation set v6."; @@ -58,9 +62,9 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << "has unsupported 'sequence_length' input precision: " << seqLenPrecision; std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); + inDataConf.reserve(inputShapes.size()); inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); - for (int i = 1; i < getOriginalInputsNumber(); ++i) + for (int i = 1; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, @@ -75,9 +79,9 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { int* decodedClasses = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemoryPtr()->GetPtr()); int* decodedClassesLength = reinterpret_cast(getChildEdgesAtPort(DECODED_CLASSES_LENGTH_INDEX)[0]->getMemoryPtr()->GetPtr()); - const size_t B = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[0];; - const size_t T = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[1];; - const int C = getParentEdgeAt(DATA_INDEX)->getShape().getStaticDims()[2];; + const size_t B = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[0];; + const size_t T = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[1];; + const int C = getParentEdgeAt(DATA_INDEX)->getMemory().getStaticDims()[2];; const size_t TC = T * C; int blankIndex = C - 1; @@ -90,7 +94,7 @@ void MKLDNNCTCGreedyDecoderSeqLenNode::execute(mkldnn::stream strm) { std::string errorMsg = errorPrefix + ". Sequence length " + std::to_string(sequenceLengths[b]) + " cannot be greater than according decoded classes dimension size " - + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getShape().getStaticDims()[1]); + + std::to_string(getChildEdgesAtPort(DECODED_CLASSES_INDEX)[0]->getMemory().getStaticDims()[1]); IE_THROW() << errorMsg; } workAmount += sequenceLengths[b]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h index b1d5ab6d9ffef3..95ef66222c198f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_greedy_decoder_seq_len_node.h @@ -19,7 +19,7 @@ class MKLDNNCTCGreedyDecoderSeqLenNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const size_t DATA_INDEX = 0lu; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp index 47da05014863bf..d3e0f149c7c943 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.cpp @@ -11,8 +11,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNCTCLossNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNCTCLossNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto ctcLossOp = ngraph::as_type_ptr(op); if (!ctcLossOp) { errorMessage = "Node is not an instance of the CTCLoss operation from operation set v4."; @@ -47,9 +51,9 @@ void MKLDNNCTCLossNode::initSupportedPrimitiveDescriptors() { return; std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); + inDataConf.reserve(inputShapes.size()); inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); - for (int i = 1; i < getOriginalInputsNumber(); ++i) + for (int i = 1; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, @@ -66,9 +70,10 @@ void MKLDNNCTCLossNode::execute(mkldnn::stream strm) { const int* labelsLength = reinterpret_cast(getParentEdgeAt(3)->getMemoryPtr()->GetPtr()); float* dstData = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const size_t batchNum = getParentEdgeAt(0)->getShape().getStaticDims()[0]; - const size_t maxTime = getParentEdgeAt(0)->getShape().getStaticDims()[1]; - const size_t classesNum = getParentEdgeAt(0)->getShape().getStaticDims()[2]; + const auto &inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); + const size_t batchNum = inDims[0]; + const size_t maxTime = inDims[1]; + const size_t classesNum = inDims[2]; int blankIndex = classesNum - 1; if (inputShapes.size() > 4) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h index b46ff413e829be..8b5a0253b36868 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_ctc_loss_node.h @@ -19,7 +19,7 @@ class MKLDNNCTCLossNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: bool ctcMergeRepeated; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp index 5124409cf8b9d8..542ca78f3ea26a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.cpp @@ -16,8 +16,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNCumSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNCumSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto cumsum = std::dynamic_pointer_cast(op); if (!cumsum) { errorMessage = "Only opset3 CumSum operation is supported"; @@ -71,16 +75,16 @@ void MKLDNNCumSumNode::initSupportedPrimitiveDescriptors() { dataPrecision != Precision::FP32 && dataPrecision != Precision::I64 && dataPrecision != Precision::U64 && dataPrecision != Precision::BF16) IE_THROW() << errorPrefix << " has unsupported 'data' input precision: " << dataPrecision.name(); - if (getOriginalInputsNumber() == numOfInputs) { + if (inputShapes.size() == numOfInputs) { const auto &axisTensorPrec = getOriginalInputPrecisionAtPort(AXIS); if (axisTensorPrec != Precision::I32 && axisTensorPrec != Precision::I64) IE_THROW() << errorPrefix << " has unsupported 'axis' input precision: " << axisTensorPrec.name(); } std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); + inDataConf.reserve(inputShapes.size()); inDataConf.emplace_back(LayoutType::ncsp, dataPrecision); - for (int i = 1; i < getOriginalInputsNumber(); ++i) + for (int i = 1; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc(inDataConf, @@ -133,7 +137,7 @@ template void MKLDNNCumSumNode::exec() { const auto *input = reinterpret_cast(getParentEdgeAt(CUM_SUM_DATA)->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - const std::vector strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType().getStrides(); + const VectorDims strides = getParentEdgeAt(CUM_SUM_DATA)->getMemory().GetDescWithType()->getStrides(); if (reverse) { if (exclusive) { @@ -151,7 +155,7 @@ void MKLDNNCumSumNode::exec() { } template -void MKLDNNCumSumNode::cumSum(const dataType *input, dataType *output, const std::vector &strides) { +void MKLDNNCumSumNode::cumSum(const dataType *input, dataType *output, const VectorDims &strides) { SizeVector iterationRange(numOfDims - 1); size_t j = 0; for (size_t i = 0; i < shape.size(); i++) { @@ -248,8 +252,8 @@ inline size_t MKLDNNCumSumNode::getStartOffset(const std::vector &forSta } size_t MKLDNNCumSumNode::getAxis(const MKLDNNMemory& _axis, const MKLDNNMemory& _data) const { - const auto& axisPrecision = _axis.GetDesc().getPrecision(); - const int64_t dataShapeSize = static_cast(_data.GetDesc().getShape().getRank()); + const auto& axisPrecision = _axis.getDesc().getPrecision(); + const int64_t dataShapeSize = static_cast(_data.GetShape().getRank()); int64_t axisValueFromBlob; switch (axisPrecision) { case Precision::I32 : { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h index bbe180f5544910..2836af15660519 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_cum_sum_node.h @@ -19,7 +19,7 @@ class MKLDNNCumSumNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: template diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index 62c173c72f5a29..fd4d8f6dd990fd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -16,14 +16,20 @@ #include #include #include -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (std::dynamic_pointer_cast(op) == nullptr && std::dynamic_pointer_cast(op) == nullptr) { errorMessage = "Only opset1 ConvolutionBackpropData and GroupConvolutionBackpropData operations are supported"; @@ -42,8 +48,8 @@ bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { - internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { - return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); + internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> DnnlMemoryDescPtr { + return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.weights_desc(0)); }); std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { @@ -152,7 +158,7 @@ bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() const { if (!withGroups && stride.back() > 3) return false; if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) { - auto inDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto inDims = getOutputShapeAtPort(0).getStaticDims(); // heuristicConst = 2^26 // heuristicParam = IC^2 * SP auto heuristicConst = 67108864; @@ -231,8 +237,8 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { for (int i = 0; i < paddingR.size(); i++) { int with_group = getAlgorithm() == DeconvolutionGrouped ? 1 : 0; int krn = weightDims[with_group + 2 + i]; - int src = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i]; - int dst = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i]; + int src = getOutputShapeAtPort(0).getStaticDims()[2 + i]; + int dst = getInputShapeAtPort(0).getStaticDims()[2 + i]; krn = (krn - 1)*(dilation[i] + 1) + 1; int calc_dst = (src - krn + paddingL[i]) / stride[i] + 1; @@ -243,15 +249,15 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { // WA: if int8 deconvolution is supported, we create internal weights blob in IO format std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]); internalBlobs.push_back(createWeiBlobAsIO(weightDims)); - auto format = getParentEdgeAt(0)->getShape().getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); - createDescriptor({&in_candidate}, {&out_candidate}); + auto format = getInputShapeAtPort(0).getRank() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; + MemoryDescPtr in_candidate = std::make_shared(getInputShapeAtPort(0), inputDataType, format); + MemoryDescPtr out_candidate = std::make_shared(getOutputShapeAtPort(0), outputDataType, format); + createDescriptor({in_candidate}, {out_candidate}); } else { - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { - MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); - MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); - createDescriptor({&in_candidate}, {&out_candidate}); + for (auto format : getAvailableFormatsForDims(getInputShapeAtPort(0))) { + MemoryDescPtr in_candidate = std::make_shared(getInputShapeAtPort(0), inputDataType, format); + MemoryDescPtr out_candidate = std::make_shared(getOutputShapeAtPort(0), outputDataType, format); + createDescriptor({in_candidate}, {out_candidate}); } } setPostOps(attr); @@ -292,20 +298,20 @@ void MKLDNNDeconvolutionNode::filterSupportedDescriptors() { bool isSuitableDesc = true; if (!inputMemoryFormatsFilter.empty()) { if (isInt8) { - auto src_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.src_desc); - isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.src_desc); + isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]); } else { - auto src_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_src_desc); - isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + auto src_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_src_desc); + isSuitableDesc &= src_tdesc->isSame(inputMemoryFormatsFilter[0]); } } if (!outputMemoryFormatsFilter.empty()) { if (isInt8) { - auto dst_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.dst_desc); - isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.dst_desc); + isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]); } else { - auto dst_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_dst_desc); - isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + auto dst_tdesc = MKLDNNExtensionUtils::makeDescriptor(std::shared_ptr(*itd)->data.diff_dst_desc); + isSuitableDesc &= dst_tdesc->isSame(outputMemoryFormatsFilter[0]); } } if (!isSuitableDesc) { @@ -347,10 +353,10 @@ void MKLDNNDeconvolutionNode::createPrimitive() { } } -void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - const MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); - const MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); +void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + const auto in_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*inputDesc[0]); + const auto out_candidate = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(*outputDesc[0]); // grouping and autoblicking is not compatible if ((withGroups && !isDW) && (in_candidate.blocksExtended() || out_candidate.blocksExtended())) @@ -361,29 +367,27 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector deconv_desc; deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct, - in_candidate, wgh_candidate, out_candidate, + in_candidate.getDnnlDesc(), wgh_candidate, out_candidate.getDnnlDesc(), convertDims(stride), convertDims(dilation), convertDims(paddingL), convertDims(paddingR))); descs.emplace_back(deconv_desc); } else { - MKLDNNDims weightsDims = MKLDNNDims(weightDims); - mkldnn::memory::desc wgh_candidate(weightsDims, in_candidate.getDataType(), memory::format_tag::any); + mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightDims), in_candidate.getDataType(), memory::format_tag::any); for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) { std::shared_ptr conv_desc; conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg, - out_candidate, wgh_candidate, in_candidate, + out_candidate.getDnnlDesc(), wgh_candidate, in_candidate.getDnnlDesc(), convertDims(stride), convertDims(dilation), convertDims(paddingL), convertDims(paddingR))); std::shared_ptr deconv_desc; - deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate, wgh_candidate, - in_candidate, + deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate.getDnnlDesc(), wgh_candidate, + in_candidate.getDnnlDesc(), convertDims(stride), convertDims(dilation), convertDims(paddingL), @@ -400,21 +404,18 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx == 2) { - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisionAtPort(2)); - return MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), dataType, - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(2)->getShape().getRank())); + return std::make_shared(getOriginalInputPrecisionAtPort(2), Shape(getInputShapeAtPort(2).getStaticDims())); } - MKLDNNMemoryDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) - : isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)) : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx)); - return MKLDNNPlugin::make_unique(std::move(desc)); + auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : isInt8 ? primitive_desc_it.src_desc(idx) : primitive_desc_it.diff_dst_desc(idx); + return MKLDNNExtensionUtils::makeDescriptor(desc); } -std::unique_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - return isInt8 ? MKLDNNPlugin::make_unique(primitive_desc_it.dst_desc(idx)) : - MKLDNNPlugin::make_unique(primitive_desc_it.diff_src_desc(idx)); +std::shared_ptr MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = isInt8 ? primitive_desc_it.dst_desc(idx) : primitive_desc_it.diff_src_desc(idx); + return MKLDNNExtensionUtils::makeDescriptor(desc); } InferenceEngine::Precision MKLDNNDeconvolutionNode::getRuntimePrecision() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index 15ee71d6af74ac..a01941b9ef7f64 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -17,8 +17,8 @@ class MKLDNNDeconvolutionNode : public MKLDNNNode { MKLDNNDeconvolutionNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void createPrimitive() override; void filterSupportedPrimitiveDescriptors() override; void filterSupportedDescriptors(); @@ -31,12 +31,12 @@ class MKLDNNDeconvolutionNode : public MKLDNNNode { return static_cast(getParentEdges().size()); } - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::shared_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::shared_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; bool canFuse(const MKLDNNNodePtr& node) const override; private: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp index 370524be475d32..603d479ebdda99 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_def_conv_node.cpp @@ -14,6 +14,7 @@ #include #include #include "ie_parallel.hpp" +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -741,6 +742,10 @@ struct jit_uni_def_conv_kernel_f32 : public jit_uni_def_conv_kernel, public jit_ bool MKLDNNDeformableConvolutionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } if (!one_of(op->get_type_info(), ngraph::op::v1::DeformableConvolution::type_info, ngraph::op::v8::DeformableConvolution::type_info)) { @@ -793,20 +798,20 @@ void MKLDNNDeformableConvolutionNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges"; - if (getParentEdgeAt(0)->getShape().getRank() != 4) { + if (getInputShapeAtPort(0).getRank() != 4) { IE_THROW() << "Deformable convolution layer. Unsupported mode. Only 4D blobs are supported as input."; } - if (getParentEdgeAt(1)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); + if (getInputShapeAtPort(1).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getInputShapeAtPort(1).getRank(); } - if (getParentEdgeAt(2)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank(); + if (getInputShapeAtPort(2).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getInputShapeAtPort(2).getRank(); } - if (getChildEdgeAt(0)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); + if (getOutputShapeAtPort(0).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getOutputShapeAtPort(0).getRank(); } } @@ -853,45 +858,45 @@ void MKLDNNDeformableConvolutionNode::initSupportedPrimitiveDescriptors() { auto weiFormat = group > 1 ? mayiuse(avx512_common) ? memory::format_tag::gOIhw16i16o : memory::format_tag::gOIhw8i8o : mayiuse(avx512_common) ? memory::format_tag::OIhw16i16o : memory::format_tag::OIhw8i8o; - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), + config.inConfs[0].desc = std::make_shared(getInputShapeAtPort(0), memory::data_type::f32, dataFormat); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), + config.inConfs[1].desc = std::make_shared(getInputShapeAtPort(1), memory::data_type::f32, offFormat); - auto& wDims = getParentEdgeAt(2)->getShape().getStaticDims(); + auto& wDims = getInputShapeAtPort(2).getStaticDims(); if (group > 1 && wDims.size() != 5) { auto new_dims = InferenceEngine::SizeVector({group, div_up(wDims[0], group)}); for (int i = 1; i < wDims.size(); i++) { new_dims.push_back(wDims[i]); } - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), + config.inConfs[2].desc = std::make_shared(getInputShapeAtPort(2), memory::data_type::f32, weiFormat); } else { - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), + config.inConfs[2].desc = std::make_shared(getInputShapeAtPort(2), memory::data_type::f32, weiFormat); } if (inputsNumber > 3) { - config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(3)->getShape().getStaticDims(), + config.inConfs[3].desc = std::make_shared(getInputShapeAtPort(3), memory::data_type::f32, memory::format_tag::nchw); } - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), + config.outConfs[0].desc = std::make_shared(getOutputShapeAtPort(0), memory::data_type::f32, dataFormat); supportedPrimitiveDescriptors.push_back({config, impl_type}); } else { // reference implementation - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[0].desc = std::make_shared(getInputShapeAtPort(0), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[1].desc = std::make_shared(getInputShapeAtPort(1), memory::data_type::f32, memory::format_tag::nchw); - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[2].desc = std::make_shared(getInputShapeAtPort(2), memory::data_type::f32, memory::format_tag::oihw); if (inputsNumber > 3) { - config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(3)->getShape().getStaticDims(), memory::data_type::f32, + config.inConfs[3].desc = std::make_shared(getInputShapeAtPort(3), memory::data_type::f32, memory::format_tag::nchw); } - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), memory::data_type::f32, + config.outConfs[0].desc = std::make_shared(getOutputShapeAtPort(0), memory::data_type::f32, memory::format_tag::nchw); supportedPrimitiveDescriptors.push_back({config, impl_type}); } @@ -903,9 +908,9 @@ void MKLDNNDeformableConvolutionNode::createPrimitive() { IE_THROW() << "CPU deformable convolution with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto weiDims = getParentEdgeAt(2)->getShape().getStaticDims(); - auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto srcDims = getParentEdgeAt(0)->getMemory().getStaticDims(); + auto weiDims = getParentEdgeAt(2)->getMemory().getStaticDims(); + auto dstDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); jcp.dg = deformable_group; @@ -1136,23 +1141,23 @@ void MKLDNNDeformableConvolutionNode::execute(mkldnn::stream strm) { auto config = selectedPrimitiveDescriptor->getConfig(); auto src_block_desc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - std::vector src_strides(src_block_desc.getStrides().size()); + std::vector src_strides(src_block_desc->getStrides().size()); for (int i = 0; i < src_strides.size(); i++) { - src_strides[src_block_desc.getOrder()[i]] = src_block_desc.getStrides()[i]; + src_strides[src_block_desc->getOrder()[i]] = src_block_desc->getStrides()[i]; } auto dst_block_desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - std::vector dst_strides(dst_block_desc.getStrides().size()); + std::vector dst_strides(dst_block_desc->getStrides().size()); for (int i = 0; i < dst_strides.size(); i++) { - dst_strides[dst_block_desc.getOrder()[i]] = dst_block_desc.getStrides()[i]; + dst_strides[dst_block_desc->getOrder()[i]] = dst_block_desc->getStrides()[i]; } - auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType().getStrides(); - auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType().getStrides(); + auto off_strides = getParentEdgeAt(1)->getMemory().GetDescWithType()->getStrides(); + auto wei_strides = getParentEdgeAt(2)->getMemory().GetDescWithType()->getStrides(); InferenceEngine::SizeVector modulation_strides; if (inputsNumber > 3) { - modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType().getStrides(); + modulation_strides = getParentEdgeAt(3)->getMemory().GetDescWithType()->getStrides(); } @@ -1171,4 +1176,4 @@ InferenceEngine::Precision MKLDNNDeformableConvolutionNode::getRuntimePrecision( return getMaxPrecision(getInputPrecisions()); } -REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution); +REG_MKLDNN_PRIM_FOR(MKLDNNDeformableConvolutionNode, DeformableConvolution); \ No newline at end of file diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp index a117d3acbdcd4d..c341e98a78bcf4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.cpp @@ -21,8 +21,12 @@ using namespace mkldnn; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; -bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNDepthToSpaceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto depthToSpace = std::dynamic_pointer_cast(op); if (!depthToSpace) { errorMessage = "Only opset1 DepthToSpace operation is supported"; @@ -99,7 +103,7 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto srcDims = getInputShapeAtPort(0).getStaticDims(); const size_t nDims = srcDims.size(); impl_desc_type impl_type; @@ -140,8 +144,8 @@ void MKLDNNDepthToSpaceNode::initSupportedPrimitiveDescriptors() { auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims()); - config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); + config.inConfs[0].desc = itr->second->createSharedDesc(precision, getInputShapeAtPort(0)); + config.outConfs[0].desc = itr->second->createSharedDesc(precision, getOutputShapeAtPort(0)); supportedPrimitiveDescriptors.emplace_back(config, impl_type); } } @@ -156,13 +160,12 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor"; - SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); - SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + VectorDims srcDims = srcMemPtr->getStaticDims(); size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; @@ -194,8 +197,7 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + VectorDims srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -224,11 +226,9 @@ void MKLDNNDepthToSpaceNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForDims, orderShiftForBlocks, firstSpatialOrder, srcBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); - dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); - dstDims.erase(dstDims.begin() + 1); firstSpatialOrder = 1; size_t shift = static_cast(mode == DEPTH_FIRST) + nSpatialDims + 1; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h index a7c0145fc8153f..0ab0b3bd940b06 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_depth_to_space_node.h @@ -21,7 +21,7 @@ class MKLDNNDepthToSpaceNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: enum Mode { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp index 1d44dd3f7475e5..b731092cf97742 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.cpp @@ -24,8 +24,12 @@ bool SortScorePairDescend>(const std::pair pair2.first) || (pair1.first == pair2.first && pair1.second.second < pair2.second.second); } -bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNDetectionOutputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto doOp = ngraph::as_type_ptr(op); if (!doOp) { errorMessage = "Node is not an instance of the DetectionOutput from the operations set v0."; @@ -116,8 +120,8 @@ void MKLDNNDetectionOutputNode::initSupportedPrimitiveDescriptors() { return; std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); - for (int i = 0; i < getOriginalInputsNumber(); ++i) + inDataConf.reserve(inputShapes.size()); + for (int i = 0; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, @@ -136,7 +140,7 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { const float *arm_loc_data = inputShapes.size() > 4 ? reinterpret_cast(getParentEdgeAt(idx_arm_location)->getMemoryPtr()->GetPtr()) : nullptr; - const int N = getParentEdgeAt(idx_confidence)->getShape().getStaticDims()[0]; + const int N = getParentEdgeAt(idx_confidence)->getMemory().getStaticDims()[0]; float *decoded_bboxes_data = _decoded_bboxes.data(); float *reordered_conf_data = _reordered_conf.data(); @@ -285,8 +289,9 @@ void MKLDNNDetectionOutputNode::execute(mkldnn::stream strm) { } } - const int num_results = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[2]; - const int DETECTION_SIZE = getChildEdgesAtPort(0)[0]->getShape().getStaticDims()[3]; + const auto outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); + const int num_results = outDims[2]; + const int DETECTION_SIZE = outDims[3]; if (DETECTION_SIZE != 7) { IE_THROW() << NOT_IMPLEMENTED; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.h index dbf9bde760907c..7d3626242346f8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_detection_output_node.h @@ -19,7 +19,7 @@ class MKLDNNDetectionOutputNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const int idx_location = 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp index 1796d49989e9eb..e937e3df98bed4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.cpp @@ -19,8 +19,12 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNDFTNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNDFTNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto interpDFT = std::dynamic_pointer_cast(op); const auto interpIDFT = std::dynamic_pointer_cast(op); @@ -86,7 +90,7 @@ void MKLDNNDFTNode::initSupportedPrimitiveDescriptors() { IE_THROW() << layerErrorPrefix << " has unsupported 'axes' input precision: " << axesPrecision.name(); } - if (getOriginalInputsNumber() > SIGNAL_SIZE_INDEX) { + if (inputShapes.size() > SIGNAL_SIZE_INDEX) { const auto& signalSizeTensorPrec = getOriginalInputPrecisionAtPort(SIGNAL_SIZE_INDEX); if (signalSizeTensorPrec != Precision::I32 && signalSizeTensorPrec != Precision::I64) { IE_THROW() << layerErrorPrefix << " has unsupported 'signal_size' input precision: " << signalSizeTensorPrec.name(); @@ -95,7 +99,7 @@ void MKLDNNDFTNode::initSupportedPrimitiveDescriptors() { std::vector inDataConfigurators({{LayoutType::ncsp, Precision::FP32}, {LayoutType::ncsp, Precision::I32}}); - if (getOriginalInputsNumber() > SIGNAL_SIZE_INDEX) + if (inputShapes.size() > SIGNAL_SIZE_INDEX) inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, Precision::FP32}}, impl_desc_type::ref_any); @@ -225,7 +229,7 @@ void copyDataToOutputWithSignalSize(const float* input, const std::vector(axesEdge->getMemoryPtr()->GetPtr()); - axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getShape().getStaticDims()[0]); + axes = std::vector(axesStartPtr, axesStartPtr + axesEdge->getMemory().getStaticDims()[0]); for (auto& axis : axes) { if (axis < 0) { axis += inputShape.size() - 1; @@ -233,7 +237,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { } std::sort(axes.begin(), axes.end()); - outputShape = getChildEdgeAt(0)->getShape().getStaticDims(); + outputShape = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); for (size_t axis : axes) { size_t nComplex = outputShape[axis]; // FFT uses different twiddle factors @@ -247,8 +251,8 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { const auto *input = reinterpret_cast(inputDataEdge->getMemoryPtr()->GetPtr()); auto *output = reinterpret_cast(outputDataEdge->getMemoryPtr()->GetPtr()); - auto inputStrides = inputDataEdge->getMemory().GetDescWithType().getStrides(); - auto outputStrides = outputDataEdge->getMemory().GetDescWithType().getStrides(); + auto inputStrides = inputDataEdge->getMemory().GetDescWithType()->getStrides(); + auto outputStrides = outputDataEdge->getMemory().GetDescWithType()->getStrides(); if (inputShape != outputShape) { copyDataToOutputWithSignalSize(input, inputShape, inputStrides, output, outputShape, outputStrides); } else { @@ -257,7 +261,7 @@ void MKLDNNDFTNode::execute(mkldnn::stream strm) { } // 1d case - if (inputDataEdge->getShape().getRank() == 2) { + if (inputDataEdge->getMemory().GetShape().getRank() == 2) { size_t nComplex = outputShape[0]; if (IsPowerOfTwo(nComplex)) { fft(output, nComplex * 2, true); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.h index 12d23dd5bcebdb..81ddff6e6c528b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_dft_node.h @@ -21,7 +21,7 @@ class MKLDNNDFTNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: void dftNd(float* output, const std::vector& outputStrides) const; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp index 1bba69c7cfbb28..3cb13a860fd42b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.cpp @@ -37,6 +37,7 @@ #include #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; @@ -372,7 +373,7 @@ struct jit_uni_eltwise_generic : public MKLDNNPlugin::jit_uni_eltwise_kernel, pu Vmm vmm_d_bias = Vmm(13); Vmm vmm_zero = Vmm(15); - std::unique_ptr emu_vcvtneps2bf16; + std::shared_ptr emu_vcvtneps2bf16; std::shared_ptr eltwise_emitter = nullptr; std::vector> post_op_emitters = {}; @@ -958,14 +959,30 @@ std::map& op, std::string& errorMessage) noexcept { + try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + + if (initializers.find(op->get_type_info()) == initializers.end()) { + errorMessage = "Doesn't support Eltwise algorithm: " + std::string(op->get_type_name()); + return false; + } + } catch (...) { + return false; + } + return true; +} + MKLDNNEltwiseNode::MKLDNNEltwiseNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { - if (initializers.find(op->get_type_info()) != initializers.end()) { - initializers[op->get_type_info()](op, *this); - } else { - IE_THROW(NotImplemented) - << "CPU Eltwise node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; } + initializers[op->get_type_info()](op, *this); } size_t MKLDNNEltwiseNode::getOpInputsNum() const { @@ -1103,9 +1120,9 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { }; auto initDesc = [&] (LayoutType lt) -> NodeDesc { - auto createMemoryDesc = [lt](MKLDNNEdgePtr edge, Precision prc, size_t offset) -> std::unique_ptr { - if (lt == ChannelsFirst && edge->getShape().getRank() != 1) { - auto dims = edge->getShape().getStaticDims(); + auto createMemoryDesc = [lt](const Shape &shape, Precision prc, size_t offset) -> std::shared_ptr { + if (lt == ChannelsFirst && shape.getRank() != 1) { + auto dims = shape.getStaticDims(); auto ndims = dims.size(); std::vector order(ndims); std::iota(order.begin(), order.end(), 0); @@ -1119,11 +1136,11 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks[i] = dims[order[i]]; } - return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); - } else if (lt == Blocked && edge->getShape().getRank() != 1 && edge->getShape().getStaticDims()[1] != 1) { + return std::make_shared(prc, shape, blocks, order, offset); + } else if (lt == Blocked && shape.getRank() != 1 && shape.getStaticDims()[1] != 1) { size_t blockSize = mayiuse(x64::avx512_common) ? 16 : 8; - std::vector blocks = edge->getShape().getStaticDims(); + std::vector blocks = shape.getStaticDims(); std::vector order(blocks.size()); std::iota(order.begin(), order.end(), 0); @@ -1131,27 +1148,27 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { blocks.push_back(blockSize); order.push_back(1); - return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); + return std::make_shared(prc, shape, blocks, order, offset); } else { - std::vector blocks = edge->getShape().getStaticDims(); + std::vector blocks = shape.getStaticDims(); std::vector order(blocks.size()); std::iota(order.begin(), order.end(), 0); - return MKLDNNPlugin::make_unique(prc, edge->getShape().getStaticDims(), blocks, order, offset); + return std::make_shared(prc, shape, blocks, order, offset); } }; size_t offset = std::numeric_limits::max(); NodeConfig config; - config.dynBatchSupport = getChildEdgeAt(0)->getShape().getRank() > 1 && getChildEdgeAt(0)->getShape() == - getParentEdgeAt(0)->getShape(); + config.dynBatchSupport = getOutputShapeAtPort(0).getRank() > 1 && getOutputShapeAtPort(0) == + getInputShapeAtPort(0); for (size_t i = 0; i < getParentEdges().size(); i++) { PortConfig portConfig; portConfig.inPlace = (!i && canBeInPlace() && inputPrecisions[i] == outputPrecision) ? 0 : -1; portConfig.constant = false; - portConfig.desc = createMemoryDesc(getParentEdgeAt(i), inputPrecisions[i], offset); + portConfig.desc = createMemoryDesc(getInputShapeAtPort(i), inputPrecisions[i], offset); config.inConfs.push_back(portConfig); } @@ -1160,7 +1177,7 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { portConfig.inPlace = -1; portConfig.constant = false; - portConfig.desc = createMemoryDesc(getChildEdgeAt(0), outputPrecision, offset); + portConfig.desc = createMemoryDesc(getOutputShapeAtPort(0), outputPrecision, offset); config.outConfs.push_back(portConfig); @@ -1178,20 +1195,20 @@ void MKLDNNEltwiseNode::initSupportedPrimitiveDescriptors() { return {config, impl_type}; }; - bool isChannelsFirstApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 2, 4, 5); + bool isChannelsFirstApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 2, 4, 5); for (size_t i = 0; i < getParentEdges().size(); i++) { - isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 2, 4, 5); - isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1, - getChildEdgeAt(0)->getShape().getRank() == - getParentEdgeAt(i)->getShape().getRank()); + isChannelsFirstApplicable = isChannelsFirstApplicable && one_of(getInputShapeAtPort(i).getRank(), 1, 2, 4, 5); + isChannelsFirstApplicable = isChannelsFirstApplicable && implication(getInputShapeAtPort(i).getRank() != 1, + getOutputShapeAtPort(0).getRank() == + getInputShapeAtPort(i).getRank()); } - bool isBlockedApplicable = one_of(getChildEdgeAt(0)->getShape().getRank(), 1, 4, 5); + bool isBlockedApplicable = one_of(getOutputShapeAtPort(0).getRank(), 1, 4, 5); for (size_t i = 0; i < getParentEdges().size(); i++) { - isBlockedApplicable = isBlockedApplicable && one_of(getParentEdgeAt(i)->getShape().getRank(), 1, 4, 5); - isBlockedApplicable = isBlockedApplicable && implication(getParentEdgeAt(i)->getShape().getRank() != 1, - getChildEdgeAt(0)->getShape().getRank() == - getParentEdgeAt(i)->getShape().getRank()); + isBlockedApplicable = isBlockedApplicable && one_of(getInputShapeAtPort(i).getRank(), 1, 4, 5); + isBlockedApplicable = isBlockedApplicable && implication(getInputShapeAtPort(i).getRank() != 1, + getOutputShapeAtPort(0).getRank() == + getInputShapeAtPort(i).getRank()); } if (isChannelsFirstApplicable) @@ -1215,7 +1232,7 @@ void MKLDNNEltwiseNode::createPrimitive() { auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); std::vector order(maxInputSize); - auto outOrder = outBlockingDesc.getOrder(); + auto outOrder = outBlockingDesc->getOrder(); for (size_t i = 0; i < order.size(); i++) { if (i < order.size() - outOrder.size()) order[i] = i; @@ -1223,18 +1240,18 @@ void MKLDNNEltwiseNode::createPrimitive() { order[i] = outOrder[i - (order.size() - outOrder.size())] + (order.size() - outOrder.size()); } - size_t outRank = outBlockingDesc.getBlockDims().size(); + size_t outRank = outBlockingDesc->getBlockDims().size(); for (int i = 0; i < outRank; i++) { - dims_out[dims_out.size() - 1 - i] = outBlockingDesc.getBlockDims()[outRank - 1 - i]; + dims_out[dims_out.size() - 1 - i] = outBlockingDesc->getBlockDims()[outRank - 1 - i]; } for (int i = 0; i < inputNum; i++) { auto inBlockingDesc = getParentEdgeAt(i)->getMemory().GetDescWithType(); - size_t inRank = inBlockingDesc.getBlockDims().size(); + size_t inRank = inBlockingDesc->getBlockDims().size(); // WA to normalize blocked and planar layouts - auto inOrder = inBlockingDesc.getOrder(); - size_t startOff = outOrder.size() != outBlockingDesc.getShape().getRank() && + auto inOrder = inBlockingDesc->getOrder(); + size_t startOff = outOrder.size() != outBlockingDesc->getShape().getRank() && outOrder[outOrder.size() - 1] != inOrder[inOrder.size() - 1] ? 1 : 0; // WA to handle nspc layout with 1D tensors @@ -1243,7 +1260,7 @@ void MKLDNNEltwiseNode::createPrimitive() { } for (int j = 0; j < inRank; j++) { - dims_in[i][dims_in[i].size() - 1 - j - startOff] = inBlockingDesc.getBlockDims()[inRank - 1 - j]; + dims_in[i][dims_in[i].size() - 1 - j - startOff] = inBlockingDesc->getBlockDims()[inRank - 1 - j]; } } @@ -1259,7 +1276,7 @@ void MKLDNNEltwiseNode::createPrimitive() { offsets_out.resize(maxInputSize, 1); offset_out_calc(offsets_out, dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_out[j] *= getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size(); + offsets_out[j] *= getChildEdgeAt(0)->getMemory().getDesc().getPrecision().size(); } offsets_in.resize(inputNum); @@ -1267,17 +1284,17 @@ void MKLDNNEltwiseNode::createPrimitive() { offsets_in[i].resize(maxInputSize, 1); offset_in_calc(offsets_in[i], dims_in[i], dims_out); for (int j = 0; j < maxInputSize; j++) { - offsets_in[i][j] *= getParentEdgeAt(i)->getMemory().GetDesc().getPrecision().size(); + offsets_in[i][j] *= getParentEdgeAt(i)->getMemory().getDesc().getPrecision().size(); } } start_offset_in.resize(inputNum); for (size_t i = 0; i < inputNum; i++) { - start_offset_in[i] = getParentEdgeAt(i)->getMemory().GetDescriptor().data.offset0 * - MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(getParentEdgeAt(i)->getMemory().GetDescriptor().data.data_type)); + const auto desc = getParentEdgeAt(i)->getMemory().GetDescWithType(); + start_offset_in[i] = desc->getOffsetPadding() * desc->getPrecision().size(); } - start_offset_out = getChildEdgeAt(0)->getMemory().GetDescriptor().data.offset0 * - MKLDNNExtensionUtils::sizeOfDataType(mkldnn::memory::data_type(getChildEdgeAt(0)->getMemory().GetDescriptor().data.data_type)); + const auto desc = getChildEdgeAt(0)->getMemory().GetDescWithType(); + start_offset_out = desc->getOffsetPadding() * desc->getPrecision().size(); }; auto collapseLastDims = [](std::vector& dims, int dimsToCollapse) { @@ -1312,10 +1329,10 @@ void MKLDNNEltwiseNode::createPrimitive() { }; auto outBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - tensorRank = std::max(static_cast(optimalTensorRank), outBlockingDesc.getBlockDims().size()); + tensorRank = std::max(static_cast(optimalTensorRank), outBlockingDesc->getBlockDims().size()); initDims(tensorRank); - auto outOrder = outBlockingDesc.getOrder(); + auto outOrder = outBlockingDesc->getOrder(); size_t oc_size = 0; offsets_oc.resize(tensorRank, 0); if (isFusedWith(FakeQuantize)) { @@ -1345,7 +1362,7 @@ void MKLDNNEltwiseNode::createPrimitive() { bool hasDifferentDims = false; while (currentJitWorkAmount < minimalJitWorkAmount && currentJitWorkAmount < fullWorkAmount && // we shouldn't collapse batch dimension in case dynamic batch is enabled - (!isDynBatchEnabled || (outBlockingDesc.getBlockDims().size() - collapsedDims > 2))) { + (!isDynBatchEnabled || (outBlockingDesc->getBlockDims().size() - collapsedDims > 2))) { if (dims_out.size() - collapsedDims - 2 < 0) break; @@ -1397,7 +1414,7 @@ void MKLDNNEltwiseNode::createPrimitive() { } } - batchDimIdx = tensorRank - outBlockingDesc.getBlockDims().size() + collapsedDims; + batchDimIdx = tensorRank - outBlockingDesc->getBlockDims().size() + collapsedDims; schedulerWorkAmount = fullWorkAmount / dims_out[dims_out.size() - 1]; initOffsets(tensorRank); @@ -1414,10 +1431,10 @@ void MKLDNNEltwiseNode::createPrimitive() { for (int i = 0; i < inputNum; i++) { jep.src_size[i] = dims_in[i][dims_in[i].size() - 1]; - jep.src_prc[i] = getParentEdgesAtPort(i).front()->getMemory().GetDesc().getPrecision(); + jep.src_prc[i] = getParentEdgesAtPort(i).front()->getMemory().getDesc().getPrecision(); } jep.dst_size = dims_out[dims_out.size() - 1]; - jep.dst_prc = getChildEdgesAtPort(0).front()->getMemory().GetDesc().getPrecision(); + jep.dst_prc = getChildEdgesAtPort(0).front()->getMemory().getDesc().getPrecision(); jep.oc_size = oc_size; jep.work_amount = dims_out.back(); @@ -1638,13 +1655,13 @@ bool MKLDNNEltwiseNode::canBeInPlace() const { } } - return getParentEdgesAtPort(0)[0].get()->getShape() == getChildEdgesAtPort(0)[0].get()->getShape(); + return getInputShapeAtPort(0) == getOutputShapeAtPort(0); } void MKLDNNEltwiseNode::fuseInto(MKLDNNNodePtr& parentNode) { // Handling Convolution custom Add node fusing case which is processed via dnnl append_sum() API. specialConvolutionAddFusing = (parentNode->getType() == Convolution || parentNode->getType() == BinaryConvolution) && getAlgorithm() == EltwiseAdd && - getParentEdgesAtPort(0)[0]->getShape() == getParentEdgesAtPort(1)[0]->getShape(); + getInputShapeAtPort(0) == getInputShapeAtPort(1); if (!specialConvolutionAddFusing && canBePerformedAsScaleShift(parentNode.get())) { fillScalesAndShifts(parentNode.get(), scales, shifts, 16); } @@ -1748,7 +1765,7 @@ bool MKLDNNEltwiseNode::canFuse(const MKLDNNNodePtr& node) const { } // We can use optimized execution with fusions only in cases when dim rank is less or equal to the maximum possible - if (node->getParentEdgesAtPort(0).front()->getShape().getRank() > MAX_ELTWISE_DIM_RANK) + if (node->getInputShapeAtPort(0).getRank() > MAX_ELTWISE_DIM_RANK) return false; return true; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h index e1719be037fac5..6f4f74a90fc64e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_eltwise_node.h @@ -88,6 +88,8 @@ class MKLDNNEltwiseNode : public MKLDNNNode { bool isWithBroadcast(); bool isSpecialConvolutionAddFusing() const { return specialConvolutionAddFusing; } + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + private: mkldnn::algorithm mkldnnAlgorithm = mkldnn::algorithm::undef; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp index 4499e91dacb9bd..0f1c1da89de31a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.cpp @@ -11,8 +11,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNEmbeddingBagOffsetSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNEmbeddingBagOffsetSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto embBagOffsetSumOp = ngraph::as_type_ptr(op); if (!embBagOffsetSumOp) { errorMessage = "Node is not an instance of the EmbeddingBagOffsetsSum operation from opset v3."; @@ -65,9 +69,9 @@ void MKLDNNEmbeddingBagOffsetSumNode::initSupportedPrimitiveDescriptors() { std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, {LayoutType::ncsp, Precision::I32}, {LayoutType::ncsp, Precision::I32}}); - if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX) + if (inputShapes.size() > DEFAULT_INDEX_IDX) inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); - if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) + if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); @@ -122,8 +126,9 @@ void MKLDNNEmbeddingBagOffsetSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), - getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); + const auto &inputMem = getParentEdgeAt(0)->getMemory(); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), + inputMem .getStaticDims(), getChildEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()); } bool MKLDNNEmbeddingBagOffsetSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h index 26a38b05081c0c..8827ad545d95c3 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_offset_sum_node.h @@ -23,7 +23,7 @@ class MKLDNNEmbeddingBagOffsetSumNode : public MKLDNNNode, public MKLDNNEmbeddin void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: void initFromInputs() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp index f185d08588157d..d95fbb5ca7a916 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.cpp @@ -11,8 +11,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNEmbeddingBagPackedSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNEmbeddingBagPackedSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto embBagPackedSumOp = ngraph::as_type_ptr(op); if (!embBagPackedSumOp) { errorMessage = "Node is not an instance of the EmbeddingBagPackedSum operation from opset v3."; @@ -60,7 +64,7 @@ void MKLDNNEmbeddingBagPackedSumNode::initSupportedPrimitiveDescriptors() { std::vector inDataConfigurators({{LayoutType::ncsp, inDataPrecision}, {LayoutType::ncsp, Precision::I32}}); - if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) + if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); @@ -89,8 +93,9 @@ void MKLDNNEmbeddingBagPackedSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), - getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); + const auto &inputMem = getParentEdgeAt(0)->getMemory(); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), + inputMem .getStaticDims(), getChildEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()); } bool MKLDNNEmbeddingBagPackedSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h index 9d67116a499f77..b52c17ee85888b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_bag_packed_sum_node.h @@ -23,7 +23,7 @@ class MKLDNNEmbeddingBagPackedSumNode : public MKLDNNNode, public MKLDNNEmbeddin void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: void initFromInputs() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp index 1cea74dc5fb886..6196c6478ac2e2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.cpp @@ -11,8 +11,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNEmbeddingSegmentsSumNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto embBagSegSumOp = ngraph::as_type_ptr(op); if (!embBagSegSumOp) { errorMessage = "Node is not an instance of the EmbeddingSegmentsSum operation from opset v3."; @@ -66,9 +70,9 @@ void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() { {LayoutType::ncsp, Precision::I32}, {LayoutType::ncsp, Precision::I32}, {LayoutType::ncsp, Precision::I32}}); - if (getOriginalInputsNumber() > DEFAULT_INDEX_IDX) + if (inputShapes.size() > DEFAULT_INDEX_IDX) inDataConfigurators.push_back({LayoutType::ncsp, Precision::I32}); - if (getOriginalInputsNumber() > PER_SAMPLE_WEIGHTS_IDX) + if (inputShapes.size() > PER_SAMPLE_WEIGHTS_IDX) inDataConfigurators.push_back({LayoutType::ncsp, inDataPrecision}); addSupportedPrimDesc(inDataConfigurators, {{LayoutType::ncsp, inDataPrecision}}, impl_desc_type::ref_any); @@ -76,7 +80,7 @@ void MKLDNNEmbeddingSegmentsSumNode::initSupportedPrimitiveDescriptors() { void MKLDNNEmbeddingSegmentsSumNode::initFromInputs() { indices_ = reinterpret_cast(getParentEdgeAt(INDICES_IDX)->getMemoryPtr()->GetPtr()); - indicesSize_ = getParentEdgeAt(INDICES_IDX)->getShape().getElementsCount(); + indicesSize_ = getParentEdgeAt(INDICES_IDX)->getMemory().GetShape().getElementsCount(); segmentIds_ = reinterpret_cast(getParentEdgeAt(SEGMENT_ID_IDX)->getMemoryPtr()->GetPtr()); @@ -124,8 +128,9 @@ void MKLDNNEmbeddingSegmentsSumNode::execute(mkldnn::stream strm) { if (_withWeights) weightsData = reinterpret_cast(getParentEdgeAt(PER_SAMPLE_WEIGHTS_IDX)->getMemoryPtr()->GetPtr()); - MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), - getParentEdgeAt(0)->getShape().getStaticDims(), getChildEdgeAt(0)->getShape().getStaticDims()); + const auto &inputMem = getParentEdgeAt(0)->getMemory(); + MKLDNNEmbeddingBagSumNode::execute(srcData, weightsData, dstData, inputMem .getDesc().getPrecision(), + inputMem .getStaticDims(), getChildEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()); } bool MKLDNNEmbeddingSegmentsSumNode::created() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h index b20e9d6765095b..7346e6c3b4b819 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_embedding_segments_sum_node.h @@ -23,7 +23,7 @@ class MKLDNNEmbeddingSegmentsSumNode : public MKLDNNNode, public MKLDNNEmbedding void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: void initFromInputs() override; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp index d04b80b0086280..708c6d91921cb8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.cpp @@ -215,8 +215,12 @@ static void nms_cf(const float* conf_data, detections = (post_nms_topn == -1 ? detections : (std::min)(post_nms_topn, detections)); } -bool MKLDNNExperimentalDetectronDetectionOutputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNExperimentalDetectronDetectionOutputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto doOp = ngraph::as_type_ptr(op); if (!doOp) { errorMessage = "Node is not an instance of the ExperimentalDetectronDetectionOutput from the operations set v6."; @@ -253,8 +257,8 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr return; std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); - for (int i = 0; i < getOriginalInputsNumber(); ++i) + inDataConf.reserve(inputShapes.size()); + for (int i = 0; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, @@ -265,9 +269,9 @@ void MKLDNNExperimentalDetectronDetectionOutputNode::initSupportedPrimitiveDescr } void MKLDNNExperimentalDetectronDetectionOutputNode::execute(mkldnn::stream strm) { - const int rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; - assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[1])); - assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1])); + const int rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; + assert(classes_num_ == static_cast(getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims()[1])); + assert(4 * classes_num_ == static_cast(getParentEdgeAt(INPUT_DELTAS)->getMemory().getStaticDims()[1])); const auto* boxes = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); const auto* deltas = reinterpret_cast(getParentEdgeAt(INPUT_DELTAS)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.h index 2df28ce5c4983b..aac589b058f4b6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_detection_output_node.h @@ -19,7 +19,7 @@ class MKLDNNExperimentalDetectronDetectionOutputNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const int INPUT_ROIS {0}; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp index 8bd70dd2a6ebde..977493ed5be325 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.cpp @@ -273,8 +273,12 @@ void fill_output_blobs(const float* proposals, const int* roi_indices, } bool MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::isSupportedOperation - (const std::shared_ptr& op, std::string& errorMessage) noexcept { + (const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto proposalOp = ngraph::as_type_ptr(op); if (!proposalOp) { errorMessage = "Node is not an instance of the Proposal from the operations set v0."; @@ -327,20 +331,23 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn } size_t anchor_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_ANCHORS)->getShape().getRank(); i++) { - anchor_dims_size *= getParentEdgeAt(INPUT_ANCHORS)->getShape().getStaticDims()[i]; + const auto &anchorDims = getParentEdgeAt(INPUT_ANCHORS)->getMemory().getStaticDims(); + for (size_t i = 0; i < anchorDims.size(); i++) { + anchor_dims_size *= anchorDims[i]; } size_t deltas_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_DELTAS)->getShape().getRank(); i++) { - deltas_dims_size *= getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[i]; + const auto &deltaDims = getParentEdgeAt(INPUT_DELTAS)->getMemory().getStaticDims(); + for (size_t i = 0; i < deltaDims.size(); i++) { + deltas_dims_size *= deltaDims[i]; } if (anchor_dims_size != deltas_dims_size) IE_THROW() << "'Anchors' blob size for ONNXProposal is incompatible with 'deltas' blob size!"; size_t score_dims_size = 1; - for (size_t i = 0; i < getParentEdgeAt(INPUT_SCORES)->getShape().getRank(); i++) { - score_dims_size *= getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[i]; + const auto &scoreDims = getParentEdgeAt(INPUT_SCORES)->getMemory().getStaticDims(); + for (size_t i = 0; i < scoreDims.size(); i++) { + score_dims_size *= scoreDims[i]; } if (deltas_dims_size != (4 * score_dims_size)) IE_THROW() << "'Deltas' blob size for ONNXProposal is incompatible with 'scores' blob size!"; @@ -354,11 +361,11 @@ void MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode::execute(mkldnn float *p_roi_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); float *p_roi_score_item = reinterpret_cast(getChildEdgesAtPort(OUTPUT_SCORES)[0]->getMemoryPtr()->GetPtr()); - const int anchors_num = getParentEdgeAt(INPUT_SCORES)->getShape().getStaticDims()[0]; + const int anchors_num = scoreDims[0]; // bottom shape: (num_anchors) x H x W - const int bottom_H = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[1]; - const int bottom_W = getParentEdgeAt(INPUT_DELTAS)->getShape().getStaticDims()[2]; + const int bottom_H = deltaDims[1]; + const int bottom_W = deltaDims[2]; // input image height & width const float img_H = p_img_info_cpu[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.h index b2f5f0bcd89fe1..3caf61e168be01 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_generate_proposals_single_image_node.h @@ -20,7 +20,7 @@ class MKLDNNExperimentalDetectronGenerateProposalsSingleImageNode : public MKLDN void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: // Inputs: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp index 001257c443d419..10359d50949116 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.cpp @@ -11,8 +11,13 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNExperimentalDetectronPriorGridGeneratorNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNExperimentalDetectronPriorGridGeneratorNode::isSupportedOperation(const std::shared_ptr& op, + std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto priorGridGen = std::dynamic_pointer_cast(op); if (!priorGridGen) { errorMessage = "Only opset6 ExperimentalDetectronPriorGridGenerator operation is supported"; @@ -61,14 +66,14 @@ void MKLDNNExperimentalDetectronPriorGridGeneratorNode::initSupportedPrimitiveDe } void MKLDNNExperimentalDetectronPriorGridGeneratorNode::execute(mkldnn::stream strm) { - const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[0]; - assert(getParentEdgeAt(INPUT_PRIORS)->getShape().getStaticDims()[1] == 4); + const int num_priors_ = getParentEdgeAt(INPUT_PRIORS)->getMemory().getStaticDims()[0]; + assert(getParentEdgeAt(INPUT_PRIORS)->getMemory().getStaticDims()[1] == 4); // Execute - const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[3]; - const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getShape().getStaticDims()[2]; - const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[3]) / layer_width; - const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getShape().getStaticDims()[2]) / layer_height; + const int layer_width = grid_w_ ? grid_w_ : getParentEdgeAt(INPUT_FEATUREMAP)->getMemory().getStaticDims()[3]; + const int layer_height = grid_h_ ? grid_h_ : getParentEdgeAt(INPUT_FEATUREMAP)->getMemory().getStaticDims()[2]; + const float step_w = stride_w_ ? stride_w_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getMemory().getStaticDims()[3]) / layer_width; + const float step_h = stride_h_ ? stride_h_ : static_cast(getParentEdgeAt(INPUT_IMAGE)->getMemory().getStaticDims()[2]) / layer_height; const auto *bottom_data_0 = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *top_data_0 = reinterpret_cast(getChildEdgesAtPort(OUTPUT_ROIS)[0]->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.h index 9ef117f44e65f7..2f7e224e63c73b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_priorgridgenerator_node.h @@ -19,7 +19,7 @@ class MKLDNNExperimentalDetectronPriorGridGeneratorNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: // Inputs: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp index 09313e30bd64aa..84992e52cadfdf 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.cpp @@ -304,8 +304,13 @@ void reorder_rois(const float *rois, const int* ids, int* mapping, const int roi } } -bool MKLDNNExperimentalDetectronROIFeatureExtractorNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNExperimentalDetectronROIFeatureExtractorNode::isSupportedOperation(const std::shared_ptr& op, + std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto roiFeatureExtractor = std::dynamic_pointer_cast(op); if (!roiFeatureExtractor) { errorMessage = "Only opset6 ExperimentalDetectronROIFeatureExtractor operation is supported"; @@ -340,8 +345,8 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::initSupportedPrimitiveD return; std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); - for (int i = 0; i < getOriginalInputsNumber(); ++i) + inDataConf.reserve(inputShapes.size()); + for (int i = 0; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, @@ -352,8 +357,8 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::initSupportedPrimitiveD void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream strm) { const int levels_num = inputShapes.size() - INPUT_FEATURES_START; - const int num_rois = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; - const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getShape().getStaticDims()[1]; + const int num_rois = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; + const int channels_num = getParentEdgeAt(INPUT_FEATURES_START)->getMemory().getStaticDims()[1]; const int feaxels_per_roi = pooled_height_ * pooled_width_ * channels_num; auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); @@ -379,8 +384,8 @@ void MKLDNNExperimentalDetectronROIFeatureExtractorNode::execute(mkldnn::stream const int level_rois_num = rois_per_level[i + 1] - level_rois_offset; if (level_rois_num > 0) { auto *featuremap = reinterpret_cast(getParentEdgeAt(INPUT_FEATURES_START + i)->getMemoryPtr()->GetPtr()); - const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[2]; - const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getShape().getStaticDims()[3]; + const int featuremap_height = getParentEdgeAt(INPUT_FEATURES_START + i)->getMemory().getStaticDims()[2]; + const int featuremap_width = getParentEdgeAt(INPUT_FEATURES_START + i)->getMemory().getStaticDims()[3]; ROIAlignForward_cpu_kernel(feaxels_per_roi * level_rois_num, featuremap, 1.0f / pyramid_scales_[i], diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.h index bfcb9061f26fbe..cb17ee2cadd621 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_roifeatureextractor_node.h @@ -19,7 +19,7 @@ class MKLDNNExperimentalDetectronROIFeatureExtractorNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const int INPUT_ROIS {0}; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp index f77c3fcb2b08b6..6cf84aac94f2fb 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.cpp @@ -14,8 +14,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNExperimentalDetectronTopKROIsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNExperimentalDetectronTopKROIsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto topKROI = std::dynamic_pointer_cast(op); if (!topKROI) { errorMessage = "Only opset6 ExperimentalDetectronTopKROIs operation is supported"; @@ -56,7 +60,7 @@ void MKLDNNExperimentalDetectronTopKROIsNode::initSupportedPrimitiveDescriptors( } void MKLDNNExperimentalDetectronTopKROIsNode::execute(mkldnn::stream strm) { - const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getShape().getStaticDims()[0]; + const int input_rois_num = getParentEdgeAt(INPUT_ROIS)->getMemory().getStaticDims()[0]; const int top_rois_num = (std::min)(max_rois_num_, input_rois_num); auto *input_rois = reinterpret_cast(getParentEdgeAt(INPUT_ROIS)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.h index 76171de71e473c..40cea208abc768 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_experimental_detectron_topkrois_node.h @@ -19,7 +19,7 @@ class MKLDNNExperimentalDetectronTopKROIsNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: // Inputs: diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp index 13ada3cf81dfa5..d130e753438402 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.cpp @@ -268,8 +268,12 @@ struct jit_extract_image_patches_kernel : public jit_uni_extract_image_patches_k } }; -bool MKLDNNExtractImagePatchesNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNExtractImagePatchesNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto extImgPatcher = std::dynamic_pointer_cast(op); if (!extImgPatcher) { errorMessage = "Only opset3 ExtractImagePatches operation is supported"; @@ -419,12 +423,12 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { char *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); const size_t dtype_size = getOriginalInputPrecisionAtPort(0).size(); - const auto& inDims = getParentEdgeAt(0)->getShape().getStaticDims(); + const auto& inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); const size_t IC = inDims[1]; const size_t IH = inDims[2]; const size_t IW = inDims[3]; - const auto& outDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); + const auto& outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); const size_t OB = outDims[0]; const size_t OH = outDims[2]; const size_t OW = outDims[3]; @@ -434,8 +438,8 @@ void MKLDNNExtractImagePatchesNode::execute(mkldnn::stream strm) { const size_t RH = _rates[0], RW = _rates[1]; const size_t PT = _pad_top, PL = _pad_left; - const std::vector istrides = getParentEdgeAt(0)->getMemory().GetDescWithType().getStrides(); - const std::vector ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType().getStrides(); + const std::vector istrides = getParentEdgeAt(0)->getMemory().GetDescWithType()->getStrides(); + const std::vector ostrides = getChildEdgesAtPort(0)[0]->getMemory().GetDescWithType()->getStrides(); const std::vector ostrides_partial = {ostrides[0], KW * IC * ostrides[1], IC * ostrides[1], ostrides[1]}; if (extract_image_patches_kernel) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.h index 2990b12d08f2e3..f78730185aa6d6 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_extract_image_patches_node.h @@ -50,7 +50,7 @@ class MKLDNNExtractImagePatchesNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: enum class ExtImgPatcherPadType { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp index b08ebae30f4c41..a0acfc5c7b3be4 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fake_quantize_node.cpp @@ -19,7 +19,9 @@ #include "ie_parallel.hpp" #include -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "utils/ngraph_utils.hpp" // Quantization ranges validation is switched off by default in order to avoid regressions on user side // #define VALIDATE_QUANTIZATION_RANGES @@ -820,6 +822,11 @@ struct jit_uni_quantization_kernel : public jit_uni_quantize_kernel, public jit_ bool MKLDNNFakeQuantizeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto fq = std::dynamic_pointer_cast(op); if (!fq) { errorMessage = "Only opset1 FakeQuantize operation is supported"; @@ -1093,13 +1100,13 @@ MKLDNNFakeQuantizeNode::MKLDNNFakeQuantizeNode(const std::shared_ptr MKLDNNFakeQuantizeNode::getDataFormats() const { // Special case for first FQ in the network - if (getParentEdgesAtPort(0)[0]->getShape().getStaticDims()[getAxis()] == 3) { + if (getInputShapeAtPort(0).getStaticDims()[getAxis()] == 3) { return { LayoutType::ncsp }; } else { if (isBinarization()) { return { LayoutType::nspc }; } else { - if (one_of(getParentEdgesAtPort(0)[0]->getShape().getRank(), 4, 5)) { + if (one_of(getInputShapeAtPort(0).getRank(), 4, 5)) { if (getAxis() == 1) { auto blkFormat = mayiuse(cpu::x64::avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c; return { blkFormat, LayoutType::nspc, LayoutType::ncsp }; @@ -1140,12 +1147,12 @@ void MKLDNNFakeQuantizeNode::getSupportedDescriptors() { IE_THROW() << errorPrefix << "has unsupported number of parent edges at port " << i; } - if (getParentEdgesAtPort(0)[0]->getShape().getRank() != getChildEdgesAtPort(0)[0]->getShape().getRank()) { + if (getInputShapeAtPort(0).getRank() != getInputShapeAtPort(0).getRank()) { IE_THROW() << errorPrefix << "has different ranks for input and output tensors"; } if (isBinarization()) { - if (getParentEdgesAtPort(0)[0]->getShape().getRank() != 4ul) { + if (getInputShapeAtPort(0).getRank() != 4ul) { IE_THROW() << errorPrefix << "doesn't support input/output rank != 4"; } } @@ -1192,10 +1199,10 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() { if (i == 0) { auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt); - dataConfig.desc = descCreator->createUniqueDesc(getInputPrecision(), getParentEdgeAt(i)->getShape().getStaticDims()); + dataConfig.desc = descCreator->createSharedDesc(getInputPrecision(), getInputShapeAtPort(i)); } else { auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); - dataConfig.desc = descCreator->createUniqueDesc(Precision::FP32, getParentEdgeAt(i)->getShape().getStaticDims()); + dataConfig.desc = descCreator->createSharedDesc(Precision::FP32, getInputShapeAtPort(i)); } config.inConfs.push_back(dataConfig); } @@ -1204,7 +1211,7 @@ void MKLDNNFakeQuantizeNode::initSupportedPrimitiveDescriptors() { dataConfig.inPlace = -1; dataConfig.constant = false; auto descCreator = BlockedDescCreator::getCommonCreators().at(fmt); - dataConfig.desc = descCreator->createUniqueDesc(getOutputPrecision(), getChildEdgeAt(0)->getShape().getStaticDims()); + dataConfig.desc = descCreator->createSharedDesc(getOutputPrecision(), getOutputShapeAtPort(0)); config.outConfs.push_back(dataConfig); supportedPrimitiveDescriptors.push_back({config, impl_type}); @@ -1222,12 +1229,12 @@ void MKLDNNFakeQuantizeNode::createPrimitive() { jqp.dst_prc = config.outConfs[0].desc->getPrecision(); auto srcDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - jqp.s_str = srcDesc.getStrides(); + jqp.s_str = srcDesc->getStrides(); auto dstDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - jqp.d_str = dstDesc.getStrides(); + jqp.d_str = dstDesc->getStrides(); - jqp.is_planar = srcDesc.hasLayoutType(LayoutType::ncsp) && one_of(srcDesc.getShape().getRank(), 3, 4, 5); + jqp.is_planar = srcDesc->hasLayoutType(LayoutType::ncsp) && one_of(srcDesc->getShape().getRank(), 3, 4, 5); jqp.op_type = getAlgorithm(); @@ -1256,10 +1263,10 @@ void MKLDNNFakeQuantizeNode::createPrimitive() { if (quantize_kernel) quantize_kernel->create_ker(); - size_t axisSize = getParentEdgeAt(0)->getShape().getStaticDims()[getAxis()]; + size_t axisSize = getParentEdgesAtPort(0)[0]->getMemory().GetShape().getStaticDims()[getAxis()]; size_t axisPaddedSize = rnd_up(axisSize, 16); - MKLDNNMemoryDesc weightsDataDesc = {{(uint32_t)axisPaddedSize}, memory::data_type::f32, memory::format_tag::x}; + DnnlBlockedMemoryDesc weightsDataDesc(Shape(InferenceEngine::SizeVector{axisPaddedSize}), memory::data_type::f32, memory::format_tag::x); if (isBinarization()) { auto binarizationThresholdsDataMem = std::make_shared(getEngine()); @@ -1295,8 +1302,8 @@ void MKLDNNFakeQuantizeNode::executeReference() { auto src = reinterpret_cast(srcMemory->GetPtr()); - auto srcDims = srcMemory->GetDesc().getShape().getStaticDims(); - auto dstDims = dstMemory->GetDesc().getShape().getStaticDims(); + auto srcDims = srcMemory->getStaticDims(); + auto dstDims = dstMemory->getStaticDims(); auto s_str = jqp.s_str; auto d_str = jqp.d_str; @@ -1416,7 +1423,7 @@ void MKLDNNFakeQuantizeNode::executeBinarization() { auto thresholds = reinterpret_cast(internalBlobMemory[0]->GetData()); auto output_mask = reinterpret_cast(internalBlobMemory[1]->GetData()); - auto src_dims = srcMemory->GetDesc().getShape().getStaticDims(); + auto src_dims = srcMemory->getStaticDims(); std::vector s_str = jqp.s_str; size_t tmp = s_str[s_str.size() - 1]; @@ -1459,7 +1466,7 @@ void MKLDNNFakeQuantizeNode::executeQuantization() { auto output_scale = reinterpret_cast(internalBlobMemory[4]->GetData()); auto output_shift = reinterpret_cast(internalBlobMemory[5]->GetData()); - auto& srcDesc = srcMemory->GetDesc(); + auto& srcDesc = srcMemory->getDesc(); auto srcDims = srcDesc.getShape().getStaticDims(); bool is_blk_format = !srcDesc.hasLayoutType(LayoutType::nspc) && one_of(srcDesc.getShape().getRank(), 4, 5); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp index ee8dc1b730b911..cd8b171cd050ba 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.cpp @@ -12,14 +12,20 @@ #include #include #include "utils/general_utils.h" -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNFullyConnectedNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNFullyConnectedNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto fc = std::dynamic_pointer_cast(op); if (!fc) { errorMessage = "Only legacy FullyConnected operation is supported"; @@ -101,8 +107,8 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() { outputDataType = memory::data_type::bf16; } - const auto inDims = getParentEdgeAt(0)->getShape().getStaticDims(); - const auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); + const auto& inDims = getInputShapeAtPort(0).getStaticDims(); + const auto& outDims = getOutputShapeAtPort(0).getStaticDims(); if (inDims.size() == 3) { weightsDims = InferenceEngine::SizeVector({static_cast(outDims[2]), static_cast(inDims[2])}); @@ -113,7 +119,7 @@ void MKLDNNFullyConnectedNode::getSupportedDescriptors() { } biasesDims.push_back(weightsDims[0]); - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { + for (auto format : getAvailableFormatsForDims(getInputShapeAtPort(0))) { auto in_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(inDims), inputDataType, format); auto out_candidate = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(outDims), outputDataType, mkldnn::memory::format_tag::any); @@ -149,7 +155,7 @@ void MKLDNNFullyConnectedNode::execute(mkldnn::stream strm) { auto oldMem = param->second; auto dims = oldMem.get_desc().dims(); if (dims.size() == 3) { - MKLDNNDims normalizedDims({static_cast(dims[0] * dims[1]), static_cast(dims[2])}); + std::vector normalizedDims({dims[0] * dims[1], dims[2]}); mkldnn::memory::desc newMemDesc(oldMem.get_desc().reshape(normalizedDims)); mkldnn::memory newMem(newMemDesc, oldMem.get_engine(), oldMem.get_data_handle()); primArgs.at(argType) = newMem; @@ -237,7 +243,7 @@ std::shared_ptr MKLDNNFullyConnectedNode::initPrimitiveA return attr; } -// WA: creation MKLDNNMemoryDesc with format == any is prohibited +// WA: creation DnnlMemoryDesc with format == any is prohibited // so we create mkldnn::memory::desc directly // we need specific method and can't remove createDescriptor from base class because its used into initDescriptor void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::desc &inputDesc, @@ -261,12 +267,12 @@ void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::de auto normalizedInDims = {inDims[0] * inDims[1], inDims[2]}; auto normalizedOutDims = {outDims[0] * outDims[1], outDims[2]}; in_candidate = mkldnn::memory::desc(normalizedInDims, in_candidate.data_type(), - MKLDNNMemory::GetPlainFormatByRank(normalizedInDims.size())); + MKLDNNExtensionUtils::GetPlainFormatByRank(normalizedInDims.size())); out_candidate = mkldnn::memory::desc(normalizedOutDims, out_candidate.data_type(), - MKLDNNMemory::GetPlainFormatByRank(normalizedOutDims.size())); + MKLDNNExtensionUtils::GetPlainFormatByRank(normalizedOutDims.size())); } - mkldnn::memory::desc wgh_candidate(MKLDNNDims(weightsDims), wdt, mkldnn::memory::format_tag::any); + mkldnn::memory::desc wgh_candidate(MKLDNNExtensionUtils::convertToDnnlDims(weightsDims), wdt, mkldnn::memory::format_tag::any); if (withBiases) { mkldnn::memory::desc bias_candidate(MKLDNNExtensionUtils::convertToDnnlDims(inputShapes[BIAS_ID].getStaticDims()), bdt, @@ -283,28 +289,30 @@ void MKLDNNFullyConnectedNode::createDescriptorInternal(const mkldnn::memory::de } } -void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - createDescriptorInternal(MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0])); +void MKLDNNFullyConnectedNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + createDescriptorInternal(MemoryDescUtils::convertToDnnlMemoryDesc(inputDesc[0])->getDnnlDesc(), + MemoryDescUtils::convertToDnnlMemoryDesc(outputDesc[0])->getDnnlDesc()); } -std::unique_ptr MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - auto desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) : MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)); +std::shared_ptr MKLDNNFullyConnectedNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = idx > 0 ? primitive_desc_it.weights_desc(idx - 1) : primitive_desc_it.src_desc(idx); - if (getParentEdgeAt(idx)->getShape().getRank() == 3) { - desc = MKLDNNMemoryDesc(getParentEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); + if (getInputShapeAtPort(idx).getRank() == 3) { + return std::make_shared(MKLDNNExtensionUtils::DataTypeToIEPrecision( + static_cast(desc.data.data_type)), getInputShapeAtPort(idx)); } - return MKLDNNPlugin::make_unique(std::move(desc)); + return MKLDNNExtensionUtils::makeDescriptor(desc); } -std::unique_ptr MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - auto desc = MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)); - if (getChildEdgeAt(idx)->getShape().getRank() == 3) { - desc = MKLDNNMemoryDesc(getChildEdgeAt(idx)->getShape().getStaticDims(), MKLDNNExtensionUtils::IEPrecisionToDataType(desc.getPrecision()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(idx)->getShape().getRank())); +std::shared_ptr MKLDNNFullyConnectedNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { + auto desc = primitive_desc_it.dst_desc(idx); + + if (getOutputShapeAtPort(idx).getRank() == 3) { + return std::make_shared(MKLDNNExtensionUtils::DataTypeToIEPrecision( + static_cast(desc.data.data_type)), getOutputShapeAtPort(idx)); } - return MKLDNNPlugin::make_unique(std::move(desc)); + return MKLDNNExtensionUtils::makeDescriptor(desc); } InferenceEngine::Precision MKLDNNFullyConnectedNode::getRuntimePrecision() const { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h index 01820fdfcc39ea..239bce774fd0ae 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_fullyconnected_node.h @@ -27,21 +27,21 @@ class MKLDNNFullyConnectedNode : public MKLDNNNode { } const std::vector& getPrimitivesPriority() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; size_t descInputNumbers(MKLDNNDescriptor desc) override { return static_cast(getOriginalInputsNumber()); } - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; - std::unique_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::shared_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::shared_ptr getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; InferenceEngine::Precision getRuntimePrecision() const override; bool canFuse(const MKLDNNNodePtr& node) const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; protected: std::shared_ptr initPrimitiveAttr(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp index e4da50abe8af9a..68799da481a923 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.cpp @@ -16,8 +16,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNGatherElementsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNGatherElementsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto gatherElementsOp = ngraph::as_type_ptr(op); if (!gatherElementsOp) { errorMessage = "Node is not an instance of the GatherElements operation from operation set v6."; @@ -98,7 +102,7 @@ void MKLDNNGatherElementsNode::directExecution() { const auto *indices = reinterpret_cast(getParentEdgeAt(indicesIndex_)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const int outSize = getChildEdgeAt(0)->getShape().getElementsCount(); + const int outSize = getChildEdgesAtPort(0)[0]->getMemory().GetShape().getElementsCount(); auto threadBody = [&](const int ithr, const int nthr) { int start(0lu), end(0lu); splitter(outSize, nthr, ithr, start, end); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h index bc19866768dfcf..a99edf4458e26a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_elements_node.h @@ -22,7 +22,7 @@ class MKLDNNGatherElementsNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const size_t dataIndex_ = 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp index 75ee34dbda5be4..4ca5fe84ecfb3f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.cpp @@ -16,8 +16,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNGatherNDNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNGatherNDNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto gatherElementsOp = ngraph::as_type_ptr(op); if (!gatherElementsOp) { errorMessage = "Node is not an instance of the GatherND operation from operation set v5."; @@ -101,11 +105,11 @@ void MKLDNNGatherNDNode::gatherElementwise() { const auto *indices = reinterpret_cast(getParentEdgeAt(_indicesIndex)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto strides = getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides(); + auto strides = getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType()->getStrides(); const size_t* srcMultipliers = strides.data() + _batchDims; - const size_t cycles = getChildEdgeAt(0)->getShape().getElementsCount() * - getChildEdgeAt(0)->getMemory().GetDesc().getPrecision().size() / (sizeof(dataType) * _batchNum); + const size_t cycles = getChildEdgeAt(0)->getMemory().GetShape().getElementsCount() * + getChildEdgeAt(0)->getMemory().getDesc().getPrecision().size() / (sizeof(dataType) * _batchNum); const size_t CS = cycles * _sliceRank; const size_t CB = cycles * _blockSize; const size_t workAmount = _batchNum * cycles; @@ -150,7 +154,7 @@ void MKLDNNGatherNDNode::gatherBlocks() { std::vector srcMultipliers(_sliceRank); for (size_t i = 0; i < _sliceRank ; i++) - srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType().getStrides()[i + _batchDims]; + srcMultipliers[i] = _dataTypeSize * getParentEdgeAt(_dataIndex)->getMemory().GetDescWithType()->getStrides()[i + _batchDims]; const size_t batchStep = _batchStep * _dataTypeSize; const size_t dataStep = _blockSize * _dataTypeSize; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h index 3845fabaf7005b..3d826621c9d0af 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_nd_node.h @@ -22,7 +22,7 @@ class MKLDNNGatherNDNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: size_t _dataRank; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp index f41a57730a57cc..feb9c2e00d0b58 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.cpp @@ -13,8 +13,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNGatherNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto gatherOp = ngraph::as_type_ptr(op); if (!gatherOp) { errorMessage = "Only opset7 Gather operation is supported"; @@ -92,10 +96,10 @@ void MKLDNNGatherNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix_ << " has unidentified preferable primitive descriptor."; - const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getShape().getStaticDims(); - const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getShape().getStaticDims(); - const SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); - dataSize = getParentEdgeAt(GATHER_DATA)->getMemory().GetDesc().getPrecision().size(); + const SizeVector srcDims = getParentEdgeAt(GATHER_DATA)->getMemory().getStaticDims(); + const SizeVector idxDims = getParentEdgeAt(GATHER_INDEXES)->getMemory().getStaticDims(); + const SizeVector dstDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); + dataSize = getParentEdgeAt(GATHER_DATA)->getMemory().getDesc().getPrecision().size(); indexRange = srcDims[axis]; batchSize = std::accumulate(srcDims.begin(), srcDims.begin() + batchDims, 1, std::multiplies()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h index 6c7663bd95fd67..025b944eedc60e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_node.h @@ -22,7 +22,7 @@ class MKLDNNGatherNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: int axis = 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp index 89fb6c08167f68..e3c38ac3c6bada 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.cpp @@ -14,8 +14,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNGatherTreeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNGatherTreeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto gatherElementsOp = ngraph::as_type_ptr(op); if (!gatherElementsOp) { errorMessage = "Node is not an instance of the GatherTree operation from operation set v1."; @@ -66,9 +70,9 @@ void MKLDNNGatherTreeNode::initSupportedPrimitiveDescriptors() { } addSupportedPrimDesc({{LayoutType::ncsp, precision}, - {LayoutType::ncsp, precision}, - {LayoutType::ncsp, precision}, - {LayoutType::ncsp, precision}}, + {LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}, + {LayoutType::ncsp, precision}}, {{LayoutType::ncsp, precision}}, impl_desc_type::ref_any); } @@ -84,16 +88,17 @@ template void MKLDNNGatherTreeNode::gatherTreeKernel() noexcept { const auto *step_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemoryPtr()->GetPtr()); const auto * const parent_idx = reinterpret_cast(getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemoryPtr()->GetPtr()); - const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getElementsCount() - - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetDescWithType().getOffsetPadding(); + const size_t parent_idx_size = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetShape().getElementsCount() + - getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().GetDescWithType()->getOffsetPadding(); const auto *max_seq_len = reinterpret_cast(getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getMemoryPtr()->GetPtr()); auto end_token = (reinterpret_cast(getParentEdgeAt(GATHER_TREE_END_TOKEN)->getMemoryPtr()->GetPtr()))[0]; auto * final_idx = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getShape().getStaticDims(); - SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getShape().getStaticDims(); - SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getShape().getStaticDims(); - SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); + SizeVector step_idx_dims = getParentEdgeAt(GATHER_TREE_STEP_IDX)->getMemory().getStaticDims(); + SizeVector parent_idx_dims = getParentEdgeAt(GATHER_TREE_PARENT_IDX)->getMemory().getStaticDims(); + SizeVector max_seq_len_dims = getParentEdgeAt(GATHER_TREE_MAX_SEQ_LEN)->getMemory().getStaticDims(); + SizeVector final_idx_dims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); + int32_t max_time = step_idx_dims[0]; const size_t batch_size = step_idx_dims[1]; const size_t beam_width = step_idx_dims[2]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.h index 63f34fe6d6e685..4faf0da53699ff 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_gather_tree_node.h @@ -19,7 +19,7 @@ class MKLDNNGatherTreeNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; template void gatherTreeKernel() noexcept; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp index ef87345daae9a1..6829f0f8c54012 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_generic_node.cpp @@ -8,7 +8,8 @@ #include #include #include -#include "cpu_memory_desc_utils.h" +#include "memory_desc/cpu_memory_desc_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -30,13 +31,13 @@ NodeConfig MKLDNNGenericNode::convertLayerToNodeConfig(const InferenceEngine::La for (size_t i = 0; i < layerConfig.inConfs.size(); i++) { config.inConfs[i].inPlace = layerConfig.inConfs[i].inPlace; config.inConfs[i].constant = layerConfig.inConfs[i].constant; - config.inConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.inConfs[i].desc).clone(); + config.inConfs[i].desc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(layerConfig.inConfs[i].desc).clone(); } config.outConfs.resize(layerConfig.outConfs.size()); for (size_t i = 0; i < layerConfig.outConfs.size(); i++) { config.outConfs[i].inPlace = layerConfig.outConfs[i].inPlace; config.outConfs[i].constant = layerConfig.outConfs[i].constant; - config.outConfs[i].desc = MemoryDescUtils::convertToMKLDNNMemoryDesc(layerConfig.outConfs[i].desc).clone(); + config.outConfs[i].desc = MemoryDescUtils::convertToDnnlBlockedMemoryDesc(layerConfig.outConfs[i].desc).clone(); } return config; } @@ -177,7 +178,7 @@ void MKLDNNGenericNode::execLayer() { for (size_t i = 0; i < outputShapes.size(); i++) { if (isDynBatch) { auto out_edge = getChildEdgesAtPort(i)[0]; - auto td = MemoryDescUtils::convertToTensorDesc(out_edge->getMemory().GetDesc()); + auto td = MemoryDescUtils::convertToTensorDesc(out_edge->getMemory().getDesc()); td.setDims(execOutputShapes[i]); outputs.push_back(make_blob_with_precision(td, out_edge->getMemory().GetData())); } else { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp index 605aa2d6af5283..d7da220de729ca 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.cpp @@ -11,8 +11,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNGRNNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNGRNNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto grn = std::dynamic_pointer_cast(op); if (!grn) { errorMessage = "Only opset1 GRN operation is supported"; @@ -53,7 +57,7 @@ void MKLDNNGRNNode::execute(mkldnn::stream strm) { const float* src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - SizeVector dims = getParentEdgeAt(0)->getShape().getStaticDims(); + const auto &dims = getParentEdgeAt(0)->getMemory().getStaticDims(); int N = static_cast((dims.size() > 0) ? dims[0] : 1); int C = static_cast((dims.size() > 1) ? dims[1] : 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.h index 8fe8d9d75b04e7..7fb3b6fb955231 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_grn_node.h @@ -19,7 +19,7 @@ class MKLDNNGRNNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: float bias = 1.0f; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp index 33b6fdab4f4984..b6f8a046c71c0e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.cpp @@ -20,6 +20,7 @@ #include "common/cpu_convert.h" #include "utils/cpu_utils.hpp" #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -242,14 +243,14 @@ MKLDNNInputNode::MKLDNNInputNode(const std::shared_ptr& op, const if (constOp) { constant = ConstantType::Const; cloneBlobIfRequired(); - } + } } void MKLDNNInputNode::cloneBlobIfRequired() { - std::vector dims(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); + Shape shape(constOp->get_shape().empty() ? ngraph::Shape(1, 1) : constOp->get_shape()); const auto prec = convertPrecision(constOp->get_element_type()); - const size_t size = dims.size(); - MKLDNNMemoryDesc memDesc(dims, MKLDNNExtensionUtils::IEPrecisionToDataType(prec)); + const size_t size = shape.getRank(); + DnnlBlockedMemoryDesc memDesc(prec, shape); auto cloneBlob = [&, this] () { MKLDNNMemory memory{ getEngine() }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h index 8c57ac8873007e..9e32f9bd0e49aa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_input_node.h @@ -25,6 +25,12 @@ class MKLDNNInputNode : public MKLDNNNode { void withMeanImage(); MKLDNNMemoryCPtr getMemoryPtr() const; + void executeDynamicImpl(mkldnn::stream strm) override {} + + std::vector shapeInfer() const override { + return std::vector(); + } + private: void cloneBlobIfRequired(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp index df6e4930b54c73..6b5dc95cf9f7aa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.cpp @@ -1641,8 +1641,13 @@ using ngInterpCoordTransf = ngraph::opset4::Interpolate::CoordinateTransformMode using ngInterpNearMode = ngraph::opset4::Interpolate::NearestMode; using ngInterpShapeCalcMode = ngraph::opset4::Interpolate::ShapeCalcMode; -bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNInterpolateNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto interp = std::dynamic_pointer_cast(op); if (!interp) { errorMessage = "Only opset4 Interpolate operation is supported"; @@ -1829,7 +1834,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - srcDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + srcDim = getInputShapeAtPort(DATA_ID).getStaticDims(); int dataRank = srcDim.size(); // get pad @@ -1868,7 +1873,7 @@ void MKLDNNInterpolateNode::getSupportedDescriptors() { } else { srcDimPad = srcDim; } - dstDim = getChildEdgeAt(0)->getShape().getStaticDims(); + dstDim = getOutputShapeAtPort(0).getStaticDims(); } void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { @@ -1894,10 +1899,8 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { inputPrecision = outputPrecision = Precision::FP32; } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); - srcDataSize = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); - dstDataSize = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); + srcDataSize = inputPrecision.size(); + dstDataSize = outputPrecision.size(); inputPrec = inputPrecision; outputPrec = outputPrecision; @@ -1911,63 +1914,48 @@ void MKLDNNInterpolateNode::initSupportedPrimitiveDescriptors() { } config.outConfs.resize(1); - auto targetShapeType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); - auto scalesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::FP32); - auto axesType = MKLDNNExtensionUtils::IEPrecisionToDataType(Precision::I32); - - auto pushDesc = [&](memory::format_tag dataFormat, impl_desc_type implDetail) { - config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), - inputDataType, dataFormat); - config.inConfs[TARGET_SHAPE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(TARGET_SHAPE_ID)->getShape().getStaticDims(), - targetShapeType, memory::format_tag::x); - config.inConfs[SCALES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(SCALES_ID)->getShape().getStaticDims(), scalesType, - memory::format_tag::x); + auto targetShapeType = Precision::I32; + auto scalesType = Precision::FP32; + auto axesType = Precision::I32; + + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto pushDesc = [&](LayoutType dataFormat, impl_desc_type implDetail) { + config.inConfs[DATA_ID].desc = creatorsMap.at(dataFormat)->createSharedDesc(inputPrecision, getInputShapeAtPort(DATA_ID)); + config.inConfs[TARGET_SHAPE_ID].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(targetShapeType, getInputShapeAtPort(TARGET_SHAPE_ID)); + config.inConfs[SCALES_ID].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(scalesType, getInputShapeAtPort(SCALES_ID)); + if (isAxesSpecified) - config.inConfs[AXES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES_ID)->getShape().getStaticDims(), axesType, - memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, dataFormat); + config.inConfs[AXES_ID].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(axesType, getInputShapeAtPort(AXES_ID)); + + config.outConfs[0].desc = creatorsMap.at(dataFormat)->createSharedDesc(outputPrecision, getOutputShapeAtPort(0)); supportedPrimitiveDescriptors.push_back({config, implDetail}); }; - auto channels = getParentEdgeAt(DATA_ID)->getShape().getRank() > 1 ? getParentEdgeAt(DATA_ID)->getShape().getStaticDims()[1] : 1; + auto channels = getInputShapeAtPort(DATA_ID).getRank() > 1 ? getInputShapeAtPort(DATA_ID).getStaticDims()[1] : 1; if (!mayiuse(cpu::x64::sse41) || mode == InterpolateMode::linear) { - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), ref); + pushDesc(LayoutType::ncsp, ref); } else { // blk and by_channel JIT kernel on sse41 or above machine - if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 4) { - if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::nhwc, jit_avx512); - if (channels != 1) - pushDesc(memory::format_tag::nChw16c, jit_avx512); - } else if (mayiuse(cpu::x64::avx2)) { - pushDesc(memory::format_tag::nhwc, jit_avx2); - if (channels != 1) - pushDesc(memory::format_tag::nChw8c, jit_avx2); - } else { - pushDesc(memory::format_tag::nhwc, jit_sse42); - if (channels != 1) - pushDesc(memory::format_tag::nChw8c, jit_sse42); - } - } else if (getParentEdgeAt(DATA_ID)->getShape().getRank() == 5 && mode != InterpolateMode::cubic) { + if (getInputShapeAtPort(DATA_ID).getRank() == 4 || (getInputShapeAtPort(DATA_ID).getRank() == 5 && mode != InterpolateMode::cubic)) { if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::ndhwc, jit_avx512); + pushDesc(LayoutType::nspc, jit_avx512); if (channels != 1) - pushDesc(memory::format_tag::nCdhw16c, jit_avx512); + pushDesc(LayoutType::nCsp16c, jit_avx512); } else if (mayiuse(cpu::x64::avx2)) { - pushDesc(memory::format_tag::ndhwc, jit_avx2); + pushDesc(LayoutType::nspc, jit_avx2); if (channels != 1) - pushDesc(memory::format_tag::nCdhw8c, jit_avx2); + pushDesc(LayoutType::nCsp8c, jit_avx2); } else { - pushDesc(memory::format_tag::ndhwc, jit_sse42); + pushDesc(LayoutType::nspc, jit_sse42); if (channels != 1) - pushDesc(memory::format_tag::nCdhw8c, jit_sse42); + pushDesc(LayoutType::nCsp8c, jit_sse42); } } // planar for 1.ref on machine without sse41(if no sse41, canFuse() is false). 2.JIT kernel for f32 && avx2(gather).(with fuse) if (mayiuse(cpu::x64::avx2) && inputPrec == Precision::FP32) { - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), jit_avx2); + pushDesc(LayoutType::ncsp, jit_avx2); } } } @@ -2011,10 +1999,10 @@ void MKLDNNInterpolateNode::createPrimitive() { jcp.ID = srcDimPad5d[2]; jcp.spatial_dim_size = spatialDimSize; - if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { + if (getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) { jcp.layout = InterpolateLayoutType::planar; - } else if (getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c)) { + } else if (getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c)) { jcp.layout = InterpolateLayoutType::block; } else { jcp.layout = InterpolateLayoutType::by_channel; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h index 2b2f1c38d90efe..1f7160e3dce801 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_interpolate_node.h @@ -102,7 +102,7 @@ class MKLDNNInterpolateNode : public MKLDNNNode { } bool canFuse(const MKLDNNNodePtr& node) const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: // nearest neighbor diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp index 702d5b42f0cff1..b85c15b7023a80 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.cpp @@ -11,8 +11,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNLogSoftmaxNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNLogSoftmaxNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto logSoftMax = std::dynamic_pointer_cast(op); if (!logSoftMax) { errorMessage = "Only opset5 LogSoftmax operation is supported"; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.h index 456d7321efcdc4..15479cb81ffff5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_log_softmax_node.h @@ -20,7 +20,7 @@ class MKLDNNLogSoftmaxNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: size_t reducedAxisSize; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp index b107fca78343b2..b4e1e28891bd20 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.cpp @@ -6,13 +6,19 @@ #include #include #include -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNLrnNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNLrnNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto lrn = std::dynamic_pointer_cast(op); if (!lrn) { errorMessage = "Only opset1 LRN operation is supported"; @@ -89,22 +95,19 @@ void MKLDNNLrnNode::getSupportedDescriptors() { precision = InferenceEngine::Precision::FP32; auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - const auto &parentShape = getParentEdgeAt(0)->getShape(); - const auto parentStaticDims = parentShape.getStaticDims(); + const auto &parentShape = getInputShapeAtPort(0); for (auto format : getAvailableFormatsForDims(parentShape)) { - auto in_candidate = MKLDNNPlugin::make_unique(parentStaticDims, inputDataType, format); - createDescriptor({in_candidate.get()}, {}); + auto in_candidate = std::make_shared(parentShape, inputDataType, format); + createDescriptor({in_candidate}, {}); } } -std::unique_ptr MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { +std::shared_ptr MKLDNNLrnNode::getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { if (idx > 0) { - return MKLDNNPlugin::make_unique(getParentEdgeAt(idx)->getShape().getStaticDims(), - MKLDNNExtensionUtils::IEPrecisionToDataType(getOriginalInputPrecisions()[idx]), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(idx)->getShape().getRank())); + return std::make_shared(getOriginalInputPrecisionAtPort(idx), getInputShapeAtPort(idx)); } else { - return MKLDNNNode::getSrcMemDesc(primitive_desc_it, idx); + return MKLDNNExtensionUtils::makeDescriptor(primitive_desc_it.dst_desc(idx)); } } @@ -125,11 +128,11 @@ bool MKLDNNLrnNode::created() const { return getType() == Lrn; } -void MKLDNNLrnNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { +void MKLDNNLrnNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { mkldnn::algorithm alg = isAcrossMaps ? mkldnn::algorithm::lrn_across_channels : mkldnn::algorithm::lrn_within_channel; MKLDNNDescriptor desc(std::shared_ptr( - new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]), + new mkldnn::lrn_forward::desc(mkldnn::prop_kind::forward_scoring, alg, MemoryDescUtils::convertToDnnlMemoryDesc(inputDesc[0])->getDnnlDesc(), size, alpha, beta, k))); descs.push_back(desc); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h index 295d16b369c191..8ab180829ad211 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_lrn_node.h @@ -17,19 +17,19 @@ class MKLDNNLrnNode : public MKLDNNNode { MKLDNNLrnNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void getSupportedDescriptors() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; size_t descInputNumbers(MKLDNNDescriptor desc) override { return static_cast(getOriginalInputsNumber()); } - std::unique_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; + std::shared_ptr getSrcMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) override; void createPrimitive() override; bool created() const override; bool canBeInPlace() const override { return false; } - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: bool isAcrossMaps = false; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp index fed1158f97eaf2..7a1be4706bb091 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.cpp @@ -14,8 +14,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNMathNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNMathNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } if (initializers.find(op->get_type_info()) == initializers.end()) { errorMessage = "Unsupported Math layer type."; return false; @@ -50,8 +54,8 @@ void MKLDNNMathNode::initSupportedPrimitiveDescriptors() { return; std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); - for (int i = 0; i < getOriginalInputsNumber(); ++i) + inDataConf.reserve(inputShapes.size()); + for (int i = 0; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, @@ -60,7 +64,7 @@ void MKLDNNMathNode::initSupportedPrimitiveDescriptors() { } void MKLDNNMathNode::execute(mkldnn::stream strm) { - size_t dataSize = getChildEdgeAt(0)->getShape().getElementsCount(); + size_t dataSize = getChildEdgesAtPort(0)[0]->getMemory().GetShape().getElementsCount(); const float *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.h index 28260dc476ec54..5c5de72a750a70 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_math_node.h @@ -19,7 +19,7 @@ class MKLDNNMathNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: static std::map&, MKLDNNMathNode& node)>> initializers; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp index a0a7f7eafa419f..5be7995e6adcda 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.cpp @@ -13,13 +13,19 @@ #include "ie_parallel.hpp" #include "common/cpu_memcpy.h" #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNMatMulNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto matMul = std::dynamic_pointer_cast(op); if (!matMul) { errorMessage = "Only opset1 MatMul operation is supported"; @@ -70,9 +76,9 @@ void MKLDNNMatMulNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges for layer " << getName(); - auto inDims0 = getParentEdgeAt(0)->getShape().getStaticDims(); - auto inDims1 = getParentEdgeAt(1)->getShape().getStaticDims(); - auto outDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto inDims0 = getInputShapeAtPort(0).getStaticDims(); + auto inDims1 = getInputShapeAtPort(1).getStaticDims(); + auto outDims = getOutputShapeAtPort(0).getStaticDims(); if (inDims0.size() != inDims1.size() || inDims0.size() != outDims.size()) IE_THROW() << errorPrefix << " has invalid dims count"; @@ -131,24 +137,22 @@ void MKLDNNMatMulNode::initSupportedPrimitiveDescriptors() { } } - auto inputDataType0 = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrec0); - auto inputDataType1 = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrec1); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(InferenceEngine::Precision::FP32); + auto outputPrec = InferenceEngine::Precision::FP32; NodeConfig config; config.dynBatchSupport = true; - auto createDataConfig = [](const std::vector& dims, memory::data_type dataType) -> PortConfig { + auto createDataConfig = [](const Shape& shape, InferenceEngine::Precision dataType) -> PortConfig { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dims, dataType, MKLDNNMemory::GetPlainFormatByRank(dims.size())); + dataConfig.desc = std::make_shared(dataType, shape); return dataConfig; }; - config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType0)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape().getStaticDims(), inputDataType1)); - config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType)); + config.inConfs.push_back(createDataConfig(getInputShapeAtPort(0), inPrec0)); + config.inConfs.push_back(createDataConfig(getInputShapeAtPort(1), inPrec1)); + config.outConfs.push_back(createDataConfig(getOutputShapeAtPort(0), outputPrec)); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::gemm_any); } @@ -181,8 +185,8 @@ void MKLDNNMatMulNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << errorPrefix << " did not set preferable primitive descriptor"; - auto inDims0 = src0MemPtr->GetDims(); - auto outDims = dstMemPtr->GetDims(); + auto inDims0 = src0MemPtr->getStaticDims(); + auto outDims = dstMemPtr->getStaticDims(); params.src0_mem_ptr = src0MemPtr; params.src1_mem_ptr = src1MemPtr; @@ -207,7 +211,7 @@ void MKLDNNMatMulNode::createPrimitive() { params.shift1 = params.M * params.N * params.MB2; params.shift2 = params.M * params.N; - runtimePrecision = getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(); + runtimePrecision = getParentEdgeAt(0)->getMemory().getDesc().getPrecision(); } inline void process_gemm(char transa, char transb, int M, int N, int K, float alpha, const float *A, int lda, @@ -301,7 +305,7 @@ bool MKLDNNMatMulNode::created() const { return getType() == MatMul; } -int MKLDNNMatMulNode::getMaxBatch() { +size_t MKLDNNMatMulNode::getMaxBatch() { if (!outputShapes.empty()) return outputShapes[0].getStaticDims()[0]; return 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h index 3f056cc99533d9..df13da6646b3b8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matmul_node.h @@ -21,11 +21,11 @@ class MKLDNNMatMulNode : public MKLDNNNode { void createPrimitive() override; void execute(mkldnn::stream strm) override; bool created() const override; - int getMaxBatch() override; + size_t getMaxBatch() override; InferenceEngine::Precision getRuntimePrecision() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: float alpha = 1.f; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp index ade776e8ce3b04..4317596fd325dc 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.cpp @@ -22,8 +22,12 @@ using MatrixNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE& op, std::string& errorMessage) noexcept { +bool MKLDNNMatrixNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto nms = std::dynamic_pointer_cast(op); if (!nms) { errorMessage = "Only internal MatrixNms operation is supported"; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h index 5d85a3669529d3..e0b77e3f08062f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_matrix_nms_node.h @@ -31,7 +31,7 @@ class MKLDNNMatrixNmsNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: // input diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp index 3218bc54eb0300..425fb5cf550b5b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_memory_node.cpp @@ -8,6 +8,8 @@ #include "mkldnn_memory_node.hpp" #include "common/cpu_memcpy.h" #include "utils/general_utils.h" +#include "memory_desc/dnnl_blocked_memory_desc.h" +#include "utils/ngraph_utils.hpp" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -25,6 +27,11 @@ MKLDNNMemoryNode::MKLDNNMemoryNode(const std::shared_ptr& op) { bool MKLDNNMemoryOutputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v3::Assign::type_info, ngraph::op::v6::Assign::type_info)) { @@ -59,14 +66,12 @@ void MKLDNNMemoryOutputNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(1); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); + config.inConfs[0].desc = std::make_shared(precision, getInputShapeAtPort(0)); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -80,6 +85,11 @@ void MKLDNNMemoryOutputNode::execute(mkldnn::stream strm) { bool MKLDNNMemoryInputNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v3::ReadValue::type_info, ngraph::op::v6::ReadValue::type_info)) { @@ -106,10 +116,11 @@ MKLDNNMemoryInputNode::MKLDNNMemoryInputNode(const std::shared_ptr void MKLDNNMemoryInputNode::createPrimitive() { MKLDNNInputNode::createPrimitive(); - dataStore->Create(getChildEdgeAt(0)->getMemory().GetDesc()); + dataStore->Create(getChildEdgeAt(0)->getMemory().getDesc()); // default memory state is zero filled - dataStore->FillZero(); + if (dataStore->getDesc().getMaxMemSize() != MemoryDesc::UNDEFINED_SIZE) + dataStore->FillZero(); } /** diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp index 64dccbdaeab47e..64d08e0fab3255 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.cpp @@ -24,8 +24,12 @@ using namespace InferenceEngine; using ngNmsSortResultType = ngraph::op::util::NmsBase::SortResultType; using MulticlassNmsIEInternal = ngraph::op::internal::NmsStaticShapeIE; -bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNMultiClassNmsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto nms = std::dynamic_pointer_cast(op); if (!nms) { errorMessage = "Only internal MulitClassNonMaxSuppression operation is supported"; @@ -135,7 +139,7 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { const float* boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); const float* scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); - auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().GetDesc().getShape().getStaticDims(); + auto dims_boxes = getParentEdgeAt(NMS_BOXES)->getMemory().getStaticDims(); if (max_output_boxes_per_class == 0) return; @@ -146,8 +150,8 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { int* selected_num = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDNUM)[0]->getMemoryPtr()->GetPtr()); - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType()->getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType()->getStrides(); if ((nms_eta >= 0) && (nms_eta < 1)) { nmsWithEta(boxes, scores, boxesStrides, scoresStrides); @@ -232,7 +236,7 @@ void MKLDNNMultiClassNmsNode::execute(mkldnn::stream strm) { }); } - const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getMemory().GetDesc().getShape().getStaticDims()[0]; + const size_t selectedBoxesNum = getChildEdgeAt(NMS_SELECTEDINDICES)->getMemory().getStaticDims()[0]; const size_t validOutputs = std::min(startOffset, selectedBoxesNum); std::vector m_selected_num; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp index 0627f72cea0df8..ac4a350bc564cd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_multiclass_nms.hpp @@ -27,7 +27,7 @@ class MKLDNNMultiClassNmsNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: // input (port Num) diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp index f476aa8dec5231..16e1eb473f60ac 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp @@ -23,6 +23,7 @@ #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -603,6 +604,11 @@ struct jit_uni_mvn_kernel_f32 : public jit_uni_mvn_kernel, public jit_generator bool MKLDNNMVNNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (op->get_output_partial_shape(0).rank().is_dynamic()) { errorMessage = "Unsupported dynamic input rank."; return false; @@ -720,13 +726,8 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { inputPrecision = outputPrecision = Precision::FP32; } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); - - input_prec = inputPrecision; - output_prec = outputPrecision; - src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); - dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); + src_data_size = inputPrecision.size(); + dst_data_size = outputPrecision.size(); bool canBeInplace = (src_data_size == dst_data_size) && (getParentEdgeAt(0)->getParent()->getChildEdges().size() == 1) && @@ -742,14 +743,14 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { config.inConfs[0].inPlace = -1; config.outConfs[0].inPlace = canBeInplace ? 0 : -1; if (inputsNum == 2) { - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::s32, - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(1)->getShape().getRank())); + config.inConfs[1].desc = std::make_shared(InferenceEngine::Precision::I32, getInputShapeAtPort(1)); config.inConfs[1].constant = true; } - auto pushDesc = [&](memory::format_tag format, impl_desc_type impl_type) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, format); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), outputDataType, format); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto pushDesc = [&](LayoutType format, impl_desc_type impl_type) { + config.inConfs[0].desc = creatorsMap.at(format)->createSharedDesc(inputPrecision, getInputShapeAtPort(0)); + config.outConfs[0].desc = creatorsMap.at(format)->createSharedDesc(outputPrecision, getOutputShapeAtPort(0)); supportedPrimitiveDescriptors.push_back({config, impl_type}); }; @@ -766,23 +767,17 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { if (mayiuse(cpu::x64::sse41)) { // nspc - if (getParentEdgeAt(0)->getShape().getRank() == 4) { - pushDesc(memory::format_tag::nhwc, impl_type); - } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { - pushDesc(memory::format_tag::ndhwc, impl_type); + if (getInputShapeAtPort(0).getRank() == 4 || getInputShapeAtPort(0).getRank() == 5) { + pushDesc(LayoutType::nspc, impl_type); } // blk if (impl_desc_type::jit_avx512 == impl_type) { - if (getParentEdgeAt(0)->getShape().getRank() == 4) { - pushDesc(memory::format_tag::nChw16c, impl_type); - } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { - pushDesc(memory::format_tag::nCdhw16c, impl_type); + if (getInputShapeAtPort(0).getRank() == 4 || getInputShapeAtPort(0).getRank() == 5) { + pushDesc(LayoutType::nCsp16c, impl_type); } } else if (impl_desc_type::jit_avx2 == impl_type || impl_desc_type::jit_sse42 == impl_type) { - if (getParentEdgeAt(0)->getShape().getRank() == 4) { - pushDesc(memory::format_tag::nChw8c, impl_type); - } else if (getParentEdgeAt(0)->getShape().getRank() == 5) { - pushDesc(memory::format_tag::nCdhw8c, impl_type); + if (getInputShapeAtPort(0).getRank() == 4 || getInputShapeAtPort(0).getRank() == 5) { + pushDesc(LayoutType::nCsp8c, impl_type); } } } @@ -790,7 +785,7 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() { // planar if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), impl_type); + pushDesc(LayoutType::ncsp, impl_type); } void MKLDNNMVNNode::createPrimitive() { @@ -803,7 +798,7 @@ void MKLDNNMVNNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - const SizeVector in_dims = getParentEdgeAt(0)->getShape().getStaticDims(); + const SizeVector in_dims = srcMemPtr->getStaticDims(); transformTo5DCase(in_dims); auto selectedPD = getSelectedPrimitiveDescriptor(); auto jcp = jit_mvn_config_params(); @@ -911,12 +906,12 @@ void MKLDNNMVNNode::execute(mkldnn::stream strm) { uint8_t *dst_data = reinterpret_cast(dstMemPtr->GetPtr()); uint8_t *src_data = reinterpret_cast(srcMemPtr->GetPtr()); - auto dim = getParentEdgeAt(0)->getShape().getStaticDims(); + auto dim = srcMemPtr->getStaticDims(); if (mayiuse(cpu::x64::sse41)) { if (!mvn_mean_kernel || (normalizeVariance_ && !mvn_variance_kernel) || !mvn_kernel) { IE_THROW() << "MVN layer with name '" << getName() << "' doesn't create kernel to execute on sse41 above platform."; } - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) { mvn_pln(src_data, dst_data, dim); } else { mvn_blk(src_data, dst_data, dim); @@ -1170,7 +1165,7 @@ void MKLDNNMVNNode::mvn_blk(const uint8_t* src_data, uint8_t* dst_data, const Si size_t N = 1; size_t C = 1; size_t D = 1; size_t H = 1; size_t W = 1; std::tie(N, C, D, H, W) = shape5D; - bool is_nhwc = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); + bool is_nhwc = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc); size_t CB = div_up(C, blk_size); @@ -1401,7 +1396,7 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const { } // limit post ops to unary when shape transformed on channel // 1D only fused with unary - int inputRank = getParentEdgeAt(0)->getShape().getRank(); + int inputRank = getInputShapeAtPort(0).getRank(); bool unaryEltwise = one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh, EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven, EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp index a6c0bc07b2840e..c29f2e693e8e6f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp @@ -11,19 +11,37 @@ #include "mkldnn_non_max_suppression_node.h" #include "ie_parallel.hpp" +#include #include #include "utils/general_utils.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNNonMaxSuppressionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNNonMaxSuppressionNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - const auto nms = std::dynamic_pointer_cast(op); - if (!nms) { - errorMessage = "Only internal NonMaxSuppression operation is supported"; + if (op->is_dynamic()) { + errorMessage = "Doesn't support op with dynamic input shapes"; return false; } + + using NonMaxSuppressionV5 = ngraph::op::v5::NonMaxSuppression; + if (!one_of(op->get_type_info(), NonMaxSuppressionV5::type_info, ngraph::op::internal::NonMaxSuppressionIEInternal::type_info)) { + errorMessage = "Only NonMaxSuppression v5 and NonMaxSuppressionIEInternal are supported"; + return false; + } + if (op->get_input_size() > 2 && !dynamic_cast(op->get_input_node_ptr(2))) { + errorMessage = "Doesn't support NonMaxSuppression with undefined max_output_boxes_per_class"; + return false; + } + + if (const auto nms5 = std::dynamic_pointer_cast(op)) { + const auto boxEncoding = nms5->get_box_encoding(); + if (!one_of(boxEncoding, NonMaxSuppressionV5::BoxEncodingType::CENTER, NonMaxSuppressionV5::BoxEncodingType::CORNER)) { + errorMessage = "Supports only CENTER and CORNER box encoding type"; + return false; + } + } } catch (...) { return false; } @@ -38,19 +56,25 @@ MKLDNNNonMaxSuppressionNode::MKLDNNNonMaxSuppressionNode(const std::shared_ptrget_friendly_name() + "' "; - const auto nms = std::dynamic_pointer_cast(op); if (getOriginalInputsNumber() < 2 || getOriginalInputsNumber() > 6) IE_THROW() << errorPrefix << "has incorrect number of input edges: " << getOriginalInputsNumber(); - if (getOriginalOutputsNumber() < 1 || getOriginalOutputsNumber() > 3) + if (getOriginalOutputsNumber() != 3) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getOriginalOutputsNumber(); - boxEncodingType = nms->m_center_point_box ? boxEncoding::CENTER : boxEncoding::CORNER; - - sort_result_descending = nms->m_sort_result_descending; + if (const auto nms5 = std::dynamic_pointer_cast(op)) { + boxEncodingType = static_cast(nms5->get_box_encoding()); + sort_result_descending = nms5->get_sort_result_descending(); + } else if (const auto nmsIe = std::dynamic_pointer_cast(op)) { + boxEncodingType = nmsIe->m_center_point_box ? boxEncoding::CENTER : boxEncoding::CORNER; + sort_result_descending = nmsIe->m_sort_result_descending; + } else { + const auto &typeInfo = op->get_type_info(); + IE_THROW() << errorPrefix << " doesn't support NMS: " << typeInfo.name << " v" << typeInfo.version; + } - const SizeVector &boxes_dims = op->get_input_shape(NMS_BOXES); + const auto &boxes_dims = getInputShapeAtPort(NMS_BOXES).getStaticDims(); num_batches = boxes_dims[0]; num_boxes = boxes_dims[1]; if (boxes_dims.size() != 3) @@ -58,7 +82,7 @@ MKLDNNNonMaxSuppressionNode::MKLDNNNonMaxSuppressionNode(const std::shared_ptrget_input_shape(NMS_SCORES); + const auto &scores_dims = getInputShapeAtPort(NMS_SCORES).getStaticDims(); num_classes = scores_dims[1]; if (scores_dims.size() != 3) IE_THROW() << errorPrefix << "has unsupported 'scores' input rank: " << scores_dims.size(); @@ -72,21 +96,11 @@ MKLDNNNonMaxSuppressionNode::MKLDNNNonMaxSuppressionNode(const std::shared_ptrget_input_shape(NMS_MAXOUTPUTBOXESPERCLASS); - inputShape_IOUTHRESHOLD = op->get_input_shape(NMS_IOUTHRESHOLD); - inputShape_SCORETHRESHOLD = op->get_input_shape(NMS_SCORETHRESHOLD); - if (getOriginalInputsNumber() > NMS_SOFTNMSSIGMA) { - inputShape_SOFTNMSSIGMA = op->get_input_shape(NMS_SOFTNMSSIGMA); - } - - outputShape_SELECTEDINDICES = op->get_output_shape(NMS_SELECTEDINDICES); - outputShape_SELECTEDSCORES = op->get_output_shape(NMS_SELECTEDSCORES); - - const SizeVector &valid_outputs_dims = op->get_input_shape(NMS_VALIDOUTPUTS); - if (valid_outputs_dims.size() != 1) - IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_dims.size(); - if (valid_outputs_dims[0] != 1) - IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_dims[1]; + const Shape valid_outputs_shape = getOutputShapeAtPort(NMS_VALIDOUTPUTS); + if (valid_outputs_shape.getRank() != 1) + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output rank: " << valid_outputs_shape.getRank(); + if (valid_outputs_shape.getDims()[0] != 1) + IE_THROW() << errorPrefix << "has unsupported 'valid_outputs' output 1st dimension size: " << valid_outputs_shape.getDims()[1]; } void MKLDNNNonMaxSuppressionNode::initSupportedPrimitiveDescriptors() { @@ -98,32 +112,33 @@ void MKLDNNNonMaxSuppressionNode::initSupportedPrimitiveDescriptors() { checkPrecision(getOriginalInputPrecisionAtPort(NMS_BOXES), supportedFloatPrecision, "boxes", inType); checkPrecision(getOriginalInputPrecisionAtPort(NMS_SCORES), supportedFloatPrecision, "scores", inType); - checkPrecision(getOriginalInputPrecisionAtPort(NMS_VALIDOUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType); + checkPrecision(getOriginalOutputPrecisionAtPort(NMS_VALIDOUTPUTS), supportedIntOutputPrecision, "valid_outputs", outType); const std::vector supportedPrecision = {Precision::I16, Precision::U8, Precision::I8, Precision::U16, Precision::I32, Precision::U32, Precision::I64, Precision::U64}; - check1DInput(inputShape_MAXOUTPUTBOXESPERCLASS, supportedPrecision, "max_output_boxes_per_class", NMS_MAXOUTPUTBOXESPERCLASS); - check1DInput(inputShape_IOUTHRESHOLD, supportedFloatPrecision, "iou_threshold", NMS_IOUTHRESHOLD); - check1DInput(inputShape_SCORETHRESHOLD, supportedFloatPrecision, "score_threshold", NMS_SCORETHRESHOLD); - - if (getOriginalInputsNumber() > NMS_SOFTNMSSIGMA) { - check1DInput(inputShape_SOFTNMSSIGMA, supportedFloatPrecision, "soft_nms_sigma", NMS_SOFTNMSSIGMA); - } + if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) + check1DInput(getInputShapeAtPort(NMS_MAXOUTPUTBOXESPERCLASS), supportedPrecision, "max_output_boxes_per_class", NMS_MAXOUTPUTBOXESPERCLASS); + if (inputShapes.size() > NMS_IOUTHRESHOLD) + check1DInput(getInputShapeAtPort(NMS_IOUTHRESHOLD), supportedFloatPrecision, "iou_threshold", NMS_IOUTHRESHOLD); + if (inputShapes.size() > NMS_SCORETHRESHOLD) + check1DInput(getInputShapeAtPort(NMS_SCORETHRESHOLD), supportedFloatPrecision, "score_threshold", NMS_SCORETHRESHOLD); + if (inputShapes.size() > NMS_SOFTNMSSIGMA) + check1DInput(getInputShapeAtPort(NMS_SCORETHRESHOLD), supportedFloatPrecision, "soft_nms_sigma", NMS_SCORETHRESHOLD); - checkOutput(outputShape_SELECTEDINDICES, supportedIntOutputPrecision, "selected_indices", NMS_SELECTEDINDICES); - checkOutput(outputShape_SELECTEDSCORES, supportedFloatPrecision, "selected_scores", NMS_SELECTEDSCORES); + checkOutput(getOutputShapeAtPort(NMS_SELECTEDINDICES), supportedIntOutputPrecision, "selected_indices", NMS_SELECTEDINDICES); + checkOutput(getOutputShapeAtPort(NMS_SELECTEDSCORES), supportedFloatPrecision, "selected_scores", NMS_SELECTEDSCORES); std::vector inDataConf; - inDataConf.reserve(getOriginalInputsNumber()); - for (int i = 0; i < getOriginalInputsNumber(); ++i) { + inDataConf.reserve(inputShapes.size()); + for (int i = 0; i < inputShapes.size(); ++i) { Precision inPrecision = i == NMS_MAXOUTPUTBOXESPERCLASS ? Precision::I32 : Precision::FP32; inDataConf.emplace_back(LayoutType::ncsp, inPrecision); } std::vector outDataConf; - outDataConf.reserve(getOriginalOutputsNumber()); - for (int i = 0; i < getOriginalOutputsNumber(); ++i) { + outDataConf.reserve(outputShapes.size()); + for (int i = 0; i < outputShapes.size(); ++i) { Precision outPrecision = i == NMS_SELECTEDSCORES ? Precision::FP32 : Precision::I32; outDataConf.emplace_back(LayoutType::ncsp, outPrecision); } @@ -135,44 +150,35 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { const float *boxes = reinterpret_cast(getParentEdgeAt(NMS_BOXES)->getMemoryPtr()->GetPtr()); const float *scores = reinterpret_cast(getParentEdgeAt(NMS_SCORES)->getMemoryPtr()->GetPtr()); - max_output_boxes_per_class = outputShapes.size() > NMS_SELECTEDSCORES ? 0 : num_boxes; if (inputShapes.size() > NMS_MAXOUTPUTBOXESPERCLASS) { max_output_boxes_per_class = reinterpret_cast(getParentEdgeAt(NMS_MAXOUTPUTBOXESPERCLASS)->getMemoryPtr()->GetPtr())[0]; } + if (!isDynamicNode()) { + max_output_boxes_per_class = std::min(max_output_boxes_per_class, num_boxes); + } + if (max_output_boxes_per_class == 0) return; - iou_threshold = outputShapes.size() > NMS_SELECTEDSCORES ? 0.0f : 1.0f; if (inputShapes.size() > NMS_IOUTHRESHOLD) iou_threshold = reinterpret_cast(getParentEdgeAt(NMS_IOUTHRESHOLD)->getMemoryPtr()->GetPtr())[0]; - score_threshold = 0.0f; if (inputShapes.size() > NMS_SCORETHRESHOLD) score_threshold = reinterpret_cast(getParentEdgeAt(NMS_SCORETHRESHOLD)->getMemoryPtr()->GetPtr())[0]; - soft_nms_sigma = 0.0f; if (inputShapes.size() > NMS_SOFTNMSSIGMA) soft_nms_sigma = reinterpret_cast(getParentEdgeAt(NMS_SOFTNMSSIGMA)->getMemoryPtr()->GetPtr())[0]; scale = 0.0f; if (soft_nms_sigma > 0.0) { - scale = -0.5 / soft_nms_sigma; + scale = -0.5f / soft_nms_sigma; } - int *selected_indices = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr()); - - float *selected_scores = nullptr; - if (outputShapes.size() > NMS_SELECTEDSCORES) - selected_scores = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr()->GetPtr()); - - int *valid_outputs = nullptr; - if (outputShapes.size() > NMS_VALIDOUTPUTS) - valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); - - auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType().getStrides(); - auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType().getStrides(); + auto boxesStrides = getParentEdgeAt(NMS_BOXES)->getMemory().GetDescWithType()->getStrides(); + auto scoresStrides = getParentEdgeAt(NMS_SCORES)->getMemory().GetDescWithType()->getStrides(); - std::vector filtBoxes(max_output_boxes_per_class * num_batches * num_classes); + const auto maxNumberOfBoxes = max_output_boxes_per_class * num_batches * num_classes; + std::vector filtBoxes(maxNumberOfBoxes); if (soft_nms_sigma == 0.0f) { nmsWithoutSoftSigma(boxes, scores, boxesStrides, scoresStrides, filtBoxes); @@ -205,12 +211,20 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { }); } - const size_t selectedBoxesNum = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getShape().getStaticDims()[0]; - const size_t validOutputs = std::min(filtBoxes.size(), selectedBoxesNum); + auto indicesMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr(); + auto scoresMemPtr = getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr(); + const size_t validOutputs = std::min(filtBoxes.size(), maxNumberOfBoxes); - int selectedIndicesStride = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemory().GetDescWithType().getStrides()[0]; - int *selectedIndicesPtr = selected_indices; - float *selectedScoresPtr = selected_scores; + if (isDynamicNode()) { + VectorDims newDims{validOutputs, 3}; + indicesMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDINDICES)->cloneWithNewDims(newDims)); + scoresMemPtr->redefineDesc(getBaseMemDescAtOutputPort(NMS_SELECTEDSCORES)->cloneWithNewDims(newDims)); + } + + int selectedIndicesStride = indicesMemPtr->GetDescWithType()->getStrides()[0]; + + int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); + float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->GetPtr()); size_t idx = 0lu; for (; idx < validOutputs; idx++) { @@ -218,19 +232,20 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { selectedIndicesPtr[1] = filtBoxes[idx].class_index; selectedIndicesPtr[2] = filtBoxes[idx].box_index; selectedIndicesPtr += selectedIndicesStride; - if (outputShapes.size() > NMS_SELECTEDSCORES) { - selectedScoresPtr[0] = static_cast(filtBoxes[idx].batch_index); - selectedScoresPtr[1] = static_cast(filtBoxes[idx].class_index); - selectedScoresPtr[2] = static_cast(filtBoxes[idx].score); - selectedScoresPtr += selectedIndicesStride; - } + + selectedScoresPtr[0] = static_cast(filtBoxes[idx].batch_index); + selectedScoresPtr[1] = static_cast(filtBoxes[idx].class_index); + selectedScoresPtr[2] = static_cast(filtBoxes[idx].score); + selectedScoresPtr += selectedIndicesStride; } - std::fill(selectedIndicesPtr, selectedIndicesPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1); - if (outputShapes.size() > NMS_SELECTEDSCORES) { - std::fill(selectedScoresPtr, selectedScoresPtr + (selectedBoxesNum - idx) * selectedIndicesStride, -1.f); + + if (!isDynamicNode()) { + std::fill(selectedIndicesPtr, selectedIndicesPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1); + std::fill(selectedScoresPtr, selectedScoresPtr + (maxNumberOfBoxes - idx) * selectedIndicesStride, -1.f); } - if (outputShapes.size() > NMS_VALIDOUTPUTS) - *valid_outputs = static_cast(validOutputs); + + int *valid_outputs = reinterpret_cast(getChildEdgesAtPort(NMS_VALIDOUTPUTS)[0]->getMemoryPtr()->GetPtr()); + *valid_outputs = static_cast(validOutputs); } bool MKLDNNNonMaxSuppressionNode::created() const { @@ -272,8 +287,8 @@ float MKLDNNNonMaxSuppressionNode::intersectionOverUnion(const float *boxesI, co return intersection_area / (areaI + areaJ - intersection_area); } -void MKLDNNNonMaxSuppressionNode::nmsWithSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides, - const SizeVector &scoresStrides, std::vector &filtBoxes) { +void MKLDNNNonMaxSuppressionNode::nmsWithSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, + const VectorDims &scoresStrides, std::vector &filtBoxes) { auto less = [](const boxInfo& l, const boxInfo& r) { return l.score < r.score || ((l.score == r.score) && (l.idx > r.idx)); }; @@ -333,8 +348,8 @@ void MKLDNNNonMaxSuppressionNode::nmsWithSoftSigma(const float *boxes, const flo }); } -void MKLDNNNonMaxSuppressionNode::nmsWithoutSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides, - const SizeVector &scoresStrides, std::vector &filtBoxes) { +void MKLDNNNonMaxSuppressionNode::nmsWithoutSoftSigma(const float *boxes, const float *scores, const VectorDims &boxesStrides, + const VectorDims &scoresStrides, std::vector &filtBoxes) { int max_out_box = static_cast(max_output_boxes_per_class); parallel_for2d(num_batches, num_classes, [&](int batch_idx, int class_idx) { const float *boxesPtr = boxes + batch_idx * boxesStrides[0]; @@ -375,31 +390,31 @@ void MKLDNNNonMaxSuppressionNode::nmsWithoutSoftSigma(const float *boxes, const }); } -void MKLDNNNonMaxSuppressionNode::checkPrecision(const Precision prec, const std::vector precList, - const std::string name, const std::string type) { +void MKLDNNNonMaxSuppressionNode::checkPrecision(const Precision& prec, const std::vector& precList, + const std::string& name, const std::string& type) { if (std::find(precList.begin(), precList.end(), prec) == precList.end()) IE_THROW() << errorPrefix << "has unsupported '" << name << "' " << type << " precision: " << prec; } -void MKLDNNNonMaxSuppressionNode::check1DInput(const SizeVector& dims, const std::vector precList, - const std::string name, const size_t port) { +void MKLDNNNonMaxSuppressionNode::check1DInput(const Shape& shape, const std::vector& precList, + const std::string& name, const size_t port) { checkPrecision(getOriginalInputPrecisionAtPort(port), precList, name, inType); - if (dims.size() != 0 && dims.size() != 1) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' input rank: " << dims.size(); - if (dims.size() == 1) - if (dims[0] != 1) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' input 1st dimension size: " << dims[0]; + if (shape.getRank() != 0 && shape.getRank() != 1) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' input rank: " << shape.getRank(); + if (shape.getRank() == 1) + if (shape.getDims()[0] != 1) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' input 1st dimension size: " << MemoryDescUtils::dim2str(shape.getDims()[0]); } -void MKLDNNNonMaxSuppressionNode::checkOutput(const SizeVector& dims, const std::vector precList, - const std::string name, const size_t port) { +void MKLDNNNonMaxSuppressionNode::checkOutput(const Shape& shape, const std::vector& precList, + const std::string& name, const size_t port) { checkPrecision(getOriginalOutputPrecisionAtPort(port), precList, name, outType); - if (dims.size() != 2) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' output rank: " << dims.size(); - if (dims[1] != 3) - IE_THROW() << errorPrefix << "has unsupported '" << name << "' output 2nd dimension size: " << dims[1]; + if (shape.getRank() != 2) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' output rank: " << shape.getRank(); + if (shape.getDims()[1] != 3) + IE_THROW() << errorPrefix << "has unsupported '" << name << "' output 2nd dimension size: " << MemoryDescUtils::dim2str(shape.getDims()[1]); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.h index 4651da1f2e795c..24c489981e8940 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.h @@ -24,7 +24,7 @@ class MKLDNNNonMaxSuppressionNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; struct filteredBoxes { float score; @@ -50,19 +50,30 @@ class MKLDNNNonMaxSuppressionNode : public MKLDNNNode { void nmsWithoutSoftSigma(const float *boxes, const float *scores, const SizeVector &boxesStrides, const SizeVector &scoresStrides, std::vector &filtBoxes); + void executeDynamicImpl(mkldnn::stream strm) override { execute(strm); } + + std::vector shapeInfer() const override { + return std::vector(); + } + private: // input - const size_t NMS_BOXES = 0; - const size_t NMS_SCORES = 1; - const size_t NMS_MAXOUTPUTBOXESPERCLASS = 2; - const size_t NMS_IOUTHRESHOLD = 3; - const size_t NMS_SCORETHRESHOLD = 4; - const size_t NMS_SOFTNMSSIGMA = 5; + enum : size_t { + NMS_BOXES, + NMS_SCORES, + NMS_MAXOUTPUTBOXESPERCLASS, + NMS_IOUTHRESHOLD, + NMS_SCORETHRESHOLD, + NMS_SOFTNMSSIGMA, + } InputNumber; // output - const size_t NMS_SELECTEDINDICES = 0; - const size_t NMS_SELECTEDSCORES = 1; - const size_t NMS_VALIDOUTPUTS = 2; + enum : size_t { + NMS_SELECTEDINDICES, + NMS_SELECTEDSCORES, + NMS_VALIDOUTPUTS + } OutputNumber; + enum class boxEncoding { CORNER, @@ -81,22 +92,14 @@ class MKLDNNNonMaxSuppressionNode : public MKLDNNNode { float soft_nms_sigma = 0.0f; float scale = 1.f; - SizeVector inputShape_MAXOUTPUTBOXESPERCLASS; - SizeVector inputShape_IOUTHRESHOLD; - SizeVector inputShape_SCORETHRESHOLD; - SizeVector inputShape_SOFTNMSSIGMA; - - SizeVector outputShape_SELECTEDINDICES; - SizeVector outputShape_SELECTEDSCORES; - std::string errorPrefix; std::vector> numFiltBox; const std::string inType = "input", outType = "output"; - void checkPrecision(const Precision prec, const std::vector precList, const std::string name, const std::string type); - void check1DInput(const SizeVector& dims, const std::vector precList, const std::string name, const size_t port); - void checkOutput(const SizeVector& dims, const std::vector precList, const std::string name, const size_t port); + void checkPrecision(const Precision& prec, const std::vector& precList, const std::string& name, const std::string& type); + void check1DInput(const Shape& shape, const std::vector& precList, const std::string& name, const size_t port); + void checkOutput(const Shape& shape, const std::vector& precList, const std::string& name, const size_t port); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp index 10b1be0dac532d..a10e42d6e72e8b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_normalize_node.cpp @@ -21,6 +21,7 @@ #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -662,6 +663,11 @@ MKLDNNNormalizeL2Node::MKLDNNNormalizeL2Node(const std::shared_ptr bool MKLDNNNormalizeL2Node::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto norm = std::dynamic_pointer_cast(op); if (!norm) { errorMessage = "Only opset1 NormalizeL2 operation is supported"; @@ -717,7 +723,7 @@ void MKLDNNNormalizeL2Node::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges: " << getChildEdges().size(); - if (getParentEdgeAt(0)->getShape().getRank() > 4 || getParentEdgeAt(0)->getShape().getRank() < 2) { + if (getInputShapeAtPort(0).getRank() > 4 || getInputShapeAtPort(0).getRank() < 2) { IE_THROW() << errorPrefix << "has invalid input shape. Normalize supports from 2D to 4D blobs."; } } @@ -749,13 +755,10 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << "has unsupported output precision. " << getName(); } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); - input_prec = inputPrecision; output_prec = outputPrecision; - src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); - dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); + src_data_size = inputPrecision.size(); + dst_data_size = outputPrecision.size(); bool canBeInplace = src_data_size == dst_data_size && getParentEdgeAt(DATA)->getParent()->getChildEdges().size() == 1; @@ -765,28 +768,31 @@ void MKLDNNNormalizeL2Node::initSupportedPrimitiveDescriptors() { config.outConfs.resize(1); config.outConfs[0].inPlace = canBeInplace ? 0 : -1; - auto pushDesc = [&](memory::format_tag format) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), inputDataType, format); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXES)->getShape().getStaticDims(), memory::data_type::s32, - memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA)->getShape().getStaticDims(), outputDataType, format); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto pushDesc = [&](LayoutType format) { + auto a = creatorsMap.at(format)->createSharedDesc(inputPrecision, getInputShapeAtPort(DATA)); + config.inConfs[0].desc = std::move(a); + a = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(InferenceEngine::Precision::I32, getInputShapeAtPort(AXES)); + config.inConfs[1].desc = std::move(a); + a = creatorsMap.at(format)->createSharedDesc(outputPrecision, getOutputShapeAtPort(DATA)); + config.outConfs[0].desc = std::move(a); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); }; // only plain layout support when w/o sse42 - if (getParentEdgeAt(DATA)->getShape().getRank() == 4 && !cornerCase) { + if (getInputShapeAtPort(DATA).getRank() == 4 && !cornerCase) { if (mayiuse(cpu::x64::sse41)) { - pushDesc(memory::format_tag::nhwc); + pushDesc(LayoutType::nspc); if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::nChw16c); + pushDesc(LayoutType::nCsp16c); } else { - pushDesc(memory::format_tag::nChw8c); + pushDesc(LayoutType::nCsp8c); } } } if (canBeInplace) config.inConfs[0].inPlace = 0; - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(DATA)->getShape().getRank())); + pushDesc(LayoutType::ncsp); } bool MKLDNNNormalizeL2Node::canFuse(const MKLDNNNodePtr& node) const { @@ -833,17 +839,17 @@ void MKLDNNNormalizeL2Node::createPrimitive() { jcp.dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(jcp.dst_dt); jcp.is_nchw = jcp.is_nhwc = jcp.is_blk = false; - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) { jcp.is_nchw = true; - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c)) { jcp.is_blk = true; } else { jcp.is_nhwc = true; } jcp.across_spatial = across_spatial; - auto dims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto dims = getParentEdgeAt(0)->getMemory().getStaticDims(); size_t dims_size = dims.size(); jcp.n = (dims_size > 0) ? dims[0] : 1lu; jcp.c = (dims_size > 1) ? dims[1] : 1lu; @@ -909,7 +915,7 @@ void MKLDNNNormalizeL2Node::execute(mkldnn::stream strm) { const uint8_t *src_ptr = reinterpret_cast(srcMemPtr->GetPtr()); uint8_t *dst_ptr = reinterpret_cast(dstMemPtr->GetPtr()); - auto dims = getParentEdgeAt(DATA)->getShape().getStaticDims(); + auto dims = getParentEdgeAt(DATA)->getMemory().getStaticDims(); NormalizeContext ctx = { *this, diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp index 350e86e556e85d..24c40f93329b82 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.cpp @@ -17,8 +17,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNOneHotNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNOneHotNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto oneHot = std::dynamic_pointer_cast(op); if (!oneHot) { errorMessage = "Only opset1 OneHot operation is supported"; @@ -125,13 +129,13 @@ void MKLDNNOneHotNode::one_hot(size_t prefix_size, size_t suffix_size) { void MKLDNNOneHotNode::execute(mkldnn::stream strm) { std::size_t prefix_size = 1; - auto input_dims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto input_dims = getParentEdgeAt(0)->getMemory().getStaticDims(); std::size_t actual_axis = (axis == -1) ? src_dims.size() : axis; for (size_t i = 0; i < actual_axis; ++i) prefix_size *= input_dims[i]; - std::size_t suffix_size = getParentEdgeAt(0)->getShape().getElementsCount() / prefix_size; + std::size_t suffix_size = getParentEdgeAt(0)->getMemory().GetShape().getElementsCount() / prefix_size; OneHotContext ctx = {this, prefix_size, suffix_size}; OV_SWITCH(MKLDNNPlugin, OneHotExecute, ctx, output_precision.size(), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h index 9f39ff1232eeec..18367f21baf672 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_one_hot_node.h @@ -23,7 +23,7 @@ class MKLDNNOneHotNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: typedef InferenceEngine::PrecisionTrait::value_type in_type; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp index 584eb4bce79051..626d66921d1d59 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.cpp @@ -18,8 +18,13 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNPadNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNPadNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto pad = std::dynamic_pointer_cast(op); if (!pad) { errorMessage = "Only opset1 Pad operation is supported"; @@ -92,8 +97,8 @@ void MKLDNNPadNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "Incorrect number of output edges"; - const SizeVector srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - const SizeVector dstDims = getChildEdgeAt(DATA_ID)->getShape().getStaticDims(); + const auto srcDims = getInputShapeAtPort(DATA_ID).getStaticDims(); + const auto dstDims = getOutputShapeAtPort(DATA_ID).getStaticDims(); if (srcDims.size() != dstDims.size() || padsBegin.size() != srcDims.size() || padsEnd.size() != srcDims.size()) IE_THROW() << errorPrefix << " has incorrect number of input/output dimensions!"; @@ -120,9 +125,8 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(DATA_ID); if (std::find(supportedPrecisions.begin(), supportedPrecisions.end(), precision) == supportedPrecisions.end()) precision = precision.is_float() ? InferenceEngine::Precision::FP32 : InferenceEngine::Precision::I32; - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); + auto srcDims = getInputShapeAtPort(DATA_ID).getStaticDims(); int numOfDims = srcDims.size(); NodeConfig config; @@ -130,42 +134,33 @@ void MKLDNNPadNode::initSupportedPrimitiveDescriptors() { config.inConfs.resize(isPadValueSpecified ? 4 : 3); config.outConfs.resize(1); - auto pushSupportedPrimitiveDescriptor = [&](memory::format_tag memoryFormat) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, - memoryFormat); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_BEGIN_ID)->getShape().getStaticDims(), - memory::data_type::s32, memory::format_tag::x); - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PADS_END_ID)->getShape().getStaticDims(), - memory::data_type::s32, memory::format_tag::x); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + auto pushSupportedPrimitiveDescriptor = [&](LayoutType memoryFormat) { + config.inConfs[0].desc = creatorsMap.at(memoryFormat)->createSharedDesc(precision, getInputShapeAtPort(DATA_ID)); + config.inConfs[1].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(InferenceEngine::Precision::I32, getInputShapeAtPort(PADS_BEGIN_ID)); + config.inConfs[2].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(InferenceEngine::Precision::I32, getInputShapeAtPort(PADS_END_ID)); if (isPadValueSpecified) - config.inConfs[3].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(PAD_VALUE_ID)->getShape().getStaticDims(), - memory::data_type::f32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, memoryFormat); + config.inConfs[3].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(InferenceEngine::Precision::FP32, getInputShapeAtPort(PAD_VALUE_ID)); + + config.outConfs[0].desc = creatorsMap.at(memoryFormat)->createSharedDesc(precision, getOutputShapeAtPort(DATA_ID)); supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); }; - if (numOfDims == 4) - pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::nhwc); - else if (numOfDims == 5) - pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::ndhwc); + if (numOfDims == 4 || numOfDims == 5) + pushSupportedPrimitiveDescriptor(LayoutType::nspc); - pushSupportedPrimitiveDescriptor(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(0)->getShape().getRank())); + pushSupportedPrimitiveDescriptor(LayoutType::ncsp); auto canUseBlocked = [=](const size_t blockSize) { return (padMode == CONSTANT && padsBegin[1] % blockSize == 0 && padsEnd[1] % blockSize == 0) || (padMode != CONSTANT && padsBegin[1] == 0 && padsEnd[1] == 0); }; - if (numOfDims == 4) { - if (srcDims[1] % 8 == 0 && canUseBlocked(8)) - pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::nChw8c); - if (srcDims[1] % 16 == 0 && canUseBlocked(16)) - pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::nChw16c); - } else if (numOfDims == 5) { + if (numOfDims == 4 || numOfDims == 5) { if (srcDims[1] % 8 == 0 && canUseBlocked(8)) - pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::nCdhw8c); + pushSupportedPrimitiveDescriptor(LayoutType::nCsp8c); if (srcDims[1] % 16 == 0 && canUseBlocked(16)) - pushSupportedPrimitiveDescriptor(mkldnn::memory::format_tag::nCdhw16c); + pushSupportedPrimitiveDescriptor(LayoutType::nCsp16c); } } @@ -182,8 +177,8 @@ void MKLDNNPadNode::createPrimitive() { params.sizeData = this->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->getPrecision().size(); const auto inBlkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - params.srcDims = inBlkDesc.getBlockDims(); - params.dstDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + params.srcDims = inBlkDesc->getBlockDims(); + params.dstDims = getChildEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); size_t nDims = params.srcDims.size(); params.srcStrides.resize(nDims, 1); @@ -193,14 +188,14 @@ void MKLDNNPadNode::createPrimitive() { params.dstStrides[i] = params.dstStrides[i + 1] * params.dstDims[i + 1]; } - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c)) { padsBegin[1] /= params.srcDims[params.srcDims.size() - 1]; padsEnd[1] /= params.srcDims[params.srcDims.size() - 1]; padsBegin.push_back(0); padsEnd.push_back(0); } else { - auto order = inBlkDesc.getOrder(); + auto order = inBlkDesc->getOrder(); std::vector newPadsBegin(padsBegin.size(), 0), newPadsEnd(padsEnd.size(), 0); for (size_t i = 0; i < padsBegin.size(); ++i) { newPadsBegin[i] = padsBegin[order[i]]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h index a4a1333914bb10..68af48dac0ba50 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pad_node.h @@ -20,7 +20,7 @@ class MKLDNNPadNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: enum PadMode { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp index 5d6e900d75d9fd..1e66a99616f160 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.cpp @@ -13,14 +13,36 @@ #include #include #include -#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; +bool MKLDNNPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!ngraph::as_type_ptr(op) && !ngraph::as_type_ptr(op)) { + errorMessage = "Only opset1 MaxPool and AvgPool operations are supported"; + return false; + } + } catch (...) { + return false; + } + return true; +} + MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } + auto maxPoolOp = ngraph::as_type_ptr(op); auto avgPoolOp = ngraph::as_type_ptr(op); if (maxPoolOp) { @@ -55,9 +77,6 @@ MKLDNNPoolingNode::MKLDNNPoolingNode(const std::shared_ptr& op, co for (int i = 0; i < avgPoolOp->get_pads_end().size(); i++) { data_pad_end.push_back(static_cast(avgPoolOp->get_pads_end()[i])); } - } else { - IE_THROW(NotImplemented) - << "CPU Pooling node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); } } @@ -113,17 +132,17 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { effective_pad_begin = data_pad_begin; effective_pad_end.resize(data_pad_end.size()); - auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto childDims = getChildEdgeAt(0)->getShape().getStaticDims(); - const size_t inputRank = getParentEdgeAt(0)->getShape().getRank(); + auto parentShape = getInputShapeAtPort(0); + auto childShape = getOutputShapeAtPort(0); + const size_t inputRank = getInputShapeAtPort(0).getRank(); if ((inputRank < 4) || (inputRank > 5)) IE_THROW() << "Pooling layer. Unsupported mode. Only 4D and 5D blobs are supported as input."; for (int i = 0; i < effective_pad_end.size(); i++) { int krn = kernel[i]; - int src = getParentEdgeAt(0)->getShape().getStaticDims()[2 + i]; - int dst = getChildEdgeAt(0)->getShape().getStaticDims()[2 + i]; + int src = getInputShapeAtPort(0).getStaticDims()[2 + i]; + int dst = getOutputShapeAtPort(0).getStaticDims()[2 + i]; int calc_dst = (src - krn + data_pad_begin[i]) / stride[i] + 1; effective_pad_end[i] = (dst - calc_dst) * stride[i]; @@ -133,28 +152,28 @@ void MKLDNNPoolingNode::getSupportedDescriptors() { if (outputDataType == memory::data_type::bf16) outputDataType = memory::data_type::f32; // i8 layers supports only ndhwc and nhwc layouts - const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? - memory::format_tag::ndhwc : memory::format_tag::nhwc); - const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? - memory::format_tag::ndhwc : memory::format_tag::nhwc); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); - } else if ((inputRank == 4 || inputRank == 5) && parentDims[1] == 1) { + const auto in_candidate = std::make_shared(parentShape, inputDataType, inputRank == 5 ? + memory::format_tag::ndhwc : memory::format_tag::nhwc); + const auto out_candidate = std::make_shared(childShape, outputDataType, inputRank == 5 ? + memory::format_tag::ndhwc : memory::format_tag::nhwc); + createDescriptor({ in_candidate }, { out_candidate }); + } else if ((inputRank == 4 || inputRank == 5) && parentShape.getStaticDims()[1] == 1) { // WA. We should force planar layout since it provides better performance - const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, inputRank == 5 ? - memory::format_tag::ncdhw : memory::format_tag::nchw); - const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, inputRank == 5 ? - memory::format_tag::ncdhw : memory::format_tag::nchw); - createDescriptor({ in_candidate.get() }, { out_candidate.get() }); + const auto in_candidate = std::make_shared(parentShape, inputDataType, inputRank == 5 ? + memory::format_tag::ncdhw : memory::format_tag::nchw); + const auto out_candidate = std::make_shared(childShape, outputDataType, inputRank == 5 ? + memory::format_tag::ncdhw : memory::format_tag::nchw); + createDescriptor({ in_candidate }, { out_candidate }); } else { if (inputDataType != memory::data_type::bf16) { inputDataType = memory::data_type::f32; outputDataType = memory::data_type::f32; } // It doesn't support any format - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { - const auto in_candidate = MKLDNNPlugin::make_unique(parentDims, inputDataType, format); - const auto out_candidate = MKLDNNPlugin::make_unique(childDims, outputDataType, format); - createDescriptor({in_candidate.get()}, {out_candidate.get()}); + for (auto format : getAvailableFormatsForDims(getInputShapeAtPort(0))) { + const auto in_candidate = std::make_shared(parentShape, inputDataType, format); + const auto out_candidate = std::make_shared(childShape, outputDataType, format); + createDescriptor({in_candidate}, {out_candidate}); } } } @@ -179,10 +198,10 @@ bool MKLDNNPoolingNode::created() const { return getType() == Pooling; } -void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); - MKLDNNMemoryDesc out_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*outputDesc[0]); +void MKLDNNPoolingNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + auto in_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(inputDesc[0])->getDnnlDesc(); + auto out_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(outputDesc[0])->getDnnlDesc(); mkldnn::algorithm alg; if (algorithm == PoolingAvg) { @@ -253,8 +272,12 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getSrcMemDesc(itpd, i)); - dataConfig.desc = getSrcMemDesc(itpd, i); + auto desc = getSrcMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.inConfs.push_back(dataConfig); } @@ -262,8 +285,12 @@ void MKLDNNPoolingNode::initSupportedPrimitiveDescriptors() { PortConfig dataConfig; dataConfig.inPlace = canBeInPlace() ? 0 : -1; dataConfig.constant = false; - dataConfig.desc = MemoryDescUtils::applyUndefinedOffset(*getDstMemDesc(itpd, i)); - dataConfig.desc = getDstMemDesc(itpd, i); + auto desc = getDstMemDesc(itpd, i); + if (desc->getType() & MemoryDescType::Blocked) { + dataConfig.desc = MemoryDescUtils::cloneWithUndefStridesAndOffset(*desc); + } else { + dataConfig.desc = std::move(desc); + } config.outConfs.push_back(dataConfig); } impl_desc_type impl_type = parse_impl_name(itpd.impl_info_str()); @@ -280,13 +307,13 @@ void MKLDNNPoolingNode::initDescriptor(const NodeConfig& config) { if (!selectedPD) { return; } - std::vector inDescs; + std::vector inDescs; for (const auto& inConf : config.inConfs) - inDescs.push_back(inConf.desc.get()); - std::vector outDescs; + inDescs.push_back(inConf.desc); + std::vector outDescs; for (const auto& outConf : config.outConfs) - outDescs.push_back(outConf.desc.get()); - createDescriptor({inDescs}, {outDescs}); + outDescs.push_back(outConf.desc); + createDescriptor(inDescs, outDescs); mkldnn::primitive_attr attr; setPostOps(attr); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h index a594e774e47709..d131bed7857cad 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_pooling_node.h @@ -16,8 +16,8 @@ class MKLDNNPoolingNode : public MKLDNNNode { public: MKLDNNPoolingNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; std::vector getAvailableFormatsForDims(const Shape &dims) const override; void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -28,6 +28,8 @@ class MKLDNNPoolingNode : public MKLDNNNode { return false; } + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + private: void setPostOps(mkldnn::primitive_attr &attr, bool initWeights = false); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp index e7421d82f1270c..bcc37aebfe5305 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.cpp @@ -71,8 +71,12 @@ static std::vector generate_anchors(proposal_conf &conf) { using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNProposalNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNProposalNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto proposal0Op = ngraph::as_type_ptr(op); const auto proposal4Op = ngraph::as_type_ptr(op); if (!proposal0Op && !proposal4Op) { @@ -165,8 +169,8 @@ void MKLDNNProposalNode::execute(mkldnn::stream strm) { if (store_prob) outProbData = reinterpret_cast (getChildEdgesAtPort(PROBABILITIES_OUT_IDX)[0]->getMemoryPtr()->GetPtr()); - auto inProbDims = getParentEdgeAt(0)->getShape().getStaticDims(); - const size_t imgInfoSize = getParentEdgeAt(2)->getShape().getStaticDims()[0]; + auto inProbDims = getParentEdgeAt(0)->getMemory().getStaticDims(); + const size_t imgInfoSize = getParentEdgeAt(2)->getMemory().getStaticDims()[0]; // input image height & width const float imgHeight = imgInfoData[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.h index 4fdb333b25921b..c2da9756b7fc45 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_proposal_node.h @@ -22,7 +22,7 @@ class MKLDNNProposalNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const size_t PROBABILITIES_IN_IDX = 0lu; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp index e56d6d2c245563..8ec2e2f67627fa 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.cpp @@ -21,8 +21,12 @@ using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; using namespace mkldnn::impl::utils; -bool MKLDNNPSROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNPSROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto psroi = std::dynamic_pointer_cast(op); const auto defPsroi = std::dynamic_pointer_cast(op); if (!psroi && !defPsroi) { @@ -495,8 +499,9 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { int numClasses = 1; int channelsEachClass = outputDim; if (!noTrans) { - bottomTrans = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); - numClasses = static_cast(getParentEdgeAt(2)->getShape().getStaticDims()[1]) / 2; + const auto mem = getParentEdgeAt(2)->getMemoryPtr(); + bottomTrans = reinterpret_cast(mem->GetPtr()); + numClasses = static_cast(mem->getStaticDims()[1]) / 2; channelsEachClass /= numClasses; } @@ -504,9 +509,9 @@ void MKLDNNPSROIPoolingNode::executeSpecified() { const float *bottomRois = bottomRoisBeginning + currentRoi * 5; int roiBatchInd = static_cast(bottomRois[0]); if (getAlgorithm() == Algorithm::PSROIPoolingAverage) { - executeAverage(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc); + executeAverage(srcData, dstData, bottomRois, currentRoi, roiBatchInd, *srcDesc, *dstDesc); } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinear) { - executeBilinear(srcData, dstData, bottomRois, currentRoi, roiBatchInd, srcDesc, dstDesc); + executeBilinear(srcData, dstData, bottomRois, currentRoi, roiBatchInd, *srcDesc, *dstDesc); } else if (getAlgorithm() == Algorithm::PSROIPoolingBilinearDeformable) { executeBilinearDeformable(srcData, dstData, bottomRois, bottomTrans, numClasses, channelsEachClass, currentRoi, roiBatchInd); @@ -533,8 +538,8 @@ struct MKLDNNPSROIPoolingNode::PSROIPoolingExecute { }; void MKLDNNPSROIPoolingNode::execute(mkldnn::stream strm) { - auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); - auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().GetDesc().getPrecision(); + auto inputPrec = getParentEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); + auto outputPrec = getChildEdgesAtPort(0)[0]->getMemory().getDesc().getPrecision(); if (!((inputPrec == Precision::BF16 && outputPrec == Precision::BF16) || (inputPrec == Precision::FP32 && outputPrec == Precision::FP32))) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h index 45f275fe1ddff5..c79382a393d0df 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_psroi_pooling_node.h @@ -22,7 +22,7 @@ class MKLDNNPSROIPoolingNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: size_t outputDim = 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp index 86818d36140967..c4243b89c63825 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.cpp @@ -12,8 +12,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNRangeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNRangeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v0::Range::type_info, ngraph::op::v4::Range::type_info)) { errorMessage = "Only opset1 and opset4 Range operation is supported"; return false; @@ -74,15 +78,15 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { getOriginalInputPrecisionAtPort(RANGE_LIMIT) == Precision::FP32 && getOriginalInputPrecisionAtPort(RANGE_DELTA) == Precision::FP32 && getOriginalOutputPrecisionAtPort(0) == Precision::FP32)) { - inDataConf.reserve(getOriginalInputsNumber()); - for (int i = 0; i < getOriginalInputsNumber(); ++i) + inDataConf.reserve(inputShapes.size()); + for (int i = 0; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); outDataConf.reserve(1); outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); addSupportedPrimDesc(inDataConf, outDataConf, impl_desc_type::ref_any); } else { - inDataConf.reserve(getOriginalInputsNumber()); - for (int i = 0; i < getOriginalInputsNumber(); ++i) + inDataConf.reserve(inputShapes.size()); + for (int i = 0; i < inputShapes.size(); ++i) inDataConf.emplace_back(LayoutType::ncsp); outDataConf.reserve(1); outDataConf.emplace_back(LayoutType::ncsp); @@ -92,7 +96,7 @@ void MKLDNNRangeNode::initSupportedPrimitiveDescriptors() { void MKLDNNRangeNode::execute(mkldnn::stream strm) { StatusCode retcode = OK; - switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision()) { + switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision()) { case Precision::FP32: retcode = rangeKernel(); break; @@ -110,7 +114,7 @@ void MKLDNNRangeNode::execute(mkldnn::stream strm) { template InferenceEngine::StatusCode MKLDNNRangeNode::rangeKernel() noexcept { - size_t dst_size = (getChildEdgesAtPort(0)[0]->getShape().getStaticDims())[0]; + size_t dst_size = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims()[0]; data_t* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); data_t start = reinterpret_cast(getParentEdgeAt(RANGE_START)->getMemoryPtr()->GetPtr())[0]; data_t limit = reinterpret_cast(getParentEdgeAt(RANGE_LIMIT)->getMemoryPtr()->GetPtr())[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.h index b5584be6aa949c..36b3f4aaeefab0 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_range_node.h @@ -19,7 +19,7 @@ class MKLDNNRangeNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; template InferenceEngine::StatusCode rangeKernel() noexcept; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp index c76156ec4ae464..62ed485c63a635 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.cpp @@ -1359,10 +1359,14 @@ std::map& op, std::string& errorMessage) noexcept { +bool MKLDNNReduceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { - if (std::dynamic_pointer_cast(op) == nullptr && - std::dynamic_pointer_cast(op) == nullptr) { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (std::dynamic_pointer_cast(op) == nullptr && + std::dynamic_pointer_cast(op) == nullptr) { errorMessage = "Reduce node with name " + op->get_friendly_name() + " is not derived from ArithmeticReductionKeepDims or LogicalReductionKeepDims"; return false; } @@ -1405,18 +1409,18 @@ void MKLDNNReduceNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " gets incorrect number of output edges!"; - if (getParentEdgeAt(REDUCE_INDEXES)->getShape().getRank() != 1) { + if (getInputShapeAtPort(REDUCE_INDEXES).getRank() != 1) { IE_THROW() << errorPrefix << " gets incorrect index vector dimension! Index vector should be 1 dimension."; } if (keep_dims) { - if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() != getChildEdgeAt(0)->getShape().getRank()) + if (getInputShapeAtPort(REDUCE_DATA).getRank() != getOutputShapeAtPort(0).getRank()) IE_THROW() << errorPrefix << " gets incorrect number of input/output dimensions!"; } else { // In fact, after the Reduce operation, the shape must be a scalar if the previous one was 1d. // But for now, 0d tensor (scalar) is emulated as 1d tensor. Skip checking in such cases. - bool is_emulated_0d_as_1d = getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 1 && getChildEdgeAt(0)->getShape().getRank() == 1; - if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= getChildEdgeAt(0)->getShape().getRank() && !is_emulated_0d_as_1d) + bool is_emulated_0d_as_1d = getInputShapeAtPort(REDUCE_DATA).getRank() == 1 && getOutputShapeAtPort(0).getRank() == 1; + if (getInputShapeAtPort(REDUCE_DATA).getRank() <= getOutputShapeAtPort(0).getRank() && !is_emulated_0d_as_1d) IE_THROW() << errorPrefix << "gets incorrect number of input/output dimensions!"; } } @@ -1436,7 +1440,7 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { Precision inputPrecision = getOriginalInputPrecisionAtPort(REDUCE_DATA); Precision outputPrecision = getOriginalOutputPrecisionAtPort(0); - jit_mode = (mayiuse(cpu::x64::sse41)) && getParentEdgeAt(REDUCE_DATA)->getShape().getRank() <= 5 && + jit_mode = (mayiuse(cpu::x64::sse41)) && getInputShapeAtPort(REDUCE_DATA).getRank() <= 5 && std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), inputPrecision) != std::end(supportedPrecisions) && std::find(std::begin(supportedPrecisions), std::end(supportedPrecisions), outputPrecision) != std::end(supportedPrecisions); @@ -1453,13 +1457,10 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { } } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrecision); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrecision); - input_prec = inputPrecision; output_prec = outputPrecision; - src_data_size = MKLDNNExtensionUtils::sizeOfDataType(inputDataType); - dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(outputDataType); + src_data_size = inputPrecision.size(); + dst_data_size = outputPrecision.size(); NodeConfig config; config.dynBatchSupport = false; @@ -1472,13 +1473,14 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { config.inConfs[REDUCE_INDEXES].inPlace = -1; config.outConfs[0].inPlace = -1; - auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag outFormat, memory::data_type inDataType, - memory::data_type outDataType, impl_desc_type impl_type) { - config.inConfs[REDUCE_DATA].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(), - inDataType, inFormat); - config.inConfs[REDUCE_INDEXES].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims(), - memory::data_type::s32, memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outDataType, outFormat); + auto& creatorsMap = BlockedDescCreator::getCommonCreators(); + + auto pushDesc = [&](LayoutType inFormat, LayoutType outFormat, InferenceEngine::Precision inDataType, + InferenceEngine::Precision outDataType, impl_desc_type impl_type) { + config.inConfs[REDUCE_DATA].desc = creatorsMap.at(inFormat)->createSharedDesc(inDataType, getInputShapeAtPort(REDUCE_DATA)); + config.inConfs[REDUCE_INDEXES].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(InferenceEngine::Precision::I32, + getInputShapeAtPort(REDUCE_INDEXES)); + config.outConfs[0].desc = creatorsMap.at(outFormat)->createSharedDesc(outDataType, getOutputShapeAtPort(0)); supportedPrimitiveDescriptors.push_back({config, impl_type}); }; @@ -1490,27 +1492,19 @@ void MKLDNNReduceNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::jit_avx2; } - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), inputDataType, outputDataType, impl_type); + pushDesc(LayoutType::ncsp, LayoutType::ncsp, inputPrecision, outputPrecision, impl_type); if (keep_dims) { - if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 4 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) { - if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::nChw16c, memory::format_tag::nChw16c, inputDataType, outputDataType, impl_type); - } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) { - pushDesc(memory::format_tag::nChw8c, memory::format_tag::nChw8c, inputDataType, outputDataType, impl_type); - } - } else if (getParentEdgeAt(REDUCE_DATA)->getShape().getRank() == 5 && getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims()[1] > 1) { + if ((getInputShapeAtPort(REDUCE_DATA).getRank() == 4 || getInputShapeAtPort(REDUCE_DATA).getRank() == 5) && + getInputShapeAtPort(REDUCE_DATA).getStaticDims()[1] > 1) { if (mayiuse(cpu::x64::avx512_common)) { - pushDesc(memory::format_tag::nCdhw16c, memory::format_tag::nCdhw16c, inputDataType, outputDataType, impl_type); + pushDesc(LayoutType::nCsp16c, LayoutType::nCsp16c, inputPrecision, outputPrecision, impl_type); } else if (mayiuse(cpu::x64::avx2) || mayiuse(cpu::x64::sse41)) { - pushDesc(memory::format_tag::nCdhw8c, memory::format_tag::nCdhw8c, inputDataType, outputDataType, impl_type); + pushDesc(LayoutType::nCsp8c, LayoutType::nCsp8c, inputPrecision, outputPrecision, impl_type); } } } } else { - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(REDUCE_DATA)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank()), - memory::data_type::f32, memory::data_type::f32, impl_desc_type::ref); + pushDesc(LayoutType::ncsp, LayoutType::ncsp, InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP32, impl_desc_type::ref); } } @@ -1526,7 +1520,7 @@ void MKLDNNReduceNode::createPrimitive() { IE_THROW() << errorPrefix << " has nullable preferable primitive descriptor"; auto selectedPD = getSelectedPrimitiveDescriptor(); - planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp); + planar_layout = getParentEdgeAt(REDUCE_DATA)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp); auto jcp = jit_reduce_config_params(); jcp.src_dt = MKLDNNExtensionUtils::IEPrecisionToDataType(selectedPD->getConfig().inConfs[REDUCE_DATA].desc->getPrecision()); @@ -1566,8 +1560,8 @@ void MKLDNNReduceNode::execute(mkldnn::stream strm) { const auto idx_data = reinterpret_cast(srcIndexesMemPtr->GetData()); size_t dst_size = dstMemPtr->GetSize(); - src_dims = getParentEdgeAt(REDUCE_DATA)->getShape().getStaticDims(); - src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType().getStrides(); + src_dims = getParentEdgeAt(REDUCE_DATA)->getMemory().getStaticDims(); + src_strides = getParentEdgeAt(REDUCE_DATA)->getMemory().GetDescWithType()->getStrides(); dims_size = src_dims.size(); calc_process_dst_dims(idx_data); @@ -1932,9 +1926,9 @@ inline void MKLDNNReduceNode::init_dst_data(uint8_t *out_ptr, size_t dst_size) { inline void MKLDNNReduceNode::calc_process_dst_dims(const int32_t *idx_data) { SizeVector out_dims; - SizeVector dst_dims = getChildEdgeAt(0)->getShape().getStaticDims(); + SizeVector dst_dims = getOutputShapeAtPort(0).getStaticDims(); std::set axes; - for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getShape().getStaticDims()[0]; i++) { + for (size_t i = 0; i < getParentEdgeAt(REDUCE_INDEXES)->getMemory().getStaticDims()[0]; i++) { int32_t axis = idx_data[i]; if (axis < 0) axis += src_dims.size(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h index 07e2724d98a5c1..566277f06ee895 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reduce_node.h @@ -75,7 +75,7 @@ class MKLDNNReduceNode : public MKLDNNNode { return false; } - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: void reduce_type(const uint8_t *in_ptr, uint8_t *out_ptr, size_t dst_size); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp index f7ddad8b6794a4..031c4c0dd8babe 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.cpp @@ -7,6 +7,7 @@ #include #include #include "common/blocked_desc_creator.h" +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -21,6 +22,19 @@ MKLDNNReferenceNode::MKLDNNReferenceNode(const std::shared_ptr& op } setType(Reference); setTypeStr("Reference"); + + if (isDynamicNode()) { + ngraph::OutputVector inputsForShapeInfer; + for (size_t i = 0; i < inputShapes.size(); i++) { + if (dynamic_cast(ngraphOp->get_input_node_ptr(i))) { + inputsForShapeInfer.push_back(ngraphOp->get_input_node_shared_ptr(i)); + } else { + inputsForShapeInfer.push_back(std::make_shared(ngraphOp->get_input_element_type(i), + ngraphOp->get_input_partial_shape(i))); + } + } + opToShapeInfer = ngraphOp->clone_with_new_inputs(inputsForShapeInfer); + } } void MKLDNNReferenceNode::getSupportedDescriptors() {} @@ -46,17 +60,41 @@ void MKLDNNReferenceNode::initSupportedPrimitiveDescriptors() { void MKLDNNReferenceNode::createPrimitive() {} +std::vector> MKLDNNReferenceNode::shapeInfer() const { + for (size_t i = 0; i < opToShapeInfer->get_input_size(); i++) { + if (!dynamic_cast(opToShapeInfer->get_input_node_ptr(i))) { + opToShapeInfer->get_input_tensor(i).set_partial_shape( + getParentEdgesAtPort(i)[0]->getMemory().getDesc().getShape().toPartialShape()); + } + } + + opToShapeInfer->validate_and_infer_types(); + + IE_ASSERT(opToShapeInfer->get_output_size() == outputShapes.size()); + + std::vector newShapes(outputShapes.size()); + for (size_t i = 0; i < newShapes.size(); i++) { + const auto &partShape = opToShapeInfer->get_output_partial_shape(i); + if (partShape.is_dynamic()) + IE_THROW(NotImplemented) << "MKLDNNReferenceNode doesn't support nodes with internal dynamism"; + newShapes[i] = partShape.get_shape(); + } + return newShapes; +} + void MKLDNNReferenceNode::execute(mkldnn::stream strm) { ngraph::HostTensorVector inputs; for (size_t i = 0; i < inputShapes.size(); i++) { void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().GetPtr(); - inputs.push_back(std::make_shared(ngraphOp->get_input_element_type(i), ngraphOp->get_input_shape(i), srcDataPtr)); + inputs.push_back(std::make_shared(ngraphOp->get_input_element_type(i), + getParentEdgesAtPort(i)[0]->getMemory().getStaticDims(), srcDataPtr)); } ngraph::HostTensorVector outputs; for (size_t i = 0; i < outputShapes.size(); i++) { void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().GetPtr(); - outputs.push_back(std::make_shared(ngraphOp->get_output_element_type(i), ngraphOp->get_output_shape(i), dstDataPtr)); + outputs.push_back(std::make_shared(ngraphOp->get_output_element_type(i), + getChildEdgesAtPort(i)[0]->getMemory().getStaticDims(), dstDataPtr)); } if (!ngraphOp->evaluate(outputs, inputs)) { @@ -64,6 +102,10 @@ void MKLDNNReferenceNode::execute(mkldnn::stream strm) { } } +void MKLDNNReferenceNode::executeDynamicImpl(mkldnn::stream strm) { + execute(strm); +} + bool MKLDNNReferenceNode::created() const { return getType() == Reference; } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h index ce27028aa56700..8c60ff7b45fbf2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reference_node.h @@ -20,8 +20,12 @@ class MKLDNNReferenceNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; + std::vector> shapeInfer() const override; + void executeDynamicImpl(mkldnn::stream strm) override; + private: const std::shared_ptr ngraphOp; + std::shared_ptr opToShapeInfer; const std::string additionalErrorMessage; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp index c140baa88c533c..fe7adbb9552ecd 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.cpp @@ -227,8 +227,12 @@ struct jit_uni_logistic_kernel_f32 : public jit_uni_logistic_kernel, public jit_ } }; -bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNRegionYoloNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto regionYolo = std::dynamic_pointer_cast(op); if (!regionYolo) { errorMessage = "Only opset1 RegionYolo operation is supported"; @@ -367,10 +371,12 @@ inline void MKLDNNRegionYoloNode::calculate_logistic(size_t start_index, int cou } void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { - size_t B = (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1; - size_t IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1; - size_t IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1; - size_t IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1; + const auto &inShape = getParentEdgeAt(0)->getMemory().GetShape(); + const auto &inDims = inShape.getStaticDims(); + size_t B = (inShape.getRank() > 0) ? inDims[0] : 1; + size_t IC = (inShape.getRank() > 1) ? inDims[1] : 1; + size_t IH = (inShape.getRank() > 2) ? inDims[2] : 1; + size_t IW = (inShape.getRank() > 3) ? inDims[3] : 1; size_t mask_size = mask.size(); int end_index = 0; @@ -388,8 +394,9 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { output_size = B * IH * IW * mask_size * (classes + coords + 1); } - if (output_size != getChildEdgeAt(0)->getMemoryPtr()->GetElementsCount()) - IE_THROW() << "Incorrect layer configuration or output dimensions. " << output_size << " != " << getChildEdgeAt(0)->getMemoryPtr()->GetElementsCount(); + if (output_size != getChildEdgeAt(0)->getMemoryPtr()->GetShape().getElementsCount()) + IE_THROW() << "Incorrect layer configuration or output dimensions. " << output_size << " != " + << getChildEdgeAt(0)->getMemoryPtr()->GetShape().getElementsCount(); size_t inputs_size = IH * IW * num_ * (classes + coords + 1); size_t total_size = 2 * IH * IW; @@ -397,8 +404,8 @@ void MKLDNNRegionYoloNode::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().GetDesc().getPrecision(), - getChildEdgeAt(0)->getMemory().GetDesc().getPrecision(), output_size); + cpu_convert(src_data, dst_data, getParentEdgeAt(0)->getMemory().getDesc().getPrecision(), + getChildEdgeAt(0)->getMemory().getDesc().getPrecision(), output_size); for (int b = 0; b < B; b++) { for (int n = 0; n < num_; n++) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h index 31404a5be11c9c..fda76d7b33a697 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_region_yolo_node.h @@ -47,7 +47,7 @@ class MKLDNNRegionYoloNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: int classes; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp index c1701deacdeeb4..faaac99926b6ed 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.cpp @@ -11,6 +11,9 @@ #include "ie_parallel.hpp" #include "utils/general_utils.h" #include +#include "nodes/common/cpu_memcpy.h" +#include "nodes/common/cpu_convert.h" +#include "mkldnn_convert_node.h" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -51,12 +54,12 @@ void MKLDNNReorderNode::initSupportedPrimitiveDescriptors() { config.outConfs[0].inPlace = 0; } if (input && output) { - config.inConfs[0].desc = input->clone(); - config.outConfs[0].desc = output->clone(); + config.inConfs[0].desc = input; + config.outConfs[0].desc = output; } else if (parent->getSelectedPrimitiveDescriptor() != nullptr && child->getSelectedPrimitiveDescriptor() != nullptr) { - config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc->clone(); - config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc->clone(); + config.inConfs[0].desc = parent->getSelectedPrimitiveDescriptor()->getConfig().outConfs[0].desc; + config.outConfs[0].desc = child->getSelectedPrimitiveDescriptor()->getConfig().inConfs[0].desc; } else { IE_THROW() << "Cannot initialize supported PDs for Reorder node with name `" << getName() << "`"; } @@ -74,40 +77,41 @@ void MKLDNNReorderNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) IE_THROW() << "Preferable primitive descriptor is not set."; - auto inDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto inDims = srcMemPtr->GetShape().getStaticDims(); if (!isOptimized) { + const auto &parentMem = getParentEdgeAt(0)->getMemory(); if (MKLDNNPlugin::one_of(inDims.size(), 4, 5) && inDims[1] <= 64 && inDims[1] >= 16 && - (getParentEdgeAt(0)->getMemory().GetElementsCount() / inDims[1]) >= 128 && - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && - getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && - getParentEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32 && - getChildEdgeAt(0)->getMemory().GetDesc().getPrecision() == Precision::FP32) { + parentMem.getDesc().hasLayoutType(LayoutType::nspc) && + (parentMem.GetDescWithType()->getPaddedElementsCount() / inDims[1]) >= 128 && + getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp) && + parentMem.getDesc().getPrecision() == Precision::FP32 && + getChildEdgeAt(0)->getMemory().getDesc().getPrecision() == Precision::FP32) { // oneDNN JIT reorder shows bad perf for nspc to ncsp reorder case so we fallback on simple c++ implementation canUseOptimizedNspc2Ncsp = true; } else if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2) && MKLDNNPlugin::one_of(inDims.size(), 4, 5) && - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp) && - getChildEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc) && - getParentEdgeAt(0)->getMemory().GetDataType() == getChildEdgeAt(0)->getMemory().GetDataType() && - MKLDNNExtensionUtils::sizeOfDataType(getParentEdgeAt(0)->getMemory().GetDataType()) == 1) { + parentMem.getDesc().hasLayoutType(LayoutType::ncsp) && + getChildEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc) && + parentMem.GetDataType() == getChildEdgeAt(0)->getMemory().GetDataType() && + MKLDNNExtensionUtils::sizeOfDataType(parentMem.GetDataType()) == 1) { // oneDNN doesn't provide JIT reorder impl for non-avx2 targets so we fallback on simple c++ implementation which shows better perf canUseOptimizedNcsp2Nspc = true; } else { - createReorderPrimitive(srcMemPtr->GetDescriptor(), srcMemPtr->GetPrimitive().get_data_handle(), - dstMemPtr->GetDescriptor(), dstMemPtr->GetPrimitive().get_data_handle()); + createReorderPrimitive(srcMemPtr->GetDescWithType()->getDnnlDesc(), srcMemPtr->GetPrimitive().get_data_handle(), + dstMemPtr->GetDescWithType()->getDnnlDesc(), dstMemPtr->GetPrimitive().get_data_handle()); } } } void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDesc, void* srcPtr, const mkldnn::memory::desc &dstDesc, void* dstPtr) { src_blocked = std::make_shared(getEngine()); - src_blocked->Create(MKLDNNMemoryDesc(srcDesc), srcPtr, false); + src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(srcDesc), srcPtr, false); dst_blocked = std::make_shared(getEngine()); - dst_blocked->Create(MKLDNNMemoryDesc(dstDesc), dstPtr, false); + dst_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(dstDesc), dstPtr, false); mkldnn::primitive_attr attr; auto createReorder = [&]() -> bool { @@ -137,13 +141,13 @@ void MKLDNNReorderNode::createReorderPrimitive(const mkldnn::memory::desc &srcDe // perform such conversion if the source tensor can be reshaped to the destination rank. This is // useful in situations when rank in IR does not much rank that is required by the oneDNN primitive, // but the input tensor can be reshaped (e.g. weights for grouped convolutions, biases etc.) - if (src_blocked->GetDesc().hasLayoutType(LayoutType::ncsp) && - src_blocked->GetDims().size() != dst_blocked->GetDims().size()) { - const auto newDims = dst_blocked->GetDims(); - const auto newFormat = MKLDNNMemory::GetPlainFormatByRank(newDims.size()); + if (src_blocked->getDesc().hasLayoutType(LayoutType::ncsp) && + src_blocked->GetShape().getRank() != dst_blocked->GetShape().getRank()) { + const auto newDims = dst_blocked->getStaticDims(); + const auto newFormat = MKLDNNExtensionUtils::GetPlainFormatByRank(newDims.size()); - auto newDesc = mkldnn::memory::desc(newDims, src_blocked->GetDataType(), newFormat); - src_blocked->Create(MKLDNNMemoryDesc(newDesc), srcPtr, false); + auto newDesc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(newDims), src_blocked->GetDataType(), newFormat); + src_blocked->Create(MKLDNNExtensionUtils::makeDescriptor(newDesc), srcPtr, false); success = createReorder(); } @@ -171,7 +175,7 @@ void MKLDNNReorderNode::optimizedNcsp2Nspc() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - auto inDims = parentEdge->getShape().getStaticDims(); + auto inDims = parentEdge->getMemory().GetShape().getStaticDims(); const size_t ndims = inDims.size(); const size_t DIM0 = inDims[0]; const size_t DIM1 = inDims[1]; @@ -202,7 +206,7 @@ void MKLDNNReorderNode::optimizedNspc2Ncsp() { auto parentEdge = getParentEdgeAt(0); auto childEdge = getChildEdgeAt(0); - auto inDims = parentEdge->getShape().getStaticDims(); + auto inDims = parentEdge->getMemory().GetShape().getStaticDims(); const size_t ndims = inDims.size(); const size_t DIM0 = inDims[0]; const size_t DIM1 = inDims[1]; @@ -247,8 +251,8 @@ void MKLDNNReorderNode::setDynamicBatchLim(int lim) { if (prim) { auto &dstMemPtr = getChildEdgeAt(0)->getMemoryPtr(); auto &srcMemPtr = getParentEdgeAt(0)->getMemoryPtr(); - memory::desc src_d = srcMemPtr->GetDescriptor(); - memory::desc dst_d = dstMemPtr->GetDescriptor(); + memory::desc src_d = srcMemPtr->GetDescWithType()->getDnnlDesc(); + memory::desc dst_d = dstMemPtr->GetDescWithType()->getDnnlDesc(); void *src_data_hdl = srcMemPtr->GetPrimitive().get_data_handle(); void *dst_data_hdl = dstMemPtr->GetPrimitive().get_data_handle(); @@ -277,4 +281,53 @@ std::string MKLDNNReorderNode::getReorderArgs(const MemoryDesc &parentDesc, cons return inArgs + "_" + outArgs; } +void MKLDNNReorderNode::reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size) { + if (size != 0) + IE_ASSERT(size <= output.GetSize()); + if (input.getDesc().isCompatible(output.getDesc())) { + auto srcPtr = static_cast(input.GetPtr()); + auto dstPtr = static_cast(output.GetPtr()); + + auto copySize = size == 0 ? output.GetSize() : size; + cpu_memcpy(dstPtr, srcPtr, copySize); + } else { + std::unique_ptr pReorder; + std::shared_ptr srcMemoryPtr; + std::vector tmpBuff; + + try { + pReorder = std::unique_ptr(new mkldnn::reorder(input.GetPrimitive(), output.GetPrimitive())); + srcMemoryPtr = input.GetPrimitivePtr(); + } + catch (const mkldnn::error& err) { + if (mkldnn_unimplemented == err.status && output.GetDataType() != input.GetDataType() && MKLDNNConvertNode::isSupportedDesc(input.getDesc()) && + MKLDNNConvertNode::isSupportedDesc(output.getDesc())) { + //we probably could not make the reorder because there is no one supporting this precision conversion + //lets try to convert data first using cpu_convert + auto data = static_cast(input.GetPtr()); + tmpBuff.resize(input.GetSize()); + + const auto outPrc = MKLDNNExtensionUtils::DataTypeToIEPrecision(output.GetDataType()); + cpu_convert(data, tmpBuff.data(), MKLDNNExtensionUtils::DataTypeToIEPrecision(input.GetDataType()), + outPrc, input.GetSize() / input.getDesc().getPrecision().size()); + + MKLDNNMemory tmpMem(output.getEngine()); + auto tmpDesc = MemoryDescUtils::cloneWithNewPrecision(input.getDesc(), outPrc); + tmpMem.Create(std::move(tmpDesc), tmpBuff.data()); + + pReorder = std::unique_ptr(new mkldnn::reorder(tmpMem.GetPrimitive(), output.GetPrimitive())); + srcMemoryPtr = tmpMem.GetPrimitivePtr(); + } else { + throw; + } + } + if (pReorder) { + mkldnn::stream loc_stream(output.getEngine(), mkldnn::stream::flags::default_order); + pReorder->execute(loc_stream, *srcMemoryPtr, *output.GetPrimitivePtr()); + } else { + IE_THROW() << "Could not make mkldnn reorder."; + } + } +} + REG_MKLDNN_PRIM_FOR(MKLDNNReorderNode, Reorder); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h index da821878035e37..51ee28a5614f1a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorder_node.h @@ -50,9 +50,11 @@ class MKLDNNReorderNode : public MKLDNNNode { static std::string getReorderArgs(const MemoryDesc &parentDesc, const MemoryDesc &childDesc); + static void reorderData(const MKLDNNMemory &input, const MKLDNNMemory &output, size_t size = 0); + private: - std::unique_ptr input; - std::unique_ptr output; + std::shared_ptr input; + std::shared_ptr output; MKLDNNMemoryPtr dst_blocked; MKLDNNMemoryPtr src_blocked; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp index 48e2eaf9992fc9..194ddca390c416 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.cpp @@ -11,8 +11,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNReorgYoloNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNReorgYoloNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto reorgYolo = std::dynamic_pointer_cast(op); if (!reorgYolo) { errorMessage = "Only opset2 ReorgYolo operation is supported"; @@ -55,10 +59,11 @@ void MKLDNNReorgYoloNode::execute(mkldnn::stream strm) { const auto *src_data = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - int IW = (getParentEdgeAt(0)->getShape().getRank() > 3) ? getParentEdgeAt(0)->getShape().getStaticDims()[3] : 1; - int IH = (getParentEdgeAt(0)->getShape().getRank() > 2) ? getParentEdgeAt(0)->getShape().getStaticDims()[2] : 1; - int IC = (getParentEdgeAt(0)->getShape().getRank() > 1) ? getParentEdgeAt(0)->getShape().getStaticDims()[1] : 1; - int B = (getParentEdgeAt(0)->getShape().getRank() > 0) ? getParentEdgeAt(0)->getShape().getStaticDims()[0] : 1; + const auto &inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); + int IW = (inDims.size() > 3) ? inDims[3] : 1; + int IH = (inDims.size() > 2) ? inDims[2] : 1; + int IC = (inDims.size() > 1) ? inDims[1] : 1; + int B = (inDims.size() > 0) ? inDims[0] : 1; int ic_off = IC / (stride * stride); int ih_off = IH * stride; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.h index b88f19010e0491..d22147c48e0a4a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reorg_yolo_node.h @@ -19,7 +19,7 @@ class MKLDNNReorgYoloNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: int stride; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp index f4256042379500..9bc94c59cd5c42 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.cpp @@ -6,13 +6,37 @@ #include #include #include +#include using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; +bool MKLDNNReshapeNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!std::dynamic_pointer_cast(op) && + !std::dynamic_pointer_cast(op) && + !std::dynamic_pointer_cast(op)) { + errorMessage = "Only opset1 Reshape, Squeeze, Unsqueeze operations are supported"; + return false; + } + } catch (...) { + return false; + } + return true; +} + MKLDNNReshapeNode::MKLDNNReshapeNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : - MKLDNNNode(op, eng, cache) {} + MKLDNNNode(op, eng, cache) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; + } +} MKLDNNReshapeNode::MKLDNNReshapeNode(const std::string& name, const Shape& inDims, const Shape& outDims, Precision precision, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &wCache) @@ -34,28 +58,27 @@ void MKLDNNReshapeNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - precision = getOriginalOutputPrecisionAtPort(0); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + InferenceEngine::Precision inPrec = getOriginalInputPrecisionAtPort(0); + InferenceEngine::Precision outPrec = getOriginalOutputPrecisionAtPort(0); // Current reshape implementation is simple memory reinterpret, // same precision on input and output is required - if (inputDataType != outputDataType) - inputDataType = outputDataType; + if (inPrec != outPrec) + inPrec = outPrec; NodeConfig config; config.dynBatchSupport = true; config.inConfs.resize(getParentEdges().size()); - for (size_t i = 0; i (getParentEdgeAt(i)->getShape().getStaticDims(), inputDataType); + config.inConfs[i].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(inPrec, getInputShapeAtPort(i)); } config.outConfs.resize(1); config.outConfs[0].inPlace = 0; config.outConfs[0].constant = false; - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType); + config.outConfs[0].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(outPrec, getOutputShapeAtPort(0)); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h index cf88872f195701..c262e02a87ba8e 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reshape_node.h @@ -26,6 +26,8 @@ class MKLDNNReshapeNode : public MKLDNNNode { void initSupportedPrimitiveDescriptors() override; void createPrimitive() override; bool created() const override; + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; }; } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp index ffa831a670db77..25f912b4f3832b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.cpp @@ -12,8 +12,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNReverseSequenceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNReverseSequenceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto revSeq = std::dynamic_pointer_cast(op); if (!revSeq) { errorMessage = "Only opset1 ReverseSequence operation is supported"; @@ -94,7 +98,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { const float *src_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_DATA)->getMemoryPtr()->GetPtr()); float* dst_data = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); - switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision()) { + switch (getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().getDesc().getPrecision()) { case Precision::FP32: { float *seq_lengths_data = reinterpret_cast(getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemoryPtr()->GetPtr()); for (i = 0; i < src_dims[batch_axis]; i++) { @@ -169,7 +173,7 @@ void MKLDNNReverseSequenceNode::execute(mkldnn::stream strm) { break; default: IE_THROW() << "ReverseSequence layer does not support " - << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().GetDesc().getPrecision() << " precision"; + << getParentEdgeAt(REVERSESEQUENCE_LENGTHS)->getMemory().getDesc().getPrecision() << " precision"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.h index 4b3cf056c63afa..10fb8774abfa22 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_reverse_sequence_node.h @@ -19,7 +19,7 @@ class MKLDNNReverseSequenceNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: const size_t REVERSESEQUENCE_DATA = 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp index 6e9086274d5a7d..2f7bd432cfc54f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.cpp @@ -9,6 +9,7 @@ #include "utils/bfloat16.hpp" #include "mkldnn_input_node.h" #include +#include "memory_desc/dnnl_blocked_memory_desc.h" #include @@ -109,6 +110,11 @@ const std::map MKLDNNRNN bool MKLDNNRNN::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!one_of(op->get_type_info(), ngraph::op::v3::GRUCell::type_info, ngraph::op::v0::LSTMCell::type_info, @@ -260,7 +266,7 @@ void MKLDNNRNN::initCell(const std::shared_ptr& op) { Gb = (cell_type != mkldnn::algorithm::lbr_gru) ? G : G + 1; // Expected shapes - std::vector D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; + VectorDims D_shape {N, DC}, S_shape {N, SC}, S_4D_shape {L, D, N, SC}; if (in_data_dims != D_shape || in_h_state_dims != S_shape @@ -281,15 +287,15 @@ void MKLDNNRNN::fillCellDesc() { runtimePrecision = getOriginalInputPrecisionAtPort(0); auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); - std::vector S_4D_shape {L, D, N, SC}; + Shape S_4D_shape(VectorDims{L, D, N, SC}); // layer input plus states in_data_d.reserve(S + 1); out_data_d.reserve(S + 1); // Shapes and Attributes are correct. Can start internal stuff initialization. - in_data_d.emplace_back(std::vector{T, N, DC}, dataType, memory::format_tag::tnc); - out_data_d.emplace_back(std::vector{T, N, SC}, dataType, memory::format_tag::tnc); + in_data_d.emplace_back(Shape(VectorDims{T, N, DC}), dataType, memory::format_tag::tnc); + out_data_d.emplace_back(Shape(VectorDims{T, N, SC}), dataType, memory::format_tag::tnc); in_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); out_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); @@ -299,39 +305,34 @@ void MKLDNNRNN::fillCellDesc() { out_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); - w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); + w_data_d = std::make_shared(Shape(VectorDims{L, D, DC, G, SC}), dataType, memory::format_tag::ldigo); + w_state_d = std::make_shared(Shape(VectorDims{L, D, SC, G, SC}), dataType, memory::format_tag::ldigo); // Add 5th input - w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); + w_bias_d = std::make_shared(Shape(VectorDims{L, D, Gb, SC}), memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); // Expected shapes - std::vector D_shape {N, DC}, S_shape {N, SC}, WShape {SC * G, DC}, RShape {SC * G, SC}, BShape {SC * Gb}; - std::vector in_candidate, out_candidate; + Shape D_shape(VectorDims{N, DC}), S_shape(VectorDims{N, SC}), WShape(VectorDims{SC * G, DC}), RShape(VectorDims{SC * G, SC}), BShape(VectorDims{SC * Gb}); + std::vector in_candidate, out_candidate; in_candidate.reserve(6); - in_candidate.emplace_back(D_shape, dataType, memory::format_tag::nc); - in_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc); - out_candidate.emplace_back(S_shape, dataType, memory::format_tag::nc); + in_candidate.emplace_back(std::make_shared(D_shape, dataType, memory::format_tag::nc)); + in_candidate.emplace_back(std::make_shared(S_shape, dataType, memory::format_tag::nc)); + out_candidate.emplace_back(std::make_shared(S_shape, dataType, memory::format_tag::nc)); if (haveCellState(cell_type)) { - in_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc); - out_candidate.emplace_back(S_shape, memory::data_type::f32, memory::format_tag::nc); + in_candidate.emplace_back(std::make_shared(S_shape, memory::data_type::f32, memory::format_tag::nc)); + out_candidate.emplace_back(std::make_shared(S_shape, memory::data_type::f32, memory::format_tag::nc)); } if (one_of(cell_type, mkldnn::algorithm::vanilla_rnn, mkldnn::algorithm::vanilla_gru, mkldnn::algorithm::lbr_gru, mkldnn::algorithm::vanilla_lstm)) { - in_candidate.emplace_back(WShape, memory::data_type::f32, memory::format_tag::nc); - in_candidate.emplace_back(RShape, memory::data_type::f32, memory::format_tag::nc); - in_candidate.emplace_back(BShape, memory::data_type::f32, memory::format_tag::x); + in_candidate.emplace_back(std::make_shared(WShape, memory::data_type::f32, memory::format_tag::nc)); + in_candidate.emplace_back(std::make_shared(RShape, memory::data_type::f32, memory::format_tag::nc)); + in_candidate.emplace_back(std::make_shared(BShape, memory::data_type::f32, memory::format_tag::x)); } - std::vector in_candidate_ptrs(in_candidate.size()); - std::vector out_candidate_ptrs(out_candidate.size()); - std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); - std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); - - createDescriptor(in_candidate_ptrs, out_candidate_ptrs); + createDescriptor(in_candidate, out_candidate); } void MKLDNNRNN::initSeq(const std::shared_ptr& op) { @@ -386,68 +387,84 @@ void MKLDNNRNN::fillSeqDesc() { runtimePrecision = getOriginalInputPrecisionAtPort(0); auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); - std::vector S_4D_shape {L, D, N, SC}; + Shape S_4D_shape(VectorDims{L, D, N, SC}); // Try to create descriptor and corresponding configuration - in_data_d.emplace_back(std::vector{in_data_dims}, dataType, memory::format_tag::tnc); - out_data_d.emplace_back(std::vector{out_data_dims}, dataType, memory::format_tag::tnc); + in_data_d.emplace_back(Shape(VectorDims{in_data_dims}), dataType, memory::format_tag::tnc); + out_data_d.emplace_back(Shape(VectorDims{out_data_dims}), dataType, memory::format_tag::tnc); - in_data_d.emplace_back(std::vector{S_4D_shape}, dataType, memory::format_tag::ldnc); - out_data_d.emplace_back(std::vector{S_4D_shape}, dataType, memory::format_tag::ldnc); + in_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); + out_data_d.emplace_back(S_4D_shape, dataType, memory::format_tag::ldnc); if (haveCellState(cell_type)) { - in_data_d.emplace_back(std::vector{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc); - out_data_d.emplace_back(std::vector{S_4D_shape}, memory::data_type::f32, memory::format_tag::ldnc); + in_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); + out_data_d.emplace_back(S_4D_shape, memory::data_type::f32, memory::format_tag::ldnc); } - w_data_d = MKLDNNPlugin::make_unique(std::vector{L, D, DC, G, SC}, dataType, memory::format_tag::ldigo); - w_state_d = MKLDNNPlugin::make_unique(std::vector{L, D, SC, G, SC}, dataType, memory::format_tag::ldigo); + w_data_d = std::make_shared(Shape(VectorDims{L, D, DC, G, SC}), dataType, memory::format_tag::ldigo); + w_state_d = std::make_shared(Shape(VectorDims{L, D, SC, G, SC}), dataType, memory::format_tag::ldigo); - w_bias_d = MKLDNNPlugin::make_unique(std::vector{L, D, Gb, SC}, memory::data_type::f32, memory::format_tag::ldgo); + w_bias_d = std::make_shared(Shape(VectorDims{L, D, Gb, SC}), memory::data_type::f32, memory::format_tag::ldgo); copyWeightsData(); - std::vector in_candidate; + std::vector in_candidate; in_candidate.reserve(7); if (nativeOrder) - in_candidate.emplace_back(inputShapes[RNNInOutKind::Layer].getStaticDims(), dataType, memory::format_tag::tnc); + in_candidate.emplace_back(std::make_shared(inputShapes[RNNInOutKind::Layer], dataType, memory::format_tag::tnc)); else if (N == 1) // WA to avoid reorder before sequence for some models - in_candidate.emplace_back(std::vector{N, T, DC}, dataType, memory::format_tag::tnc); + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, T, DC}), dataType, memory::format_tag::tnc)); + else + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, T, DC}), dataType, memory::format_tag::ntc)); + + // initial hidden state + // WA to avoid reorder before + if (D == 1) + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), dataType, memory::format_tag::tnc)); else - in_candidate.emplace_back(std::vector{N, T, DC}, dataType, memory::format_tag::ntc); + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), dataType, memory::format_tag::ntc)); + + // initial cell state + if (haveCellState(cell_type)) { + if (D == 1) + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), memory::data_type::f32, memory::format_tag::tnc)); + else + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), memory::data_type::f32, memory::format_tag::ntc)); + } - in_candidate.emplace_back(std::vector{N, D, SC}, dataType, memory::format_tag::ntc); // initial hidden state - if (haveCellState(cell_type)) - in_candidate.emplace_back(std::vector{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc); // initial cell state - in_candidate.emplace_back(std::vector{N}, memory::data_type::s32, memory::format_tag::x); // sequence lengths - in_candidate.emplace_back(std::vector{D, G * SC, DC}, memory::data_type::f32, memory::format_tag::ntc); // W - in_candidate.emplace_back(std::vector{D, G * SC, SC}, memory::data_type::f32, memory::format_tag::ntc); // R - in_candidate.emplace_back(std::vector{D, Gb * SC}, memory::data_type::f32, memory::format_tag::nc); // B + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{N}), memory::data_type::s32, memory::format_tag::x)); // sequence lengths + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{D, G * SC, DC}), memory::data_type::f32, memory::format_tag::ntc)); // W + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{D, G * SC, SC}), memory::data_type::f32, memory::format_tag::ntc)); // R + in_candidate.emplace_back(std::make_shared(Shape(VectorDims{D, Gb * SC}), memory::data_type::f32, memory::format_tag::nc)); // B - std::vector out_candidate; + std::vector out_candidate; out_candidate.reserve(3); if (nativeOrder) { - out_candidate.emplace_back(out_data_d[RNNInOutKind::Layer]); + out_candidate.emplace_back(std::make_shared(out_data_d[RNNInOutKind::Layer])); } else if (N == 1) { // WA to avoid reorder after sequence for some models - out_candidate.emplace_back(std::vector{N, T, SC}, dataType, memory::format_tag::tnc); + out_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, T, SC}), dataType, memory::format_tag::tnc)); } else { - out_candidate.emplace_back(std::vector{N, T, SC}, dataType, memory::format_tag::ntc); + out_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, T, SC}), dataType, memory::format_tag::ntc)); } - out_candidate.emplace_back(std::vector{N, D, SC}, dataType, memory::format_tag::ntc); - if (haveCellState(cell_type)) - out_candidate.emplace_back(std::vector{N, D, SC}, memory::data_type::f32, memory::format_tag::ntc); + // WA to avoid reorder after + if (D == 1) + out_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), dataType, memory::format_tag::tnc)); + else + out_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), dataType, memory::format_tag::ntc)); - std::vector in_candidate_ptrs(in_candidate.size()); - std::vector out_candidate_ptrs(out_candidate.size()); - std::transform(in_candidate.begin(), in_candidate.end(), in_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); - std::transform(out_candidate.begin(), out_candidate.end(), out_candidate_ptrs.begin(), [](const MKLDNNMemoryDesc& item) { return &item; }); + if (haveCellState(cell_type)) { + if (D == 1) + out_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), memory::data_type::f32, memory::format_tag::tnc)); + else + out_candidate.emplace_back(std::make_shared(Shape(VectorDims{N, D, SC}), memory::data_type::f32, memory::format_tag::ntc)); + } - createDescriptor(in_candidate_ptrs, out_candidate_ptrs); + createDescriptor(in_candidate, out_candidate); } bool MKLDNNRNN::verifyWeightsPrecision(const Precision &layerPrec, const Precision &weightsPrec) { @@ -470,8 +487,8 @@ void MKLDNNRNN::fillWeights(const int *gate_map, const size_t wIdx, const size_t w_state_mem->Create(*w_state_d); internalBlobMemory.push_back(w_state_mem); - const size_t ie_w_vec_size = getParentEdgesAtPort(wIdx)[0]->getShape().getElementsCount(); - const size_t ie_r_vec_size = getParentEdgesAtPort(rIdx)[0]->getShape().getElementsCount(); + const size_t ie_w_vec_size = getInputShapeAtPort(wIdx).getElementsCount(); + const size_t ie_r_vec_size = getInputShapeAtPort(rIdx).getElementsCount(); auto *wInputNode = dynamic_cast(getParentEdgesAtPort(wIdx)[0]->getParent().get()); auto wConstBlob = wInputNode->getMemoryPtr(); @@ -526,7 +543,7 @@ void MKLDNNRNN::fillBiases(const int *gate_map) { auto *constInputNode = dynamic_cast(getParentEdgesAtPort(bIdx)[0]->getParent().get()); auto constBlob = constInputNode->getMemoryPtr(); - auto const elementsCount = constBlob->GetElementsCount(); + auto const elementsCount = constBlob->GetSize() / constBlob->getDesc().getPrecision().size(); std::vector ie_b_vec(elementsCount); cpu_convert(constBlob->GetPtr(), @@ -607,57 +624,57 @@ void MKLDNNRNN::copyWeightsData() { if (runtimePrecision == Precision::BF16 || runtimePrecision == Precision::FP32) fillBiases(gate_map); } -void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { +void MKLDNNRNN::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { switch (cell_type) { case mkldnn::algorithm::vanilla_rnn: { MKLDNNDescriptor desc(std::shared_ptr( new vanilla_rnn_forward::desc(prop_kind::forward_scoring, cell_act, direction, - /* In Data */ in_data_d[RNNInOutKind::Layer], - /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ *w_data_d, - /* Weights state */ *w_state_d, - /* Bias */ *w_bias_d, - /* Out Data */ out_data_d[RNNInOutKind::Layer], - /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); + /* In Data */ in_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* In State */ in_data_d[RNNInOutKind::HiddenState].getDnnlDesc(), + /* Weights data */ w_data_d->getDnnlDesc(), + /* Weights state */ w_state_d->getDnnlDesc(), + /* Bias */ w_bias_d->getDnnlDesc(), + /* Out Data */ out_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* Out State */ out_data_d[RNNInOutKind::HiddenState].getDnnlDesc()))); descs.push_back(desc); } break; case mkldnn::algorithm::vanilla_gru: { MKLDNNDescriptor desc(std::shared_ptr( new gru_forward::desc(prop_kind::forward_scoring, direction, - /* In Data */ in_data_d[RNNInOutKind::Layer], - /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ *w_data_d, - /* Weights state */ *w_state_d, - /* Bias */ *w_bias_d, - /* Out Data */ out_data_d[RNNInOutKind::Layer], - /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); + /* In Data */ in_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* In State */ in_data_d[RNNInOutKind::HiddenState].getDnnlDesc(), + /* Weights data */ w_data_d->getDnnlDesc(), + /* Weights state */ w_state_d->getDnnlDesc(), + /* Bias */ w_bias_d->getDnnlDesc(), + /* Out Data */ out_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* Out State */ out_data_d[RNNInOutKind::HiddenState].getDnnlDesc()))); descs.push_back(desc); } break; case mkldnn::algorithm::lbr_gru: { MKLDNNDescriptor desc(std::shared_ptr( new lbr_gru_forward::desc(prop_kind::forward_scoring, direction, - /* In Data */ in_data_d[RNNInOutKind::Layer], - /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* Weights data */ *w_data_d, - /* Weights state */ *w_state_d, - /* Bias */ *w_bias_d, - /* Out Data */ out_data_d[RNNInOutKind::Layer], - /* Out State */ out_data_d[RNNInOutKind::HiddenState]))); + /* In Data */ in_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* In State */ in_data_d[RNNInOutKind::HiddenState].getDnnlDesc(), + /* Weights data */ w_data_d->getDnnlDesc(), + /* Weights state */ w_state_d->getDnnlDesc(), + /* Bias */ w_bias_d->getDnnlDesc(), + /* Out Data */ out_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* Out State */ out_data_d[RNNInOutKind::HiddenState].getDnnlDesc()))); descs.push_back(desc); } break; case mkldnn::algorithm::vanilla_lstm: { MKLDNNDescriptor desc(std::shared_ptr( new lstm_forward::desc(prop_kind::forward_scoring, direction, - /* In Data */ in_data_d[RNNInOutKind::Layer], - /* In State */ in_data_d[RNNInOutKind::HiddenState], - /* In State C */ in_data_d[RNNInOutKind::CellState], - /* Weights data */ *w_data_d, - /* Weights state */ *w_state_d, - /* Bias */ *w_bias_d, - /* Out Data */ out_data_d[RNNInOutKind::Layer], - /* Out State */ out_data_d[RNNInOutKind::HiddenState], - /* Out State C */ out_data_d[RNNInOutKind::CellState]))); + /* In Data */ in_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* In State */ in_data_d[RNNInOutKind::HiddenState].getDnnlDesc(), + /* In State C */ in_data_d[RNNInOutKind::CellState].getDnnlDesc(), + /* Weights data */ w_data_d->getDnnlDesc(), + /* Weights state */ w_state_d->getDnnlDesc(), + /* Bias */ w_bias_d->getDnnlDesc(), + /* Out Data */ out_data_d[RNNInOutKind::Layer].getDnnlDesc(), + /* Out State */ out_data_d[RNNInOutKind::HiddenState].getDnnlDesc(), + /* Out State C */ out_data_d[RNNInOutKind::CellState].getDnnlDesc()))); descs.push_back(desc); } break; default: @@ -671,7 +688,7 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = inputDesc[i]->clone(); + dataConfig.desc = inputDesc[i]; config.inConfs.push_back(dataConfig); } @@ -679,7 +696,7 @@ void MKLDNNRNN::createDescriptor(const std::vector &inputDesc PortConfig dataConfig; dataConfig.inPlace = -1; dataConfig.constant = false; - dataConfig.desc = outputDesc[i]->clone(); + dataConfig.desc = outputDesc[i]; config.outConfs.push_back(dataConfig); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h index 9e47637235f583..2dd4fe436ef529 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_rnn.h @@ -8,6 +8,7 @@ #include #include #include +#include "memory_desc/dnnl_blocked_memory_desc.h" namespace MKLDNNPlugin { @@ -19,8 +20,8 @@ class MKLDNNRNN : public MKLDNNNode { void getSupportedDescriptors() override; void createPrimitive() override; bool created() const override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void execute(mkldnn::stream strm) override; @@ -44,8 +45,6 @@ class MKLDNNRNN : public MKLDNNNode { void copyWeightsData(); private: - using MKLDNNMemoryDescPtr = std::unique_ptr; - InferenceEngine::Precision runtimePrecision; /** Specify mode Cell or Seq. true - Cell, false - Seq */ bool is_cell = false; @@ -73,8 +72,8 @@ class MKLDNNRNN : public MKLDNNNode { const size_t L = 1; /**< What is it??. Constant for mkldnn impl */ const size_t D = 1; /**< Num of direction. 1 or 2 */ - std::vector in_data_d; - std::vector out_data_d; + std::vector in_data_d; + std::vector out_data_d; enum RNNInOutKind { Layer = 0, @@ -82,9 +81,9 @@ class MKLDNNRNN : public MKLDNNNode { CellState = 2 }; - MKLDNNMemoryDescPtr w_data_d; - MKLDNNMemoryDescPtr w_state_d; - MKLDNNMemoryDescPtr w_bias_d; + DnnlBlockedMemoryDescPtr w_data_d; + DnnlBlockedMemoryDescPtr w_state_d; + DnnlBlockedMemoryDescPtr w_bias_d; std::vector in_data_dims; std::vector out_data_dims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp index 0517350e09c6c1..5e258a4d765c01 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.cpp @@ -23,8 +23,12 @@ using namespace mkldnn::impl::cpu::x64; using ngPoolingMode = ngraph::op::v3::ROIAlign::PoolingMode; -bool MKLDNNROIAlignNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNROIAlignNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto roiAlign = std::dynamic_pointer_cast(op); if (!roiAlign) { errorMessage = "Only opset3 ROIAlign operation is supported"; @@ -73,31 +77,31 @@ void MKLDNNROIAlignNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); - if (getParentEdgeAt(0)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); + if (getInputShapeAtPort(0).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getInputShapeAtPort(0).getRank(); } - if (getParentEdgeAt(1)->getShape().getRank() != 2) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); + if (getInputShapeAtPort(1).getRank() != 2) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getInputShapeAtPort(1).getRank(); } - if (getParentEdgeAt(2)->getShape().getRank() != 1) { - IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getParentEdgeAt(2)->getShape().getRank(); + if (getInputShapeAtPort(2).getRank() != 1) { + IE_THROW() << errorPrefix << "doesn't support 2nd input with rank: " << getInputShapeAtPort(2).getRank(); } - if (getChildEdgeAt(0)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); + if (getOutputShapeAtPort(0).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getOutputShapeAtPort(0).getRank(); } - if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 4) { + if (getInputShapeAtPort(1).getStaticDims()[1] != 4) { IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" - << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]"; + << getInputShapeAtPort(1).getStaticDims()[0] << "," << getInputShapeAtPort(1).getStaticDims()[1] << "]"; } - if (getParentEdgeAt(1)->getShape().getStaticDims()[0] != getParentEdgeAt(2)->getShape().getStaticDims()[0]) { + if (getInputShapeAtPort(1).getStaticDims()[0] != getInputShapeAtPort(2).getStaticDims()[0]) { IE_THROW() << errorPrefix << "has different sizes of inputs for proposals (" - << getParentEdgeAt(1)->getShape().getStaticDims()[0] << ") and indexes (" - << getParentEdgeAt(2)->getShape().getStaticDims()[0] << ")"; + << getInputShapeAtPort(1).getStaticDims()[0] << ") and indexes (" + << getInputShapeAtPort(2).getStaticDims()[0] << ")"; } } @@ -113,29 +117,24 @@ void MKLDNNROIAlignNode::initSupportedPrimitiveDescriptors() { outputPrec = inputPrec0 = Precision::FP32; } - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inputPrec0); - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outputPrec); - NodeConfig config; config.dynBatchSupport = false; config.inConfs.resize(3); config.outConfs.resize(1); - std::vector> supportedFormats { - {memory::format_tag::nchw, memory::format_tag::nchw}, - {memory::format_tag::nhwc, memory::format_tag::nhwc}, - {memory::format_tag::nChw16c, memory::format_tag::nChw16c}, - {memory::format_tag::nChw8c, memory::format_tag::nChw8c} + std::vector> supportedFormats { + {LayoutType::ncsp, LayoutType::ncsp}, + {LayoutType::nspc, LayoutType::nspc}, + {LayoutType::nCsp16c, LayoutType::nCsp16c}, + {LayoutType::nCsp8c, LayoutType::nCsp8c} }; for (auto fmts : supportedFormats) { - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, fmts.first); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), memory::data_type::f32, - memory::format_tag::nc); - config.inConfs[2].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(2)->getShape().getStaticDims(), memory::data_type::s32, - memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), outputDataType, fmts.second); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); + addSupportedPrimDesc({{fmts.first, inputPrec0}, + {LayoutType::ncsp, Precision::FP32}, + {LayoutType::ncsp, Precision::I32}}, + {{fmts.second, outputPrec}}, + impl_desc_type::unknown); } } @@ -155,8 +154,8 @@ struct MKLDNNROIAlignNode::ROIAlignExecute { } }; void MKLDNNROIAlignNode::execute(mkldnn::stream strm) { - auto inputPrec = getParentEdgeAt(0)->getMemory().GetDescriptor().data.data_type; - auto outputPrec = getChildEdgeAt(0)->getMemory().GetDescriptor().data.data_type; + auto inputPrec = getParentEdgeAt(0)->getMemory().GetDataType(); + auto outputPrec = getChildEdgeAt(0)->getMemory().GetDataType(); if (!((inputPrec == mkldnn_bf16 && outputPrec == mkldnn_bf16) || (inputPrec == mkldnn_f32 && outputPrec == mkldnn_f32))) IE_THROW() <<"ROIAlign doesn't support demanded precisions"; @@ -176,32 +175,37 @@ void MKLDNNROIAlignNode::executeSpecified() { auto &srcMemory1 = getParentEdgeAt(1)->getMemory(); auto &dstMemory = getChildEdgeAt(0)->getMemory(); - auto srcBlockDesc = srcMemory0.GetDescriptor().data.format_desc.blocking; - auto dstBlockDesc = dstMemory.GetDescriptor().data.format_desc.blocking; + auto srcBlockDesc = srcMemory0.GetDescWithType(); + auto dstBlockDesc = dstMemory.GetDescWithType(); + + auto isPlainFmt = srcBlockDesc->hasLayoutType(LayoutType::ncsp); + auto isNhwcFmt = srcBlockDesc->hasLayoutType(LayoutType::nspc); + auto isBlkFmt = srcBlockDesc->hasLayoutType(LayoutType::nCsp16c) || srcBlockDesc->hasLayoutType(LayoutType::nCsp8c); - int blockSize = srcBlockDesc.inner_nblks > 0 ? srcBlockDesc.inner_blks[0] : 1; - auto isPlainFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::ncsp); - auto isNhwcFmt = srcMemory0.GetDesc().hasLayoutType(LayoutType::nspc); + int blockSize = isBlkFmt ? srcBlockDesc->getBlockDims().back() : 1; const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); const auto *srcRoi = reinterpret_cast(getParentEdgeAt(1)->getMemoryPtr()->GetPtr()); const auto *srcRoiIdx = reinterpret_cast(getParentEdgeAt(2)->getMemoryPtr()->GetPtr()); auto *dst = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - auto nominalRoiCount = static_cast(srcMemory1.GetDims()[0]); + auto nominalRoiCount = static_cast(srcMemory1.getStaticDims()[0]); int realRois = 0; - auto inputDimVector = srcMemory0.GetDims(); + auto inputDimVector = srcMemory0.getStaticDims(); const int C = static_cast(inputDimVector[1]); const int H = static_cast(inputDimVector[2]); const int W = static_cast(inputDimVector[3]); const int binCount = pooledH * pooledW; - const int hInputStride = srcBlockDesc.strides[2]; - const int wInputStride = srcBlockDesc.strides[3]; - const int hOutputStride = dstBlockDesc.strides[2]; - const int wOutputStride = dstBlockDesc.strides[3]; - const int chPadding = srcMemory0.GetDescriptor().data.padded_dims[1]; + const size_t tailDimsOffset = (isNhwcFmt ? -1 : 0); + const auto &srcStrides = srcBlockDesc->getStrides(); + const auto &dstStrides = dstBlockDesc->getStrides(); + const int hInputStride = srcStrides[2 + tailDimsOffset]; + const int wInputStride = srcStrides[3 + tailDimsOffset]; + const int hOutputStride = dstStrides[2 + tailDimsOffset]; + const int wOutputStride = dstStrides[3 + tailDimsOffset]; + const int chPadding = blockSize * srcBlockDesc->getBlockDims()[1]; const int blockCount = chPadding / blockSize; for (; realRois < nominalRoiCount; realRois++) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h index 9f3d31c478359d..4e6b835c95f8b7 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_align_node.h @@ -23,7 +23,7 @@ class MKLDNNROIAlignNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: int pooledH = 7; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp index 23fd252ae2ba38..5f5b2c8641c268 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.cpp @@ -306,8 +306,12 @@ struct jit_uni_roi_pooling_kernel_f32 : public jit_uni_roi_pooling_kernel, publi } }; -bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNROIPoolingNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto roiPooling = std::dynamic_pointer_cast(op); if (!roiPooling) { errorMessage = "Only opset2 ROIPooling operation is supported"; @@ -354,21 +358,21 @@ void MKLDNNROIPoolingNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << "has incorrect number of output edges: " << getChildEdges().size(); - if (getParentEdgeAt(0)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getParentEdgeAt(0)->getShape().getRank(); + if (getInputShapeAtPort(0).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support 0th input with rank: " << getInputShapeAtPort(0).getRank(); } - if (getParentEdgeAt(1)->getShape().getRank() != 2) { - IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getParentEdgeAt(1)->getShape().getRank(); + if (getInputShapeAtPort(1).getRank() != 2) { + IE_THROW() << errorPrefix << "doesn't support 1st input with rank: " << getInputShapeAtPort(1).getRank(); } - if (getChildEdgeAt(0)->getShape().getRank() != 4) { - IE_THROW() << errorPrefix << "doesn't support output with rank: " << getChildEdgeAt(0)->getShape().getRank(); + if (getOutputShapeAtPort(0).getRank() != 4) { + IE_THROW() << errorPrefix << "doesn't support output with rank: " << getOutputShapeAtPort(0).getRank(); } - if (getParentEdgeAt(1)->getShape().getStaticDims()[1] != 5) { + if (getInputShapeAtPort(1).getStaticDims()[1] != 5) { IE_THROW() << errorPrefix << "has invalid shape on 1st input: [" - << getParentEdgeAt(1)->getShape().getStaticDims()[0] << "," << getParentEdgeAt(1)->getShape().getStaticDims()[1] << "]"; + << getInputShapeAtPort(1).getStaticDims()[0] << "," << getInputShapeAtPort(1).getStaticDims()[1] << "]"; } } @@ -383,25 +387,10 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { runtimePrecision = Precision::FP32; } - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(runtimePrecision); - - src_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); - dst_data_size = MKLDNNExtensionUtils::sizeOfDataType(dataType); - - NodeConfig config; - config.dynBatchSupport = false; - config.inConfs.resize(2); - config.inConfs[0].constant = false; - config.inConfs[0].inPlace = -1; - config.inConfs[1].constant = false; - config.inConfs[1].inPlace = -1; - - config.outConfs.resize(1); - config.outConfs[0].constant = false; - config.outConfs[0].inPlace = -1; + src_data_size = dst_data_size = runtimePrecision.size(); - auto parentDims = getParentEdgeAt(0)->getShape().getStaticDims(); - auto format = mayiuse(avx512_common) ? memory::format_tag::nChw16c : memory::format_tag::nChw8c; + auto parentDims = getInputShapeAtPort(0).getStaticDims(); + auto format = mayiuse(avx512_common) ? LayoutType::nCsp16c : LayoutType::nCsp8c; impl_desc_type impl_type; if (mayiuse(cpu::x64::avx512_common)) { impl_type = impl_desc_type::jit_avx512; @@ -413,10 +402,10 @@ void MKLDNNROIPoolingNode::initSupportedPrimitiveDescriptors() { impl_type = impl_desc_type::ref; } - config.inConfs[0].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), dataType, format); - config.inConfs[1].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(1)->getShape().getStaticDims(), dataType, memory::format_tag::nc); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, format); - supportedPrimitiveDescriptors.push_back({config, impl_type}); + addSupportedPrimDesc({{format, runtimePrecision}, + {LayoutType::ncsp, runtimePrecision}}, + {{format, runtimePrecision}}, + impl_type); } void MKLDNNROIPoolingNode::createPrimitive() { @@ -428,8 +417,8 @@ void MKLDNNROIPoolingNode::createPrimitive() { const int simd_w = mayiuse(cpu::x64::avx512_common) ? 16 : 8; jpp.c_block = simd_w; - auto inDims = config.inConfs[0].desc->getShape().getStaticDims(); - auto outDims = config.outConfs[0].desc->getShape().getStaticDims(); + auto inDims = getParentEdgeAt(0)->getMemory().getStaticDims(); + auto outDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); jpp.mb = outDims[0]; jpp.c = rnd_up(inDims[1], simd_w); @@ -481,9 +470,9 @@ void MKLDNNROIPoolingNode::execute() { IE_THROW() << "CPU ROI Pooling node with name '" << getName() << "' doesn't have primitive descriptors."; auto config = selectedPrimitiveDescriptor->getConfig(); - auto src_strides = srcMemory0.GetDescWithType().getStrides(); - auto dst_strides = dstMemory.GetDescWithType().getStrides(); - size_t src_roi_step = srcMemory1.GetDescWithType().getStrides()[0]; + auto src_strides = srcMemory0.GetDescWithType()->getStrides(); + auto dst_strides = dstMemory.GetDescWithType()->getStrides(); + size_t src_roi_step = srcMemory1.GetDescWithType()->getStrides()[0]; int cb_work = impl::utils::div_up(jpp.nb_c, jpp.nb_c_blocking); int MB = jpp.mb; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h index 512616c60e486c..78f4dc146ffb66 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roi_pooling_node.h @@ -75,7 +75,7 @@ class MKLDNNROIPoolingNode : public MKLDNNNode { bool created() const override; private: - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; template void execute(); template struct ROIPoolingExecute; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp index 410051c7be4b78..8150ae9b378a1f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.cpp @@ -19,8 +19,12 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNRollNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNRollNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto interp = std::dynamic_pointer_cast(op); if (!interp) { errorMessage = "Only opset7 Roll operation is supported"; @@ -90,33 +94,18 @@ void MKLDNNRollNode::initSupportedPrimitiveDescriptors() { InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - - auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); - - NodeConfig config; - config.dynBatchSupport = false; - - auto createDataConfig = [](const Shape& dims, memory::data_type dataType) -> PortConfig { - PortConfig dataConfig; - dataConfig.inPlace = -1; - dataConfig.constant = false; - dataConfig.desc = MKLDNNPlugin::make_unique(dims.getStaticDims(), dataType, MKLDNNMemory::GetPlainFormatByRank(dims.getRank())); - return dataConfig; - }; - - config.inConfs.push_back(createDataConfig(getParentEdgeAt(0)->getShape(), dataType)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(1)->getShape(), memory::data_type::s32)); - config.inConfs.push_back(createDataConfig(getParentEdgeAt(2)->getShape(), memory::data_type::s32)); - - config.outConfs.push_back(createDataConfig(getChildEdgeAt(0)->getShape(), dataType)); + auto srcDims = getInputShapeAtPort(0).getStaticDims(); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::ref}); + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp, InferenceEngine::Precision::I32}, + {LayoutType::ncsp, InferenceEngine::Precision::I32}}, + {{LayoutType::ncsp, precision}}, + impl_desc_type::ref); } void MKLDNNRollNode::execute(mkldnn::stream strm) { - const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().GetDesc().getPrecision(); + const auto dataPrecision = getParentEdgeAt(DATA_INDEX)->getMemory().getDesc().getPrecision(); const auto& dataTypeSize = dataPrecision.size(); switch (dataTypeSize) { case sizeof(PrecisionTrait::value_type): { @@ -155,7 +144,7 @@ void MKLDNNRollNode::rollImpl() { auto *output = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); std::vector shiftsVector(numOfDims, 0); - const size_t axesLength = axesEdge->getShape().getStaticDims()[0]; + const size_t axesLength = axesEdge->getMemory().getStaticDims()[0]; for (size_t dim = 0; dim < axesLength ; ++dim) { int32_t currentAxis = axes[dim] < 0 ? axes[dim] + numOfDims : axes[dim]; int32_t shiftSum = shiftsVector[currentAxis] + shifts[dim]; @@ -170,7 +159,7 @@ void MKLDNNRollNode::rollImpl() { const size_t elementSize = sizeof(DataType); const size_t nIterations = totalElements / blockSize; - const auto strides = dataEdge->getMemory().GetDescWithType().getStrides(); + const auto strides = dataEdge->getMemory().GetDescWithType()->getStrides(); parallel_for(nIterations, [&](size_t iter) { size_t start = iter * blockSize; size_t leftBlockStartOffset = start; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h index da597d4d9819ec..bb04dcd86fcd2c 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_roll_node.h @@ -20,7 +20,7 @@ class MKLDNNRollNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: size_t calculateShiftOffset(size_t dataOffset, size_t dimShift, size_t segmentSize, size_t dimSize); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp index af7b36dd7f361b..1bce5ea3ea9425 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.cpp @@ -19,8 +19,12 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNScatterUpdateNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNScatterUpdateNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto scatterElemUpd = std::dynamic_pointer_cast(op); const auto scatterUpd = std::dynamic_pointer_cast(op); const auto scatterNdUpd = std::dynamic_pointer_cast(op); @@ -52,9 +56,9 @@ void MKLDNNScatterUpdateNode::getSupportedDescriptors() { if (getChildEdges().empty()) IE_THROW() << errorPrefix << " has incorrect number of output edges"; - if (getParentEdgeAt(DATA_ID)->getShape().getRank() < 1 || - getParentEdgeAt(INDICES_ID)->getShape().getRank() < 1 || - getParentEdgeAt(UPDATE_ID)->getShape().getRank() < 1) { + if (getInputShapeAtPort(DATA_ID).getRank() < 1 || + getInputShapeAtPort(INDICES_ID).getRank() < 1 || + getInputShapeAtPort(UPDATE_ID).getRank() < 1) { IE_THROW() << errorPrefix << " do not support scalar input"; } @@ -77,10 +81,10 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - auto indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); - auto updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); - auto dstDataDim = getChildEdgeAt(0)->getShape().getStaticDims(); + auto srcDataDim = getInputShapeAtPort(DATA_ID).getStaticDims(); + auto indicesDim = getInputShapeAtPort(INDICES_ID).getStaticDims(); + auto updateDim = getInputShapeAtPort(UPDATE_ID).getStaticDims(); + auto dstDataDim = getOutputShapeAtPort(0).getStaticDims(); size_t srcRank = srcDataDim.size(); size_t indicesRank = indicesDim.size(); @@ -157,7 +161,6 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { indicesPrec = Precision::I32; indicesSize = 4; } - indicesType = MKLDNNExtensionUtils::IEPrecisionToDataType(indicesPrec); if (axisRelaxed) { axisPrec = getOriginalInputPrecisionAtPort(AXIS_ID); @@ -173,8 +176,7 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { } dataPrec = getOriginalInputPrecisionAtPort(DATA_ID); - auto dataType = MKLDNNExtensionUtils::IEPrecisionToDataType(dataPrec); - dataSize = MKLDNNExtensionUtils::sizeOfDataType(dataType); + dataSize = dataPrec.size(); bool canBeInplace = getParentEdgeAt(DATA_ID)->getParent()->getChildEdges().size() == 1 && !getParentEdgeAt(DATA_ID)->getParent()->isConstant(); @@ -200,23 +202,12 @@ void MKLDNNScatterUpdateNode::initSupportedPrimitiveDescriptors() { config.inConfs[AXIS_ID].inPlace = -1; } - auto pushDesc = [&](memory::format_tag inFormat, memory::format_tag idxFormat, memory::format_tag updateFormat, memory::format_tag outFormat) { - config.inConfs[DATA_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(DATA_ID)->getShape().getStaticDims(), dataType, inFormat); - config.inConfs[INDICES_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(), indicesType, - idxFormat); - config.inConfs[UPDATE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(), dataType, - updateFormat); - if (axisRelaxed) - config.inConfs[AXIS_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(AXIS_ID)->getShape().getStaticDims(), - MKLDNNExtensionUtils::IEPrecisionToDataType(axisPrec), memory::format_tag::x); - config.outConfs[0].desc = MKLDNNPlugin::make_unique(getChildEdgeAt(0)->getShape().getStaticDims(), dataType, outFormat); - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); - }; - - pushDesc(MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(DATA_ID)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(INDICES_ID)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getParentEdgeAt(UPDATE_ID)->getShape().getRank()), - MKLDNNMemory::GetPlainFormatByRank(getChildEdgeAt(0)->getShape().getRank())); + std::vector inPortConfig{{LayoutType::ncsp, dataPrec}, {LayoutType::ncsp, indicesPrec}, {LayoutType::ncsp, dataPrec}}; + if (axisRelaxed) + inPortConfig.emplace_back(LayoutType::ncsp, axisPrec); + addSupportedPrimDesc(inPortConfig, + {{LayoutType::ncsp, dataPrec}}, + impl_desc_type::unknown); } void MKLDNNScatterUpdateNode::createPrimitive() { @@ -274,14 +265,13 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { uint8_t *indicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); uint8_t *updatePtr = reinterpret_cast(updateMemPtr->GetPtr()); - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); size_t srcRank = srcDataDim.size(); int axis = 0; if (axisRelaxed) { auto &axisMemPtr = getParentEdgeAt(AXIS_ID)->getMemoryPtr(); - uint8_t *axisPtr = reinterpret_cast(axisMemPtr->GetData()) + - axisMemPtr->GetDescriptor().data.offset0 * axisSize; + uint8_t *axisPtr = reinterpret_cast(axisMemPtr->GetPtr()); if (axisSize == 4) { auto *axisPtr32 = reinterpret_cast(axisPtr); axis = *axisPtr32; @@ -311,8 +301,8 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { }); if (scatterUpdateMode == ScatterUpdateMode::ScatterUpdate) { - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getMemory().getStaticDims(); size_t indicesRank = indicesDim.size(); size_t updateRank = updateDim.size(); SizeVector expectUpdateShape = {}; @@ -372,9 +362,9 @@ void MKLDNNScatterUpdateNode::execute(mkldnn::stream strm) { // and indices tensor of shape [i_0, i_1, ..., i_k]. // Updates tensor shape should be [d_0, d_1, ... d_(axis - 1), i_0, i_1, ..., i_k, d_(axis + 1), ..., d_n]. void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getMemory().getStaticDims(); size_t indicesRank = indicesDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); @@ -405,8 +395,8 @@ void MKLDNNScatterUpdateNode::scatterUpdate(uint8_t *indices, uint8_t *update, i // k is indices.shape[-1] and should not be greater than rank of input, q is rank of indicies. // updates is a (q-1)-dimension tensor of replacement-slice-values void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); size_t indicesRank = indicesDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); @@ -435,9 +425,9 @@ void MKLDNNScatterUpdateNode::scatterNDUpdate(uint8_t *indices, uint8_t *update, // output[i][indices[i][j][k]][k] = updates[i][j][k] if axis = 1, // output[i][j][indices[i][j][k]] = updates[i][j][k] if axis = 2. void MKLDNNScatterUpdateNode::scatterElementsUpdate(uint8_t *indices, uint8_t *update, int axis, uint8_t *dstData) { - SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getShape().getStaticDims(); - SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getShape().getStaticDims(); + SizeVector srcDataDim = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); + SizeVector updateDim = getParentEdgeAt(UPDATE_ID)->getMemory().getStaticDims(); + SizeVector indicesDim = getParentEdgeAt(INDICES_ID)->getMemory().getStaticDims(); size_t updateRank = updateDim.size(); std::vector srcBlockND = getBlockND(srcDataDim); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h index 41519bc6346c5a..7fc9d96818fa03 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_scatter_update_node.h @@ -31,7 +31,7 @@ class MKLDNNScatterUpdateNode : public MKLDNNNode { return false; } - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: void scatterUpdate(uint8_t *indicesPtr, uint8_t *updatePtr, int axis, uint8_t *dstDataPtr); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp index 093ee7e82557b4..dc6001ad74aa73 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.cpp @@ -15,8 +15,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNSelectNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNSelectNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto select = std::dynamic_pointer_cast(op); if (!select) { errorMessage = "Only opset1 Select operation is supported"; @@ -180,8 +184,8 @@ void MKLDNNSelectNode::execute_impl() { } void MKLDNNSelectNode::execute(mkldnn::stream strm) { - const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().size(); - const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().size(); + const size_t condPrecSize = getParentEdgeAt(CONDITION)->getMemory().getDesc().getPrecision().size(); + const size_t inputsPrecSize = getParentEdgeAt(THEN)->getMemory().getDesc().getPrecision().size(); switch (condPrecSize) { case 1: { @@ -192,7 +196,7 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().getDesc().getPrecision().name()); } break; } @@ -204,13 +208,13 @@ void MKLDNNSelectNode::execute(mkldnn::stream strm) { case 8: { execute_impl(); break; } default: IE_THROW() << "Select layer doesn't support 'Then' and 'Else' inputs' precision: " - + std::string(getParentEdgeAt(THEN)->getMemory().GetDesc().getPrecision().name()); + + std::string(getParentEdgeAt(THEN)->getMemory().getDesc().getPrecision().name()); } break; } default: { IE_THROW() << "Select layer doesn't support 'Condition' inputs' precision: " - + std::string(getParentEdgeAt(CONDITION)->getMemory().GetDesc().getPrecision().name()); + + std::string(getParentEdgeAt(CONDITION)->getMemory().getDesc().getPrecision().name()); } } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h index 1fc6adebb60e27..f6e84a34de9410 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_select_node.h @@ -22,7 +22,7 @@ class MKLDNNSelectNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: enum { CONDITION, THEN, ELSE, numOfInputs }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp index f83ddfed0d0a67..370dc9296bc329 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.cpp @@ -23,8 +23,12 @@ using namespace InferenceEngine; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; -bool MKLDNNShuffleChannelsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNShuffleChannelsNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto shuffleChannels = std::dynamic_pointer_cast(op); if (!shuffleChannels) { errorMessage = "Only opset1 ShuffleChannels operation is supported"; @@ -127,8 +131,8 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_SHCH_ERROR << "has unidentified preferable primitive descriptor"; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); int batchRank = axis_; int spatialRank = dataRank_ - axis_ - 1; @@ -160,9 +164,9 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { const int channelDim = 1; if (isBlocked) { const auto blkDesc = getParentEdgeAt(0)->getMemory().GetDescWithType(); - size_t blkSize = blkDesc.getBlockDims().back(); + size_t blkSize = blkDesc->getBlockDims().back(); size_t CB = div_up(inShape_[1], blkSize); - SizeVector srcBlockedDims = blkDesc.getBlockDims(); + SizeVector srcBlockedDims = blkDesc->getBlockDims(); if (axis_ > channelDim) { // axis on spatial for (int i = 0; i < batchRank; i++) { params.order[i] = i; @@ -181,7 +185,7 @@ void MKLDNNShuffleChannelsNode::createPrimitive() { params.order[2] = 2; params.src_block_dims[2] = spatialShapeSize; } - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { if (axis_ == channelDim) { // axis on channel params.order[0] = 0; params.src_block_dims[0] = inShape_[0]; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.h index 7206b1ae3637bc..3b5c52c6b3fd5b 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_shuffle_channels_node.h @@ -24,7 +24,7 @@ class MKLDNNShuffleChannelsNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: ngraph::Shape inShape_; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp index 9fe05e475fc1dc..21c1467507907a 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.cpp @@ -7,21 +7,37 @@ #include #include #include -#include +#include +#include +#include "memory_desc/dnnl_blocked_memory_desc.h" using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; +bool MKLDNNSoftMaxNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!std::dynamic_pointer_cast(op)) { + errorMessage = "Only opset1 Softmax operation is supported"; + return false; + } + } catch (...) { + return false; + } + return true; +} + MKLDNNSoftMaxNode::MKLDNNSoftMaxNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { - const auto softmaxOp = ngraph::as_type_ptr(op); - if (softmaxOp) { - axis = softmaxOp->get_axis(); - } else { - IE_THROW(NotImplemented) - << "CPU Softmax node doesn't support ngraph operation " << op->get_type_name() << " with name " << op->get_friendly_name(); + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + IE_THROW(NotImplemented) << errorMessage; } + axis = ngraph::as_type_ptr(op)->get_axis(); } void MKLDNNSoftMaxNode::getSupportedDescriptors() { @@ -38,20 +54,19 @@ void MKLDNNSoftMaxNode::getSupportedDescriptors() { if (!getChildEdges().size()) IE_THROW() << "Incorrect number of output edges for layer " << getName(); - if (getParentEdgeAt(0)->getShape().getRank() == 3) { - MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(getParentEdgeAt(0)->getShape().getStaticDims(), inputDataType, - memory::format_tag::abc); - createDescriptor({in_candidate.get()}, {}); + const auto &inShape = getInputShapeAtPort(0); + if (inShape.getRank() == 3) { + auto in_candidate = std::make_shared(inShape, inputDataType, memory::format_tag::abc); + createDescriptor({in_candidate}, {}); } - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getShape())) { - const auto dims = getParentEdgeAt(0)->getShape().getStaticDims(); - if (MKLDNNMemoryDesc(dims, inputDataType, format).blocksExtended()) - continue; + for (auto format : getAvailableFormatsForDims(inShape)) { + auto in_candidate = std::make_shared(inShape, inputDataType, format); - MemoryDescPtr in_candidate = MKLDNNPlugin::make_unique(dims, inputDataType, format); + if (in_candidate->blocksExtended()) + continue; - createDescriptor({in_candidate.get()}, {}); + createDescriptor({in_candidate}, {}); } } @@ -59,7 +74,7 @@ void MKLDNNSoftMaxNode::createPrimitive() { if (prim) return; - memory::desc in_candidate = getParentEdgeAt(0)->getMemory().GetDescriptor(); + auto in_candidate = getParentEdgeAt(0)->getMemory().GetDescWithType()->getDnnlDesc(); MKLDNNDescriptor desc(std::shared_ptr( new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis))); descs[0] = desc; @@ -94,34 +109,34 @@ bool MKLDNNSoftMaxNode::created() const { return getType() == Softmax; } - void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { - auto selected_pd = getSelectedPrimitiveDescriptor(); - if (selected_pd == nullptr) - IE_THROW() << "Preferable primitive descriptor is not set."; - auto config = selected_pd->getConfig(); - if (isConfigDefined(config)) - return; - - if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || - (config.inConfs[0].desc->isDefined() && - config.outConfs[0].desc->isDefined() && !config.inConfs[0].desc->isCompatible(*config.outConfs[0].desc))) - IE_THROW() << "Layer " << getName() << " has incorrect selected config!"; - - if (config.inConfs[0].desc->isDefined()) { - config.outConfs[0].desc = config.inConfs[0].desc->clone(); - } else if (config.outConfs[0].desc->isDefined()) { - config.inConfs[0].desc = config.outConfs[0].desc->clone(); - } else { - config.inConfs[0].desc = getDefinedInputDesc(config, 0); - config.outConfs[0].desc = config.inConfs[0].desc->clone(); - } - - initDescriptor(config); - } - -void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, - const std::vector &outputDesc) { - MKLDNNMemoryDesc in_candidate = MemoryDescUtils::convertToMKLDNNMemoryDesc(*inputDesc[0]); +void MKLDNNSoftMaxNode::initOptimalPrimitiveDescriptor() { + auto selected_pd = getSelectedPrimitiveDescriptor(); + if (selected_pd == nullptr) + IE_THROW() << "Preferable primitive descriptor is not set."; + auto config = selected_pd->getConfig(); + if (isConfigDefined(config)) + return; + + if (config.inConfs.size() != 1 || config.outConfs.size() != 1 || + (config.inConfs[0].desc->isDefined() && + config.outConfs[0].desc->isDefined() && !config.inConfs[0].desc->isCompatible(*config.outConfs[0].desc))) + IE_THROW() << "Layer " << getName() << " has incorrect selected config!"; + + if (config.inConfs[0].desc->isDefined()) { + config.outConfs[0].desc = config.inConfs[0].desc; + } else if (config.outConfs[0].desc->isDefined()) { + config.inConfs[0].desc = config.outConfs[0].desc; + } else { + config.inConfs[0].desc = getDefinedInputDesc(config, 0); + config.outConfs[0].desc = config.inConfs[0].desc; + } + + initDescriptor(config); +} + +void MKLDNNSoftMaxNode::createDescriptor(const std::vector &inputDesc, + const std::vector &outputDesc) { + auto in_candidate = MemoryDescUtils::convertToDnnlMemoryDesc(inputDesc[0])->getDnnlDesc(); MKLDNNDescriptor desc(std::shared_ptr( new softmax_forward::desc(prop_kind::forward_scoring, in_candidate, axis))); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h index fd200cdb1457fa..81944c829f12c2 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_softmax_node.h @@ -17,12 +17,14 @@ class MKLDNNSoftMaxNode : public MKLDNNNode { MKLDNNSoftMaxNode(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache); void initOptimalPrimitiveDescriptor() override; - void createDescriptor(const std::vector& inputDesc, - const std::vector& outputDesc) override; + void createDescriptor(const std::vector& inputDesc, + const std::vector& outputDesc) override; void getSupportedDescriptors() override; void createPrimitive() override; bool created() const override; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + private: size_t axis = 0; }; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp index 1861799f97c32b..486c1bc4a411c5 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.cpp @@ -16,8 +16,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNSpaceToBatchNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNSpaceToBatchNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto spaceToBatch = std::dynamic_pointer_cast(op); if (!spaceToBatch) { errorMessage = "Only opset2 SpaceToBatch operation is supported"; @@ -112,15 +116,15 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { const auto *srcData = reinterpret_cast(getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); auto *dstData = reinterpret_cast(getChildEdgeAt(0)->getMemoryPtr()->GetPtr()); - const bool blocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c); + const bool blocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c); const auto dimsSize = inDims.size(); auto inShape5D = getShape5D(outDims); auto outShape5D = getShape5D(inDims); auto blockShape = getShape5D(blockShapeIn); - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { inShape5D.push_back(inShape5D[1]); inShape5D.erase(inShape5D.begin() + 1); outShape5D.push_back(outShape5D[1]); @@ -129,10 +133,10 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { blockShape.erase(blockShape.begin() + 1); } - const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + const auto outBlkDims = getChildEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); const size_t blockSize = blocked ? outBlkDims.back() : 1lu; const size_t blockCountInput = outBlkDims[1]; - const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims()[1]; + const size_t blockCountOutput = getParentEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims()[1]; const auto blockRemainder = inShape5D[1] % blockSize; const auto lastBlock = blockRemainder == 0 ? blockSize : blockRemainder; @@ -173,7 +177,7 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { oAdd[2] = dimsSize == 5 ? bIdx % blockShapeIn[2] - padsBeginIn[2] : 0lu; bIdx = dimsSize == 5 ? bIdx / blockShapeIn[2] : bIdx; oAdd[1] = bIdx % blockShapeIn[1] - padsBeginIn[1]; - if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { oAdd.push_back(oAdd[1]); oAdd.erase(oAdd.begin() + 1); } @@ -227,12 +231,12 @@ void MKLDNNSpaceToBatchNode::SpaceToBatchKernel() { } void MKLDNNSpaceToBatchNode::execute(mkldnn::stream strm) { - switch (getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().size()) { + switch (getParentEdgeAt(0)->getMemory().getDesc().getPrecision().size()) { case 1: SpaceToBatchKernel::value_type>(); break; case 2: SpaceToBatchKernel::value_type>(); break; case 4: SpaceToBatchKernel::value_type>(); break; default: - IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getMemory().GetDesc().getPrecision().name()) + IE_THROW() << "SpaceToBatch layer does not support precision '" + std::string(getParentEdgeAt(0)->getMemory().getDesc().getPrecision().name()) + "'"; } } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h index f5c9fd1ec9d05b..128e1b71226339 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_batch_node.h @@ -22,7 +22,7 @@ class MKLDNNSpaceToBatchNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: InferenceEngine::SizeVector inDims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp index 25003088139af9..7097a88df5d324 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.cpp @@ -21,8 +21,12 @@ using namespace mkldnn; using namespace mkldnn::impl; using namespace mkldnn::impl::cpu::x64; -bool MKLDNNSpaceToDepthNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNSpaceToDepthNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto spaceToDepth = std::dynamic_pointer_cast(op); if (!spaceToDepth) { errorMessage = "Only opset1 SpaceToDepth operation is supported"; @@ -98,7 +102,7 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { return; InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - auto srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); + auto srcDims = getInputShapeAtPort(0).getStaticDims(); const size_t nDims = srcDims.size(); impl_desc_type impl_type; @@ -138,8 +142,8 @@ void MKLDNNSpaceToDepthNode::initSupportedPrimitiveDescriptors() { auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createUniqueDesc(precision, getParentEdgeAt(0)->getShape().getStaticDims()); - config.outConfs[0].desc = itr->second->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); + config.inConfs[0].desc = itr->second->createSharedDesc(precision, getInputShapeAtPort(0)); + config.outConfs[0].desc = itr->second->createSharedDesc(precision, getOutputShapeAtPort(0)); supportedPrimitiveDescriptors.emplace_back(config, impl_type); } } @@ -154,13 +158,13 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "has unidentified preferable primitive descriptor"; - SizeVector srcDims = getParentEdgeAt(0)->getShape().getStaticDims(); - SizeVector dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + VectorDims srcDims = srcMemPtr->getStaticDims(); + VectorDims dstDims = dstMemPtr->getStaticDims(); size_t nDims = srcDims.size(); const size_t nSpatialDims = nDims - 2; - const bool isBlocked = getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); + const bool isBlocked = getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); const size_t reshapedRank = nDims + nSpatialDims + static_cast(isBlocked) + static_cast(isBlocked && mode == Mode::DEPTH_FIRST); const size_t lastIdx = reshapedRank - 1; size_t firstSpatialOrder = 2; @@ -191,8 +195,8 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { }; if (isBlocked) { - SizeVector srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); - SizeVector dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType().getBlockDims(); + VectorDims srcBlockedDims = getParentEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); + VectorDims dstBlockedDims = getChildEdgeAt(0)->getMemory().GetDescWithType()->getBlockDims(); size_t orderShiftForBlocks, orderShiftForDims; if (mode == Mode::BLOCKS_FIRST) { @@ -219,7 +223,7 @@ void MKLDNNSpaceToDepthNode::createPrimitive() { } reshapeAndSetPermOrder(orderShiftForBlocks, orderShiftForDims, firstSpatialOrder, dstBlockedDims); - } else if (getParentEdgeAt(0)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc)) { + } else if (getParentEdgeAt(0)->getMemory().getDesc().hasLayoutType(LayoutType::nspc)) { srcDims.push_back(srcDims[1]); dstDims.push_back(dstDims[1]); srcDims.erase(srcDims.begin() + 1); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h index b7639b90a18029..11a39670654140 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_space_to_depth_node.h @@ -21,7 +21,7 @@ class MKLDNNSpaceToDepthNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: enum Mode { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp index a95bd0c4f758e3..b815b02111abf8 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_split_node.cpp @@ -10,7 +10,8 @@ #include #include #include "utils/general_utils.h" -#include +#include +#include "utils/ngraph_utils.hpp" #define THROW_ERROR IE_THROW() << "Split layer with name '" << getName() <<"' " @@ -20,6 +21,11 @@ using namespace InferenceEngine; bool MKLDNNSplitNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!MKLDNNPlugin::one_of(op->get_type_info(), ngraph::op::v1::Split::type_info, ngraph::op::v1::VariadicSplit::type_info)) { errorMessage = "Only opset1 Split and VariadicSplit operations are supported"; return false; @@ -75,9 +81,9 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { if (!supportedPrimitiveDescriptors.empty()) return; - auto srcShape = getParentEdgeAt(0)->getShape(); + auto srcShape = getInputShapeAtPort(0); auto axis_size = 0; - auto dstFirstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto dstFirstDims = getOutputShapeAtPort(0).getStaticDims(); for (size_t i = 0; i < outputShapes.size(); i++) { auto o_Dims = outputShapes[i].getStaticDims(); if (dstFirstDims.size() != o_Dims.size()) { @@ -139,12 +145,12 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { config.inConfs.resize(INPUTS_NUM); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, srcShape.getStaticDims())); + config.inConfs[0].desc = std::make_shared(itr->second->createDesc(inpPrecision, srcShape)); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{1}); + config.inConfs[1].desc = std::make_shared(axisPrecision, Shape(SizeVector {1})); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{outputShapes.size()}); + config.inConfs[2].desc = std::make_shared(axisPrecision, Shape(SizeVector{outputShapes.size()})); config.inConfs[2].constant = true; } @@ -153,7 +159,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { for (size_t i = 0; i < outputShapes.size(); i++) { config.outConfs[i].inPlace = -1; config.outConfs[i].constant = false; - config.outConfs[i].desc = MKLDNNPlugin::make_unique(itr->second->createDesc(inpPrecision, outputShapes[i].getStaticDims())); + config.outConfs[i].desc = std::make_shared(itr->second->createDesc(inpPrecision, outputShapes[i])); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); @@ -171,7 +177,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { for (auto refPdIndex : pdIndexesToReuse) { const auto& refConfig = supportedPrimitiveDescriptors[refPdIndex].getConfig(); auto config = refConfig; - const auto inBlockingDesc = refConfig.inConfs[0].desc->as(); + const auto inBlockingDesc = refConfig.inConfs[0].desc->as(); const auto& order = inBlockingDesc->getOrder(); const auto& blkDims = inBlockingDesc->getBlockDims(); auto numOfDim = blkDims.size(); @@ -189,15 +195,15 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { } } - config.inConfs[0].desc = MKLDNNPlugin::make_unique(inpPrecision, srcShape.getStaticDims(), blkDims, order, offset, offsets, strides); + config.inConfs[0].desc = std::make_shared(inpPrecision, srcShape, blkDims, order, offset, offsets, strides); for (size_t i = 0; i < outputShapes.size(); i++) { - auto outBlockingDesc = refConfig.outConfs[i].desc->as(); + auto outBlockingDesc = refConfig.outConfs[i].desc->as(); const auto& outBlkDims = outBlockingDesc->getBlockDims(); const auto& dims = outBlockingDesc->getShape().getStaticDims(); config.outConfs[i].inPlace = 0; - config.outConfs[i].desc = MKLDNNPlugin::make_unique(outPrecision, dims, outBlkDims, order, offset, offsets, strides); + config.outConfs[i].desc = std::make_shared(outPrecision, Shape(dims), outBlkDims, order, offset, offsets, strides); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); } @@ -210,12 +216,12 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { config.inConfs.resize(INPUTS_NUM); config.inConfs[0].inPlace = -1; config.inConfs[0].constant = false; - config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createUniqueDesc(inpPrecision, srcShape.getStaticDims()); + config.inConfs[0].desc = creatorsMap.at(LayoutType::nspc)->createSharedDesc(inpPrecision, srcShape); config.inConfs[1].inPlace = -1; config.inConfs[1].constant = true; - config.inConfs[1].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{1}); + config.inConfs[1].desc = std::make_shared(axisPrecision, Shape(SizeVector{1})); if (INPUTS_NUM == 3) { - config.inConfs[2].desc = MKLDNNPlugin::make_unique(axisPrecision, SizeVector{outputShapes.size()}); + config.inConfs[2].desc = std::make_shared(axisPrecision, Shape(SizeVector{outputShapes.size()})); config.inConfs[2].constant = true; } config.outConfs.resize(outputShapes.size()); @@ -223,7 +229,7 @@ void MKLDNNSplitNode::initSupportedPrimitiveDescriptors() { for (size_t i = 0; i < outputShapes.size(); i++) { config.outConfs[i].inPlace = -1; config.outConfs[i].constant = false; - config.outConfs[i].desc = creatorsMap.at(LayoutType::ncsp)->createUniqueDesc(inpPrecision, outputShapes[i].getStaticDims()); + config.outConfs[i].desc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(inpPrecision, outputShapes[i]); } supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); } @@ -240,13 +246,13 @@ void MKLDNNSplitNode::createPrimitive() { if (getSelectedPrimitiveDescriptor() == nullptr) THROW_ERROR << "Preferable primitive descriptor is not set."; - auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->GetDesc(); + auto& memDesc = getParentEdgeAt(0)->getMemoryPtr()->getDesc(); canUseOptimizedNspc2Ncsp = false; if (axis == 1 && one_of(memDesc.getShape().getRank(), 4, 5) && memDesc.hasLayoutType(LayoutType::nspc)) { canUseOptimizedNspc2Ncsp = true; for (size_t i = 0; i < getChildEdges().size(); i++) { - auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->GetDesc(); + auto& childMemDesc = getChildEdgeAt(i)->getMemoryPtr()->getDesc(); if (!childMemDesc.hasLayoutType(LayoutType::ncsp)) canUseOptimizedNspc2Ncsp = false; } @@ -274,7 +280,7 @@ void MKLDNNSplitNode::execute(mkldnn::stream strm) { } uint8_t* srcData = reinterpret_cast(this->getParentEdgeAt(0)->getMemoryPtr()->GetPtr()); - size_t batch = this->getParentEdgeAt(0)->getShape().getStaticDims()[0]; + size_t batch = getParentEdgesAtPort(0)[0]->getMemory().getStaticDims()[0]; if (batch != MB) optimizedParams.countStrides = optimizedParams.countStrides / batch * MB; @@ -320,33 +326,34 @@ void MKLDNNSplitNode::initOptimalPrimitiveDescriptor() { if (!parentConfig.desc->isDefined() && parentConfig.inPlace >= 0) getParentEdgeAt(i)->getParent()->initOptimalPrimitiveDescriptor(); if (parentConfig.desc->isDefined() && parentConfig.desc->isCompatible(*config.inConfs[i].desc)) { - config.inConfs[i].desc = parentConfig.desc->clone(); + config.inConfs[i].desc = parentConfig.desc; continue; } } } // reset undefined offsets - config.inConfs[i].desc = MemoryDescUtils::resetOffset(config.inConfs[i].desc.get()); + config.inConfs[i].desc = MemoryDescUtils::cloneWithDefaultStridesAndOffset(*config.inConfs[i].desc); } if (config.outConfs.size() != outputShapes.size()) THROW_ERROR << "has invalid config"; - auto firstInBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.inConfs[0].desc); + auto firstInBlockingDesc = config.inConfs[0].desc->as(); size_t offset = 0; for (size_t i = 0; i < outputShapes.size(); i++) { - auto outBlockingDesc = MemoryDescUtils::convertToBlockedDescriptor(*config.outConfs[i].desc); - config.outConfs[i].desc = MKLDNNPlugin::make_unique(outBlockingDesc.getPrecision(), - outBlockingDesc.getShape().getStaticDims(), - outBlockingDesc.getBlockDims(), - outBlockingDesc.getOrder(), - firstInBlockingDesc.getOffsetPadding() + offset, - firstInBlockingDesc.getOffsetPaddingToData(), - firstInBlockingDesc.getStrides()); + auto oldDesc = config.outConfs[i].desc; + auto outBlockingDesc = oldDesc->as(); + config.outConfs[i].desc = std::make_shared(outBlockingDesc->getPrecision(), + outBlockingDesc->getShape(), + outBlockingDesc->getBlockDims(), + outBlockingDesc->getOrder(), + firstInBlockingDesc->getOffsetPadding() + offset, + firstInBlockingDesc->getOffsetPaddingToData(), + firstInBlockingDesc->getStrides()); size_t axisSize = 1; - for (size_t j = axis; j < outBlockingDesc.getBlockDims().size(); j++) { - axisSize *= outBlockingDesc.getBlockDims()[j]; + for (size_t j = axis; j < outBlockingDesc->getBlockDims().size(); j++) { + axisSize *= outBlockingDesc->getBlockDims()[j]; } offset += axisSize; } @@ -464,7 +471,7 @@ void MKLDNNSplitNode::prepareOptimizedParams() { const auto outputPortsCount = outputShapes.size(); //find axis order position - const auto& order = inpTensorDesc.getOrder(); + const auto& order = inpTensorDesc->getOrder(); unsigned axisOrderPos = std::numeric_limits::max(); for (size_t i = 0; i < order.size(); ++i) { if (order[i] == axis) { @@ -476,8 +483,8 @@ void MKLDNNSplitNode::prepareOptimizedParams() { THROW_ERROR << "Can't find the axis in the input tensor order list"; } - uint8_t srcDataSize = inpTensorDesc.getPrecision().size(); - const auto& srcDims = inpTensorDesc.getBlockDims(); + uint8_t srcDataSize = inpTensorDesc->getPrecision().size(); + const auto& srcDims = inpTensorDesc->getBlockDims(); const auto getRank = srcDims.size(); optimizedParams.countStrides = 1; @@ -491,7 +498,7 @@ void MKLDNNSplitNode::prepareOptimizedParams() { auto outputEdge = this->getChildEdgesAtPort(i).front(); optimizedParams.dataSize[i] = srcDataSize; - auto desc = outputEdge->getMemory().GetDesc().as(); + auto desc = outputEdge->getMemory().getDesc().as(); for (size_t j = axisOrderPos; j < getRank; j++) optimizedParams.dataSize[i] *= desc->getBlockDims()[j]; @@ -507,8 +514,8 @@ void MKLDNNSplitNode::prepareOptimizedParams() { void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { auto parentEdge = getParentEdgeAt(0); - const int rank = parentEdge->getShape().getRank(); - const auto parentDims = parentEdge->getShape().getStaticDims(); + const int rank = parentEdge->getMemory().GetShape().getRank(); + const auto parentDims = parentEdge->getMemory().getStaticDims(); const size_t IC = parentDims[1]; const size_t D = rank == 5 ? parentDims[rank - 3] : 1; const size_t H = parentDims[rank - 2]; @@ -516,7 +523,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { auto& srcMem = parentEdge->getMemory(); auto srcData = reinterpret_cast(srcMem.GetData()); - const auto dataSize = srcMem.GetDesc().getPrecision().size(); + const auto dataSize = srcMem.getDesc().getPrecision().size(); const size_t DHW = D*H*W; const size_t strideIB = DHW * IC * dataSize; @@ -532,7 +539,7 @@ void MKLDNNSplitNode::optimizedNspc2Ncsp(size_t MB) { for (size_t j = axis; j < dims.size(); j++) { innerSize *= dims[j]; } - auto srcPtr = srcData + srcMem.GetDesc().getElementOffset(sIdx) * dataSize; + auto srcPtr = srcData + srcMem.getDesc().getElementOffset(sIdx) * dataSize; const size_t OC = dims[1]; const size_t strideOB = OC * strideOC; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp index 8e87617d3692b4..54d6a99dd75017 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.cpp @@ -35,8 +35,13 @@ static inline size_t parallel_init(size_t start, size_t nDims, const SizeVector& return start; } -bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNStridedSliceNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + const auto ss = std::dynamic_pointer_cast(op); if (!ss) { errorMessage = "Only opset1 StridedSlice operation is supported"; @@ -199,15 +204,13 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { const bool hasStrides = getParentEdges().size() > 3; InferenceEngine::Precision dataPrecision = getOriginalInputPrecisionAtPort(DATA_ID); InferenceEngine::Precision beginPrecision = getOriginalInputPrecisionAtPort(BEGIN_ID); - auto beginDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(beginPrecision); InferenceEngine::Precision endPrecision = getOriginalInputPrecisionAtPort(END_ID); - auto endDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(endPrecision); InferenceEngine::Precision stridePrecision; if (hasStrides) stridePrecision = getOriginalInputPrecisionAtPort(STRIDE_ID); - auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto srcDims = getInputShapeAtPort(DATA_ID).getStaticDims(); + auto dstDims = getOutputShapeAtPort(0).getStaticDims(); size_t nDims = srcDims.size(); NodeConfig config; @@ -242,17 +245,13 @@ void MKLDNNStridedSliceNode::initSupportedPrimitiveDescriptors() { auto range = BlockedDescCreator::makeFilteredRange(creators, nDims, supportedTypes); for (auto itr = range.first; itr != range.second; ++itr) { - config.inConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getParentEdgeAt(DATA_ID)->getShape().getStaticDims()); - config.inConfs[BEGIN_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(BEGIN_ID)->getShape().getStaticDims(), beginDataType, - mkldnn::memory::format_tag::x); - config.inConfs[END_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(END_ID)->getShape().getStaticDims(), endDataType, - mkldnn::memory::format_tag::x); + config.inConfs[0].desc = itr->second->createSharedDesc(dataPrecision, getInputShapeAtPort(DATA_ID)); + config.inConfs[BEGIN_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(beginPrecision, getInputShapeAtPort(BEGIN_ID)); + config.inConfs[END_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(endPrecision, getInputShapeAtPort(END_ID)); if (hasStrides) - config.inConfs[STRIDE_ID].desc = MKLDNNPlugin::make_unique(getParentEdgeAt(STRIDE_ID)->getShape().getStaticDims(), - MKLDNNExtensionUtils::IEPrecisionToDataType(stridePrecision), - mkldnn::memory::format_tag::x); + config.inConfs[STRIDE_ID].desc = creators.at(LayoutType::ncsp)->createSharedDesc(stridePrecision, getInputShapeAtPort(STRIDE_ID)); - config.outConfs[0].desc = itr->second->createUniqueDesc(dataPrecision, getChildEdgeAt(DATA_ID)->getShape().getStaticDims()); + config.outConfs[0].desc = itr->second->createSharedDesc(dataPrecision, getOutputShapeAtPort(DATA_ID)); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::ref); } } @@ -269,16 +268,16 @@ void MKLDNNStridedSliceNode::createPrimitive() { auto srcBlockingDesc = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType(); auto dstBlockingDesc = getChildEdgeAt(0)->getMemory().GetDescWithType(); - auto srcOrder = srcBlockingDesc.getOrder(); - params.srcDims = srcBlockingDesc.getBlockDims(); - params.dstDims = dstBlockingDesc.getBlockDims(); + auto srcOrder = srcBlockingDesc->getOrder(); + params.srcDims = srcBlockingDesc->getBlockDims(); + params.dstDims = dstBlockingDesc->getBlockDims(); params.srcMemPtr = srcMemPtr; params.dstMemPtr = dstMemPtr; params.dataSize = getSelectedPrimitiveDescriptor()->getConfig().inConfs[DATA_ID].desc->getPrecision().size(); if (params.parametersAreConstant) { size_t realNDims = params.dstDims.size(); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) + if (!getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; @@ -289,10 +288,10 @@ void MKLDNNStridedSliceNode::createPrimitive() { } void MKLDNNStridedSliceNode::orderParametersByLayouts() { - const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nspc); - const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp8c) || - getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::nCsp16c); - auto srcOrder = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType().getOrder(); + const bool isPerChannelLayout = getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::nspc); + const bool isBlockedLayout = getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp8c) || + getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::nCsp16c); + auto srcOrder = getParentEdgeAt(DATA_ID)->getMemory().GetDescWithType()->getOrder(); if (isBlockedLayout) { const size_t blk = params.srcDims.back(); @@ -596,8 +595,8 @@ void MKLDNNStridedSliceNode::indicesCalculationForOptimized() { void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { if (!params.parametersAreConstant) { - auto srcDims = getParentEdgeAt(DATA_ID)->getShape().getStaticDims(); - auto dstDims = getChildEdgeAt(0)->getShape().getStaticDims(); + auto srcDims = getParentEdgeAt(DATA_ID)->getMemory().getStaticDims(); + auto dstDims = getChildEdgesAtPort(DATA_ID)[0]->getMemory().getStaticDims(); const size_t nDims = std::max(srcDims.size(), dstDims.size()); const size_t ellipsisMaskCounter = std::accumulate(ellipsisMask.begin(), ellipsisMask.end(), 0); @@ -620,7 +619,7 @@ void MKLDNNStridedSliceNode::execute(mkldnn::stream strm) { if (srcDims.size() > 3 && params.equalDims && ellipsisMaskCounter != 0) addHiddenDims(srcDims.size()); - if (!getParentEdgeAt(DATA_ID)->getMemory().GetDesc().hasLayoutType(LayoutType::ncsp)) + if (!getParentEdgeAt(DATA_ID)->getMemory().getDesc().hasLayoutType(LayoutType::ncsp)) orderParametersByLayouts(); SizeVector newSrcDims, newDstDims; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h index 672bc0b6ce9c9b..8883311903d010 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_strided_slice_node.h @@ -24,7 +24,7 @@ class MKLDNNStridedSliceNode : public MKLDNNNode { return false; } - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: inline void stridedSlice(); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp index 2e1a9f426ef55a..d42d168b4e4786 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tensoriterator_node.cpp @@ -11,6 +11,7 @@ #include #include #include "common/blocked_desc_creator.h" +#include "utils/ngraph_utils.hpp" using namespace mkldnn; using namespace MKLDNNPlugin; @@ -22,22 +23,24 @@ static NodeConfig make_plain_config(const std::shared_ptr& op) { NodeConfig config; for (size_t i = 0; i < op->get_input_size(); i++) { - const auto& dims = op->get_input_shape(i); + const auto &origShape = op->get_input_partial_shape(i); + const auto& shape = Shape(origShape.rank().get_length() == 0 ? ngraph::PartialShape{1} : origShape); const auto prec = InferenceEngine::details::convertPrecision(op->get_input_element_type(i)); PortConfig data_conf {}; auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); - data_conf.desc = descCreator->createUniqueDesc(prec, dims); + data_conf.desc = descCreator->createSharedDesc(prec, shape); config.inConfs.push_back(data_conf); } for (size_t i = 0; i < op->get_output_size(); i++) { - const auto& dims = op->get_output_shape(i); + const auto &origShape = op->get_output_partial_shape(i); + const auto& shape = Shape(origShape.rank().get_length() == 0 ? ngraph::PartialShape{1} : origShape); const auto prec = InferenceEngine::details::convertPrecision(op->get_output_element_type(i)); PortConfig data_conf {}; auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); - data_conf.desc = descCreator->createUniqueDesc(prec, dims); + data_conf.desc = descCreator->createSharedDesc(prec, shape); config.outConfs.push_back(data_conf); } @@ -56,8 +59,8 @@ class PortIteratorHelper : public PortMapHelper { auto axis = slice_rule.axis; auto stride = slice_rule.stride; - auto full_dims = full_blob->GetDims(); - auto part_dims = part_blob->GetDims(); + auto full_dims = full_blob->GetShape().getStaticDims(); + auto part_dims = part_blob->GetShape().getStaticDims(); auto abs_stride = std::abs(stride); auto sign_of_stride = stride < 0.0f ? -1 : 1; @@ -68,7 +71,7 @@ class PortIteratorHelper : public PortMapHelper { IE_ASSERT(full_dims == part_dims) << "Shape mismatch for tensor iterator port"; // make chunk view - auto chunk_desc = full_blob->GetDescriptor(); + auto chunk_desc = full_blob->GetDescWithType()->getDnnlDesc(); chunk_desc.data.dims[axis] = abs_stride; chunk_desc.data.padded_dims[axis] = abs_stride; // TODO: asamption that plain tensor @@ -132,7 +135,7 @@ class IterCountPortHelper : public PortMapHelper { IterCountPortHelper(const MKLDNNMemoryPtr &to, const mkldnn::engine& eng) { // Only scalar I32 tensor is supported IE_ASSERT(to->GetDataType() == memory::data_type::s32); - IE_ASSERT(to->GetDims() == memory::dims{1}); + IE_ASSERT(to->GetShape() == Shape(InferenceEngine::SizeVector{1})); mem_holder_dst = to->GetPrimitive(); } @@ -150,7 +153,7 @@ class asBoolCheck : public PortChecker { public: asBoolCheck(const MKLDNNMemoryPtr &mem) { IE_ASSERT(mem->GetDataType() == memory::data_type::u8); - IE_ASSERT(mem->GetDims() == memory::dims{1}); + IE_ASSERT(mem->GetShape() == Shape(InferenceEngine::SizeVector{1})); mem_holder = mem->GetPrimitive(); } @@ -167,7 +170,8 @@ class asIntCheck : public PortChecker { public: asIntCheck(const MKLDNNMemoryPtr &mem) { IE_ASSERT(mem->GetDataType() == memory::data_type::s32); - IE_ASSERT(mem->GetDims() == memory::dims{1}); + const auto a = Shape(InferenceEngine::SizeVector{1}); + IE_ASSERT(mem->GetShape() == a); mem_holder = mem->GetPrimitive(); } @@ -273,6 +277,11 @@ int getNumIteration(const std::shared_ptr& op, const std::ve bool MKLDNNTensorIteratorNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } + if (!one_of(op->get_type_info(), ngraph::op::v0::TensorIterator::type_info, ngraph::op::v5::Loop::type_info)) { diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp index c92193c6e927c0..00fc9ba787323f 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.cpp @@ -13,8 +13,12 @@ using namespace mkldnn; using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNTileNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNTileNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto tile = std::dynamic_pointer_cast(op); if (!tile) { errorMessage = "Only opset1 Tile operation is supported"; @@ -86,17 +90,12 @@ void MKLDNNTileNode::initSupportedPrimitiveDescriptors() { IE_THROW() << errorPrefix << " has unsupported input precision: " << precision; } - auto descCreator = BlockedDescCreator::getCommonCreators().at(LayoutType::ncsp); - - NodeConfig config; - config.dynBatchSupport = true; - config.inConfs.resize(2); - config.outConfs.resize(1); - config.inConfs[TILE_INPUT].desc = descCreator->createUniqueDesc(precision, getParentEdgeAt(TILE_INPUT)->getShape().getStaticDims()); - config.inConfs[TILE_REPEATS].desc = descCreator->createUniqueDesc(Precision::I32, getParentEdgeAt(TILE_REPEATS)->getShape().getStaticDims()); - config.outConfs[0].desc = descCreator->createUniqueDesc(precision, getChildEdgeAt(0)->getShape().getStaticDims()); - config.outConfs[0].inPlace = noTiling ? 0 : -1; - supportedPrimitiveDescriptors.push_back({config, impl_desc_type::unknown}); + int inPlace = noTiling ? 0 : -1; + addSupportedPrimDesc({{LayoutType::ncsp, precision}, + {LayoutType::ncsp, Precision::I32}}, + {{LayoutType::ncsp, precision, false, inPlace}}, + impl_desc_type::unknown, + true); } void MKLDNNTileNode::createPrimitive() { @@ -122,7 +121,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { int m_inner_dim = 1; int m_outer_dim = 1; - memory::dims inDims = srcMemory.GetDims(); + auto inDims = srcMemory.getStaticDims(); for (int i=0; i < axis; i++ ) m_outer_dim *= inDims[i]; for (int i=axis; i < inDims.size(); i++ ) m_inner_dim *= inDims[i]; if (axis > 0) { @@ -133,13 +132,13 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_inner_dim *= batchToProcess(); } - if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp8c)) { + if (m_inner_dim == 1 && m_outer_dim % 8 == 0 && srcMemory.getDesc().hasLayoutType(LayoutType::nCsp8c)) { /* * We may enable tile processing directly to appropriate output format (nChw8c) */ m_inner_dim *= 8; m_outer_dim /= 8; - } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.GetDesc().hasLayoutType(LayoutType::nCsp16c)) { + } else if (m_inner_dim == 1 && m_outer_dim % 16 == 0 && srcMemory.getDesc().hasLayoutType(LayoutType::nCsp16c)) { /* * We may enable tile processing directly to appropriate output format (nChw16c) */ @@ -147,7 +146,7 @@ void MKLDNNTileNode::execute(mkldnn::stream strm) { m_outer_dim /= 16; } - m_inner_dim *= srcMemory.GetDesc().getPrecision().size(); + m_inner_dim *= srcMemory.getDesc().getPrecision().size(); for (int i = 0; i < m_outer_dim; ++i) { for (int t = 0; t < tiles; ++t) { cpu_memcpy(dst_ptr, src_ptr, m_inner_dim); diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h index a6fd6e51168ff1..2eeb96ae3d8687 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_tile_node.h @@ -20,7 +20,7 @@ class MKLDNNTileNode : public MKLDNNNode { void execute(mkldnn::stream strm) override; bool created() const override; - static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; private: static const size_t TILE_INPUT = 0; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp index f3fa2e69b5fa8d..ffef15396cdb84 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_topk_node.cpp @@ -16,8 +16,12 @@ using namespace MKLDNNPlugin; using namespace InferenceEngine; -bool MKLDNNTopKNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { +bool MKLDNNTopKNode::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { try { + if (isDynamicNgraphNode(op)) { + errorMessage = "Doesn't support op with dynamic shapes"; + return false; + } const auto topKOp = ngraph::as_type_ptr(op); if (!topKOp) { errorMessage = "Node is not an instance of the TopK from the operations set v1 or v3"; @@ -48,7 +52,7 @@ MKLDNNTopKNode::MKLDNNTopKNode(const std::shared_ptr& op, const mk } auto topK1Op = ngraph::as_type_ptr(op); - SizeVector dstDims = topK1Op->get_output_shape(TOPK_VALUE); + VectorDims dstDims = topK1Op->get_output_shape(TOPK_VALUE); src_dims = topK1Op->get_input_shape(TOPK_DATA); axis = topK1Op->get_axis(); @@ -85,9 +89,9 @@ void MKLDNNTopKNode::initSupportedPrimitiveDescriptors() { return; std::vector outDataConf; - outDataConf.reserve(getOriginalOutputsNumber()); + outDataConf.reserve(outputShapes.size()); outDataConf.emplace_back(LayoutType::ncsp, Precision::FP32); - for (int i = 1; i < getOriginalOutputsNumber(); ++i) + for (int i = 1; i < outputShapes.size(); ++i) outDataConf.emplace_back(LayoutType::ncsp, Precision::I32); addSupportedPrimDesc({{LayoutType::ncsp, Precision::FP32}, @@ -108,7 +112,7 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { } else { dst_idx = reinterpret_cast(getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPtr()); } - SizeVector dstDims = getChildEdgesAtPort(0)[0]->getShape().getStaticDims(); + const VectorDims& dstDims = getChildEdgesAtPort(0)[0]->getMemory().getStaticDims(); if (dstDims[axis] != static_cast(src_k)) { std::string errorMsg = "Output tensor dimension mismatch"; @@ -116,10 +120,10 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { } } else if (outputShapes.size() == 2) { dst_data = reinterpret_cast(getChildEdgesAtPort(TOPK_VALUE)[0]->getMemoryPtr()->GetPtr()); - SizeVector dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getShape().getStaticDims(); + const VectorDims& dst_data_dims = getChildEdgesAtPort(TOPK_VALUE)[0]->getMemory().getStaticDims(); dst_idx = reinterpret_cast(getChildEdgesAtPort(TOPK_INDEX)[0]->getMemoryPtr()->GetPtr()); - SizeVector dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getShape().getStaticDims(); + const VectorDims& dst_idx_dims = getChildEdgesAtPort(TOPK_INDEX)[0]->getMemory().getStaticDims(); if (dst_idx_dims[axis] != static_cast(src_k) || dst_data_dims[axis] != static_cast(src_k)) { std::string errorMsg = "Output tensors dimension mismatch"; @@ -133,7 +137,7 @@ void MKLDNNTopKNode::execute(mkldnn::stream strm) { if (src_dims[axis] < static_cast(src_k)) src_k = src_dims[axis]; - SizeVector in_dims = getParentEdgeAt(TOPK_DATA)->getShape().getStaticDims(); + const VectorDims& in_dims = getParentEdgeAt(TOPK_DATA)->getMemory().getStaticDims(); if (src_k == 1) { if (is_last_dim) { @@ -167,7 +171,7 @@ bool MKLDNNTopKNode::created() const { } template class Compare2> -void MKLDNNTopKNode::top1_axis(const float* src_data, float* dst_data, int* dst_idx, SizeVector in_dims) { +void MKLDNNTopKNode::top1_axis(const float* src_data, float* dst_data, int* dst_idx, VectorDims in_dims) { int after_num = count(in_dims, axis + 1, in_dims.size()); int first_index = 0; @@ -216,7 +220,7 @@ void MKLDNNTopKNode::top1_axis(const float* src_data, float* dst_data, int* dst_ } template