From 17ab2b8718acd713ff30149aa7d2bd7ae07612d9 Mon Sep 17 00:00:00 2001 From: Maksim Kutakov Date: Thu, 22 Jul 2021 12:47:44 +0300 Subject: [PATCH] MKLDNNMemoryDesc undefined state has been implemented --- .../mkldnn_plugin/cpu_blocked_memory_desc.cpp | 23 +- .../mkldnn_plugin/cpu_blocked_memory_desc.h | 6 +- .../src/mkldnn_plugin/cpu_memory_desc.h | 28 +- .../mkldnn_plugin/cpu_memory_desc_utils.cpp | 164 +---- .../src/mkldnn_plugin/cpu_shape.cpp | 48 ++ .../src/mkldnn_plugin/cpu_shape.h | 30 +- .../mkldnn_plugin/mkldnn_extension_utils.cpp | 22 +- .../src/mkldnn_plugin/mkldnn_memory.cpp | 580 +++++++++++++----- .../src/mkldnn_plugin/mkldnn_memory.h | 54 +- .../src/mkldnn_plugin/mkldnn_node.h | 22 + .../nodes/mkldnn_non_max_suppression_node.cpp | 15 +- .../src/mkldnn_plugin/utils/general_utils.h | 17 + .../tests/unit/cpu/CMakeLists.txt | 1 + .../unit/cpu/mkldnn_memory_desc_test.cpp | 196 +++++- 14 files changed, 852 insertions(+), 354 deletions(-) create mode 100644 inference-engine/src/mkldnn_plugin/cpu_shape.cpp diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp index fc93409fb7ef3b..96b32b52a75a88 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp +++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.cpp @@ -4,7 +4,6 @@ #include "cpu_blocked_memory_desc.h" #include "mkldnn_memory.h" -#include "utils/cpu_utils.hpp" using namespace MKLDNNPlugin; @@ -66,7 +65,7 @@ BlockedMemoryDesc::BlockedMemoryDesc(InferenceEngine::Precision prc, const Shape } } -bool BlockedMemoryDesc::isDefined() const { +bool BlockedMemoryDesc::isDefinedImp() const { bool defined = true; defined = defined && std::none_of(blockedDims.cbegin(), blockedDims.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); defined = defined && std::none_of(strides.cbegin(), strides.cend(), [](size_t val) { return val == Shape::UNDEFINED_DIM; }); @@ -261,25 +260,7 @@ std::string BlockedMemoryDesc::serializeFormat() const { return result.str(); } -std::unique_ptr BlockedMemoryDesc::cloneWithNewDims(const std::vector &dims) const { - // TODO [DS]: phase 2 : move to the base class - // TODO [DS]: phase 2 : to discuss the behaviour, should we check the upper bound? - if (getShape().getRank() != dims.size()) { - IE_THROW(ParameterMismatch) << "Can not clone descriptor since it has rank = " << getShape().getRank() << - ", but dims with size=" << dims.size() << "were provided."; - } - - auto comparator = [](size_t lhs, size_t rhs) { - return (lhs == rhs) || (lhs == Shape::UNDEFINED_DIM); - }; - - if (!std::equal(getShape().getDims().begin(), getShape().getDims().end(), dims.begin(), comparator)) { - IE_THROW(ParameterMismatch) << "Can not clone descriptor! Incompatible dims, shape: " << dims2str(getShape().getDims()) - << " provided dims: " << dims2str(dims); - } - - // TODO [DS]: phase 2 : end code frame to be moved - +std::unique_ptr BlockedMemoryDesc::cloneWithNewDimsImp(const std::vector &dims) const { std::vector newBlockedDims(order.size()); for (size_t i = 0; i < dims.size(); ++i) { diff --git a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h index 3458d2a8137e6a..c85e8968be6da2 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h +++ b/inference-engine/src/mkldnn_plugin/cpu_blocked_memory_desc.h @@ -22,8 +22,6 @@ class BlockedMemoryDesc : public MemoryDesc { return MKLDNNPlugin::make_unique(*this); } - bool isDefined() const override; - bool isCompatible(const MemoryDesc& rhs) const override; bool isCompatible(const BlockedMemoryDesc& rhs) const; @@ -83,8 +81,6 @@ class BlockedMemoryDesc : public MemoryDesc { size_t getMaxMemSize() const override; - std::unique_ptr cloneWithNewDims(const std::vector& dims) const override; - private: size_t getElementOffset(size_t elemNumber) const override; size_t getMemSizeImp() const override; @@ -92,6 +88,8 @@ class BlockedMemoryDesc : public MemoryDesc { bool isPlainFormat() const; bool isBlockedCFormat(size_t blk_size) const; bool isTailCFormat() const; + bool isDefinedImp() const override; + std::unique_ptr cloneWithNewDimsImp(const std::vector& dims) const override; private: InferenceEngine::Precision precision; diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h index 6012ad49be2e27..66f7cd20c2d5b5 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc.h @@ -41,13 +41,25 @@ class MemoryDesc { virtual std::unique_ptr clone() const = 0; - // clone descriptor with new dims. Throws an exception if some of the new dims conflicts with the internal shape (i.e. its defined dims and rank) - virtual std::unique_ptr cloneWithNewDims(const std::vector& dims) const = 0; + // clone descriptor with new dims. Throws an exception if some of the new dims conflicts with the internal shape (i.e. its defined dims ,rank, upper bounds) + std::unique_ptr cloneWithNewDims(const std::vector& dims) const { + if (!getShape().isCompatible(dims)) { + IE_THROW(ParameterMismatch) << "Can not clone with new dims. Descriptor's shape: " << getShape().toString() << + " is incompatible with provided dimensions: " << dims2str(dims) << "."; + } + + return cloneWithNewDimsImp(dims); + } virtual bool isCompatible(const MemoryDesc& rhs) const = 0; // Checks that all dimensions, offsets, strides, etc are defined (!= UNDEFINED_DIM) - virtual bool isDefined() const = 0; + bool isDefined() const { + if (descStatus::Unknown == status) { + status = isDefinedImp() ? descStatus::Defined : descStatus::Undefined; + } + return descStatus::Defined == status; + } virtual bool hasLayoutType(LayoutType layoutType) const = 0; @@ -102,9 +114,19 @@ class MemoryDesc { // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding, layout, blocking etc. virtual size_t getElementOffset(size_t elemNumber) const = 0; + virtual bool isDefinedImp() const = 0; + + virtual std::unique_ptr cloneWithNewDimsImp(const std::vector& dims) const = 0; + MemoryDescType type; Shape shape; + mutable enum class descStatus : uint8_t { + Unknown, + Defined, + Undefined, + } status = descStatus::Unknown; + friend class BlobDumper; // WA: optimizedNspc2Ncsp used getElementOffset inside implementation friend class MKLDNNSplitNode; diff --git a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp index 693eb63d5f221a..42264fa49f126e 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp +++ b/inference-engine/src/mkldnn_plugin/cpu_memory_desc_utils.cpp @@ -40,57 +40,13 @@ BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemory if (desc.data.format_kind != dnnl_blocked) IE_THROW() << "Conversion is not possible"; - const auto &blk_desc = desc.data.format_desc.blocking; - - const size_t outer_ndims = dims.size(); - const size_t inner_ndims = blk_desc.inner_nblks; - const size_t total_ndims = outer_ndims + inner_ndims; - - // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} - std::vector inner_strides(inner_ndims, 1); - for (size_t i = 1; i < blk_desc.inner_nblks; i++) { - inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + if (desc.data.extra.flags != dnnl_memory_extra_flag_none) { + IE_THROW() << "Conversion is not possible"; } - // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} - std::vector total_block_per_dim(outer_ndims, 1); - for (int i = 0; i < inner_ndims; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } + const auto &blk_desc = desc.data.format_desc.blocking; - // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} - std::vector outer_order(outer_ndims); - std::iota(outer_order.begin(), outer_order.end(), 0); - std::sort(outer_order.begin(), outer_order.end(), - [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || - (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - - // IE blocked order - // [new_outer_order] U [inner_idxs] - SizeVector ie_blk_order(total_ndims, 0); - std::copy(outer_order.begin(), outer_order.end(), ie_blk_order.begin()); - std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, ie_blk_order.begin() + dims.size()); - - // IE blocked strides - // [outer_strides via new_outer_order] U [inner_strides] - SizeVector ie_blk_strides(total_ndims, 0); - std::copy(inner_strides.rbegin(), inner_strides.rend(), ie_blk_strides.rbegin()); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_strides.begin(), - [&] (size_t i) { return blk_desc.strides[i]; }); - - // IE blocked dims - // [dims via new_outer_order with auto pad] U [inner_blk_dims] - SizeVector ie_blk_dims(total_ndims, 0); - std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, - ie_blk_dims.end() - blk_desc.inner_nblks); - std::transform(outer_order.begin(), outer_order.end(), ie_blk_dims.begin(), - [&] (size_t i) { return outer_block_dims[i]; }); + const size_t inner_ndims = blk_desc.inner_nblks; // IE offset padded to data. Same as for oneDNN SizeVector ie_blk_offset_to_data {desc.data.padded_offsets, desc.data.padded_offsets + desc.data.ndims}; @@ -101,8 +57,8 @@ BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemory // fill it with zero. ie_blk_offset_to_data.insert(ie_blk_offset_to_data.end(), inner_ndims, 0); - BlockedMemoryDesc res(MKLDNNMemory::convertToIePrec(desc.data_type()), Shape({begin(dims), end(dims)}), ie_blk_dims, - ie_blk_order, ie_blk_offset0, ie_blk_offset_to_data, ie_blk_strides); + BlockedMemoryDesc res(inpDesc.getPrecision(), inpDesc.getShape(), inpDesc.getBlockDims(), + inpDesc.getOrder(), ie_blk_offset0, ie_blk_offset_to_data, inpDesc.getStrides()); return res; } @@ -135,90 +91,8 @@ MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const MemoryDesc& de } MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc) { - dnnl_memory_desc_t mkldnnDesc; - - // scalar case - if (desc.getShape().getRank() == 0) { - mkldnn::memory::desc convertedDesc; - convertedDesc.data.format_kind = dnnl_blocked; - convertedDesc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); - convertedDesc.data.ndims = 1; - convertedDesc.data.dims[0] = 1; - convertedDesc.data.padded_dims[0] = 1; - convertedDesc.data.format_desc.blocking.strides[0] = 1; - convertedDesc.data.padded_offsets[0] = 0; - convertedDesc.data.offset0 = desc.getOffsetPadding(); - return MKLDNNMemoryDesc(convertedDesc); - } - - auto dims = desc.getShape().getStaticDims(); - - auto ie_blkdDims = desc.getBlockDims(); - auto ie_order = desc.getOrder(); - auto ie_offsetsToData = desc.getOffsetPaddingToData(); - auto ie_strides = desc.getStrides(); - - size_t outer_ndims = dims.size(); - size_t inner_ndims = ie_order.size() - dims.size(); - - bool is_descending_strides = true; - for (int i = 1; i < ie_strides.size(); i++) { - is_descending_strides &= (ie_strides[i-1] >= ie_strides[i]); - } - - // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims - // and may be we can achieve correct "descending strides" form which allow conversion. - if (!is_descending_strides) - IE_THROW() << "Unsupported case for conversion"; - - std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension - for (size_t i = 0; i < outer_ndims; i++) { - outer_order[ie_order[i]] = i; - } - bool outer_is_correct_permutation_of_n = - std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); - - if (!outer_is_correct_permutation_of_n) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_block_are_dense = one_of(ie_strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted - for (int i = outer_ndims; i < ie_strides.size() - 1; i++) { - inner_block_are_dense &= (ie_strides[i] == ie_strides[i+1] * ie_blkdDims[i+1]); - } - - if (!inner_block_are_dense) - IE_THROW() << "Unsupported case for conversion"; - - bool inner_pad_offsets_is_zero = std::all_of(ie_offsetsToData.begin() + outer_ndims, ie_offsetsToData.end(), - [](size_t pad) { return pad == 0; }); - - if (!inner_pad_offsets_is_zero) - IE_THROW() << "Unsupported case for conversion"; - - // Fill general memory desc fields - mkldnnDesc.format_kind = dnnl_blocked; - mkldnnDesc.extra.flags = 0; - mkldnnDesc.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(desc.getPrecision())); - mkldnnDesc.ndims = dims.size(); - mkldnnDesc.offset0 = desc.getOffsetPadding(); - std::copy(dims.begin(), dims.end(), mkldnnDesc.dims); - std::copy(ie_offsetsToData.begin(), ie_offsetsToData.begin() + outer_ndims, mkldnnDesc.padded_offsets); - std::fill(mkldnnDesc.padded_dims, mkldnnDesc.padded_dims + outer_ndims, 1); - for (size_t i = 0; i < ie_order.size(); i++) { - auto idx = ie_order[i]; - mkldnnDesc.padded_dims[idx] *= ie_blkdDims[i]; - } - - // Fill blocking desc - auto &dnn_blk_desc = mkldnnDesc.format_desc.blocking; - dnn_blk_desc.inner_nblks = inner_ndims; - std::copy(ie_blkdDims.end() - inner_ndims, ie_blkdDims.end(), dnn_blk_desc.inner_blks); - std::copy(ie_order.end() - inner_ndims, ie_order.end(), dnn_blk_desc.inner_idxs); - for (size_t i = 0; i < outer_ndims; i++) { - dnn_blk_desc.strides[i] = ie_strides[outer_order[i]]; - } - - return MKLDNNMemoryDesc(mkldnnDesc); + return MKLDNNMemoryDesc(desc.getPrecision(), desc.getShape(), desc.getBlockDims(), + desc.getOrder(), desc.getOffsetPadding(), desc.getOffsetPaddingToData(), desc.getStrides()); } @@ -351,9 +225,15 @@ MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc if (desc.getFormatKind() != dnnl_format_kind_t::dnnl_blocked) IE_THROW() << "applyUndefinedOffset doesn't support not dnnl_blocked MKLDNNMemoryDesc"; - mkldnn::memory::desc retDesc = desc; - retDesc.data.offset0 = Shape::UNDEFINED_DIM; - return MKLDNNPlugin::make_unique(retDesc); + std::vector strides; + std::vector offsetPaddingToData; + + strides.resize(desc.getBlockDims().size(), Shape::UNDEFINED_DIM); + offsetPaddingToData.resize(desc.getBlockDims().size(), 0); + size_t offsetPadding = Shape::UNDEFINED_DIM; + MKLDNNMemoryDesc retDesc(desc.getPrecision(), desc.getShape(), desc.getBlockDims(), + desc.getOrder(), offsetPadding, offsetPaddingToData, strides); + return MKLDNNPlugin::make_unique(std::move(retDesc)); } MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const BlockedMemoryDesc &desc) { @@ -372,14 +252,14 @@ MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc) { if (MemoryDescType::Blocked == desc->getType()) { auto blockedDesc = desc->as(); return MKLDNNPlugin::make_unique(blockedDesc->getPrecision(), blockedDesc->getShape(), - blockedDesc->getBlockDims(), blockedDesc->getOrder()); + blockedDesc->getBlockDims(), blockedDesc->getOrder()); } else if (MemoryDescType::Mkldnn == desc->getType()) { auto mkldnnDesc = desc->as(); - mkldnn::memory::desc retDesc = *mkldnnDesc; - retDesc.data.offset0 = 0; - return MKLDNNPlugin::make_unique(retDesc); + MKLDNNMemoryDesc retDesc(desc->getPrecision(), desc->getShape(), + mkldnnDesc->getBlockDims(), mkldnnDesc->getOrder()); + return MKLDNNPlugin::make_unique(std::move(retDesc)); } else { - IE_THROW() << "resetOffset support Blocked and Mkldnn descpriptors only"; + IE_THROW() << "resetOffset supports Blocked and Mkldnn descriptors only"; } } diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.cpp b/inference-engine/src/mkldnn_plugin/cpu_shape.cpp new file mode 100644 index 00000000000000..bc31d90773c2e8 --- /dev/null +++ b/inference-engine/src/mkldnn_plugin/cpu_shape.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "cpu_shape.h" +#include "utils/general_utils.h" + +using namespace MKLDNNPlugin; + +bool Shape::isCompatible(const std::vector &vecDims) const { + if (getRank() != vecDims.size()) { + return false; + } + + auto comparator = [](size_t lhs, size_t rhs) { + return (lhs == rhs) || (lhs == Shape::UNDEFINED_DIM); + }; + + if (!std::equal(getDims().begin(), getDims().end(), vecDims.begin(), comparator)) { + return false; + } + + if (!std::equal(getMaxDims().begin(), getMaxDims().end(), vecDims.begin(), [](size_t lhs, size_t rhs) { return lhs >= rhs; })) { + return false; + } + + if (!std::equal(getMinDims().begin(), getMinDims().end(), vecDims.begin(), [](size_t lhs, size_t rhs) { return lhs <= rhs; })) { + return false; + } + return true; +} + +std::string Shape::toString() const { + std::stringstream output; + output << "{"; + + size_t i = 0; + do { + if (dims[i] == Shape::UNDEFINED_DIM) { + output << dim2str(minDims[i]) << " - " << dim2str(maxDims[i]); + } else { + output << dims[i]; + } + } while (++i < dims.size() && output << ", "); + + output << "}"; + return output.str(); +} diff --git a/inference-engine/src/mkldnn_plugin/cpu_shape.h b/inference-engine/src/mkldnn_plugin/cpu_shape.h index 9eafff4e5d1690..2f4018dbc98fdf 100644 --- a/inference-engine/src/mkldnn_plugin/cpu_shape.h +++ b/inference-engine/src/mkldnn_plugin/cpu_shape.h @@ -19,7 +19,9 @@ class Shape { explicit Shape(const ngraph::PartialShape& shape) { minDims = shape.get_min_shape(); + std::transform(minDims.begin(), minDims.end(), minDims.begin(), [](size_t x){ return ngraph::Interval::s_max == x ? UNDEFINED_DIM : x;}); maxDims = shape.get_max_shape(); + std::transform(maxDims.begin(), maxDims.end(), maxDims.begin(), [](size_t x){ return ngraph::Interval::s_max == x ? UNDEFINED_DIM : x;}); type = shape.is_static() ? ShapeType::Static : ShapeType::Dynamic; initDims(); @@ -118,14 +120,21 @@ class Shape { } ngraph::PartialShape toPartialShape() const { - std::vector nGraphDims; + using ngraph::Dimension; + std::vector nGraphDims; nGraphDims.reserve(minDims.size()); for (int i = 0; i < minDims.size(); i++) { - nGraphDims.emplace_back(minDims[i], maxDims[i]); + Dimension::value_type minDim = Shape::UNDEFINED_DIM == minDims[i] ? -1 : minDims[i]; + Dimension::value_type maxDim = Shape::UNDEFINED_DIM == maxDims[i] ? -1 : maxDims[i]; + nGraphDims.emplace_back(minDim, maxDim); } return ngraph::PartialShape(nGraphDims); } + bool isCompatible(const std::vector& vecDims) const; + + std::string toString() const; + bool operator == (const Shape& rhs) const { return minDims == rhs.minDims && maxDims == rhs.maxDims; } @@ -155,21 +164,4 @@ class Shape { std::vector maxDims; std::vector dims; }; - -inline std::string dims2str(const std::vector& dims) { - std::stringstream output; - output << "{"; - - auto itr = dims.begin(); - do { - if (*itr == Shape::UNDEFINED_DIM) { - output << "?"; - } else { - output << *itr; - } - } while (++itr != dims.end() && output << ", "); - - output << "}"; - return output.str(); -} } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp index d1c851645b1d78..008b4edff9d729 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_extension_utils.cpp @@ -4,9 +4,7 @@ #include "mkldnn_extension_utils.h" #include "utils/general_utils.h" -#include #include -#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -77,10 +75,22 @@ InferenceEngine::Precision MKLDNNExtensionUtils::DataTypeToIEPrecision(memory::d } } -InferenceEngine::SizeVector MKLDNNExtensionUtils::convertToSizeVector(const mkldnn::memory::dims& dims) { - return InferenceEngine::SizeVector(dims.begin(), dims.end()); +InferenceEngine::SizeVector MKLDNNExtensionUtils::convertToSizeVector(const memory::dims& dims) { + std::vector vecResult; + vecResult.reserve(dims.size()); + std::back_insert_iterator> itr(vecResult); + std::transform(dims.begin(), dims.end(), itr, [](memory::dim x) { + return x == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : static_cast(x); + }); + return vecResult; } -std::vector MKLDNNExtensionUtils::convertToDnnlDims(const InferenceEngine::SizeVector& dims) { - return std::vector(dims.begin(), dims.end());; +memory::dims MKLDNNExtensionUtils::convertToDnnlDims(const InferenceEngine::SizeVector& dims) { + memory::dims vecResult; + vecResult.reserve(dims.size()); + std::back_insert_iterator itr(vecResult); + std::transform(dims.begin(), dims.end(), itr, [](size_t x) { + return x == Shape::UNDEFINED_DIM ? DNNL_RUNTIME_DIM_VAL : static_cast(x); + }); + return vecResult; } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp index 3f0b636bab15fb..157643717c1b01 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.cpp @@ -18,8 +18,14 @@ #include "nodes/common/cpu_convert.h" #include "mkldnn/ie_mkldnn.h" #include "cpu_shape.h" -#include "cpu_memory_desc_utils.h" -#include "mkldnn_extension_utils.h" + +namespace dnnl { +namespace impl { +extern status_t fill_blocked(memory_desc_t &md, std::vector &perm, + std::vector &inner_blks, + std::vector &inner_idxs); +} // namespace impl +} // namespace dnnl using namespace InferenceEngine; using namespace mkldnn; @@ -91,19 +97,23 @@ void MKLDNNMemory::Create(const mkldnn::memory::desc& desc, const void *data, bo } void MKLDNNMemory::Create(const MemoryDesc &desc, const void *data, bool pads_zeroing) { - pMemDesc = desc.clone(); + Create(desc.clone(), data, pads_zeroing); +} + +void MKLDNNMemory::Create(MemoryDescPtr desc, const void* data, bool pads_zeroing) { + pMemDesc = std::move(desc); if (nullptr != data) { useExternalStorage = true; } else { useExternalStorage = false; } - if (desc.isDefined()) { - Create(mkldnn::memory::desc(MemoryDescUtils::convertToMKLDNNMemoryDesc(desc)), data, pads_zeroing); + if (pMemDesc->isDefined()) { + Create(mkldnn::memory::desc(MemoryDescUtils::convertToMKLDNNMemoryDesc(*pMemDesc)), data, pads_zeroing); } else { //delayed dynamic allocation - size_t maxMemSize = desc.getMaxMemSize(); - int64_t dummySize = MemoryDesc::UNDEFINED_SIZE == maxMemSize ? 1 : maxMemSize; + size_t maxMemSize = pMemDesc->getMaxMemSize(); + size_t dummySize = MemoryDesc::UNDEFINED_SIZE == maxMemSize ? 1 : maxMemSize; MKLDNNMemoryDesc dummyDesc({dummySize}, mkldnn::memory::data_type::u8); Create(mkldnn::memory::desc(dummyDesc), data, false); // no pads zeroing } @@ -304,20 +314,23 @@ MKLDNNMemoryDesc MKLDNNMemory::GetDescWithType() const { } } -void MKLDNNMemory::redefineDims(const std::vector& dims) { - auto desc = pMemDesc->cloneWithNewDims(dims); +void MKLDNNMemory::redefineDesc(const MemoryDesc& desc) { + redefineDesc(desc.clone()); +} + +void MKLDNNMemory::redefineDesc(MemoryDescPtr desc) { if (useExternalStorage) { size_t descMaxSize = desc->getMaxMemSize(); if (MemoryDesc::UNDEFINED_SIZE == descMaxSize) { IE_THROW() << "Can not reset descriptor, memory upper bound is unknown."; } if (descMaxSize <= memUpperBound) { - this->Create(*desc, prim->get_data_handle(), false); + this->Create(std::move(desc), prim->get_data_handle(), false); } else { - this->Create(*desc, nullptr, false); + this->Create(std::move(desc), nullptr, false); } } else { - this->Create(*desc, nullptr, false); + this->Create(std::move(desc), nullptr, false); } } @@ -336,7 +349,7 @@ BlockedMemoryDesc MKLDNNMemory::GetDescWithType() const } bool MKLDNNMemoryDesc::operator==(const MKLDNNMemoryDesc &rhs) const { - return this->desc == rhs.desc; + return this->desc == rhs.desc && order == rhs.order; } bool MKLDNNMemoryDesc::operator!=(const MKLDNNMemoryDesc &rhs) const { @@ -351,37 +364,80 @@ MKLDNNMemoryDesc::MKLDNNMemoryDesc(const mkldnn::memory::desc& desc) : MemoryDesc(Shape(MKLDNNExtensionUtils::convertToSizeVector(desc.dims())), Mkldnn), desc(desc) { if (desc.data.format_kind == dnnl::impl::format_kind::any) IE_THROW(Unexpected) << "Memory format any is prohibited!"; -} -MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) - : MemoryDesc(Shape(_dims), Mkldnn) { - if (format == memory::format_tag::any) - IE_THROW(Unexpected) << "Memory format any is prohibited!"; - if (format != memory::format_tag::undef) { - if (format == memory::format_tag::x && _dims.size() == 0) { - desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); - } else { - desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, format); + mkldnn::impl::memory_desc_wrapper descWrapped(desc.data); + + if (descWrapped.is_blocking_desc()) { + if (descWrapped.has_runtime_dims_or_strides()) { + IE_THROW(Unexpected) << "Cannot calculate order from undefined dims or strides"; } - } else { - // Trying to create plain descriptor - // This WA is needed since memory::format_tag doesn't contain plain tag for tensors with rank > 6D - mkldnn::memory::dims strides(_dims.size(), 1); - for (int d = _dims.size() - 2; d >= 0; d--) { - strides[d] = strides[d + 1] * _dims[d + 1]; + + const auto dims = desc.dims(); + + const auto &blk_desc = descWrapped.blocking_desc(); + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + std::vector inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; } - desc = mkldnn::memory::desc(MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, strides); + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + std::vector total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::iota(outer_order.begin(), outer_order.end(), 0); + std::sort(outer_order.begin(), outer_order.end(), + [&blk_desc, &outer_block_dims](size_t ind_l, size_t ind_r) { + return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || + (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); + }); + + + // blocked order + // [new_outer_order] U [inner_idxs] + SizeVector blk_order(total_ndims, 0); + std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); + std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); + order.swap(blk_order); } } +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format) : + MKLDNNMemoryDesc(Shape(_dims), dataType, format) {} + MKLDNNMemoryDesc::MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType) : MemoryDesc(Shape(_dims), Mkldnn), desc() { + InitializePlain(_dims, dataType); +} + +void MKLDNNMemoryDesc::InitializePlain(const std::vector& _dims, mkldnn::memory::data_type dataType) { const auto ndims = _dims.size(); - mkldnn::memory::dims plain_strides(ndims, 1); - for (size_t i = 1; i < ndims; i++) { - plain_strides[ndims - i -1] = plain_strides[ndims - i] * _dims[ndims - i]; + mkldnn::memory::dims plain_strides; + if (std::any_of(_dims.begin(), _dims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + plain_strides.resize(ndims, DNNL_RUNTIME_DIM_VAL); + } else { + plain_strides.resize(ndims, 1); + for (size_t i = 1; i < ndims; i++) { + plain_strides[ndims - i -1] = plain_strides[ndims - i] * _dims[ndims - i]; + } } + + order.resize(ndims); + std::iota(order.begin(), order.end(), 0); + desc = {MKLDNNExtensionUtils::convertToDnnlDims(_dims), dataType, plain_strides}; } @@ -678,19 +734,18 @@ bool MKLDNNMemoryDesc::isSame(mkldnn::memory::format_tag fmt) const { } bool MKLDNNMemoryDesc::isPlainFormat() const { - if (desc.data.format_kind != dnnl_blocked || - desc.data.format_desc.blocking.inner_nblks != 0) + if (desc.data.format_kind != dnnl_blocked) return false; - const auto ndims = desc.data.ndims; - const auto dims = desc.data.dims; - const auto &strides = desc.data.format_desc.blocking.strides; - bool is_plain_strides = (strides[ndims-1] == 1); - for (int i = 0; i < ndims - 1; i++) { - is_plain_strides &= (strides[i] == strides[i+1] * dims[i+1]); + if (shape.getRank() != order.size()) { + return false; } - - return is_plain_strides; + for (size_t i = 0; i < order.size(); ++i) { + if (order[i] != i) { + return false; + } + } + return true; } bool MKLDNNMemoryDesc::isBlockedCFormat(size_t blk_size) const { @@ -701,44 +756,38 @@ bool MKLDNNMemoryDesc::isBlockedCFormat(size_t blk_size) const { blocking.inner_idxs[0] != 1) return false; - const auto &ndims = desc.data.ndims; - const auto &strides = desc.data.format_desc.blocking.strides; - const auto &dims = desc.data.padded_dims; - - if (blk_size == UNREACHABLE_DIM) { - blk_size = blocking.inner_blks[0]; - } else { - if (blk_size != blocking.inner_blks[0]) + if ((order.size() - shape.getRank()) != 1) { + return false; + } + for (size_t i = 0; i < order.size() - 1; ++i) { + if (order[i] != i) { return false; + } } - - bool is_direct_order = (strides[ndims-1] == blocking.inner_blks[0]); - for (int i = 0; i < ndims - 1; i++) { - auto dim = (i == 0) ? div_up(dims[i+1], blk_size) : dims[i+1]; - is_direct_order &= (strides[i] >= strides[i+1] * dim); + if (blk_size != UNREACHABLE_DIM && blk_size != blocking.inner_blks[0]) { + return false; } - return is_direct_order; + return true; } bool MKLDNNMemoryDesc::isTailCFormat() const { - const auto &blocking = desc.data.format_desc.blocking; - - if (desc.data.format_kind != dnnl_blocked || - blocking.inner_nblks != 0) + if (desc.data.format_kind != dnnl_blocked) return false; - const auto &ndims = desc.data.ndims; - const auto &strides = desc.data.format_desc.blocking.strides; - const auto &dims = desc.data.padded_dims; - - // dense permutation of acd..b - bool is_tailc_strides = (strides[1] == 1 && strides[ndims-1] == dims[1] && strides[0] == dims[2] * strides[2]); - for (int i = 2; i < ndims - 1; i++) { - is_tailc_strides &= (strides[i] == strides[i+1] * dims[i+1]); + if (shape.getRank() < 3) { + return false; } - - return is_tailc_strides; + if (shape.getRank() != order.size()) { + return false; + } + if (!std::is_sorted(order.begin(), --order.end())) { + return false; + } + if (order.back() != 1) { + return false; + } + return true; } bool MKLDNNMemoryDesc::blocksExtended() const { @@ -768,15 +817,25 @@ bool MKLDNNMemoryDesc::isCompatible(const MemoryDesc &rhs) const { } } +static bool array_cmp_weak(const dnnl_dim_t *a1, const dnnl_dim_t *a2, size_t size) { + for (size_t i = 0; i < size; ++i) + if (a1[i] != a2[i] && a1[i] != DNNL_RUNTIME_DIM_VAL && a2[i] != DNNL_RUNTIME_DIM_VAL) return false; + return true; +} + bool MKLDNNMemoryDesc::isCompatible(const MKLDNNMemoryDesc &rhs) const { using namespace dnnl; using namespace impl; - using namespace dnnl::impl::utils; + using namespace impl::utils; + if (this->getShape() != rhs.getShape() || this->getPrecision() != rhs.getPrecision()) { + return false; + } + if (this->desc == rhs.desc) { return true; } - mkldnn::impl::memory_desc_wrapper wrappedThis(this->desc.data); - mkldnn::impl::memory_desc_wrapper wrappedRhs(rhs.desc.data); + memory_desc_wrapper wrappedThis(this->desc.data); + memory_desc_wrapper wrappedRhs(rhs.desc.data); if (one_of(wrappedThis.format_kind(), format_kind::undef, format_kind::any)) return false; if (wrappedThis.is_wino_desc() || wrappedThis.is_rnn_packed_desc()) return false; @@ -786,17 +845,18 @@ bool MKLDNNMemoryDesc::isCompatible(const MKLDNNMemoryDesc &rhs) const { int stride_start = wrappedThis.ndims() >0 && wrappedThis.dims()[0] == 1 ? 1 : 0; //ignore batch axis stride if batch size == 1 - // Here is a slightly modified version of mkldnn::impl::memory_desc_wrapper::similar_to() call able to skip specific strides check. + // Here is a slightly modified version of mkldnn::impl::memory_desc_wrapper::similar_to() call able to skip specific strides check + // and use weak comparison return wrappedThis.ndims() == wrappedRhs.ndims() && wrappedThis.format_kind() == wrappedRhs.format_kind() && wrappedThis.data_type() == wrappedRhs.data_type() - && array_cmp(wrappedThis.dims(), wrappedRhs.dims(), wrappedThis.ndims()) - && array_cmp(blk.strides + stride_start, r_blk.strides + stride_start, wrappedThis.ndims() - stride_start) + && array_cmp_weak(wrappedThis.dims(), wrappedRhs.dims(), wrappedThis.ndims()) + && array_cmp_weak(blk.strides + stride_start, r_blk.strides + stride_start, wrappedThis.ndims() - stride_start) && blk.inner_nblks == r_blk.inner_nblks && array_cmp(blk.inner_blks, r_blk.inner_blks, blk.inner_nblks) && array_cmp(blk.inner_idxs, r_blk.inner_idxs, blk.inner_nblks) - && array_cmp(wrappedThis.padded_dims(), wrappedRhs.padded_dims(), wrappedRhs.ndims()) - && array_cmp(wrappedThis.padded_offsets(), wrappedRhs.padded_offsets(), wrappedThis.ndims()) + && array_cmp_weak(wrappedThis.padded_dims(), wrappedRhs.padded_dims(), wrappedRhs.ndims()) + && array_cmp_weak(wrappedThis.padded_offsets(), wrappedRhs.padded_offsets(), wrappedThis.ndims()) && dimsEqualWeak(wrappedThis.offset0(), wrappedRhs.offset0()); } @@ -822,78 +882,30 @@ bool MKLDNNMemoryDesc::isCompatible(const BlockedMemoryDesc &rhs) const { return false; } - const auto dims = desc.dims(); - if (desc.data.format_kind != dnnl_blocked) { return false; } + if (desc.data.extra.flags != dnnl_memory_extra_flag_none) { + return false; + } + + const auto dims = desc.dims(); const auto &blk_desc = desc.data.format_desc.blocking; - const size_t outer_ndims = dims.size(); const size_t inner_ndims = blk_desc.inner_nblks; - const size_t total_ndims = outer_ndims + inner_ndims; - // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} - std::vector inner_strides(inner_ndims, 1); - for (size_t i = 1; i < blk_desc.inner_nblks; i++) { - inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; - } - - // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} - std::vector total_block_per_dim(outer_ndims, 1); - for (int i = 0; i < inner_ndims; i++) { - total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; - } - std::vector outer_block_dims(std::begin(dims), std::begin(dims) + outer_ndims); - for (size_t i = 0; i < outer_block_dims.size(); i++) { - outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); - } - - // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} - std::vector outer_order(outer_ndims); - std::iota(outer_order.begin(), outer_order.end(), 0); - std::sort(outer_order.begin(), outer_order.end(), - [&blk_desc, &outer_block_dims] (size_t ind_l, size_t ind_r) { - return (blk_desc.strides[ind_l] > blk_desc.strides[ind_r]) || - (blk_desc.strides[ind_l] == blk_desc.strides[ind_r] && outer_block_dims[ind_l] > outer_block_dims[ind_r]); - }); - - // blocked order - // [new_outer_order] U [inner_idxs] - SizeVector blk_order(total_ndims, 0); - std::copy(outer_order.begin(), outer_order.end(), blk_order.begin()); - std::copy(blk_desc.inner_idxs, blk_desc.inner_idxs + blk_desc.inner_nblks, blk_order.begin() + dims.size()); - - if (!dimsEqualWeak(blk_order, rhs.getOrder())) { + if (!dimsEqualWeak(order, rhs.getOrder())) { return false; } - //TODO [DS]: undefined offset is also used now as an indicator of undefined strides - if (desc.data.offset0 != Shape::UNDEFINED_DIM) { - // blocked strides - // [outer_strides via new_outer_order] U [inner_strides] - SizeVector blk_strides(total_ndims, 0); - std::copy(inner_strides.rbegin(), inner_strides.rend(), blk_strides.rbegin()); - std::transform(outer_order.begin(), outer_order.end(), blk_strides.begin(), - [&](size_t i) { return blk_desc.strides[i]; }); - - size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : - Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 - if (!dimsEqualWeak(blk_strides, rhs.getStrides(), skipAxis)) { - return false; - } + size_t skipAxis = this->getShape().getRank() > 0 && this->getShape().getDims().front() == 1 ? 0 : + Shape::UNDEFINED_DIM; //ignore batch axis if batch size == 1 + if (!dimsEqualWeak(getStrides(), rhs.getStrides(), skipAxis)) { + return false; } - // blocked dims - // [dims via new_outer_order with auto pad] U [inner_blk_dims] - SizeVector blk_dims(total_ndims, 0); - std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, - blk_dims.end() - blk_desc.inner_nblks); - std::transform(outer_order.begin(), outer_order.end(), blk_dims.begin(), - [&] (size_t i) { return outer_block_dims[i]; }); - - if (!dimsEqualWeak(blk_dims, rhs.getBlockDims())) { + if (!dimsEqualWeak(getBlockDims(), rhs.getBlockDims())) { return false; } @@ -939,8 +951,17 @@ std::string MKLDNNMemoryDesc::serializeFormat() const { return mkldnn::utils::fmt2str(fmt); } -bool MKLDNNMemoryDesc::isDefined() const { - return desc.data.offset0 != Shape::UNDEFINED_DIM; +bool MKLDNNMemoryDesc::isDefinedImp() const { + mkldnn::impl::memory_desc_wrapper wrappedThis(desc.data); + if (!wrappedThis.is_blocking_desc()) { + return true; + } + + if (wrappedThis.has_runtime_dims_or_strides()) { + return false; + } + + return wrappedThis.offset0() != Shape::UNDEFINED_DIM; } InferenceEngine::Precision MKLDNNMemoryDesc::getPrecision() const { @@ -951,12 +972,283 @@ void MKLDNNMemoryDesc::setPrecision(InferenceEngine::Precision prc) { desc.data.data_type = static_cast(MKLDNNExtensionUtils::IEPrecisionToDataType(prc)); } -std::unique_ptr MKLDNNMemoryDesc::cloneWithNewDims(const std::vector &dims) const { - IE_THROW(NotImplemented) << "[DS]: MKLDNNMemoryDesc::cloneWithNewDims is not implemented."; +std::unique_ptr MKLDNNMemoryDesc::cloneWithNewDimsImp(const std::vector &dims) const { + using namespace dnnl::impl::utils; + if (desc.data.format_kind != dnnl_blocked) { + IE_THROW(Unexpected) << "Cannot clone non blocked oneDNN desc with new dims"; + } + + auto mklDims = MKLDNNExtensionUtils::convertToDnnlDims(dims); + mkldnn::memory::desc newMklDesc = desc; + array_copy(newMklDesc.data.dims, mklDims.data(), mklDims.size()); + std::vector perm(order.begin(), order.begin() + mklDims.size()); + auto& blockingDesc = newMklDesc.data.format_desc.blocking; + auto numInnerBlks = blockingDesc.inner_nblks; + std::vector innerBlks(std::begin(blockingDesc.inner_blks), std::begin(blockingDesc.inner_blks) + numInnerBlks); + std::vector innerIdxs(std::begin(blockingDesc.inner_idxs), std::begin(blockingDesc.inner_idxs) + numInnerBlks); + auto retCode = dnnl::impl::fill_blocked(newMklDesc.data, perm, innerBlks, innerIdxs); + if (retCode != dnnl::impl::status::success) { + IE_THROW() << "Can not clone MKLDNNMemoryDesc with dims: " << dims2str(dims); + } + return MKLDNNPlugin::make_unique(newMklDesc); } size_t MKLDNNMemoryDesc::getMaxMemSize() const { - // TODO [DS]: write the correct implementation - return getMemSize(); + if (desc.data.format_kind != dnnl_blocked || shape.isStatic()) { + return getCurrentSize(); + } + + auto& maxDims = shape.getMaxDims(); + if (std::any_of(maxDims.begin(), maxDims.end(), [](size_t x){ return Shape::UNDEFINED_DIM == x; })) { + return UNDEFINED_SIZE; + } + + auto maxDimsDesc = cloneWithNewDims(maxDims); + return maxDimsDesc->getCurrentSize(); +} + +std::vector MKLDNNMemoryDesc::getStrides() const { + const auto dims = desc.dims(); + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // strides of inner dims. In case of 4i16o4i will be {64, 4, 1} + std::vector inner_strides(inner_ndims, 1); + for (size_t i = 1; i < blk_desc.inner_nblks; i++) { + inner_strides[blk_desc.inner_nblks - 1 - i] = inner_strides[blk_desc.inner_nblks - i] * blk_desc.inner_blks[blk_desc.inner_nblks - i]; + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); + + // blocked strides + // [outer_strides via new_outer_order] U [inner_strides] + SizeVector blk_strides(total_ndims, 0); + std::copy(inner_strides.rbegin(), inner_strides.rend(), blk_strides.rbegin()); + std::transform(outer_order.begin(), outer_order.end(), blk_strides.begin(), + [&](size_t i) { return blk_desc.strides[i] == DNNL_RUNTIME_DIM_VAL ? Shape::UNDEFINED_DIM : blk_desc.strides[i]; }); + return blk_strides; +} + +std::vector MKLDNNMemoryDesc::getBlockDims() const { + const auto dims = desc.dims(); + + const auto &blk_desc = desc.data.format_desc.blocking; + + const size_t outer_ndims = dims.size(); + const size_t inner_ndims = blk_desc.inner_nblks; + const size_t total_ndims = outer_ndims + inner_ndims; + + // total inner block size. in case of 4i16o4i will be {16, 16, 1, 1} + std::vector total_block_per_dim(outer_ndims, 1); + for (int i = 0; i < inner_ndims; i++) { + total_block_per_dim[blk_desc.inner_idxs[i]] *= blk_desc.inner_blks[i]; + } + // blocked dims + // [dims via new_outer_order with auto pad] U [inner_blk_dims] + std::vector outer_block_dims = MKLDNNExtensionUtils::convertToSizeVector(dims); + for (size_t i = 0; i < outer_block_dims.size(); i++) { + if (outer_block_dims[i] != Shape::UNDEFINED_DIM) { + outer_block_dims[i] = div_up(outer_block_dims[i], total_block_per_dim[i]); + } + } + + // order of outer dims. In case of IOhw_ will be {1, 0, 2, 3} + std::vector outer_order(outer_ndims); + std::copy(order.begin(), order.begin() + outer_ndims, outer_order.begin()); + + SizeVector blk_dims(total_ndims, 0); + std::copy(blk_desc.inner_blks, blk_desc.inner_blks + blk_desc.inner_nblks, + blk_dims.end() - blk_desc.inner_nblks); + std::transform(outer_order.begin(), outer_order.end(), blk_dims.begin(), + [&] (size_t i) { return outer_block_dims[i]; }); + return blk_dims; +} + +MKLDNNMemoryDesc::MKLDNNMemoryDesc(const Shape &shape, dnnl::memory::data_type dataType, dnnl::memory::format_tag format) : MemoryDesc(shape, Mkldnn) { + auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims()); + if (format == memory::format_tag::any) + IE_THROW(Unexpected) << "Memory format any is prohibited!"; + if (format != memory::format_tag::undef) { + if (format == memory::format_tag::x && dims.size() == 0) { + desc = mkldnn::memory::desc(mkldnn::memory::dims(1, 1), dataType, format); + } else { + desc = mkldnn::memory::desc(dims, dataType, format); + } + + std::vector perm; + std::vector inner_blks; + std::vector inner_idxs; + + mkldnn::impl::memory_desc_wrapper::compute_blocking(mkldnn::memory::convert_to_c(format), perm, inner_blks, inner_idxs); + + order.swap(perm); + order.insert(order.end(), inner_idxs.begin(), inner_idxs.end()); + } else { + // Trying to create plain descriptor + // This WA is needed since memory::format_tag doesn't contain plain tag for tensors with rank > 6D + InitializePlain(shape.getDims(), dataType); + } +} + + +/** + * Construct from blocked parameters + * + * IE IOhw_4i16o4i dims(N) = {32, 64, 128, 128} + * blockedDims {4, 2, 128, 128, 4, 16, 4} // total dims(inner, outermost, auto blocked/padded). Generally sorted by strides. + * strides {8388608, 4194304, 32768, 256, 64, 4, 1} // strides for blockedDims, growing sequence + * order {1, 0, 2, 3, 1, 0, 1} // matching to original dims + * + * All vectors blockedDims/strides/order have same size equals total num of internal blocked dims(inner_dims + outer_dims) + * + * Tensor descriptor filing is not deterministic. It allows any permutation of index which keeps order of + * real dims spliting. + * for {1, 0, 2, 3, 1, 0, 1} we can swap elements [1] <=> [4] + * but not [0]<=>[4] because it break splitting original dims into internal blocked dims + * Normalization of representation: Make strides growing but keep layout same as original. Not all + * layout allow us to meet normalize form of tensor desc. + * + * Limitation of conversion first N elements of order should be permutation of [0,1,2 ... N] + */ + +MKLDNNMemoryDesc::MKLDNNMemoryDesc(InferenceEngine::Precision prc, const Shape &shape, const std::vector &blockedDims, + const std::vector &order, size_t offsetPadding, const std::vector &offsetPaddingToData, + const std::vector &strides) : MemoryDesc(shape, Mkldnn) { + // scalar case + if (shape.getRank() == 0) { + desc.data.format_kind = dnnl_blocked; + desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(prc)); + desc.data.ndims = 1; + desc.data.dims[0] = 1; + desc.data.padded_dims[0] = 1; + desc.data.format_desc.blocking.strides[0] = 1; + desc.data.padded_offsets[0] = 0; + desc.data.offset0 = offsetPadding; + return; + } + + if (order.size() != blockedDims.size()) { + IE_THROW() << "Can not construct MKLDNNMemoryDesc, order and blocked dims must have equals size"; + } + + if (!offsetPaddingToData.empty() && offsetPaddingToData.size() != order.size()) { + IE_THROW() << "Can not construct MKLDNNMemoryDesc, offsetPaddingToData must have equal size with order and blocked dims"; + } + + if (!strides.empty() && strides.size() != order.size()) { + IE_THROW() << "Can not construct MKLDNNMemoryDesc, strides must have equal size with order and blocked dims"; + } + + if (std::any_of(order.begin(), order.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "MKLDNNMemoryDesc doesn't support undefined order."; + } + + if (std::any_of(blockedDims.begin() + shape.getRank(), blockedDims.end(), [](size_t val) { return val == Shape::UNDEFINED_DIM; })) { + IE_THROW() << "MKLDNNMemoryDesc doesn't support undefined blockedDims."; + } + + auto dims = MKLDNNExtensionUtils::convertToDnnlDims(shape.getDims()); + + size_t outer_ndims = dims.size(); + size_t inner_ndims = order.size() - dims.size(); + + if (!strides.empty()) { + bool is_descending_strides = true; + for (int i = 1; i < strides.size(); i++) { + is_descending_strides &= (strides[i - 1] >= strides[i]); + } + + // TODO: That's strong constrains and can be mitigated. IE::TensorDesc allow to transpose blocked dims + // and may be we can achieve correct "descending strides" form which allow conversion. + if (!is_descending_strides) + IE_THROW() << "Can not construct MKLDNNMemoryDesc from strides: " << vec2str(strides); + } + + std::vector outer_order(outer_ndims, outer_ndims + 1); // outer_order[i] is index of stride for i-th dimension + for (size_t i = 0; i < outer_ndims; i++) { + outer_order[order[i]] = i; + } + bool outer_is_correct_permutation_of_n = + std::find(outer_order.begin(), outer_order.end(), outer_ndims + 1) == outer_order.end(); + + if (!outer_is_correct_permutation_of_n) + IE_THROW() << "Can not construct MKLDNNMemoryDesc because of incorrect order: " << vec2str(order); + + if (!strides.empty() && std::none_of(strides.begin(), strides.end(), [](size_t x) { return Shape::UNDEFINED_DIM == x; })) { + bool inner_block_are_dense = one_of(strides.back(), 0, 1); // stride 1 - is dense case, 0 - broad casted + for (int i = outer_ndims; i < strides.size() - 1; i++) { + inner_block_are_dense &= (strides[i] == strides[i + 1] * blockedDims[i + 1]); + } + + if (!inner_block_are_dense) + IE_THROW() << "Can not construct MKLDNNMemoryDesc from strides: " << vec2str(strides) << " inner blocks are not dense."; + } + + // Fill general memory desc fields + desc.data.format_kind = dnnl_blocked; + desc.data.extra.flags = 0; + desc.data.data_type = memory::convert_to_c(MKLDNNMemory::convertToDataType(prc)); + desc.data.ndims = dims.size(); + desc.data.offset0 = offsetPadding; + std::copy(dims.begin(), dims.end(), desc.data.dims); + + if (!offsetPaddingToData.empty()) { + bool inner_pad_offsets_is_zero = std::all_of(offsetPaddingToData.begin() + outer_ndims, offsetPaddingToData.end(), + [](size_t pad) { return pad == 0; }); + + if (!inner_pad_offsets_is_zero) + IE_THROW() << "Can not construct MKLDNNMemoryDesc, inner pad offsets is not zero: " << vec2str(offsetPaddingToData); + auto dnnlPaddedOffsets = MKLDNNExtensionUtils::convertToDnnlDims(offsetPaddingToData); + std::copy(dnnlPaddedOffsets.begin(), dnnlPaddedOffsets.begin() + outer_ndims, desc.data.padded_offsets); + } else { + std::fill(std::begin(desc.data.padded_offsets), std::begin(desc.data.padded_offsets) + outer_ndims, 0); + } + + std::fill(desc.data.padded_dims, desc.data.padded_dims + outer_ndims, 1); + auto dnnlBlkDims = MKLDNNExtensionUtils::convertToDnnlDims(blockedDims); + + for (size_t i = 0; i < order.size(); i++) { + auto idx = order[i]; + if (desc.data.padded_dims[idx] != DNNL_RUNTIME_DIM_VAL && dnnlBlkDims[i] != DNNL_RUNTIME_DIM_VAL) { + desc.data.padded_dims[idx] *= dnnlBlkDims[i]; + } else { + desc.data.padded_dims[idx] = DNNL_RUNTIME_DIM_VAL; + } + } + + // Fill blocking desc + auto &dnn_blk_desc = desc.data.format_desc.blocking; + dnn_blk_desc.inner_nblks = inner_ndims; + std::copy(dnnlBlkDims.end() - inner_ndims, dnnlBlkDims.end(), dnn_blk_desc.inner_blks); + std::copy(order.end() - inner_ndims, order.end(), dnn_blk_desc.inner_idxs); + + if (strides.empty()) { + if (std::any_of(dnnlBlkDims.begin(), dnnlBlkDims.end(), [](memory::dim val) { return val == DNNL_RUNTIME_DIM_VAL; })) { + std::fill(std::begin(dnn_blk_desc.strides), std::begin(dnn_blk_desc.strides) + outer_ndims, DNNL_RUNTIME_DIM_VAL); + } else { + //TODO [DS]: phase 2: refactor + std::vector tmpStrides(order.size()); + tmpStrides[order.size() - 1] = 1; + for (size_t i = 2; i <= order.size(); i++) { + tmpStrides[order.size() - i] = tmpStrides[order.size() - (i - 1)] * dnnlBlkDims[blockedDims.size() - (i - 1)]; + } + for (size_t i = 0; i < outer_ndims; i++) { + dnn_blk_desc.strides[i] = tmpStrides[outer_order[i]]; + } + } + } else { + for (size_t i = 0; i < outer_ndims; i++) { + auto dnnlStrides = MKLDNNExtensionUtils::convertToDnnlDims(strides); + dnn_blk_desc.strides[i] = dnnlStrides[outer_order[i]]; + } + } + + this->order = order; } } // namespace MKLDNNPlugin diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h index 665a7f34a27a6a..870a044d846adb 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_memory.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_memory.h @@ -8,6 +8,7 @@ #include "mkldnn_dims.h" #include "cpu_memory_desc.h" #include "mkldnn_extension_utils.h" +#include "cpu_memory_desc_utils.h" #include #include #include @@ -47,6 +48,8 @@ class MKLDNNMemoryDesc : public MemoryDesc { /** Construct a tensor desc with specified layout format tag. Any and Undef is not supported */ MKLDNNMemoryDesc(const std::vector& _dims, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); + MKLDNNMemoryDesc(const Shape& shape, mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format); + explicit MKLDNNMemoryDesc(const mkldnn::memory::desc& desc); /** @@ -54,17 +57,22 @@ class MKLDNNMemoryDesc : public MemoryDesc { * * @return format tag if was able to define it */ - mkldnn::memory::format_tag getFormat() const; + mkldnn::memory::format_tag getFormat() const; // move to the private section mkldnn::memory::data_type getDataType() const { return static_cast(desc.data.data_type); } + // TODO [DS]: phase 2: remove!!! MKLDNNDims getDims() const { return MKLDNNDims(desc.data.dims, desc.data.ndims); } + + // TODO [DS]: phase 2: move to the blocked desc interface bool blocksExtended() const; + + // TODO [DS]: phase 2: remove operator bool() const { return getFormat() != mkldnn::memory::format_tag::any && getFormat() != mkldnn::memory::format_tag::undef; } @@ -84,14 +92,10 @@ class MKLDNNMemoryDesc : public MemoryDesc { return MKLDNNPlugin::make_unique(*this); } - std::unique_ptr cloneWithNewDims(const std::vector& dims) const override; - bool hasLayoutType(LayoutType layoutType) const override; std::string serializeFormat() const override; - bool isDefined() const override; - InferenceEngine::Precision getPrecision() const override; void setPrecision(InferenceEngine::Precision prc) override; @@ -100,18 +104,39 @@ class MKLDNNMemoryDesc : public MemoryDesc { bool isCompatible(const BlockedMemoryDesc& rhs) const; bool isCompatible(const MKLDNNMemoryDesc& rhs) const; + const std::vector& getOrder() const { + return order; + } + size_t getMaxMemSize() const override; private: size_t getElementOffset(size_t elemNumber) const override; + void InitializePlain(const std::vector& _dims, mkldnn::memory::data_type dataType); + size_t getMemSizeImp() const override; bool isPlainFormat() const; bool isBlockedCFormat(size_t blk_size = UNREACHABLE_DIM) const; bool isTailCFormat() const; + bool isDefinedImp() const override; + std::unique_ptr cloneWithNewDimsImp(const std::vector& dims) const override; + + std::vector getStrides() const; + std::vector getBlockDims() const; private: + MKLDNNMemoryDesc(InferenceEngine::Precision prc, const Shape& shape, const std::vector& blockedDims, + const std::vector& order, size_t offsetPadding = 0, const std::vector& offsetPaddingToData = {}, + const std::vector& strides = {}); + static constexpr size_t UNREACHABLE_DIM = std::numeric_limits::max(); mkldnn::memory::desc desc; + std::vector order; + + friend BlockedMemoryDesc MemoryDescUtils::convertToBlockedDescriptor(const MKLDNNMemoryDesc& inpDesc); + friend MKLDNNMemoryDesc MemoryDescUtils::convertToMKLDNNMemoryDesc(const BlockedMemoryDesc& desc); + friend MemoryDescPtr MemoryDescUtils::applyUndefinedOffset(const MKLDNNMemoryDesc& desc); + friend MemoryDescPtr MemoryDescUtils::resetOffset(const MemoryDesc* desc); }; @@ -133,6 +158,7 @@ class MKLDNNMemory { return prim; } + // TODO [DS]: phase 2: remove mkldnn::memory::desc GetDescriptor() const { return prim->get_desc(); } @@ -164,26 +190,35 @@ class MKLDNNMemory { */ void* GetPtr() const; + //TODO [DS]: phase 2: change to get precision mkldnn::memory::data_type GetDataType() const { return static_cast(GetDescriptor().data.data_type); } + //TODO [DS]: phase 2: align with descriptors size methods (reuse them under the hood) size_t GetSize() const; + + //TODO [DS]: phase 2: remove size_t GetElementsCount() const; + + //TODO [DS]: phase 2: change to getShape mkldnn::memory::dims GetDims() const { auto data = GetDescriptor().data; return {std::begin(data.dims), std::begin(data.dims) + data.ndims}; } void Create(const MemoryDesc& desc, const void* data = nullptr, bool pads_zeroing = true); + void Create(MemoryDescPtr desc, const void* data = nullptr, bool pads_zeroing = true); - // Redefines dimensions. The memory descriptor will also be redefined with the new dims. + // Redefines descriptor. The memory descriptor will be replaced with the new one. // Memory will not be reallocated if the new tensor size is less or equal the upper bound. // Caution!!! This action invalidates the previous data layout. The old data may become unreachable. - void redefineDims(const std::vector& desc); + void redefineDesc(const MemoryDesc& desc); + void redefineDesc(MemoryDescPtr desc); // Like a plain format + //TODO [DS]: phase 2: remove void SetData(mkldnn::memory::data_type dataType, mkldnn::memory::format_tag format, const void* data, size_t size, bool ftz = true) const; void SetData(const MKLDNNMemory& memory, size_t size = 0, bool ftz = true) const; void FillZero(); @@ -192,14 +227,18 @@ class MKLDNNMemory { return useExternalStorage; } + //TODO [DS]: phase 2: move to oneDNN utils static mkldnn::memory::format_tag GetPlainFormatByRank(size_t rank); + //TODO [DS]: phase 2: remove static InferenceEngine::Layout GetPlainLayout(const mkldnn::memory::dims& dims); static mkldnn::memory::format_tag Convert(const InferenceEngine::Layout layout); static InferenceEngine::Precision convertToIePrec(mkldnn::memory::data_type dataType); static mkldnn::memory::data_type convertToDataType(const InferenceEngine::Precision &precision); static std::string formatToString(mkldnn::memory::format_tag fmt); + //TODO [DS]: end remove section + //TODO [DS]: phase 2: move to reorder static void reorderData(const MKLDNNMemory& input, const MKLDNNMemory& output, size_t size = 0); private: @@ -208,6 +247,7 @@ class MKLDNNMemory { void Create(const mkldnn::memory::desc& desc, const void* data = nullptr, bool pads_zeroing = true); + //TODO [DS]: phase 2: remove const MKLDNNMemoryDesc GetMKLDNNDesc() const { return MKLDNNMemoryDesc(prim->get_desc()); } diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_node.h b/inference-engine/src/mkldnn_plugin/mkldnn_node.h index e8df5a686927bf..fb3ee702e2e471 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_node.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_node.h @@ -454,6 +454,28 @@ class MKLDNNNode { return &supportedPrimitiveDescriptors[selectedPrimitiveDescriptorIndex]; } + const MemoryDesc* getOutputMemDescAtPort(size_t portNum) const { + if (auto primDesc = getSelectedPrimitiveDescriptor()) { + const auto& outConfs = primDesc->getConfig().outConfs; + if (outConfs.size() < portNum) { + return nullptr; + } + return outConfs[portNum].desc.get(); + } + return nullptr; + } + + const MemoryDesc* getInputMemDescAtPort(size_t portNum) const { + if (auto primDesc = getSelectedPrimitiveDescriptor()) { + const auto& inConfs = primDesc->getConfig().inConfs; + if (inConfs.size() < portNum) { + return nullptr; + } + return inConfs[portNum].desc.get(); + } + return nullptr; + } + void selectPrimitiveDescriptorByIndex(int index) { if (index < 0 || index >= supportedPrimitiveDescriptors.size()) selectedPrimitiveDescriptorIndex = -1; diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp index f80a20cb48ebe7..b89dd896d075ff 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_non_max_suppression_node.cpp @@ -195,20 +195,21 @@ void MKLDNNNonMaxSuppressionNode::execute(mkldnn::stream strm) { }); } - auto outputMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr(); - auto& maxOutputDims = outputMemPtr->GetDesc().getShape().getMaxDims(); + auto indicesMemPtr = getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr(); + auto scoresMemPtr = getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr(); + auto& maxOutputDims = indicesMemPtr->GetDesc().getShape().getMaxDims(); const size_t selectedBoxesNum = maxOutputDims[0]; const size_t validOutputs = std::min(filtBoxes.size(), selectedBoxesNum); SizeVector newDims = {validOutputs, maxOutputDims[1]}; - outputMemPtr->redefineDims(newDims); - getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr()->redefineDims(newDims); + indicesMemPtr->redefineDesc(getOutputMemDescAtPort(NMS_SELECTEDINDICES)->cloneWithNewDims(newDims)); + scoresMemPtr->redefineDesc(getOutputMemDescAtPort(NMS_SELECTEDSCORES)->cloneWithNewDims(newDims)); - int selectedIndicesStride = outputMemPtr->GetDescWithType().getStrides()[0]; + int selectedIndicesStride = indicesMemPtr->GetDescWithType().getStrides()[0]; - int *selectedIndicesPtr = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDINDICES)[0]->getMemoryPtr()->GetPtr()); - float *selectedScoresPtr = reinterpret_cast(getChildEdgesAtPort(NMS_SELECTEDSCORES)[0]->getMemoryPtr()->GetPtr()); + int *selectedIndicesPtr = reinterpret_cast(indicesMemPtr->GetPtr()); + float *selectedScoresPtr = reinterpret_cast(scoresMemPtr->GetPtr()); size_t idx = 0lu; for (; idx < validOutputs; idx++) { diff --git a/inference-engine/src/mkldnn_plugin/utils/general_utils.h b/inference-engine/src/mkldnn_plugin/utils/general_utils.h index 35640212a5555b..eb50d5ac734881 100644 --- a/inference-engine/src/mkldnn_plugin/utils/general_utils.h +++ b/inference-engine/src/mkldnn_plugin/utils/general_utils.h @@ -134,4 +134,21 @@ inline InferenceEngine::Precision getMaxPrecision(std::vector& dims) { + std::stringstream output; + output << "{"; + + auto itr = dims.begin(); + do { + output << dim2str(*itr); + } while (++itr != dims.end() && output << ", "); + + output << "}"; + return output.str(); +} + } // namespace MKLDNNPlugin \ No newline at end of file diff --git a/inference-engine/tests/unit/cpu/CMakeLists.txt b/inference-engine/tests/unit/cpu/CMakeLists.txt index bea484969fc66c..c5180204454c6b 100644 --- a/inference-engine/tests/unit/cpu/CMakeLists.txt +++ b/inference-engine/tests/unit/cpu/CMakeLists.txt @@ -14,6 +14,7 @@ addIeTargetTest( LINK_LIBRARIES gtest gtest_main + gmock mkldnn inference_engine_transformations inference_engine_lp_transformations diff --git a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp index 66eecf43cf31cf..42325b0c88dd0f 100644 --- a/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp +++ b/inference-engine/tests/unit/cpu/mkldnn_memory_desc_test.cpp @@ -4,12 +4,15 @@ #include #include +#include #include "mkldnn_memory.h" #include "cpu_memory_desc_utils.h" +#include "nodes/common/blocked_desc_creator.h" using namespace MKLDNNPlugin; using namespace InferenceEngine; +using namespace testing; TEST(MemDescTest, Conversion) { // Check if conversion keep desc structure @@ -37,13 +40,83 @@ TEST(MemDescTest, Conversion) { ASSERT_TRUE(converted_correctly(p.first, p.second)); } +TEST(MemDescTest, UndefinedStateConversion) { + ngraph::PartialShape ngraphUndefinedShape({{16}, {7, 15}, {-1, -1}, {3}}); + Shape cpuShape(ngraphUndefinedShape); + + const std::vector vecTags = { + mkldnn::memory::format_tag::nChw8c, + mkldnn::memory::format_tag::nhwc, + mkldnn::memory::format_tag::nChw16c, + mkldnn::memory::format_tag::ABcd16a16b, + mkldnn::memory::format_tag::OIhw4i16o4i + }; + + for (auto tag : vecTags) { + MKLDNNMemoryDesc mkldnnDesc(cpuShape, mkldnn::memory::data_type::f32, tag); + + ASSERT_FALSE(mkldnnDesc.isDefined()); + + auto blockedDesc = MemoryDescUtils::convertToBlockedDescriptor(mkldnnDesc); + + ASSERT_TRUE(mkldnnDesc.isCompatible(blockedDesc)); + ASSERT_TRUE(blockedDesc.isCompatible(mkldnnDesc)); + + auto reconstructedDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(blockedDesc); + + ASSERT_TRUE(mkldnnDesc.isCompatible(reconstructedDesc)); + ASSERT_TRUE(blockedDesc.isCompatible(reconstructedDesc)); + + mkldnn::memory::desc dnnlDesc = mkldnnDesc; + mkldnn::memory::desc reconstDnnlDesc = reconstructedDesc; + + ASSERT_EQ(dnnlDesc, reconstDnnlDesc); + + auto definedMemDesc = mkldnnDesc.cloneWithNewDims({16, 10, 15, 3}); + auto definedReconstructedMkldnnDesc = reconstructedDesc.cloneWithNewDims({16, 10, 15, 3}); + + ASSERT_TRUE(definedMemDesc->isCompatible(*definedReconstructedMkldnnDesc)); + } +} + +TEST(MemDescTest, TurnToUninit) { + Shape cpuShape(SizeVector{7, 19, 43, 20}); + + auto& blokcedDescCreators = BlockedDescCreator::getCommonCreators(); + + for (auto item : blokcedDescCreators) { + auto creator = item.second; + + auto blockedDesc = creator->createDesc(Precision::FP32, cpuShape); + auto mkldnnDesc = MemoryDescUtils::convertToMKLDNNMemoryDesc(blockedDesc); + + auto uninitMkldnnDesc = MemoryDescUtils::applyUndefinedOffset(mkldnnDesc); + + ASSERT_TRUE(uninitMkldnnDesc->isCompatible(mkldnnDesc)); + + auto strides = blockedDesc.getStrides(); + std::transform(strides.begin(), strides.begin() + cpuShape.getRank(), strides.begin(), [](size_t x) { return x * 3; }); + + auto stridedBlockedDesc = BlockedMemoryDesc(blockedDesc.getPrecision(), blockedDesc.getShape(), blockedDesc.getBlockDims(), blockedDesc.getOrder(), + 100500, blockedDesc.getOffsetPaddingToData(), strides); + + ASSERT_FALSE(blockedDesc.isCompatible(stridedBlockedDesc)); + ASSERT_TRUE(uninitMkldnnDesc->isCompatible(stridedBlockedDesc)); + + auto initMkldnnDesc = MemoryDescUtils::resetOffset(uninitMkldnnDesc.get()); + + ASSERT_TRUE(initMkldnnDesc->isCompatible(blockedDesc)); + ASSERT_FALSE(initMkldnnDesc->isCompatible(stridedBlockedDesc)); + } +} + TEST(MemDescTest, CompareWithTensorDescRecomputedStrides) { auto converted_correctly = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc orig_tdesc {dims, dnnl::memory::data_type::u8, fmt}; MKLDNNMemoryDesc plg_tdesc {orig_tdesc}; BlockedMemoryDesc blk_tdesc = MemoryDescUtils::convertToBlockedDescriptor(plg_tdesc); - BlockedMemoryDesc recomputed_blk_tdesc(blk_tdesc.getPrecision(), blk_tdesc.getShape().getStaticDims(), blk_tdesc.getBlockDims(), blk_tdesc.getOrder()); + BlockedMemoryDesc recomputed_blk_tdesc(blk_tdesc.getPrecision(), blk_tdesc.getShape(), blk_tdesc.getBlockDims(), blk_tdesc.getOrder()); return blk_tdesc.isCompatible(recomputed_blk_tdesc); }; @@ -138,6 +211,127 @@ TEST(MemDescTest, ComaptibleWithFormat) { GTEST_SKIP(); } +TEST(MKLDNNMemDescTest, KeepOrder) { + using mkldnn::memory; + std::vector dims = {7, 3, 1, 5}; + memory::data_type dataType = memory::data_type::u8; + MKLDNNMemoryDesc descPalanar(dims, dataType); + ASSERT_THAT(descPalanar.getOrder(), ElementsAre(0, 1, 2, 3)); + + MKLDNNMemoryDesc descTailC(dims, dataType, memory::format_tag::acdb); + ASSERT_THAT(descTailC.getOrder(), ElementsAre(0, 2, 3, 1)); + + MKLDNNMemoryDesc descBlockedC(dims, dataType, memory::format_tag::aBcd16b); + ASSERT_THAT(descBlockedC.getOrder(), ElementsAre(0, 1, 2, 3, 1)); + + MKLDNNMemoryDesc descWeightBlocked(dims, dataType, memory::format_tag::ABcd16b16a2b); + ASSERT_THAT(descWeightBlocked.getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); + + auto dnnDims = MKLDNNExtensionUtils::convertToDnnlDims(dims); + + memory::desc mkldnnDescPlanar(dnnDims, dataType, memory::format_tag::abcd); + ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescPlanar).getOrder(), ElementsAre(0, 1, 2, 3)); + + memory::desc mkldnnDescTailC(dnnDims, dataType, memory::format_tag::acdb); + ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescTailC).getOrder(), ElementsAre(0, 2, 3, 1)); + + memory::desc mkldnnDescBlockedC(dnnDims, dataType, memory::format_tag::aBcd16b); + ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescBlockedC).getOrder(), ElementsAre(0, 1, 2, 3, 1)); + + memory::desc mkldnnDescWeightBlocked(dnnDims, dataType, memory::format_tag::ABcd16b16a2b); + ASSERT_THAT(MKLDNNMemoryDesc(mkldnnDescWeightBlocked).getOrder(), ElementsAre(0, 1, 2, 3, 1, 0, 1)); +} + +TEST(MemDescTest, UndefinedState) { + ngraph::PartialShape ngraphShape({{16}, {-1, -1}, {20, 30}, {7}}); + MKLDNNPlugin::Shape pluginShape(ngraphShape); + MKLDNNMemoryDesc memDesc(pluginShape, mkldnn::memory::data_type::f32, mkldnn::memory::format_tag::nChw8c); + + ASSERT_FALSE(memDesc.isDefined()); + + ASSERT_THROW(memDesc.cloneWithNewDims({16, 7, 40, 7}), InferenceEngine::ParameterMismatch); + ASSERT_THROW(memDesc.cloneWithNewDims({16, 7, 25}), InferenceEngine::ParameterMismatch); + ASSERT_THROW(memDesc.cloneWithNewDims({16, 7, 25, 5}), InferenceEngine::ParameterMismatch); + + auto definedDesc = memDesc.cloneWithNewDims({16, 15, 25, 7}); + + ASSERT_TRUE(definedDesc->isDefined()); + + auto creator = BlockedDescCreator::getCommonCreators().at(LayoutType::nCsp8c); + auto blockedDesc = creator->createDesc(Precision::FP32, pluginShape); + + ASSERT_FALSE(blockedDesc.isDefined()); + + ASSERT_TRUE(blockedDesc.isCompatible(memDesc)); + + ASSERT_THROW(blockedDesc.cloneWithNewDims({16, 7, 40, 7}), InferenceEngine::ParameterMismatch); + ASSERT_THROW(blockedDesc.cloneWithNewDims({16, 7, 25}), InferenceEngine::ParameterMismatch); + ASSERT_THROW(blockedDesc.cloneWithNewDims({16, 7, 25, 5}), InferenceEngine::ParameterMismatch); + + auto definedBlockedDesc = blockedDesc.cloneWithNewDims({16, 15, 25, 7}); + + ASSERT_TRUE(definedBlockedDesc->isDefined()); + + ASSERT_FALSE(memDesc.isCompatible(*definedDesc)); + ASSERT_FALSE(memDesc.isCompatible(*definedBlockedDesc)); + + ASSERT_TRUE(definedBlockedDesc->isCompatible(*definedDesc)); +} + +TEST(MemDescTest, MemSize) { + constexpr size_t undefSize = MemoryDesc::UNDEFINED_SIZE; + static const auto dnnlDataType = mkldnn::memory::data_type::f32; + static const Precision iePrc = Precision::FP32; + + + ngraph::PartialShape ngraphShapeUndef({{16}, {-1, -1}, {20, 30}, {7}}); + MKLDNNPlugin::Shape pluginShapeUndef(ngraphShapeUndef); + + auto creator = BlockedDescCreator::getCommonCreators().at(LayoutType::nspc); + auto blockedDescUndef = creator->createDesc(iePrc, pluginShapeUndef); + + ASSERT_EQ(blockedDescUndef.getCurrentSize(), undefSize); + ASSERT_EQ(blockedDescUndef.getMaxMemSize(), undefSize); + + MKLDNNMemoryDesc memDescUndef(pluginShapeUndef, dnnlDataType, mkldnn::memory::format_tag::nhwc); + + ASSERT_EQ(memDescUndef.getCurrentSize(), undefSize); + ASSERT_EQ(memDescUndef.getMaxMemSize(), undefSize); + + ngraph::PartialShape ngraphShapeDefUpperBound({{16}, {7, 14}, {20, 30}, {7}}); + MKLDNNPlugin::Shape pluginShapeDefUpperBound(ngraphShapeDefUpperBound); + + auto blockedDescDefUpper = creator->createDesc(iePrc, pluginShapeDefUpperBound); + + ASSERT_EQ(blockedDescDefUpper.getCurrentSize(), undefSize); + auto maxElementsCount = std::accumulate(pluginShapeDefUpperBound.getMaxDims().begin(), + pluginShapeDefUpperBound.getMaxDims().end(), + 1, std::multiplies()); + ASSERT_EQ(blockedDescDefUpper.getMaxMemSize(), maxElementsCount * iePrc.size()); + + MKLDNNMemoryDesc memDescDefUpper(pluginShapeDefUpperBound, dnnlDataType, mkldnn::memory::format_tag::nhwc); + + ASSERT_EQ(memDescDefUpper.getCurrentSize(), undefSize); + ASSERT_EQ(memDescDefUpper.getMaxMemSize(), maxElementsCount * MKLDNNExtensionUtils::sizeOfDataType(dnnlDataType)); + + ngraph::PartialShape ngraphShapeDefined({{16}, {16}, {10}, {7}}); + MKLDNNPlugin::Shape pluginShapeDefined(ngraphShapeDefined); + + auto blockedDescDefined = creator->createDesc(iePrc, pluginShapeDefined); + + ASSERT_NE(blockedDescDefined.getCurrentSize(), undefSize); + ASSERT_NE(blockedDescDefined.getMaxMemSize(), undefSize); + ASSERT_EQ(blockedDescDefined.getCurrentSize(), blockedDescDefined.getMaxMemSize()); + + MKLDNNMemoryDesc memDescDefined(pluginShapeDefined, dnnlDataType, mkldnn::memory::format_tag::nhwc); + + ASSERT_NE(memDescDefined.getCurrentSize(), undefSize); + ASSERT_NE(memDescDefined.getMaxMemSize(), undefSize); + ASSERT_EQ(memDescDefined.getCurrentSize(), memDescDefined.getMaxMemSize()); + ASSERT_EQ(blockedDescDefined.getCurrentSize(), memDescDefined.getCurrentSize()); +} + + TEST(isSameMethodTest, CheckTensorWithSameStrides) { auto isSameDataFormat = [] (dnnl::memory::format_tag fmt, dnnl::memory::dims dims) { dnnl::memory::desc oneDnnDesc {dims, dnnl::memory::data_type::u8, fmt};