From 13b3c954c0144d6f08572f43a950e7dfd67e122a Mon Sep 17 00:00:00 2001 From: Anton Voronov Date: Fri, 21 May 2021 13:54:56 +0300 Subject: [PATCH] [CPU] Deconvolution int8 support (#5565) --- .../src/mkldnn_plugin/mkldnn_descriptor.cpp | 12 + .../src/mkldnn_plugin/mkldnn_descriptor.h | 4 + .../src/mkldnn_plugin/mkldnn_graph.cpp | 1 + .../src/mkldnn_plugin/mkldnn_plugin.cpp | 3 +- .../nodes/mkldnn_deconv_node.cpp | 243 ++++++++++++++---- .../mkldnn_plugin/nodes/mkldnn_deconv_node.h | 5 + .../skip_tests_config.cpp | 1 + inference-engine/thirdparty/mkl-dnn | 2 +- 8 files changed, 217 insertions(+), 54 deletions(-) diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp index 99002688e90db5..e674ab6cf7e32e 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.cpp @@ -35,6 +35,18 @@ MKLDNNDescriptor::operator std::shared_ptr() return typeDesc->getPtr(); } +MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc) { + this->desc.reset(new DescFwdImpl(desc)); +} + +MKLDNNDescriptor::operator std::shared_ptr() { + auto typeDesc = std::dynamic_pointer_cast>(desc); + if (typeDesc == nullptr) { + IE_THROW() << "Cannot cast descriptor!"; + } + return typeDesc->getPtr(); +} + MKLDNNDescriptor::MKLDNNDescriptor(std::shared_ptr desc, std::shared_ptr prim) { this->desc.reset( diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h index 5025e1a025d6a5..f64dfb082c40aa 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_descriptor.h @@ -15,6 +15,10 @@ class MKLDNNDescriptor { MKLDNNDescriptor(std::shared_ptr desc, std::shared_ptr prim); + + explicit MKLDNNDescriptor(std::shared_ptr desc); + operator std::shared_ptr(); + operator std::shared_ptr(); operator std::shared_ptr(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index 1caedcaba75054..773576304b729c 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -313,6 +313,7 @@ void MKLDNNGraph::InitGraph() { SortTopologically(); InitDescriptors(); + RemoveDroppedEdges(); InitOptimalPrimitiveDescriptors(); diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp index acc93f72ebd86b..f4a44eda86d392 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp @@ -334,7 +334,8 @@ static void Transformation(CNNNetwork& clonedNetwork, const Config& conf) { LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 }).setSupportAsymmetricQuantization(true)) .addStandaloneCleanup( LayerTransformation::Params(params).setPrecisionsOnActivations({ ngraph::element::u8 })) - .remove()); + .add( + LayerTransformation::Params(params).setSupportAsymmetricQuantization(false))); transformer.transform(nGraphFunc); } diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp index b18763f0e5e6e3..fd8a140997d523 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.cpp @@ -4,6 +4,7 @@ #include "mkldnn_deconv_node.h" #include "mkldnn_eltwise_node.h" +#include "mkldnn_input_node.h" #include #include #include @@ -12,6 +13,8 @@ #include "ie_parallel.hpp" #include "utils/general_utils.h" #include +#include +#include using namespace mkldnn; using namespace MKLDNNPlugin; @@ -37,6 +40,9 @@ bool MKLDNNDeconvolutionNode::isSupportedOperation(const std::shared_ptr& op, const mkldnn::engine& eng, MKLDNNWeightsSharing::Ptr &cache) : MKLDNNNode(op, eng, cache) { + internalBlobDesc.emplace_back([&](primitive_desc_iterator &primitive_desc_it, size_t idx) -> MKLDNNMemoryDesc { + return MKLDNNMemoryDesc(primitive_desc_it.weights_desc(0)); + }); std::string errorMessage; if (isSupportedOperation(op, errorMessage)) { errorPrefix = "Deconvolution node with name '" + getName() + "'"; @@ -79,23 +85,113 @@ MKLDNNDeconvolutionNode::MKLDNNDeconvolutionNode(const std::shared_ptrget_pads_begin(); paddingR = groupConvBackprop->get_pads_end(); } + for (int i = 0; i < dilation.size(); i++) { + kernel.push_back(weightDims[withGroups + 2 + i]); + } } else { IE_THROW(NotImplemented) << errorMessage; } } +InferenceEngine::Blob::Ptr MKLDNNDeconvolutionNode::createWeiBlobAsIO(InferenceEngine::SizeVector dims) { + auto constNode = std::dynamic_pointer_cast(getParentEdgeAt(1)->getParent()); + if (!constNode) + IE_THROW() << "Cannot cast const input node for node " << getName() << "."; + auto blb = constNode->getConstBlob(); + if (!blb) + IE_THROW() << "Cannot get const weights blob for node " << getName() << "."; + + // WA: In int8 case, we are processing weights using internal blob. + // So we disconnect constant node containing weights from the graph and then don't use it. + if (getParentEdges().size() == 3) { + removeEdge(getParentEdgeAt(2)); + inDims.erase(inDims.begin() + 2); + } + removeEdge(getParentEdgeAt(1)); + inDims.erase(inDims.begin() + 1); + + InferenceEngine::SizeVector dimsForBlockedDesc{dims}; + std::swap(dimsForBlockedDesc[withGroups + 0], dimsForBlockedDesc[withGroups + 1]); + + InferenceEngine::SizeVector orderForBlockedDesc; + if (withGroups) { + orderForBlockedDesc = {0, 2, 1}; + } else { + orderForBlockedDesc = {1, 0}; + } + for (int i = 2 + withGroups; i < dimsForBlockedDesc.size(); i++) + orderForBlockedDesc.push_back(i); + + BlockingDesc blkDesc(dimsForBlockedDesc, orderForBlockedDesc); + InferenceEngine::TensorDesc tensorDesc(blb->getTensorDesc().getPrecision(), dims, blkDesc); + + Blob::Ptr internalBlob = InferenceEngine::make_shared_blob(tensorDesc); + internalBlob->allocate(); + char *data = internalBlob->buffer(); + if (data == nullptr) + IE_THROW(NotAllocated) << "Internal blob was not allocated for node " << getName() << "."; + size_t intBuffSize = internalBlob->byteSize(); + + size_t offset = blb->byteSize(); + if (intBuffSize < offset) { + IE_THROW() << "Cannot create internal buffer. Buffer can be overrun."; + } + cpu_memcpy_s(data, intBuffSize, blb->cbuffer(), blb->byteSize()); + + return internalBlob; +} + +bool MKLDNNDeconvolutionNode::canBeExecutedInInt8() { + if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_common)) + return false; + + // todo: [antonvor] added these checks to fix performance problems + if (kernel.size() == 3) + return false; + if (!withGroups && IC % 4 != 0 && OC % 4 != 0) + return false; + + // todo: [antonvor] fusing is not supported yet for int8 + if (!fusedWith.empty()) + return false; + + for (int i = 0; i < kernel.size(); i++) { + if (kernel[i] < stride[i]) + return false; + } + + // not supported in oneDNN + if (withGroups && !isDW && (IC % 16 != 0 || OC % 16 != 0)) + return false; + + InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0); + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrecision); + + InferenceEngine::Precision weiPrecision = getOriginalInputPrecisionAtPort(1); + auto weightsDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(weiPrecision); + + if (isDW && (inputDataType == dnnl_s8 || dilation.size() == 3)) + return false; + + return (inputDataType == dnnl_s8 || inputDataType == dnnl_u8) && weightsDataType == dnnl_s8; +} + void MKLDNNDeconvolutionNode::getSupportedDescriptors() { if (!descs_fwd.empty() && !descs_bwd.empty()) return; - InferenceEngine::Precision precision = getOriginalInputPrecisionAtPort(0); - if (!one_of(precision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16)) - precision = InferenceEngine::Precision::FP32; - auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); - precision = getOriginalOutputPrecisionAtPort(0); - if (!one_of(precision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16)) - precision = InferenceEngine::Precision::FP32; - auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision); + isInt8 = canBeExecutedInInt8(); + + InferenceEngine::Precision inPrecision = getOriginalInputPrecisionAtPort(0); + InferenceEngine::Precision outPrecision = getOriginalOutputPrecisionAtPort(0); + if (!isInt8) { + if (!one_of(inPrecision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16)) + inPrecision = InferenceEngine::Precision::FP32; + if (!one_of(outPrecision, InferenceEngine::Precision::FP32, InferenceEngine::Precision::BF16)) + outPrecision = InferenceEngine::Precision::FP32; + } + auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(inPrecision); + auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(outPrecision); if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16) inputDataType = outputDataType = memory::data_type::bf16; @@ -115,10 +211,20 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() { paddingR[i] = (dst - calc_dst) * stride[i]; } - for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { + if (isInt8) { + // WA: if int8 deconvolution is supported, we create internal weights blob in IO format + std::swap(weightDims[withGroups + 0], weightDims[withGroups + 1]); + internalBlobs.push_back(createWeiBlobAsIO(weightDims)); + auto format = getParentEdgeAt(0)->getDims().ndims() == 5 ? dnnl::memory::format_tag::ndhwc : dnnl::memory::format_tag::nhwc; MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); createDescriptor({in_candidate}, {out_candidate}); + } else { + for (auto format : getAvailableFormatsForDims(getParentEdgeAt(0)->getDims())) { + MKLDNNMemoryDesc in_candidate(getParentEdgeAt(0)->getDims(), inputDataType, format); + MKLDNNMemoryDesc out_candidate(getChildEdgeAt(0)->getDims(), outputDataType, format); + createDescriptor({in_candidate}, {out_candidate}); + } } setPostOps(attr); } @@ -152,12 +258,22 @@ void MKLDNNDeconvolutionNode::filterSupportedDescriptors() { while (itd != descs.end()) { bool isSuitableDesc = true; if (!inputMemoryFormatsFilter.empty()) { - auto src_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_src_desc); - isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + if (isInt8) { + auto src_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.src_desc); + isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + } else { + auto src_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_src_desc); + isSuitableDesc &= src_tdesc.isSame(inputMemoryFormatsFilter[0]); + } } if (!outputMemoryFormatsFilter.empty()) { - auto dst_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_dst_desc); - isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + if (isInt8) { + auto dst_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.dst_desc); + isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + } else { + auto dst_tdesc = MKLDNNMemoryDesc(std::shared_ptr(*itd)->data.diff_dst_desc); + isSuitableDesc &= dst_tdesc.isSame(outputMemoryFormatsFilter[0]); + } } if (!isSuitableDesc) { itd = descs.erase(itd); @@ -176,15 +292,26 @@ void MKLDNNDeconvolutionNode::createPrimitive() { if (prim) return; - auto prim_desc = createPrimitiveDescriptor(attr); + if (isInt8) { + auto prim_desc = createPrimitiveDescriptor(attr); + + prim.reset(new deconvolution_forward(prim_desc)); + + auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs = {{DNNL_ARG_SRC, src}, {DNNL_ARG_WEIGHTS, internalBlobMemory[0]->GetPrimitive()}, {DNNL_ARG_DST, dst}}; + } else { + auto prim_desc = createPrimitiveDescriptor(attr); - prim.reset(new convolution_backward_data(prim_desc)); + prim.reset(new convolution_backward_data(prim_desc)); - auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive(); - auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); - primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}}; + auto src = getParentEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + auto weights = getParentEdgeAt(1)->getMemory().GetPrimitive(); + auto dst = getChildEdgesAtPort(0)[0]->getMemoryPtr()->GetPrimitive(); + primArgs = {{DNNL_ARG_DIFF_DST, src}, {DNNL_ARG_WEIGHTS, weights}, {DNNL_ARG_DIFF_SRC, dst}}; + } } void MKLDNNDeconvolutionNode::createDescriptor(const std::vector &inputDesc, @@ -196,36 +323,47 @@ void MKLDNNDeconvolutionNode::createDescriptor(const std::vector& orig_dims) { - return memory::dims(orig_dims.begin(), orig_dims.end()); - }; - - std::shared_ptr conv_desc; - conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg, - out_candidate, wgh_candidate, in_candidate, - convert(stride), - convert(dilation), - convert(paddingL), - convert(paddingR))); - - std::shared_ptr deconv_desc; - deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate, wgh_candidate, - in_candidate, - convert(stride), - convert(dilation), - convert(paddingL), - convert(paddingR))); - descs_fwd.push_back(conv_desc); - descs_bwd.push_back(deconv_desc); - - auto fwd_conv_pd = std::make_shared(*conv_desc, getEngine(), true); - if (fwd_conv_pd->get(true) == nullptr) - continue; - - descs.emplace_back(deconv_desc, fwd_conv_pd); + auto convertDims = [] (const std::vector& orig_dims) { + return memory::dims(orig_dims.begin(), orig_dims.end()); + }; + + if (isInt8) { + MKLDNNDims weightsDims = MKLDNNDims(weightDims); + MKLDNNMemoryDesc wgh_candidate{weightsDims, memory::data_type::s8, memory::format_tag::any}; + std::shared_ptr deconv_desc; + deconv_desc.reset(new deconvolution_forward::desc(prop_kind::forward_inference, mkldnn::algorithm::deconvolution_direct, + in_candidate, wgh_candidate, out_candidate, + convertDims(stride), convertDims(dilation), + convertDims(paddingL), convertDims(paddingR))); + descs.emplace_back(deconv_desc); + } else { + MKLDNNDims weightsDims = MKLDNNDims(weightDims); + MKLDNNMemoryDesc wgh_candidate{weightsDims, in_candidate.getDataType(), memory::format_tag::any}; + for (auto alg : {mkldnn::algorithm::convolution_winograd, mkldnn::algorithm::convolution_direct}) { + std::shared_ptr conv_desc; + conv_desc.reset(new convolution_forward::desc(prop_kind::forward_inference, alg, + out_candidate, wgh_candidate, in_candidate, + convertDims(stride), + convertDims(dilation), + convertDims(paddingL), + convertDims(paddingR))); + + std::shared_ptr deconv_desc; + deconv_desc.reset(new convolution_backward_data::desc(alg, out_candidate, wgh_candidate, + in_candidate, + convertDims(stride), + convertDims(dilation), + convertDims(paddingL), + convertDims(paddingR))); + descs_fwd.push_back(conv_desc); + descs_bwd.push_back(deconv_desc); + + auto fwd_conv_pd = std::make_shared(*conv_desc, getEngine(), true); + if (fwd_conv_pd->get(true) == nullptr) + continue; + + descs.emplace_back(deconv_desc, fwd_conv_pd); + } } } @@ -237,7 +375,7 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_i } InferenceEngine::TensorDesc desc = idx > 0 ? MKLDNNMemoryDesc(primitive_desc_it.weights_desc(idx - 1)) - : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx)); + : isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.src_desc(idx)) : MKLDNNMemoryDesc(primitive_desc_it.diff_dst_desc(idx)); if (desc.getLayout() == InferenceEngine::Layout::ANY) { return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), @@ -265,7 +403,8 @@ MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getSrcMemDesc(mkldnn::primitive_desc_i } MKLDNNMemoryDesc MKLDNNDeconvolutionNode::getDstMemDesc(mkldnn::primitive_desc_iterator &primitive_desc_it, size_t idx) { - InferenceEngine::TensorDesc desc = MKLDNNMemoryDesc(primitive_desc_it.diff_src_desc(idx)); + InferenceEngine::TensorDesc desc = isInt8 ? MKLDNNMemoryDesc(primitive_desc_it.dst_desc(idx)) + : MKLDNNMemoryDesc(primitive_desc_it.diff_src_desc(idx)); if (desc.getLayout() == InferenceEngine::Layout::ANY) return MKLDNNMemoryDesc(InferenceEngine::TensorDesc(desc.getPrecision(), getChildEdgeAt(idx)->getDims().ToSizeVector(), diff --git a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h index a9c17ee4c25635..aeb3ac0f2694a1 100644 --- a/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h +++ b/inference-engine/src/mkldnn_plugin/nodes/mkldnn_deconv_node.h @@ -41,10 +41,12 @@ class MKLDNNDeconvolutionNode : public MKLDNNNode { private: bool withGroups = false; bool isDW = false; + bool isInt8 = false; size_t groupNum = 1; size_t outDepth; size_t IC; size_t OC; + std::vector kernel; std::vector stride; std::vector dilation; std::vector paddingL; @@ -57,6 +59,9 @@ class MKLDNNDeconvolutionNode : public MKLDNNNode { void setPostOps(mkldnn::primitive_attr &attr); std::string errorPrefix; + + bool canBeExecutedInInt8(); + InferenceEngine::Blob::Ptr createWeiBlobAsIO(InferenceEngine::SizeVector dims); }; } // namespace MKLDNNPlugin diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp index 974fe523c1a95a..07a458d460e149 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/skip_tests_config.cpp @@ -52,6 +52,7 @@ std::vector disabledTestPatterns() { R"(.*ClampLayerTest.*netPrc=U64.*)", // TODO: 42538. Unexpected application crush R"(.*CoreThreadingTestsWithIterations\.smoke_LoadNetwork.t.*)", + R"(.*CoreThreadingTestsWithIterations\.smoke_LoadNetworkAccuracy.*AUTO.*)", // incorrect reference implementation R"(.*NormalizeL2LayerTest.*axes=\(\).*)", diff --git a/inference-engine/thirdparty/mkl-dnn b/inference-engine/thirdparty/mkl-dnn index a5fffb52b012b3..a81b4753105bb0 160000 --- a/inference-engine/thirdparty/mkl-dnn +++ b/inference-engine/thirdparty/mkl-dnn @@ -1 +1 @@ -Subproject commit a5fffb52b012b31c65ace894638ecfb8948de9ec +Subproject commit a81b4753105bb0a1622790256b02f19916cce77c