Skip to content

Commit

Permalink
[CPU] BF16 Deconvolution support (#4048)
Browse files Browse the repository at this point in the history
* disable and cleanup interp and resample that is covered by interpolate

* [BF16] Deconvolution was enabled

* Deconvolution tests were added

* Copyright year was fixed

* Friendly name usage for tests

* Test inherited from CPUTestsBase

* Fusing tests were added

* oneDNN was updated

* Gemm and 1x1 were added with tests

* Number of channels not aligned on vector length was done for blocked cases

* DW deconvolution case was added with tests

* oneDNN changes for BF16 deconvolutions were squashed

Co-authored-by: chenhuwa <[email protected]>
  • Loading branch information
alexey-varyzgin and chenhu-wang authored Feb 4, 2021
1 parent 937c332 commit 1bd7b37
Show file tree
Hide file tree
Showing 8 changed files with 761 additions and 9 deletions.
3 changes: 2 additions & 1 deletion inference-engine/src/mkldnn_plugin/bf16transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace MKLDNNPlugin {

class BF16Transformer {
const InferenceEngine::details::caseless_set<std::string> _initbf16 =
{ "convolution", "fullyconnected", "innerproduct", "gemm", "RegionYolo", "Interpolate", "PSROIPooling" };
{ "convolution", "fullyconnected", "innerproduct", "gemm", "RegionYolo", "Interpolate", "PSROIPooling", "Deconvolution" };

const InferenceEngine::details::caseless_set<std::string> _complementbf16 =
{ "relu", "tanh", "elu", "square", "abs", "sqrt", "linear", "bounded_relu", "soft_relu", "normalize",
"sigmoid", "ReLU6", "not", "activation", "HSwish", "mish", "logistic", "mod", "resample",
Expand Down
3 changes: 0 additions & 3 deletions inference-engine/src/mkldnn_plugin/nodes/log_softmax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ class LogSoftmaxImpl: public ExtLayerBase {
if (layer->insData.size() != 1)
THROW_IE_EXCEPTION << layer->name << " Incorrect number of input edges!";

if (layer->insData[0].lock()->getTensorDesc().getPrecision() != Precision::FP32)
THROW_IE_EXCEPTION << layer->name << " Incorrect input data tensor precision. Only FP32 is supported!";

SizeVector dims = layer->insData[0].lock()->getTensorDesc().getDims();
if (!dims.size())
dims = SizeVector(1, 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ void MKLDNNDeconvolutionNode::getSupportedDescriptors() {
return;

InferenceEngine::Precision precision = getCnnLayer()->insData[0].lock()->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
precision = InferenceEngine::Precision::FP32;
auto inputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
precision = getCnnLayer()->outData[0]->getPrecision();
if (precision != InferenceEngine::Precision::FP32)
if (precision != InferenceEngine::Precision::FP32 && precision != InferenceEngine::Precision::BF16)
precision = InferenceEngine::Precision::FP32;
auto outputDataType = MKLDNNExtensionUtils::IEPrecisionToDataType(precision);
if (inputDataType == memory::data_type::bf16 || outputDataType == memory::data_type::bf16)
inputDataType = outputDataType = memory::data_type::bf16;

if (getParentEdges().empty() || getParentEdges().size() > 3)
THROW_IE_EXCEPTION << "Incorrect number of input edges for layer " << getName();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,352 @@
// Copyright (C) 2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "test_utils/cpu_test_utils.hpp"
#include "test_utils/fusing_test_utils.hpp"
#include "shared_test_classes/base/layer_test_utils.hpp"
#include "ngraph_functions/utils/ngraph_helpers.hpp"
#include "ngraph_functions/builders.hpp"
#include <shared_test_classes/single_layer/convolution_backprop_data.hpp>


using namespace InferenceEngine;
using namespace CPUTestUtils;

namespace CPULayerTestsDefinitions {
using LayerTestsDefinitions::convBackpropDataSpecificParams;
using LayerTestsDefinitions::convBackpropDataLayerTestParamsSet;

typedef std::tuple<
convBackpropDataLayerTestParamsSet,
CPUSpecificParams,
fusingSpecificParams,
std::map<std::string, std::string> > deconvLayerCPUTestParamsSet;

class DeconvolutionLayerCPUTest : public testing::WithParamInterface<deconvLayerCPUTestParamsSet>,
virtual public LayerTestsUtils::LayerTestsCommon, public CpuTestWithFusing {
public:
static std::string getTestCaseName(testing::TestParamInfo<deconvLayerCPUTestParamsSet> obj) {
convBackpropDataLayerTestParamsSet basicParamsSet;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = obj.param;

std::ostringstream result;
result << LayerTestsDefinitions::ConvolutionBackpropDataLayerTest::getTestCaseName(testing::TestParamInfo<convBackpropDataLayerTestParamsSet>(
basicParamsSet, 0));

result << CPUTestsBase::getTestCaseName(cpuParams);
result << CpuTestWithFusing::getTestCaseName(fusingParams);

if (!additionalConfig.empty()) {
result << "_PluginConf";
for (auto& item : additionalConfig) {
result << "_" << item.first << "=" << item.second;
}
}

return result.str();
}
protected:
void SetUp() override {
convBackpropDataLayerTestParamsSet basicParamsSet;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
std::map<std::string, std::string> additionalConfig;
std::tie(basicParamsSet, cpuParams, fusingParams, additionalConfig) = this->GetParam();

configuration.insert(additionalConfig.begin(), additionalConfig.end());

std::tie(inFmts, outFmts, priority, selectedType) = cpuParams;
std::tie(postOpMgrPtr, fusedOps) = fusingParams;

convBackpropDataSpecificParams convParams;
std::vector<size_t> inputShape;
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = basicParamsSet;

if (inPrc == Precision::UNSPECIFIED) {
selectedType += std::string("_") + Precision(Precision::FP32).name();
} else {
selectedType += std::string("_") + inPrc.name();
}

ngraph::op::PadType padType;
InferenceEngine::SizeVector kernel, stride, dilation;
std::vector<ptrdiff_t> padBegin, padEnd;
size_t convOutChannels;
std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);

auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, { inputShape });
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));

auto deconvolutionNode = ngraph::builder::makeConvolutionBackpropData(paramOuts.front(), ngPrc, kernel, stride, padBegin,
padEnd, dilation, padType, convOutChannels);

function = makeNgraphFunction(ngPrc, inputParams, deconvolutionNode, "convolutionBackpropData");
}
};

TEST_P(DeconvolutionLayerCPUTest, CompareWithRefs) {
SKIP_IF_CURRENT_TEST_IS_DISABLED()

Run();
CheckPluginRelatedResults(executableNetwork, "Deconvolution");
}

namespace {

/* COMMON PARAMS */
const std::vector<fusingSpecificParams> fusingParamsSet{
emptyFusingSpec,
fusingScaleShift
};

const std::map<std::string, std::string> cpuEmptyPluginConfig;
const std::map<std::string, std::string> cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } };

/* ============= Deconvolution params (planar layout) ============= */
const SizeVector numOutChannels_Planar = { 6 };

/* ============= Deconvolution params (blocked layout) ============= */
const SizeVector numOutChannels_Blocked = { 64 };

/* ============= Deconvolution params (2D) ============= */
const std::vector<SizeVector> kernels2d = { {3, 3}, {1, 1} };
const std::vector<SizeVector> strides2d = { {1, 1}, {2, 2} };
const std::vector<std::vector<ptrdiff_t>> padBegins2d = { {0, 0} };
const std::vector<std::vector<ptrdiff_t>> padEnds2d = { {0, 0} };
const std::vector<SizeVector> dilations2d = { {1, 1} };

/* ============= Deconvolution params (3D) ============= */
const std::vector<SizeVector> kernels3d = { {3, 3, 3}, {1, 1, 1} };
const std::vector<SizeVector> strides3d = { {1, 1, 1}, {2, 2, 2} };
const std::vector<std::vector<ptrdiff_t>> padBegins3d = { {0, 0, 0} };
const std::vector<std::vector<ptrdiff_t>> padEnds3d = { {0, 0, 0} };
const std::vector<SizeVector> dilations3d = { {1, 1, 1} };
/* ============= */

/* INSTANCES */
/* ============= Deconvolution (Planar 2D) ============= */
const auto convParams_ExplicitPadding_Planar_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::ValuesIn(numOutChannels_Planar),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Planar_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Planar_BF16, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_2D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

/* ============= GroupDeconvolution (Planar 3D) ============= */
const auto convParams_ExplicitPadding_Planar_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d),
::testing::ValuesIn(numOutChannels_Planar),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Planar_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Planar_BF16, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Planar_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_gemm_3D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

/* ============= GroupDeconvolution (Blocked 2D) ============= */
const auto convParams_ExplicitPadding_Blocked_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Blocked_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_Blocked_BF16, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

/* ============= GroupDeconvolution (Blocked 3D) ============= */
const auto convParams_ExplicitPadding_Blocked_3D = ::testing::Combine(
::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Blocked_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_3D_Blocked_BF16, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_Blocked_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_3D})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

/* ============= Kernel_1x1 (2D) ============= */

const auto convParams_ExplicitPadding_1x1_2D = ::testing::Combine(
::testing::Values(SizeVector({1, 1})),
::testing::Values(SizeVector({1, 1})),
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::Values(std::vector<ptrdiff_t>({0, 0})),
::testing::Values(SizeVector({1, 1})),
::testing::ValuesIn(numOutChannels_Blocked),
::testing::Values(ngraph::op::PadType::EXPLICIT)
);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_1x1_FP32, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1x1_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuEmptyPluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_CASE_P(smoke_Deconv_2D_1x1_BF16, DeconvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1x1_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::BF16),
::testing::Values(Precision::BF16),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 67, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice({conv_avx512_2D_1x1})),
::testing::ValuesIn(fusingParamsSet),
::testing::Values(cpuBF16PluginConfig)),
DeconvolutionLayerCPUTest::getTestCaseName);

/* ========= */

} // namespace
} // namespace CPULayerTestsDefinitions
Loading

0 comments on commit 1bd7b37

Please sign in to comment.