Skip to content

Commit

Permalink
[CPU] enable brdgmm kernel in CPU plugin (#27589)
Browse files Browse the repository at this point in the history
### Details:
 - *replace  impl string brdgmm with brgconv*
 - *add test case*
 - *remove skip CVS-56143 config, CVS-56143 is already closed*
 - *remove skip CVS-53578 config, CVS-53578 is already closed*
 - *use new ticket CVS-157596 to track leftover test case*

### Tickets:
 - *CVS-156792*

---------

Signed-off-by: HU Yuan2 <[email protected]>
  • Loading branch information
tiger100256-hu authored Dec 9, 2024
1 parent 408a5e0 commit de949b4
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 15 deletions.
13 changes: 9 additions & 4 deletions src/plugins/intel_cpu/src/nodes/conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
impl_desc_type::winograd_acl,
impl_desc_type::gemm_acl,
impl_desc_type::acl,
impl_desc_type::brgconv_avx512_dw,
impl_desc_type::brgconv_avx512_amx_1x1,
impl_desc_type::brgconv_avx512_amx,
impl_desc_type::jit_avx512_amx_dw,
Expand All @@ -353,6 +354,7 @@ const std::vector<impl_desc_type>& Convolution::getDefaultImplPriority() {
impl_desc_type::jit_avx512_dw,
impl_desc_type::jit_avx512_1x1,
impl_desc_type::jit_avx512,
impl_desc_type::brgconv_avx2_dw,
impl_desc_type::brgconv_avx2_1x1,
impl_desc_type::brgconv_avx2,
impl_desc_type::jit_uni_dw,
Expand Down Expand Up @@ -815,15 +817,19 @@ void Convolution::initSupportedPrimitiveDescriptors() {
#endif
for (size_t dIdx = 0; dIdx < descs.size(); dIdx++) {
auto& desc = descs[dIdx];
auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(desc.get()));
auto primitive_desc = desc.get(true); //true mean allow empty
if (primitive_desc == nullptr) {
continue;
}
auto first_desc = dnnl::primitive_desc(DnnlExtensionUtils::clone_primitive_desc(primitive_desc));

auto add_supported_desc = [&](dnnl::primitive_desc& desc) {
addSupportedPrimitiveDescriptor(desc);
descIdx.push_back(dIdx);
};

const bool first_match = customImplPriorities.empty();
DEBUG_LOG("#", getName(),
DEBUG_LOG("#", getName(), ",descIndex:", dIdx + 1, "/", descs.size(),
", itpd.impl_info_str(): ", desc.impl_info_str(),
", parsed imp_type: ", impl_type_to_string(parse_impl_name(desc.impl_info_str())),
", first_match: ", first_match ? "true" : "false");
Expand Down Expand Up @@ -944,8 +950,7 @@ void Convolution::createDescriptor(const std::vector<MemoryDescPtr>& inputDesc,
const auto desc = createDescriptorInternal(getEngine(),
inDnnlDesc, weightDnnlDesc, biasDnnlDesc, outDnnlDesc, withBiases,
stride, dilation, paddingL, paddingR, alg, attr);
if (desc)
descs.emplace_back(desc);
descs.emplace_back(desc);
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/onednn/iml_type_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ impl_desc_type parse_impl_name(std::string impl_desc_name) {
if (pos != std::string::npos) impl_desc_name.replace(pos, std::string(#_wrd).length(), #_sub); }
// Replace the ONEDNN pd name with OV definition.
REPLACE_WORD(brg_conv, brgconv);
REPLACE_WORD(brdgmm, brgconv);
REPLACE_WORD(avx10_1_512, avx512);
REPLACE_WORD(brg_matmul, brgemm);

Expand Down Expand Up @@ -119,6 +120,8 @@ const char* impl_type_to_string(impl_desc_type type) {
CASE(brgconv_sse42_1x1);
CASE(brgconv_uni_1x1);
CASE(brgconv_avx512_amx_1x1);
CASE(brgconv_avx512_dw);
CASE(brgconv_avx2_dw);
CASE(brgemm_avx512);
CASE(brgemm_avx2);
CASE(brgemm_avx);
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/onednn/iml_type_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ enum impl_desc_type : int64_t {
brgconv_uni_1x1 = brgconv | uni | _1x1,
brgconv_avx512_amx_1x1 = brgconv | avx512 | amx | _1x1,

brgconv_avx2_dw = brgconv_avx2 | _dw,
brgconv_avx512_dw = brgconv_avx512 | _dw,

brgemm_avx512 = brgemm | avx512,
brgemm_avx2 = brgemm | avx2,
brgemm_avx = brgemm | avx,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "shared_test_classes/single_op/group_convolution.hpp"

#include "common_test_utils/node_builders/group_convolution.hpp"
#include "openvino/runtime/system_conf.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "utils/convolution_params.hpp"
#include "utils/cpu_test_utils.hpp"
Expand Down Expand Up @@ -176,14 +177,15 @@ class GroupConvolutionLayerCPUTest : public testing::WithParamInterface<groupCon
std::tie(groupConvParams, netType, inType, outType, inputShape, targetDevice) = basicParamsSet;

init_input_shapes({inputShape});

if (configuration.count(ov::hint::inference_precision.name()) &&
ov::element::bf16 == configuration[ov::hint::inference_precision.name()].as<ov::element::Type>()) {
selectedType += "_bf16";
rel_threshold = 1e-2f;
} else {
selectedType = makeSelectedTypeStr(selectedType, netType);
const auto& it = configuration.find(ov::hint::inference_precision.name());
if (it != configuration.end()) {
if (ov::element::bf16 == it->second.as<ov::element::Type>()) {
rel_threshold = 1e-2f;
} else if (ov::element::f16 == it->second.as<ov::element::Type>()) {
rel_threshold = 0.00125f;
}
}
selectedType = makeSelectedTypeStr(selectedType, deduce_expected_precision(netType, configuration));

// according to range propagation feature, resolution of generated inputs data for parameters moved from 32 to 32768
// 'real' part of input data was changed and some fails became visible for cases with Elu and FakeQuantize, so let's setup abs_threshold
Expand Down Expand Up @@ -289,6 +291,7 @@ std::vector<CPUSpecificParams> filterCPUInfoForDeviceSupportBF16(std::vector<CPU
}
return resParamsSet;
}

/* ===================== */

/* COMMON PARAMS */
Expand All @@ -313,6 +316,33 @@ const std::vector<fusingSpecificParams> fusingParamsSetBF16{emptyFusingSpec,
// sum
fusingSum};

const std::vector<fusingSpecificParams> fusingParamsSet_Brdgmm{emptyFusingSpec,
// eltwise
fusingRelu,
fusingPRelu1D,
// depthwise
fusingReluScaleShift,
// fake quantize
fusingFakeQuantizePerTensorRelu,
fusingFakeQuantizePerChannelRelu
// sum
// comment out sum due to MFDNN-12841
//fusingSumEluFQ,
//fusingSum
};

const std::vector<fusingSpecificParams> fusingParamsSetBF16_Brdgmm{emptyFusingSpec,
// eltwise
fusingRelu,
// depthwise
fusingReluScaleShift
// sum
// comment out sum due to MFDNN-12841
//fusingSum
};

const std::vector<fusingSpecificParams> fusingParamsSetFP16_Brdgmm = fusingParamsSetBF16_Brdgmm;

/* ============= GroupConvolution params (planar layout) ============= */
const std::vector<size_t> numOutChannels_Gemm = {6};
const std::vector<size_t> numGroups_Gemm = {2, 3};
Expand Down Expand Up @@ -1299,6 +1329,38 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP32,
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

const std::vector<std::vector<size_t>> dilations2d_Brdgmm = {{1, 1}};
const auto groupConvParams_ExplicitPadding_DW_2D_Brdgmm = ::testing::Combine(::testing::ValuesIn(kernels2d),
::testing::ValuesIn(strides2d),
::testing::ValuesIn(padBegins2d),
::testing::ValuesIn(padEnds2d),
::testing::ValuesIn(dilations2d_Brdgmm),
::testing::ValuesIn(numOutChannels_DW),
::testing::ValuesIn(numGroups_DW),
::testing::Values(ov::op::PadType::EXPLICIT));
const auto BrdgmmCPUSpec = []()-> std::vector<CPUSpecificParams> {
std::string isaStr;
if (ov::with_cpu_x86_avx512f()) {
isaStr = "avx512";
} else {
isaStr = "avx2";
}
return {CPUSpecificParams{{}, {}, {}, "brgconv_" + isaStr + "_dw"}};
};

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP32_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes2dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSet_Brdgmm),
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D,
Expand All @@ -1313,6 +1375,32 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16,
::testing::Values(cpu_bf16_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_BF16_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes2dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDeviceSupportBF16(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSetBF16_Brdgmm),
::testing::Values(cpu_bf16_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_2D_DW_FP16_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_2D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes2dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSetFP16_Brdgmm),
::testing::Values(cpu_f16_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

/* ============= GroupConvolution (DW 3D) ============= */
const auto groupConvParams_ExplicitPadding_DW_3D = ::testing::Combine(::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
Expand Down Expand Up @@ -1349,6 +1437,30 @@ INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_DW_FP32,
::testing::ValuesIn(fusingParamsSet),
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

const std::vector<std::vector<size_t>> dilations3d_Brdgmm = {{1, 1, 1}};
const auto groupConvParams_ExplicitPadding_DW_3D_Brdgmm = ::testing::Combine(::testing::ValuesIn(kernels3d),
::testing::ValuesIn(strides3d),
::testing::ValuesIn(padBegins3d),
::testing::ValuesIn(padEnds3d),
::testing::ValuesIn(dilations3d_Brdgmm),
::testing::ValuesIn(numOutChannels_DW),
::testing::ValuesIn(numGroups_DW),
::testing::Values(ov::op::PadType::EXPLICIT));

INSTANTIATE_TEST_SUITE_P(smoke_GroupConv_3D_DW_FP32_Brdgmm,
GroupConvolutionLayerCPUTest,
::testing::Combine(::testing::Combine(groupConvParams_ExplicitPadding_DW_3D_Brdgmm,
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::ValuesIn(inputShapes3dDW),
::testing::Values(ov::test::utils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(BrdgmmCPUSpec())),
::testing::ValuesIn(fusingParamsSet_Brdgmm),
::testing::Values(empty_plugin_config)),
GroupConvolutionLayerCPUTest::getTestCaseName);

/* ========= */

/* ============= SINGLE TEST CASES ============= */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ std::vector<std::string> disabledTestPatterns() {
R"(.*BinaryConvolutionLayerTest.*)",
// TODO: 53618. BF16 gemm ncsp convolution crash
R"(.*_GroupConv.*_inFmts=nc.*_primitive=jit_gemm.*ENFORCE_BF16=YES.*)",
// TODO: 53578. fork DW bf16 convolution does not support 3d cases yet
R"(.*_DW_GroupConv.*_inFmts=(ndhwc|nCdhw16c).*ENFORCE_BF16=YES.*)",
// TODO: 56143. Enable nspc convolutions for bf16 precision
R"(.*ConvolutionLayerCPUTest.*_inFmts=(ndhwc|nhwc).*INFERENCE_PRECISION_HINT=bf16.*)",
// TODO: 157596 convolution bf16 leftover test case
R"(smoke_JIT_AVX512_DW_GroupConv/GroupConvolutionLayerCPUTest.*ndhwc.*jit_avx512_dw.*INFERENCE_PRECISION_HINT=bf16.*)",
R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\]_TS=\(\((1|2)\.6(4|7)\.7\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[1\.\.200\.64\.\?\]_TS=\(\(2\.64\.7\)_\(1\.64\.5\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
R"(smoke_Conv_1D_1x1_BF16/ConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\?\.6(4|7)\.1\.\.200\]_TS=\(\(2\.6(4|7)\.7\)_\(1\.6(4|7)\.9\)_\)_K\(1\)_S\(1\)_PB\(0\)_PE\(0\)_D=\(1\)_O=63_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=jit_avx512_1x1_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
R"(smoke_GroupConv_brgemm_2D_BF16/GroupConvolutionLayerCPUTest\.CompareWithRefs/IS=\[\]_TS=\(\(1\.64\.7\.7\)_\)_K\(3\.3\)_S\(2\.2\)_PB\((0|1)\.(0|1)\)_PE\(0\.0\)_D=\(2\.2\)_O=64_G=2_AP=explicit_netPRC=f32_inPRC=undefined_outPRC=undefined_trgDev=CPU_inFmts=nhwc_outFmts=nhwc_primitive=brgconv_avx512_amx_.*PluginConf_INFERENCE_PRECISION_HINT=bf16)",
// TODO: 56827. Sporadic test failures
R"(.*smoke_Conv.+_FP32.ConvolutionLayerCPUTest\.CompareWithRefs.*TS=\(\(.\.67.+\).*inFmts=n.+c.*_primitive=jit_avx2.*)",
// incorrect jit_uni_planar_convolution with dilation = {1, 2, 1} and output channel 1
Expand Down

0 comments on commit de949b4

Please sign in to comment.