Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IE TESTS] [CPU] extended cpu specific tests to support int8 precision #6546

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,17 @@ class ConvolutionLayerCPUTest : public testing::WithParamInterface<convLayerCPUT
ASSERT_TRUE(foundConv) << "Can't find Convolution node";
}

int calculateQuantizeInHigh(const InferenceEngine::SizeVector& kernel, const int ic, const int maxIn0 = 10, const int maxIn1 = 10) const {
auto quantizeInHigh = maxIn0 * maxIn1;
quantizeInHigh *= ic;
for (int i = 0; i < kernel.size(); i++) {
quantizeInHigh *= kernel[i];
}
return quantizeInHigh;
}

void SetUp() override {
using namespace ngraph;
convLayerTestParamsSet basicParamsSet;
CPUSpecificParams cpuParams;
fusingSpecificParams fusingParams;
Expand All @@ -97,28 +107,43 @@ class ConvolutionLayerCPUTest : public testing::WithParamInterface<convLayerCPUT
auto netPrecision = InferenceEngine::Precision::UNSPECIFIED;
std::tie(convParams, netPrecision, inPrc, outPrc, inLayout, outLayout, inputShape, targetDevice) = basicParamsSet;

if (inPrc == Precision::UNSPECIFIED) {
selectedType += std::string("_") + Precision(Precision::FP32).name();
} else if (inPrc == Precision::BF16) {
selectedType += std::string("_") + inPrc.name();
if (inPrc == Precision::UNSPECIFIED)
inPrc = Precision::FP32;

if (inPrc == Precision::U8) {
selectedType += std::string("_") + Precision(Precision::I8).name();
} else {
selectedType += std::string("_") + Precision(netPrecision).name();
selectedType += std::string("_") + inPrc.name();
}

ngraph::op::PadType padType;
op::PadType padType;
InferenceEngine::SizeVector kernel, stride, dilation;
std::vector<ptrdiff_t> padBegin, padEnd;
size_t convOutChannels;
std::tie(kernel, stride, padBegin, padEnd, dilation, convOutChannels, padType) = convParams;
auto ngPrc = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(netPrecision);
auto ngPrc = (inPrc == Precision::BF16)
? FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(Precision::FP32)
: FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(inPrc);

auto inputParams = ngraph::builder::makeParams(ngraph::element::f32, { inputShape });
auto paramOuts = ngraph::helpers::convert2OutputVector(ngraph::helpers::castOps2Nodes<ngraph::op::Parameter>(inputParams));
auto inputParams = builder::makeParams(ngPrc, { inputShape });
auto paramOuts = helpers::convert2OutputVector(helpers::castOps2Nodes<op::Parameter>(inputParams));

auto convolutionNode = ngraph::builder::makeConvolution(paramOuts.front(), ngPrc, kernel, stride, padBegin,
auto weiPrc = (ngPrc == element::u8) ? element::i8 : ngPrc;
auto convolutionNode = builder::makeConvolutionRelaxed(paramOuts.front(), weiPrc, kernel, stride, padBegin,
padEnd, dilation, padType, convOutChannels);

function = makeNgraphFunction(ngPrc, inputParams, convolutionNode, "Convolution");
if (outPrc == Precision::U8 || outPrc == Precision::I8) {
threshold = 1.001f;
outElemType = FuncTestUtils::PrecisionUtils::convertIE2nGraphPrc(outPrc);
quantizeInHigh = calculateQuantizeInHigh(kernel, inputShape[1]);
}
Comment on lines +135 to +139
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like a code block that could be extracted to the base class method since it is repeated in every test SetUp() function that tests a quantized layer. Since we do not have a time to properly refactor CPU single layer tests, we can solve it using some functional stuff. For example we can define a functor pointer in the CPUTestBase that is called in this extracted code, but is initialized in implementations. If it is not initialized - the method throws. So that we define at least some int8 related stages: changing threshold, outElemType, quantizeInHigh calculation in one common place. Unfortunately this method itself should be called from the SetUp function but it insolate some amount of common code.


if (inPrc == Precision::U8 || inPrc == Precision::I8) {
additionalPasses.push_back(std::make_shared<pass::ConvertPrecision<element::i8, element::f32>>());
additionalPasses.push_back(std::make_shared<pass::ConvertPrecision<element::u8, element::f32>>());
}
Comment on lines +141 to +144
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also a good candidate for method extraction. If we had a time for refactoring we would inherit CPUTestsBase from LayerTestCommon then using algorithm pattern we could define some mandatory setUp stages and extract such repeated code blocks to the base class methods.


function = makeNgraphFunction(element::f32, inputParams, convolutionNode, "Convolution");
}
};

Expand Down Expand Up @@ -179,7 +204,7 @@ const std::map<std::string, std::string> cpuEmptyPluginConfig;
const std::map<std::string, std::string> cpuBF16PluginConfig = { { PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES } };

/* ============= Convolution params (GEMM layout) ============= */
const SizeVector numOutChannels_Gemm = {6 };
const SizeVector numOutChannels_Gemm = { 6 };

/* ============= Convolution params (blocked and nspc layout) ============= */
const SizeVector numOutChannels = { 64, 63 };
Expand Down Expand Up @@ -252,21 +277,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_BF16, ConvolutionLayerCPUTest,
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_GEMM_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_GEMM_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_2D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

/* ============= Convolution (GEMM 3D) ============= */
const auto convParams_ExplicitPadding_GEMM_3D = ::testing::Combine(
Expand Down Expand Up @@ -316,22 +326,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_GEMM_BF16, ConvolutionLayerCPUTest,
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_GEMM_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_GEMM_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_GEMM_3D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

/* ============= Convolution (2D) ============= */
const auto convParams_ExplicitPadding_2D = ::testing::Combine(
::testing::ValuesIn(kernels2d),
Expand Down Expand Up @@ -384,21 +378,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_BF16, ConvolutionLayerCPUTest,
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_2D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

const std::vector<CPUSpecificParams> CPUParams_2D_plain_to_blocked = {
conv_sse42_plain_to_blocked_2D,
Expand Down Expand Up @@ -489,22 +468,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_BF16, ConvolutionLayerCPUTest,
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapes3d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_3D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

const std::vector<CPUSpecificParams> CPUParams_3D_plain_to_blocked = {
conv_avx2_plain_to_blocked_3D,
conv_avx512_plain_to_blocked_3D,
Expand Down Expand Up @@ -595,22 +558,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_BF16, ConvolutionLayerCPUTest,
::testing::Values(cpuBF16PluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1x1_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::I8),
::testing::Values(Precision::UNSPECIFIED),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::ValuesIn(inputShapes2d),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_2D)),
::testing::Values(fusingSum),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

/* ============= Convolution (1D) ============= */
/* ============= Convolution params (1D) ============= */
const std::vector<SizeVector> kernels1d = { {3} };
Expand Down Expand Up @@ -770,4 +717,91 @@ INSTANTIATE_TEST_SUITE_P(smoke_Conv_winograd, ConvolutionLayerCPUTest,

} // namespace winograd

/* ============= U8/I8 Convolution ============= */
namespace int8 {

const std::vector<fusingSpecificParams> fusingParamsSetI8{
emptyFusingSpec,
// // activations
fusingElu,
fusingSigmoid,
fusingPReluPerChannel,
fusingSwish,
fusingMish,
// // other patterns
fusingSumEluFQ,
fusingSum,
fusingAddPerChannel
};

const std::vector<CPUSpecificParams> CPUParams_2D_I8 = {
conv_gemm_2D_nspc,
conv_sse42_2D_nspc,
conv_avx2_2D_nspc,
conv_avx512_2D_nspc
};

INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_GEMM_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::U8 /*, Precision::I8*/), // i8 primitives are disabled in oneDNN fork
::testing::Values(Precision::FP32, Precision::U8, Precision::I8),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_2D_I8)),
::testing::ValuesIn(fusingParamsSetI8),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

const std::vector<CPUSpecificParams> CPUParams_3D_I8 = {
conv_gemm_3D_nspc,
conv_sse42_3D_nspc,
conv_avx2_3D_nspc,
conv_avx512_3D_nspc
};

INSTANTIATE_TEST_SUITE_P(smoke_Conv_3D_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_GEMM_3D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::U8 /*, Precision::I8*/), // i8 primitives are disabled in oneDNN fork
::testing::Values(Precision::FP32, Precision::U8, Precision::I8),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7, 7 })),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

12 channels does not form a full vector on avx512 VNNI, m.b. it makes sense to have a shape that contains at least one full SIMD vector and some fractional part to cover more possible code paths? Here in in the other new test instances.

::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_3D_I8)),
::testing::ValuesIn(fusingParamsSetI8),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

const std::vector<CPUSpecificParams> CPUParams_1x1_2D_I8 = {
conv_sse42_2D_1x1_nspc,
conv_avx2_2D_1x1_nspc,
conv_avx512_2D_1x1_nspc
};

INSTANTIATE_TEST_SUITE_P(smoke_Conv_2D_1x1_I8, ConvolutionLayerCPUTest,
::testing::Combine(
::testing::Combine(
convParams_ExplicitPadding_1x1_2D,
::testing::Values(Precision::FP32),
::testing::Values(Precision::U8 /*, Precision::I8*/), // i8 primitives are disabled in oneDNN fork
::testing::Values(Precision::FP32, Precision::U8, Precision::I8),
::testing::Values(Layout::ANY),
::testing::Values(Layout::ANY),
::testing::Values(std::vector<size_t >({ 2, 12, 7, 7 })),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUInfoForDevice(CPUParams_1x1_2D_I8)),
::testing::ValuesIn(fusingParamsSetI8),
::testing::Values(cpuEmptyPluginConfig)),
ConvolutionLayerCPUTest::getTestCaseName);

} // namespace int8

} // namespace CPULayerTestsDefinitions
Loading