Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MVN_accuracy_fix_on_avx512 #5787

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ void jit_load_emitter::emit_isa(const Xbyak::Reg64 &reg_src, int offset_byte, In
h->uni_vcvtdq2ps(Vmm(out_vec_idx), Vmm(out_vec_idx));
break;
case Precision::I32:
if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16))
if ((src_prc == Precision::FP32) || (src_prc == Precision::BF16)) {
h->uni_vroundps(Vmm(out_vec_idx), Vmm(out_vec_idx), 3); // rounding to zero
h->uni_vcvtps2dq(Vmm(out_vec_idx), Vmm(out_vec_idx));
}
break;
default:
break;
Expand Down Expand Up @@ -549,8 +551,10 @@ template <mkldnn::impl::cpu::x64::cpu_isa_t isa>
if (src_prc != dst_prc) {
switch (src_prc) {
case Precision::FP32:
if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16))
if ((dst_prc != Precision::FP32) && (dst_prc != Precision::BF16)) {
h->uni_vroundps(Vmm(in_vec_idx), Vmm(in_vec_idx), 3); // rounding to zero
h->uni_vcvtps2dq(Vmm(in_vec_idx), Vmm(in_vec_idx));
}
break;
case Precision::I32:
if ((dst_prc == Precision::FP32) || (dst_prc == Precision::BF16))
Expand Down Expand Up @@ -636,7 +640,7 @@ template <typename Vmm>
mask = (mask << store_size) - mask;
h->mov(Reg64(aux_gpr_idxs[0]), mask);
h->kmovq(k_mask, Reg64(aux_gpr_idxs[0]));
h->vmovdqu8(addr(0) | k_mask, zmm);
h->vmovdqu8(addr(0), zmm | k_mask);
} else {
if (store_size == 64) {
h->uni_vmovdqu(addr(0), zmm);
Expand Down Expand Up @@ -768,10 +772,10 @@ template <typename Vmm>
h->mov(Reg32(aux_gpr_idxs[0]), mask);
h->kmovw(k_mask, Reg32(aux_gpr_idxs[0]));
if (is_signed) {
h->vpmovsdb(addr(0) | k_mask, vmm);
h->vpmovsdb(addr(0), vmm | k_mask);
} else {
h->vpmaxsd(vmm, vmm, Vmm(aux_vec_idxs[0]));
h->vpmovusdb(addr(0) | k_mask, vmm);
h->vpmovusdb(addr(0), vmm | k_mask);
}
}
} else {
Expand Down Expand Up @@ -850,10 +854,10 @@ template <typename Vmm>
h->mov(Reg32(aux_gpr_idxs[0]), mask);
h->kmovw(k_mask, Reg32(aux_gpr_idxs[0]));
if (is_signed) {
h->vpmovsdw(ptr[reg + offset] | k_mask, vmm);
h->vpmovsdw(ptr[reg + offset], vmm | k_mask);
} else {
h->vmaxsd(vmm, Vmm(aux_vec_idxs[0]), vmm);
h->vpmovusdw(ptr[reg + offset] | k_mask, vmm);
h->vpmovusdw(ptr[reg + offset], vmm | k_mask);
}
}
} else {
Expand Down
12 changes: 1 addition & 11 deletions inference-engine/src/mkldnn_plugin/mkldnn_graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,17 +1122,7 @@ void MKLDNNGraphOptimizer::FuseMVNAndSimpleOperation(MKLDNNGraph &graph) {
auto& graphNodes = graph.GetNodes();

auto isSutableParentNode = [](MKLDNNNodePtr node) {
bool isSutableMVN = (node->getType() == MVN);

if (isSutableMVN) {
auto mvnNode = std::dynamic_pointer_cast<MKLDNNMVNNode>(node);
if (mvnNode == nullptr)
IE_THROW() << "CPU node with name '" << node->getName() << "' is not a MVN node.";

return mvnNode->getChildEdges().size() == 1 && !mvnNode->getAcrossChannels() && mvnNode->getNormalizeVariance();
} else {
return false;
}
return (node->getType() == MVN) && (node->getChildEdges().size() == 1);
};

auto parent = graphNodes.begin();
Expand Down
16 changes: 10 additions & 6 deletions inference-engine/src/mkldnn_plugin/nodes/mkldnn_mvn_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -703,12 +703,6 @@ void MKLDNNMVNNode::initSupportedPrimitiveDescriptors() {
setPostOps(attr, true);

Precision inputPrecision = getOriginalInputPrecisionAtPort(0);
if (getParentEdgeAt(0)->getDims().ndims() < 3 || getParentEdgeAt(0)->getDims().ndims() > 5
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dmitry-gorokhov I think this is legacy constraints, so removed, correct me please if I am wrong.

|| acrossChannels_ || !normalizeVariance_) {
if (!isFloatCompatible(inputPrecision)) {
inputPrecision = Precision::FP32;
}
}
Precision outputPrecision = getOriginalOutputPrecisionAtPort(0);
if (!mayiuse(avx512_core)) {
if (outputPrecision == Precision::BF16)
Expand Down Expand Up @@ -1409,6 +1403,16 @@ bool MKLDNNMVNNode::canFuse(const MKLDNNNodePtr& node) const {
if (!mayiuse(cpu::x64::sse41)) {
return false;
}
// limit post ops to unary when shape transformed on channel
// 1D only fused with unary
int inputRank = getParentEdgeAt(0)->getDims().ndims();
bool unaryEltwise = one_of(node->getAlgorithm(), EltwiseRelu, EltwiseGelu, EltwiseElu, EltwiseSigmoid, EltwiseClamp, EltwiseTanh,
EltwiseSwish, EltwiseHswish, EltwiseMish, EltwiseHsigmoid, EltwiseRoundHalfToEven,
EltwiseRoundHalfAwayFromZero, EltwiseAbs, EltwiseSqrt, EltwiseSoftRelu);
if ((inputRank == 1 && !unaryEltwise) ||
(inputRank == 2 && !unaryEltwise && acrossChannels_)) {
return false;
}

return canFuseSimpleOperation(node);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ const std::vector<std::vector<size_t>> inputShapes_2D = {
const std::vector<std::vector<size_t>> inputShapes_3D = {
{1, 32, 17},
{1, 37, 9},
{1, 16, 4},
};

const std::vector<std::vector<size_t>> inputShapes_4D = {
Expand Down Expand Up @@ -127,7 +128,8 @@ const std::vector<double> epsilon = {
0.000000001
};

std::vector<Precision> inpOutPrc = {Precision::BF16, Precision::FP32};
std::vector<Precision> inpPrc = {Precision::I8, Precision::BF16, Precision::FP32};
std::vector<Precision> outPrc = {Precision::BF16, Precision::FP32};

std::vector<CPUSpecificParams> cpuParams_4D = {
CPUSpecificParams({nhwc}, {nhwc}, {}, {}),
Expand All @@ -141,35 +143,20 @@ std::vector<CPUSpecificParams> cpuParams_5D = {
CPUSpecificParams({ncdhw}, {ncdhw}, {}, {})
};

const auto Mvn1D = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inputShapes_1D),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(acrossChannels),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(emptyCPUSpec),
::testing::Values(emptyFusingSpec),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_1D, MvnLayerCPUTest, Mvn1D, MvnLayerCPUTest::getTestCaseName);

const auto Mvn2D = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inputShapes_2D),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::ValuesIn(acrossChannels),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(emptyCPUSpec),
::testing::Values(emptyFusingSpec),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_2D, MvnLayerCPUTest, Mvn2D, MvnLayerCPUTest::getTestCaseName);
std::vector<fusingSpecificParams> fusingParamsSet {
emptyFusingSpec,
/* activations */
fusingRelu,
fusingElu,
fusingTanh,
fusingSwish,
/* FQ */
fusingFakeQuantizePerChannel,
fusingFakeQuantizePerChannelRelu,
fusingFakeQuantizePerTensorRelu,
/* another patterns */
fusingScaleShift,
};

const auto Mvn3D = ::testing::Combine(
::testing::Combine(
Expand All @@ -180,11 +167,11 @@ const auto Mvn3D = ::testing::Combine(
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(emptyCPUSpec),
::testing::Values(emptyFusingSpec),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpPrc),
::testing::ValuesIn(outPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_3D, MvnLayerCPUTest, Mvn3D, MvnLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_Mvn3D, MvnLayerCPUTest, Mvn3D, MvnLayerCPUTest::getTestCaseName);

const auto Mvn4D = ::testing::Combine(
::testing::Combine(
Expand All @@ -195,11 +182,11 @@ const auto Mvn4D = ::testing::Combine(
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
::testing::Values(emptyFusingSpec),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpPrc),
::testing::ValuesIn(outPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D, MvnLayerCPUTest, Mvn4D, MvnLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_Mvn4D, MvnLayerCPUTest, Mvn4D, MvnLayerCPUTest::getTestCaseName);

const auto Mvn5D = ::testing::Combine(
::testing::Combine(
Expand All @@ -210,86 +197,67 @@ const auto Mvn5D = ::testing::Combine(
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
::testing::Values(emptyFusingSpec),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpPrc),
::testing::ValuesIn(outPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D, MvnLayerCPUTest, Mvn5D, MvnLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_Mvn5D, MvnLayerCPUTest, Mvn5D, MvnLayerCPUTest::getTestCaseName);

std::vector<fusingSpecificParams> fusingParamsSet {
// 1D 2D case
std::vector<fusingSpecificParams> fusingUnaryEltwiseParamsSet {
/* activations */
fusingRelu,
fusingElu,
fusingTanh,
fusingSwish,
/* FQ */
fusingFakeQuantizePerChannel,
fusingFakeQuantizePerChannelRelu,
fusingFakeQuantizePerTensorRelu,
/* another patterns */
fusingScaleShift,
};

const auto Mvn2DFuse = ::testing::Combine(
const auto Mvn1D = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inputShapes_2D),
::testing::ValuesIn(inputShapes_1D),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(false),
::testing::Values(true),
::testing::ValuesIn(acrossChannels),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_2D_Fuse, MvnLayerCPUTest, Mvn2DFuse, MvnLayerCPUTest::getTestCaseName);

const auto Mvn3DFuse = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inputShapes_3D),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(false),
::testing::Values(true),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));
::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
::testing::ValuesIn(inpPrc),
::testing::ValuesIn(outPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_3D_Fuse, MvnLayerCPUTest, Mvn3DFuse, MvnLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_Mvn1D, MvnLayerCPUTest, Mvn1D, MvnLayerCPUTest::getTestCaseName);

const auto Mvn4DFuse = ::testing::Combine(
// 2D no transformed
const auto Mvn2D = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inputShapes_4D),
::testing::ValuesIn(inputShapes_2D),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(false),
::testing::Values(true),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_4D)),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));
::testing::ValuesIn(inpPrc),
::testing::ValuesIn(outPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_4D_Fuse, MvnLayerCPUTest, Mvn4DFuse, MvnLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_Mvn2D, MvnLayerCPUTest, Mvn2D, MvnLayerCPUTest::getTestCaseName);

const auto Mvn5DFuse = ::testing::Combine(
// 2d transformed
const auto Mvn2DTrans = ::testing::Combine(
::testing::Combine(
::testing::ValuesIn(inputShapes_5D),
::testing::ValuesIn(inputShapes_2D),
::testing::Values(InferenceEngine::Precision::FP32),
::testing::Values(false),
::testing::Values(true),
::testing::ValuesIn(normalizeVariance),
::testing::ValuesIn(epsilon),
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::ValuesIn(filterCPUSpecificParams(cpuParams_5D)),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(inpOutPrc),
::testing::ValuesIn(inpOutPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_5D_Fuse, MvnLayerCPUTest, Mvn5DFuse, MvnLayerCPUTest::getTestCaseName);
::testing::Values(CommonTestUtils::DEVICE_CPU)),
::testing::Values(emptyCPUSpec),
::testing::ValuesIn(fusingUnaryEltwiseParamsSet),
::testing::ValuesIn(inpPrc),
::testing::ValuesIn(outPrc));

INSTANTIATE_TEST_CASE_P(smoke_CompareWithRefs_MVN2DTrans, MvnLayerCPUTest, Mvn2DTrans, MvnLayerCPUTest::getTestCaseName);

} // namespace
} // namespace CPULayerTestsDefinitions