From 38f476c15867bcfb816e96d7e2a7ed1ddeb8082e Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Thu, 27 Jun 2024 21:16:19 +0400 Subject: [PATCH] [CPU] Limit f16 in Accuracy mode by hardware support (#25243) ### Details: - *Restrict usage of f16 in transformations pipeline if it's not supported by hardware for ACCURACY MODE* ### Tickets: - *145051* --- src/plugins/intel_cpu/src/nodes/conv.cpp | 3 ++- .../src/transformations/transformation_pipeline.cpp | 3 ++- .../subgraph_tests/src/classes/undefined_et.cpp | 13 ++++++++++++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/conv.cpp b/src/plugins/intel_cpu/src/nodes/conv.cpp index 5fb5e6ddc6813e..e22a36af852a14 100644 --- a/src/plugins/intel_cpu/src/nodes/conv.cpp +++ b/src/plugins/intel_cpu/src/nodes/conv.cpp @@ -531,8 +531,9 @@ void Convolution::getSupportedDescriptors() { auto dt = memory::data_type::f32; // supported lower precisions: bf16, f16 - if (one_of(originalDT, memory::data_type::bf16, memory::data_type::f16)) + if (one_of(originalDT, memory::data_type::bf16, memory::data_type::f16) && hasHardwareSupport(originalPrec)) { dt = originalDT; + } // fallback to f32 on special case for performance reasons if (isDepthWise() && ndims == 5) diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 3a50f9260d83be..63a946cfa69955 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -356,7 +356,8 @@ void Transformations::PreLpt(const std::vector& defaultPrecis // @todo should we always convert to f32 regardless of hardware support, as it is done for f16? if (!hasHardwareSupport(ov::element::bf16)) map.insert({ov::element::bf16, ov::element::f32}); - if (!one_of(inferencePrecision, element::f16, element::undefined)) { + // TODO: Remove 'hasHardwareSupport' when all nodes are able to handle f16 properly. + if (!one_of(inferencePrecision, element::f16, element::undefined) || !hasHardwareSupport(element::f16)) { map.insert({ov::element::f16, ov::element::f32}); } return map; diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/undefined_et.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/undefined_et.cpp index 40b3277c8708b3..f232a2d641eb55 100644 --- a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/undefined_et.cpp +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/classes/undefined_et.cpp @@ -26,6 +26,7 @@ // ----------- #include "custom/subgraph_tests/include/undefined_et.hpp" +#include "utils/precision_support.h" namespace ov { namespace test { @@ -74,6 +75,12 @@ void UndefinedEtSubgraphTest::SetUp() { auto logical_not = std::make_shared(cvt_f32); function = std::make_shared(OutputVector{logical_not->output(0)}, ParameterVector{param_0, param_1, param_2}, "UndefinedET"); + + // TODO: Need to remove when the hardware checking for f16 will be eliminated in the Transformations pipeline. + if (m_data_et == element::f16 && !ov::intel_cpu::hasHardwareSupport(m_data_et)) { + abs_threshold = 1.f; + rel_threshold = 0.1f; + } } template @@ -146,6 +153,10 @@ TEST_P(UndefinedEtSubgraphTest, CompareWithRefs) { size_t rnd_unfm_counter = 0lu; size_t logical_not_counter = 0lu; + auto expected_dt = m_data_et; + if (!ov::intel_cpu::hasHardwareSupport(expected_dt)) { + expected_dt = element::f32; + } for (const auto& node : compiledModel.get_runtime_model()->get_ops()) { auto rt_info = node->get_rt_info(); auto it = rt_info.find(exec_model_info::LAYER_TYPE); @@ -153,7 +164,7 @@ TEST_P(UndefinedEtSubgraphTest, CompareWithRefs) { auto op_name = it->second.as(); if (op_name == "RandomUniform") { - ASSERT_EQ(node->get_output_element_type(0), m_data_et); + ASSERT_EQ(node->get_output_element_type(0), expected_dt); rnd_unfm_counter++; } if (op_name == "Eltwise") {