From 1dfe0e85f7df56971fbee2471a5f154d2e45c59b Mon Sep 17 00:00:00 2001 From: Aleksandr Voron Date: Tue, 19 Mar 2024 14:04:39 +0100 Subject: [PATCH] [CPU][ARM] Make f16 precision as default for CNN (#22839) --- src/plugins/intel_cpu/src/config.cpp | 5 +---- .../include/behavior/ov_plugin/auto_batching_tests.hpp | 1 + .../shared/include/behavior/ov_plugin/core_threading.hpp | 6 ++++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp index 3746e7237eb0db..cb5d4139b14e98 100644 --- a/src/plugins/intel_cpu/src/config.cpp +++ b/src/plugins/intel_cpu/src/config.cpp @@ -369,10 +369,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) { if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) { inferencePrecision = ov::element::f32; #if defined(OV_CPU_ARM_ENABLE_FP16) - // fp16 precision is used as default precision on ARM for non-convolution networks - // fp16 ACL convolution is slower than fp32 - if (modelType != ModelType::CNN) - inferencePrecision = ov::element::f16; + inferencePrecision = ov::element::f16; #else if (mayiuse(avx512_core_bf16)) inferencePrecision = ov::element::bf16; diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/auto_batching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/auto_batching_tests.hpp index 04d953a01e9d4c..1fb2e645e55ad9 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/auto_batching_tests.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/auto_batching_tests.hpp @@ -71,6 +71,7 @@ class AutoBatching_Test : public OVPluginTestBase, if (target_device.find("CPU") != std::string::npos) { config.insert(ov::num_streams(static_cast(num_streams))); + config.insert(ov::hint::inference_precision(ov::element::f32)); } // minimize timeout to reduce test time config.insert(ov::auto_batch_timeout(1)); diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_threading.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_threading.hpp index 43fb63ccc5e6ad..2b37c4af16725a 100644 --- a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_threading.hpp +++ b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_threading.hpp @@ -487,7 +487,8 @@ TEST_P(CoreThreadingTestsWithIter, smoke_CompileModel_Accuracy_SingleCore) { } auto getOutputBlob = [&](ov::Core& core) { - auto compiled_model = core.compile_model(model, target_device); + ov::AnyMap f32_precision_property = {{ov::hint::inference_precision.name(), ov::element::f32.to_string()}}; + auto compiled_model = core.compile_model(model, target_device, f32_precision_property); auto req = compiled_model.create_infer_request(); for (const auto& input : inputs) { req.set_tensor(input.first, input.second); @@ -530,7 +531,8 @@ TEST_P(CoreThreadingTestsWithIter, smoke_CompileModel_Accuracy_MultipleCores) { } auto getOutputBlob = [&](ov::Core& core) { - auto compiled_model = core.compile_model(model, target_device); + ov::AnyMap f32_precision_property = {{ov::hint::inference_precision.name(), ov::element::f32.to_string()}}; + auto compiled_model = core.compile_model(model, target_device, f32_precision_property); auto req = compiled_model.create_infer_request(); for (const auto& input : inputs) { req.set_tensor(input.first, input.second);