[CPU][ARM] Make f16 precision as default for CNN (openvinotoolkit#22839)

bbielawx · Apr 12, 2024 · 1dfe0e8 · 1dfe0e8
1 parent 1a95201
commit 1dfe0e8
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
@@ -369,10 +369,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
         if (executionMode == ov::hint::ExecutionMode::PERFORMANCE) {
             inferencePrecision = ov::element::f32;
 #if defined(OV_CPU_ARM_ENABLE_FP16)
-            // fp16 precision is used as default precision on ARM for non-convolution networks
-            // fp16 ACL convolution is slower than fp32
-            if (modelType != ModelType::CNN)
-                inferencePrecision = ov::element::f16;
+            inferencePrecision = ov::element::f16;
 #else
             if (mayiuse(avx512_core_bf16))
                 inferencePrecision = ov::element::bf16;

diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/auto_batching_tests.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/auto_batching_tests.hpp
@@ -71,6 +71,7 @@ class AutoBatching_Test : public OVPluginTestBase,
 
             if (target_device.find("CPU") != std::string::npos) {
                 config.insert(ov::num_streams(static_cast<int32_t>(num_streams)));
+                config.insert(ov::hint::inference_precision(ov::element::f32));
             }
             // minimize timeout to reduce test time
             config.insert(ov::auto_batch_timeout(1));

diff --git a/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_threading.hpp b/src/tests/functional/plugin/shared/include/behavior/ov_plugin/core_threading.hpp
@@ -487,7 +487,8 @@ TEST_P(CoreThreadingTestsWithIter, smoke_CompileModel_Accuracy_SingleCore) {
             }
 
             auto getOutputBlob = [&](ov::Core& core) {
-                auto compiled_model = core.compile_model(model, target_device);
+                ov::AnyMap f32_precision_property = {{ov::hint::inference_precision.name(), ov::element::f32.to_string()}};
+                auto compiled_model = core.compile_model(model, target_device, f32_precision_property);
                 auto req = compiled_model.create_infer_request();
                 for (const auto& input : inputs) {
                     req.set_tensor(input.first, input.second);
@@ -530,7 +531,8 @@ TEST_P(CoreThreadingTestsWithIter, smoke_CompileModel_Accuracy_MultipleCores) {
             }
 
             auto getOutputBlob = [&](ov::Core& core) {
-                auto compiled_model = core.compile_model(model, target_device);
+                ov::AnyMap f32_precision_property = {{ov::hint::inference_precision.name(), ov::element::f32.to_string()}};
+                auto compiled_model = core.compile_model(model, target_device, f32_precision_property);
                 auto req = compiled_model.create_infer_request();
                 for (const auto& input : inputs) {
                     req.set_tensor(input.first, input.second);