refactor cpu pinning (#23099)

### Details: - *Porting [PR 22489](#22489 -- *add ov::affinity deprecation in CPU plugin introduction* -- *refactoring get_cpu_pinning()* -- *Default CPU pinning on Windows is false on all platform* - *Enable CPU pinning in part of CPU functional test cases. Now Intel CPU plugin func tests on Windows finish at 0h:27m:32.21s in this PR and 0h:30m:50.99s in [PR 23129](#23129 ### Tickets: - *CVS-129030*
openvinotoolkit · Feb 29, 2024 · 9e759e4 · 9e759e4
1 parent 7ceff6a
commit 9e759e4
Show file tree

Hide file tree

Showing 8 changed files with 252 additions and 46 deletions.
diff --git a/.../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst b/.../openvino-workflow/running-inference/inference-devices-and-modes/cpu-device.rst
@@ -357,6 +357,9 @@ Read-only properties
 - ``ov::device::full_name``
 - ``ov::device::capabilities``
 
+.. note::
+   ``ov::affinity`` is replaced by ``ov::hint::enable_cpu_pinning``. As such, it is deprecated in the 2024.0 release and will be removed in the 2025 release.
+
 External Dependencies
 ###########################################################
 
@@ -404,7 +407,16 @@ User can use the following properties to limit available CPU resource for model
 
    ``ov::hint::scheduling_core_type`` and ``ov::hint::enable_hyper_threading`` only support Intel® x86-64 CPU on Linux and Windows in current release.
 
-By default, OpenVINO Runtime will enable CPU threads pinning for better performance. User also can use property ``ov::hint::enable_cpu_pinning`` to switch it off. Disable threads pinning might be beneficial in complex applications with several workloads executed in parallel.
+In some use cases, OpenVINO Runtime will enable CPU threads pinning by default for better performance. User can also turn it on or off using property ``ov::hint::enable_cpu_pinning``. Disable threads pinning might be beneficial in complex applications with several workloads executed in parallel. The following table describes the default setting for ``ov::hint::enable_cpu_pinning`` in different use cases.
+
+==================================================== ================================
+ Use Case                                             Default Setting of CPU Pinning 
+==================================================== ================================
+ All use cases with Windows OS                        False
+ Stream contains both Pcore and Ecore with Linux OS   False
+ Stream only contains Pcore or Ecore with Linux OS    True
+ All use cases with Mac OS                            False
+==================================================== ================================
 
 .. tab-set::
 
@@ -427,7 +439,7 @@ user can check the :doc:`optimization guide <openvino_docs_deployment_optimizati
 
 .. note::
 
-   ``ov::hint::enable_cpu_pinning`` only support Linux in current release.
+   ``ov::hint::enable_cpu_pinning`` is not supported on multi-socket platforms with Windows OS.
 
 Denormals Optimization
 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

diff --git a/src/plugins/intel_cpu/src/config.cpp b/src/plugins/intel_cpu/src/config.cpp
@@ -93,20 +93,21 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
             OPENVINO_SUPPRESS_DEPRECATED_START
         } else if (key == ov::affinity.name()) {
             try {
-                ov::Affinity affinity = val.as<ov::Affinity>();
                 changedCpuPinning = true;
+                ov::Affinity affinity = val.as<ov::Affinity>();
+#if defined(__APPLE__)
+                enableCpuPinning = false;
+                threadBindingType = affinity == ov::Affinity::NONE ? IStreamsExecutor::ThreadBindingType::NONE
+                                                                   : IStreamsExecutor::ThreadBindingType::NUMA;
+#else
                 enableCpuPinning =
                     (affinity == ov::Affinity::CORE || affinity == ov::Affinity::HYBRID_AWARE) ? true : false;
                 switch (affinity) {
                 case ov::Affinity::NONE:
                     threadBindingType = IStreamsExecutor::ThreadBindingType::NONE;
                     break;
                 case ov::Affinity::CORE: {
-#if (defined(__APPLE__) || defined(_WIN32))
-                    threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA;
-#else
                     threadBindingType = IStreamsExecutor::ThreadBindingType::CORES;
-#endif
                 } break;
                 case ov::Affinity::NUMA:
                     threadBindingType = IStreamsExecutor::ThreadBindingType::NUMA;
@@ -121,6 +122,7 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
                                    key,
                                    ". Expected only ov::Affinity::CORE/NUMA/HYBRID_AWARE.");
                 }
+#endif
             } catch (const ov::Exception&) {
                 OPENVINO_THROW("Wrong value ",
                                val.as<std::string>(),

diff --git a/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp b/src/plugins/intel_cpu/src/cpu_map_scheduling.cpp
@@ -7,6 +7,7 @@
 #include "cpu_streams_calculation.hpp"
 #include "openvino/core/parallel.hpp"
 #include "openvino/runtime/system_conf.hpp"
+#include "openvino/runtime/threading/cpu_streams_info.hpp"
 
 namespace ov {
 namespace intel_cpu {
@@ -71,32 +72,30 @@ std::vector<std::vector<int>> apply_hyper_threading(bool& input_ht_hint,
 
 bool get_cpu_pinning(bool& input_value,
                      const bool input_changed,
-                     const int num_streams,
-                     const Config::LatencyThreadingMode latency_threading_mode,
-                     const std::vector<std::vector<int>>& proc_type_table) {
-    int result_value;
-    int num_sockets = get_default_latency_streams(latency_threading_mode);
-    bool latency = num_streams <= num_sockets && num_streams > 0;
+                     const std::vector<std::vector<int>>& proc_type_table,
+                     const std::vector<std::vector<int>>& streams_info_table) {
+    bool result_value;
 
+#if defined(__APPLE__)
+    result_value = false;
+#elif defined(_WIN32)
+    result_value = ((input_changed) && (proc_type_table.size() == 1)) ? input_value : false;
+#else
     if (input_changed) {
         result_value = input_value;
     } else {
         result_value = true;
-        if (proc_type_table[0][EFFICIENT_CORE_PROC] > 0 &&
-            proc_type_table[0][EFFICIENT_CORE_PROC] < proc_type_table[0][ALL_PROC]) {
-            result_value = latency ? false : true;
+        // The following code disables pinning in case stream contains both Pcore and Ecore
+        if (streams_info_table.size() >= 3) {
+            if ((streams_info_table[0][PROC_TYPE] == ALL_PROC) &&
+                (streams_info_table[1][PROC_TYPE] != EFFICIENT_CORE_PROC) &&
+                (streams_info_table[2][PROC_TYPE] == EFFICIENT_CORE_PROC)) {
+                result_value = false;
+            }
         }
     }
-#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
-#    if defined(_WIN32)
-    if (proc_type_table.size() > 1) {
-        result_value = false;
-    }
-#    endif
-#    if defined(__APPLE__)
-    result_value = false;
-#    endif
 #endif
+
     input_value = result_value;
 
     return result_value;

diff --git a/src/plugins/intel_cpu/src/cpu_map_scheduling.hpp b/src/plugins/intel_cpu/src/cpu_map_scheduling.hpp
@@ -44,16 +44,14 @@ std::vector<std::vector<int>> apply_hyper_threading(bool& input_ht_hint,
  * @brief      whether pinning cpu cores according to enableCpuPinning property
  * @param[in]  input_type indicate value of property enableCpuPinning.
  * @param[in]  input_changed indicate if value is set by user.
- * @param[in]  num_streams number of streams
- * @param[in]  latency_threading_mode is the scope of candidate processors per stream for latency hint
- * @param[in]  proc_type_table candidate processors available at this time
+ * @param[in]  proc_type_table indicate processors information of this platform
+ * @param[in]  streams_info_table indicate streams detail of this model
  * @return     whether pinning threads to cpu cores
  */
 bool get_cpu_pinning(bool& input_value,
                      const bool input_changed,
-                     const int num_streams,
-                     const Config::LatencyThreadingMode latency_threading_mode,
-                     const std::vector<std::vector<int>>& proc_type_table);
+                     const std::vector<std::vector<int>>& proc_type_table,
+                     const std::vector<std::vector<int>>& streams_info_table);
 
 }  // namespace intel_cpu
 }  // namespace ov
diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp
@@ -534,11 +534,7 @@ std::vector<std::vector<int>> generate_stream_info(const int streams,
                                             config.changedHyperThreading,
                                             ov::util::to_string(config.hintPerfMode),
                                             proc_type_table);
-    auto cpu_reservation = get_cpu_pinning(config.enableCpuPinning,
-                                            config.changedCpuPinning,
-                                            streams,
-                                            config.latencyThreadingMode,
-                                            proc_type_table);
+
     if (-1 == preferred_nthreads_per_stream) {
         model_prefer_threads = get_model_prefer_threads(streams, proc_type_table, model, config);
     }
@@ -553,6 +549,9 @@ std::vector<std::vector<int>> generate_stream_info(const int streams,
                                                      config.latencyThreadingMode,
                                                      proc_type_table);
 
+    auto cpu_reservation =
+        get_cpu_pinning(config.enableCpuPinning, config.changedCpuPinning, proc_type_table, streams_info_table);
+
     config.streamExecutorConfig = IStreamsExecutor::Config{"CPUStreamsExecutor",
                                                            config.streams,
                                                            config.threadsPerStream,

diff --git a/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp b/src/plugins/intel_cpu/tests/functional/custom/behavior/ov_plugin/properties.cpp
@@ -133,19 +133,17 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigStreamsNum) {
 
 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) {
     ov::Core ie;
-    ov::Affinity value = ov::Affinity::NONE;
 
-#if (defined(__APPLE__) || defined(_WIN32))
-    auto numaNodes = ov::get_available_numa_nodes();
-    auto coreTypes = ov::get_available_cores_types();
+#if defined(__APPLE__)
+    ov::Affinity value = ov::Affinity::CORE;
     auto defaultBindThreadParameter = ov::Affinity::NONE;
-    if (coreTypes.size() > 1) {
-        defaultBindThreadParameter = ov::Affinity::HYBRID_AWARE;
-    } else if (numaNodes.size() > 1) {
-        defaultBindThreadParameter = ov::Affinity::NUMA;
-    }
 #else
+    ov::Affinity value = ov::Affinity::NUMA;
+#    if defined(_WIN32)
+    auto defaultBindThreadParameter = ov::Affinity::NONE;
+#    else
     auto defaultBindThreadParameter = ov::Affinity::CORE;
+#    endif
     auto coreTypes = ov::get_available_cores_types();
     if (coreTypes.size() > 1) {
         defaultBindThreadParameter = ov::Affinity::HYBRID_AWARE;
@@ -154,10 +152,15 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinity) {
     ASSERT_NO_THROW(value = ie.get_property("CPU", ov::affinity));
     ASSERT_EQ(defaultBindThreadParameter, value);
 
-    const ov::Affinity affinity = defaultBindThreadParameter == ov::Affinity::HYBRID_AWARE ? ov::Affinity::NUMA : ov::Affinity::HYBRID_AWARE;
+    const ov::Affinity affinity =
+        defaultBindThreadParameter == ov::Affinity::HYBRID_AWARE ? ov::Affinity::NUMA : ov::Affinity::HYBRID_AWARE;
     ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity)));
     ASSERT_NO_THROW(value = ie.get_property("CPU", ov::affinity));
+#if defined(__APPLE__)
+    ASSERT_EQ(ov::Affinity::NUMA, value);
+#else
     ASSERT_EQ(affinity, value);
+#endif
 }
 
 TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) {
@@ -167,12 +170,20 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginSetConfigAffinityCore) {
 
     ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity)));
     ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::enable_cpu_pinning));
+#if defined(__APPLE__)
+    ASSERT_EQ(false, value);
+#else
     ASSERT_EQ(true, value);
+#endif
 
     affinity = ov::Affinity::HYBRID_AWARE;
     ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity)));
     ASSERT_NO_THROW(value = ie.get_property("CPU", ov::hint::enable_cpu_pinning));
+#if defined(__APPLE__)
+    ASSERT_EQ(false, value);
+#else
     ASSERT_EQ(true, value);
+#endif
 
     affinity = ov::Affinity::NUMA;
     ASSERT_NO_THROW(ie.set_property("CPU", ov::affinity(affinity)));

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/core_config.cpp
@@ -17,6 +17,10 @@ void core_configuration(ov::test::SubgraphBaseTest* test) {
         // todo: issue: 123320
         test->convert_precisions.insert({ov::element::bf16, ov::element::f32});
         test->convert_precisions.insert({ov::element::f16, ov::element::f32});
+
+        // Enable CPU pinning in CPU funtional tests to save validation time of Intel CPU plugin func tests (parallel)
+        // on Windows
+        test->configuration.insert({ov::hint::enable_cpu_pinning.name(), true});
 }
 
 } // namespace test