rebasing the perf-modes-2021.3 to the 2021.4

Caveats: the (explicit) setting #streams is not disabled (as it was before for experiments with DLBenchmark), and the logic slighlty differ (streamsSet)
akuporos · Jul 1, 2021 · 1ae1edc · 1ae1edc
1 parent 0361fc8
commit 1ae1edc
Show file tree

Hide file tree

Showing 9 changed files with 403 additions and 44 deletions.
diff --git a/inference-engine/include/ie_plugin_config.hpp b/inference-engine/include/ie_plugin_config.hpp
@@ -223,6 +223,13 @@ namespace PluginConfigParams {
 #define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
 #define DECLARE_CONFIG_VALUE(name) static constexpr auto name = #name
 
+/**
+ * @brief High-level OpenVINO Performance Modes/Presets
+ */
+DECLARE_CONFIG_KEY(OV_PERFORMANCE_MODE);
+DECLARE_CONFIG_VALUE(LATENCY);
+DECLARE_CONFIG_VALUE(THROUGHPUT);
+
 /**
  * @brief generic boolean values
  */

diff --git a/inference-engine/samples/benchmark_app/benchmark_app.hpp b/inference-engine/samples/benchmark_app/benchmark_app.hpp
@@ -20,8 +20,11 @@ static const char input_message[] = "Optional. Path to a folder with images and/
 static const char model_message[] = "Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with "
                                     "a trained compiled model.";
 
+/// @brief message for execution performance mode
+static const char mode_message[] = "Optional. Selects OpenVINO Performance Mode/Preset. Default value is \"throughput (tput)\".";
+
 /// @brief message for execution mode
-static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\".";
+static const char api_message[] = "Optional (deprecated). Enable Sync/Async API. Default value is \"async\".";
 
 /// @brief message for assigning cnn calculation to device
 static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). "
@@ -157,6 +160,9 @@ DEFINE_string(i, "", input_message);
 /// It is a required parameter
 DEFINE_string(m, "", model_message);
 
+/// @brief Define execution mode
+DEFINE_string(mode, CONFIG_VALUE(THROUGHPUT), mode_message);
+
 /// @brief Define execution mode
 DEFINE_string(api, "async", api_message);
 

diff --git a/inference-engine/samples/benchmark_app/main.cpp b/inference-engine/samples/benchmark_app/main.cpp
@@ -198,6 +198,14 @@ int main(int argc, char* argv[]) {
         // ----------------- 3. Setting device configuration
         // -----------------------------------------------------------
         next_step();
+        std::string ov_perf_mode;
+        if (FLAGS_mode == "throughput" || FLAGS_mode == "THROUGHPUT" || FLAGS_mode == "tput")
+            ov_perf_mode = CONFIG_VALUE(THROUGHPUT);
+        else if (FLAGS_mode == "latency" || FLAGS_mode == "LATENCY")
+            ov_perf_mode = CONFIG_VALUE(LATENCY);
+        else if (!FLAGS_mode.empty())
+            throw std::logic_error("Performance mode " +  ov_perf_mode + " is not recognized!");
+
 
         bool perf_counts = false;
         // Update config per device according to command line parameters
@@ -206,6 +214,10 @@ int main(int argc, char* argv[]) {
                 config[device] = {};
             std::map<std::string, std::string>& device_config = config.at(device);
 
+            // high-level performance modes
+            if (!ov_perf_mode.empty())
+                device_config[CONFIG_KEY(OV_PERFORMANCE_MODE)] = ov_perf_mode;
+
             // Set performance counter
             if (isFlagSetInCommandLine("pc")) {
                 // set to user defined value
@@ -224,6 +236,7 @@ int main(int argc, char* argv[]) {
             }
             perf_counts = (device_config.at(CONFIG_KEY(PERF_COUNT)) == CONFIG_VALUE(YES)) ? true : perf_counts;
 
+            // the rest are individual per-device settings (overriding the values set with perf modes)
             auto setThroughputStreams = [&]() {
                 const std::string key = device + "_THROUGHPUT_STREAMS";
                 if (device_nstreams.count(device)) {
@@ -236,7 +249,7 @@ int main(int argc, char* argv[]) {
                                                " or via configuration file.");
                     }
                     device_config[key] = device_nstreams.at(device);
-                } else if (!device_config.count(key) && (FLAGS_api == "async")) {
+                } else if (ov_perf_mode.empty() && !device_config.count(key) && (FLAGS_api == "async")) {
                     slog::warn << "-nstreams default value is determined automatically for " << device
                                << " device. "
                                   "Although the automatic selection usually provides a "
@@ -295,20 +308,6 @@ int main(int argc, char* argv[]) {
 
                 if (isFlagSetInCommandLine("nthreads"))
                     device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
-            } else {
-                std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
-                auto supported = [&](const std::string& key) {
-                    return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) != std::end(supported_config_keys);
-                };
-                if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) {
-                    device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
-                }
-                if (supported(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) && isFlagSetInCommandLine("nstreams")) {
-                    device_config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = FLAGS_nstreams;
-                }
-                if (supported(CONFIG_KEY(CPU_BIND_THREAD)) && isFlagSetInCommandLine("pin")) {
-                    device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
-                }
             }
         }
 
@@ -422,6 +421,19 @@ int main(int argc, char* argv[]) {
             slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
             if (statistics)
                 statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});
+
+            if (!ov_perf_mode.empty()) {
+                std::cout << "OV_PERFORMANCE_MODE: " << ov_perf_mode << std::endl;
+                // output of the actual settings that the mode produces (debugging)
+                for (auto& device : devices) {
+                    std::vector<std::string> supported_config_keys = ie.GetMetric(device,
+                                                                                  METRIC_KEY(SUPPORTED_CONFIG_KEYS));
+                    std::cout << "Device: " << device << std::endl;
+                    for (auto cfg :  supported_config_keys) {
+                        std::cout << "  {" << cfg << " , " << exeNetwork.GetConfig(cfg).as<std::string>() << " }" << std::endl;
+                    }
+                }
+            }
         } else {
             next_step();
             slog::info << "Skipping the step for compiled network" << slog::endl;

diff --git a/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp b/inference-engine/src/inference_engine/threading/ie_istreams_executor.cpp
@@ -26,6 +26,19 @@ std::vector<std::string> IStreamsExecutor::Config::SupportedKeys() {
         CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM),
     };
 }
+int IStreamsExecutor::Config::GetDefaultNumStreams() {
+    const int sockets = static_cast<int>(getAvailableNUMANodes().size());
+    // bare minimum of streams (that evenly divides available number of core)
+    const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores();
+    if (0 == num_cores % 4)
+        return std::max(4, num_cores / 4);
+    else if (0 == num_cores % 5)
+        return std::max(5, num_cores / 5);
+    else if (0 == num_cores % 3)
+        return std::max(3, num_cores / 3);
+    else  // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide
+        return 1;
+}
 
 void IStreamsExecutor::Config::SetConfig(const std::string& key, const std::string& value) {
         if (key == CONFIG_KEY(CPU_BIND_THREAD)) {
@@ -49,17 +62,8 @@ void IStreamsExecutor::Config::SetConfig(const std::string& key, const std::stri
             if (value == CONFIG_VALUE(CPU_THROUGHPUT_NUMA)) {
                 _streams = static_cast<int>(getAvailableNUMANodes().size());
             } else if (value == CONFIG_VALUE(CPU_THROUGHPUT_AUTO)) {
-                const int sockets = static_cast<int>(getAvailableNUMANodes().size());
                 // bare minimum of streams (that evenly divides available number of cores)
-                const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores();
-                if (0 == num_cores % 4)
-                    _streams = std::max(4, num_cores / 4);
-                else if (0 == num_cores % 5)
-                    _streams = std::max(5, num_cores / 5);
-                else if (0 == num_cores % 3)
-                    _streams = std::max(3, num_cores / 3);
-                else  // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide
-                    _streams = 1;
+                _streams = GetDefaultNumStreams();
             } else {
                 int val_i;
                 try {

diff --git a/inference-engine/src/mkldnn_plugin/config.cpp b/inference-engine/src/mkldnn_plugin/config.cpp
@@ -27,13 +27,13 @@ Config::Config() {
     #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
         #if defined(__APPLE__) || defined(_WIN32)
         // 'CORES' is not implemented for Win/MacOS; so the 'NUMA' is default
-        streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
-        #endif
+    streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
+#endif
 
         if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) {
             streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::HYBRID_AWARE;
         }
-    #endif
+#endif
 
     if (!with_cpu_x86_bfloat16())
         enforceBF16 = false;
@@ -43,11 +43,10 @@ Config::Config() {
 
 
 void Config::readProperties(const std::map<std::string, std::string> &prop) {
-    auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
-    for (auto& kvp : prop) {
-        auto& key = kvp.first;
-        auto& val = kvp.second;
-
+    const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
+    for (const auto& kvp : prop) {
+        const auto& key = kvp.first;
+        const auto& val = kvp.second;
         if (streamExecutorConfigKeys.end() !=
             std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
             streamExecutorConfig.SetConfig(key, val);
@@ -109,7 +108,13 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
                 IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
                     << ". Expected only YES/NO";
             }
-        } else {
+        } else if (key == PluginConfigParams::KEY_OV_PERFORMANCE_MODE) {
+            if (val == PluginConfigParams::LATENCY || val == PluginConfigParams::THROUGHPUT)
+                ovPerfMode = val;
+            else
+                IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_OV_PERFORMANCE_MODE
+                                   << ". Expected only " << PluginConfigParams::LATENCY << "/" << PluginConfigParams::THROUGHPUT;
+        }  else {
             IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
         }
         _config.clear();
@@ -158,6 +163,8 @@ void Config::updateProperties() {
             _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
         else
             _config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO });
+        if (!ovPerfMode.empty())
+            _config.insert({ PluginConfigParams::KEY_OV_PERFORMANCE_MODE, ovPerfMode });
     }
 }
 

diff --git a/inference-engine/src/mkldnn_plugin/config.h b/inference-engine/src/mkldnn_plugin/config.h
@@ -25,6 +25,7 @@ struct Config {
     bool enableDynamicBatch = false;
     std::string dumpToDot = "";
     int batchLimit = 0;
+    std::string ovPerfMode = "";
     InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;
 
 #if defined(__arm__) || defined(__aarch64__)