Skip to content

Commit

Permalink
rebasing the perf-modes-2021.3 to the 2021.4
Browse files Browse the repository at this point in the history
Caveats:
the (explicit) setting #streams is not disabled (as it was before for experiments with DLBenchmark), and the logic slighlty differ (streamsSet)
  • Loading branch information
myshevts committed Jul 1, 2021
1 parent 0361fc8 commit 1ae1edc
Show file tree
Hide file tree
Showing 9 changed files with 403 additions and 44 deletions.
7 changes: 7 additions & 0 deletions inference-engine/include/ie_plugin_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,13 @@ namespace PluginConfigParams {
#define CONFIG_VALUE(name) InferenceEngine::PluginConfigParams::name
#define DECLARE_CONFIG_VALUE(name) static constexpr auto name = #name

/**
* @brief High-level OpenVINO Performance Modes/Presets
*/
DECLARE_CONFIG_KEY(OV_PERFORMANCE_MODE);
DECLARE_CONFIG_VALUE(LATENCY);
DECLARE_CONFIG_VALUE(THROUGHPUT);

/**
* @brief generic boolean values
*/
Expand Down
8 changes: 7 additions & 1 deletion inference-engine/samples/benchmark_app/benchmark_app.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ static const char input_message[] = "Optional. Path to a folder with images and/
static const char model_message[] = "Required. Path to an .xml/.onnx/.prototxt file with a trained model or to a .blob files with "
"a trained compiled model.";

/// @brief message for execution performance mode
static const char mode_message[] = "Optional. Selects OpenVINO Performance Mode/Preset. Default value is \"throughput (tput)\".";

/// @brief message for execution mode
static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\".";
static const char api_message[] = "Optional (deprecated). Enable Sync/Async API. Default value is \"async\".";

/// @brief message for assigning cnn calculation to device
static const char target_device_message[] = "Optional. Specify a target device to infer on (the list of available devices is shown below). "
Expand Down Expand Up @@ -157,6 +160,9 @@ DEFINE_string(i, "", input_message);
/// It is a required parameter
DEFINE_string(m, "", model_message);

/// @brief Define execution mode
DEFINE_string(mode, CONFIG_VALUE(THROUGHPUT), mode_message);

/// @brief Define execution mode
DEFINE_string(api, "async", api_message);

Expand Down
42 changes: 27 additions & 15 deletions inference-engine/samples/benchmark_app/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,14 @@ int main(int argc, char* argv[]) {
// ----------------- 3. Setting device configuration
// -----------------------------------------------------------
next_step();
std::string ov_perf_mode;
if (FLAGS_mode == "throughput" || FLAGS_mode == "THROUGHPUT" || FLAGS_mode == "tput")
ov_perf_mode = CONFIG_VALUE(THROUGHPUT);
else if (FLAGS_mode == "latency" || FLAGS_mode == "LATENCY")
ov_perf_mode = CONFIG_VALUE(LATENCY);
else if (!FLAGS_mode.empty())
throw std::logic_error("Performance mode " + ov_perf_mode + " is not recognized!");


bool perf_counts = false;
// Update config per device according to command line parameters
Expand All @@ -206,6 +214,10 @@ int main(int argc, char* argv[]) {
config[device] = {};
std::map<std::string, std::string>& device_config = config.at(device);

// high-level performance modes
if (!ov_perf_mode.empty())
device_config[CONFIG_KEY(OV_PERFORMANCE_MODE)] = ov_perf_mode;

// Set performance counter
if (isFlagSetInCommandLine("pc")) {
// set to user defined value
Expand All @@ -224,6 +236,7 @@ int main(int argc, char* argv[]) {
}
perf_counts = (device_config.at(CONFIG_KEY(PERF_COUNT)) == CONFIG_VALUE(YES)) ? true : perf_counts;

// the rest are individual per-device settings (overriding the values set with perf modes)
auto setThroughputStreams = [&]() {
const std::string key = device + "_THROUGHPUT_STREAMS";
if (device_nstreams.count(device)) {
Expand All @@ -236,7 +249,7 @@ int main(int argc, char* argv[]) {
" or via configuration file.");
}
device_config[key] = device_nstreams.at(device);
} else if (!device_config.count(key) && (FLAGS_api == "async")) {
} else if (ov_perf_mode.empty() && !device_config.count(key) && (FLAGS_api == "async")) {
slog::warn << "-nstreams default value is determined automatically for " << device
<< " device. "
"Although the automatic selection usually provides a "
Expand Down Expand Up @@ -295,20 +308,6 @@ int main(int argc, char* argv[]) {

if (isFlagSetInCommandLine("nthreads"))
device_config[GNA_CONFIG_KEY(LIB_N_THREADS)] = std::to_string(FLAGS_nthreads);
} else {
std::vector<std::string> supported_config_keys = ie.GetMetric(device, METRIC_KEY(SUPPORTED_CONFIG_KEYS));
auto supported = [&](const std::string& key) {
return std::find(std::begin(supported_config_keys), std::end(supported_config_keys), key) != std::end(supported_config_keys);
};
if (supported(CONFIG_KEY(CPU_THREADS_NUM)) && isFlagSetInCommandLine("nthreads")) {
device_config[CONFIG_KEY(CPU_THREADS_NUM)] = std::to_string(FLAGS_nthreads);
}
if (supported(CONFIG_KEY(CPU_THROUGHPUT_STREAMS)) && isFlagSetInCommandLine("nstreams")) {
device_config[CONFIG_KEY(CPU_THROUGHPUT_STREAMS)] = FLAGS_nstreams;
}
if (supported(CONFIG_KEY(CPU_BIND_THREAD)) && isFlagSetInCommandLine("pin")) {
device_config[CONFIG_KEY(CPU_BIND_THREAD)] = FLAGS_pin;
}
}
}

Expand Down Expand Up @@ -422,6 +421,19 @@ int main(int argc, char* argv[]) {
slog::info << "Load network took " << duration_ms << " ms" << slog::endl;
if (statistics)
statistics->addParameters(StatisticsReport::Category::EXECUTION_RESULTS, {{"load network time (ms)", duration_ms}});

if (!ov_perf_mode.empty()) {
std::cout << "OV_PERFORMANCE_MODE: " << ov_perf_mode << std::endl;
// output of the actual settings that the mode produces (debugging)
for (auto& device : devices) {
std::vector<std::string> supported_config_keys = ie.GetMetric(device,
METRIC_KEY(SUPPORTED_CONFIG_KEYS));
std::cout << "Device: " << device << std::endl;
for (auto cfg : supported_config_keys) {
std::cout << " {" << cfg << " , " << exeNetwork.GetConfig(cfg).as<std::string>() << " }" << std::endl;
}
}
}
} else {
next_step();
slog::info << "Skipping the step for compiled network" << slog::endl;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,19 @@ std::vector<std::string> IStreamsExecutor::Config::SupportedKeys() {
CONFIG_KEY_INTERNAL(CPU_THREADS_PER_STREAM),
};
}
int IStreamsExecutor::Config::GetDefaultNumStreams() {
const int sockets = static_cast<int>(getAvailableNUMANodes().size());
// bare minimum of streams (that evenly divides available number of core)
const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores();
if (0 == num_cores % 4)
return std::max(4, num_cores / 4);
else if (0 == num_cores % 5)
return std::max(5, num_cores / 5);
else if (0 == num_cores % 3)
return std::max(3, num_cores / 3);
else // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide
return 1;
}

void IStreamsExecutor::Config::SetConfig(const std::string& key, const std::string& value) {
if (key == CONFIG_KEY(CPU_BIND_THREAD)) {
Expand All @@ -49,17 +62,8 @@ void IStreamsExecutor::Config::SetConfig(const std::string& key, const std::stri
if (value == CONFIG_VALUE(CPU_THROUGHPUT_NUMA)) {
_streams = static_cast<int>(getAvailableNUMANodes().size());
} else if (value == CONFIG_VALUE(CPU_THROUGHPUT_AUTO)) {
const int sockets = static_cast<int>(getAvailableNUMANodes().size());
// bare minimum of streams (that evenly divides available number of cores)
const int num_cores = sockets == 1 ? std::thread::hardware_concurrency() : getNumberOfCPUCores();
if (0 == num_cores % 4)
_streams = std::max(4, num_cores / 4);
else if (0 == num_cores % 5)
_streams = std::max(5, num_cores / 5);
else if (0 == num_cores % 3)
_streams = std::max(3, num_cores / 3);
else // if user disables some cores say in BIOS, so we got weird #cores which is not easy to divide
_streams = 1;
_streams = GetDefaultNumStreams();
} else {
int val_i;
try {
Expand Down
25 changes: 16 additions & 9 deletions inference-engine/src/mkldnn_plugin/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ Config::Config() {
#if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO)
#if defined(__APPLE__) || defined(_WIN32)
// 'CORES' is not implemented for Win/MacOS; so the 'NUMA' is default
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
#endif
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::NUMA;
#endif

if (getAvailableCoresTypes().size() > 1 /*Hybrid CPU*/) {
streamExecutorConfig._threadBindingType = InferenceEngine::IStreamsExecutor::HYBRID_AWARE;
}
#endif
#endif

if (!with_cpu_x86_bfloat16())
enforceBF16 = false;
Expand All @@ -43,11 +43,10 @@ Config::Config() {


void Config::readProperties(const std::map<std::string, std::string> &prop) {
auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
for (auto& kvp : prop) {
auto& key = kvp.first;
auto& val = kvp.second;

const auto streamExecutorConfigKeys = streamExecutorConfig.SupportedKeys();
for (const auto& kvp : prop) {
const auto& key = kvp.first;
const auto& val = kvp.second;
if (streamExecutorConfigKeys.end() !=
std::find(std::begin(streamExecutorConfigKeys), std::end(streamExecutorConfigKeys), key)) {
streamExecutorConfig.SetConfig(key, val);
Expand Down Expand Up @@ -109,7 +108,13 @@ void Config::readProperties(const std::map<std::string, std::string> &prop) {
IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_ENFORCE_BF16
<< ". Expected only YES/NO";
}
} else {
} else if (key == PluginConfigParams::KEY_OV_PERFORMANCE_MODE) {
if (val == PluginConfigParams::LATENCY || val == PluginConfigParams::THROUGHPUT)
ovPerfMode = val;
else
IE_THROW() << "Wrong value for property key " << PluginConfigParams::KEY_OV_PERFORMANCE_MODE
<< ". Expected only " << PluginConfigParams::LATENCY << "/" << PluginConfigParams::THROUGHPUT;
} else {
IE_THROW(NotFound) << "Unsupported property " << key << " by CPU plugin";
}
_config.clear();
Expand Down Expand Up @@ -158,6 +163,8 @@ void Config::updateProperties() {
_config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::YES });
else
_config.insert({ PluginConfigParams::KEY_ENFORCE_BF16, PluginConfigParams::NO });
if (!ovPerfMode.empty())
_config.insert({ PluginConfigParams::KEY_OV_PERFORMANCE_MODE, ovPerfMode });
}
}

Expand Down
1 change: 1 addition & 0 deletions inference-engine/src/mkldnn_plugin/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct Config {
bool enableDynamicBatch = false;
std::string dumpToDot = "";
int batchLimit = 0;
std::string ovPerfMode = "";
InferenceEngine::IStreamsExecutor::Config streamExecutorConfig;

#if defined(__arm__) || defined(__aarch64__)
Expand Down
Loading

0 comments on commit 1ae1edc

Please sign in to comment.