diff --git a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst index 19c4a013c54aae..390fe00605f2c6 100644 --- a/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst +++ b/docs/articles_en/learn-openvino/openvino-samples/benchmark-tool.rst @@ -245,6 +245,13 @@ There are several options for setting the number of inference iterations: The more iterations a model runs, the better the statistics will be for determining average latency and throughput. +Maximum inference rate +++++++++++++++++++++++ + +By default, the benchmarking app will run inference at maximum rate based on device capabilities. +The maximum inferance rate can be configured by ``-max_irate `` option. +Tweaking this value allow better accuracy in power usage measurement by limiting the number of executions. + Inputs ++++++++++++++++++++ @@ -337,7 +344,7 @@ following usage message: [Step 1/11] Parsing and validating input arguments [ INFO ] Parsing input parameters usage: benchmark_app.py [-h [HELP]] [-i PATHS_TO_INPUT [PATHS_TO_INPUT ...]] -m PATH_TO_MODEL [-d TARGET_DEVICE] - [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE] + [-hint {throughput,cumulative_throughput,latency,none}] [-niter NUMBER_ITERATIONS] [-max_irate MAXIMUM_INFERENCE_RATE] [-t TIME] [-b BATCH_SIZE] [-shape SHAPE] [-data_shape DATA_SHAPE] [-layout LAYOUT] [-extensions EXTENSIONS] [-c PATH_TO_CLDNN_CONFIG] [-cdir CACHE_DIR] [-lfile [LOAD_FROM_FILE]] [-api {sync,async}] [-nireq NUMBER_INFER_REQUESTS] [-nstreams NUMBER_STREAMS] [-inference_only [INFERENCE_ONLY]] [-infer_precision INFER_PRECISION] [-ip {bool,f16,f32,f64,i8,i16,i32,i64,u8,u16,u32,u64}] @@ -536,6 +543,9 @@ following usage message: 'none': no device performance mode will be set. Using explicit 'nstreams' or other device-specific options, please set hint to 'none' -niter Optional. Number of iterations. If not specified, the number of iterations is calculated depending on a device. + -max_irate Optional. Maximum inference rate by frame per second. + If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities. + Tweaking this value allow better accuracy in power usage measurement by limiting the execution. -t Optional. Time in seconds to execute topology. Input shapes diff --git a/samples/cpp/benchmark_app/benchmark_app.hpp b/samples/cpp/benchmark_app/benchmark_app.hpp index c8411855e38022..cf38ff6708ad29 100644 --- a/samples/cpp/benchmark_app/benchmark_app.hpp +++ b/samples/cpp/benchmark_app/benchmark_app.hpp @@ -65,10 +65,11 @@ static const char cache_dir_message[] = "Optional. Enables caching of loaded mod static const char load_from_file_message[] = "Optional. Loads model from file directly without read_model." " All CNNNetwork options (like re-shape) will be ignored"; -/// @brief message for run frequency -static const char run_frequency_message[] = - "Execute at a fixed frequency. Note if the targeted rate per second cannot be reached, " - "the benchmark would start the next run immediately, trying its best to catch up."; +/// @brief message for maximum inference rate +static const char maximum_inference_rate_message[] = + "Optional. Maximum inference rate by frame per second" + "If not specified, default value is 0, the inference will run at maximium rate depending on a device capabilities. " + "Tweaking this value allow better accuracy in power usage measurement by limiting the execution."; /// @brief message for execution time static const char execution_time_message[] = "Optional. Time in seconds to execute topology."; @@ -313,7 +314,7 @@ DEFINE_string(api, "async", api_message); DEFINE_uint64(nireq, 0, infer_requests_count_message); /// @brief Execute infer requests at a fixed frequency -DEFINE_double(rfreq, 0, run_frequency_message); +DEFINE_double(max_irate, 0, maximum_inference_rate_message); /// @brief Number of streams to use for inference on the CPU (also affects Hetero cases) DEFINE_string(nstreams, "", infer_num_streams_message); @@ -396,7 +397,7 @@ static void show_usage() { std::cout << " -hint (latency or throughput or cumulative_throughput or none) " << hint_message << std::endl; std::cout << " -niter " << iterations_count_message << std::endl; - std::cout << " -rfreq \"\" " << run_frequency_message << std::endl; + std::cout << " -max_irate \"\" " << maximum_inference_rate_message << std::endl; std::cout << " -t " << execution_time_message << std::endl; std::cout << std::endl; std::cout << "Input shapes" << std::endl; diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 14091e50f1905e..253468fe2cc0ec 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -1155,8 +1155,8 @@ int main(int argc, char* argv[]) { execTime = std::chrono::duration_cast(Time::now() - startTime).count(); processedFramesN += batchSize; - if (FLAGS_rfreq > 0) { - int64_t nextRunFinishTime = 1 / FLAGS_rfreq * processedFramesN * 1.0e9; + if (FLAGS_max_irate > 0) { + int64_t nextRunFinishTime = 1 / FLAGS_max_irate * processedFramesN * 1.0e9; std::this_thread::sleep_for(std::chrono::nanoseconds(nextRunFinishTime - execTime)); } } diff --git a/tests/samples_tests/smoke_tests/test_benchmark_app.py b/tests/samples_tests/smoke_tests/test_benchmark_app.py old mode 100644 new mode 100755 index f9b37e87614d42..3be4f4b88eaab8 --- a/tests/samples_tests/smoke_tests/test_benchmark_app.py +++ b/tests/samples_tests/smoke_tests/test_benchmark_app.py @@ -38,13 +38,16 @@ def create_random_4bit_bin_file(tmp_path, shape, name): f.write(raw_data) -def verify(sample_language, device, api=None, nireq=None, shape=None, data_shape=None, nstreams=None, layout=None, pin=None, cache=None, tmp_path=None, model='bvlcalexnet-12.onnx', inp='dog-224x224.bmp', batch='1', niter='10', tm=None): +def verify(sample_language, device, api=None, nireq=None, shape=None, data_shape=None, nstreams=None, + layout=None, pin=None, cache=None, tmp_path=None, model='bvlcalexnet-12.onnx', + inp='dog-224x224.bmp', batch='1', niter='10', max_irate=None, tm=None): output = get_cmd_output( get_executable(sample_language), *prepend(cache, inp, model, tmp_path), *('-nstreams', nstreams) if nstreams else '', *('-layout', layout) if layout else '', *('-nireq', nireq) if nireq else '', + *('-max_irate', max_irate) if max_irate else '', *('-shape', shape) if shape else '', *('-data_shape', data_shape) if data_shape else '', *('-hint', 'none') if nstreams or pin else '', @@ -84,6 +87,13 @@ def test_nireq(sample_language, api, nireq, device, cache, tmp_path): verify(sample_language, device, api=api, nireq=nireq, cache=cache, tmp_path=tmp_path) +@pytest.mark.parametrize('sample_language', ['C++', 'Python']) +@pytest.mark.parametrize('max_irate', ['', '0', '10']) +@pytest.mark.parametrize('device', get_devices()) +def test_max_irate(sample_language, device, max_irate, cache, tmp_path): + verify(sample_language, device, max_irate=max_irate, cache=cache, tmp_path=tmp_path) + + @pytest.mark.skipif('CPU' not in get_devices(), reason='affinity is a CPU property') @pytest.mark.parametrize('sample_language', ['C++', 'Python']) @pytest.mark.parametrize('pin', ['YES', 'NO', 'NUMA', 'HYBRID_AWARE']) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py index adba697b598b4a..fb6f5a8ecd7a6d 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 import os +import time from datetime import datetime from math import ceil from openvino.runtime import Core, get_version, AsyncInferQueue @@ -15,7 +16,8 @@ def percentile(values, percent): class Benchmark: def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None, - duration_seconds: int = None, api_type: str = 'async', inference_only = None): + duration_seconds: int = None, api_type: str = 'async', inference_only = None, + maximum_inference_rate: float = 0): self.device = device self.core = Core() self.nireq = number_infer_requests if api_type == 'async' else 1 @@ -24,6 +26,7 @@ def __init__(self, device: str, number_infer_requests: int = 0, number_iteration self.api_type = api_type self.inference_only = inference_only self.latency_groups = [] + self.max_irate = maximum_inference_rate def __del__(self): del self.core @@ -83,13 +86,21 @@ def first_infer(self, requests): requests.wait_all() return requests[id].latency + def inference_rate_delay(self, processed_frames, exec_time): + if self.max_irate > 0: + nextRunFinishTime = 1 / self.max_irate * processed_frames + delay = nextRunFinishTime - exec_time + time.sleep(delay if delay > 0 else 0) + def sync_inference(self, request, data_queue): + processed_frames = 0 exec_time = 0 iteration = 0 times = [] start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds): + processed_frames += data_queue.get_next_batch_size() if self.inference_only == False: request.set_input_tensors(data_queue.get_next_input()) request.infer() @@ -97,10 +108,12 @@ def sync_inference(self, request, data_queue): iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() + self.inference_rate_delay(processed_frames, exec_time) total_duration_sec = (datetime.utcnow() - start_time).total_seconds() return sorted(times), total_duration_sec, iteration - def async_inference_only(self, infer_queue): + def async_inference_only(self, infer_queue, data_queue): + processed_frames = 0 exec_time = 0 iteration = 0 times = [] @@ -109,6 +122,7 @@ def async_inference_only(self, infer_queue): while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ (iteration % self.nireq): + processed_frames += data_queue.get_next_batch_size() idle_id = infer_queue.get_idle_request_id() if idle_id in in_fly: times.append(infer_queue[idle_id].latency) @@ -118,6 +132,8 @@ def async_inference_only(self, infer_queue): iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() + self.inference_rate_delay(processed_frames, exec_time) + infer_queue.wait_all() total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: @@ -149,6 +165,7 @@ def async_inference_full_mode(self, infer_queue, data_queue, pcseq): iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() + self.inference_rate_delay(processed_frames, exec_time) infer_queue.wait_all() total_duration_sec = (datetime.utcnow() - start_time).total_seconds() @@ -164,7 +181,7 @@ def main_loop(self, requests, data_queue, batch_size, latency_percentile, pcseq) times, total_duration_sec, iteration = self.sync_inference(requests[0], data_queue) fps = len(batch_size) * iteration / total_duration_sec elif self.inference_only: - times, total_duration_sec, iteration = self.async_inference_only(requests) + times, total_duration_sec, iteration = self.async_inference_only(requests, data_queue) fps = len(batch_size) * iteration / total_duration_sec else: times, total_duration_sec, processed_frames, iteration = self.async_inference_full_mode(requests, data_queue, pcseq) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py old mode 100644 new mode 100755 index ad2839d217fcad..f1095fe514bd35 --- a/tools/benchmark_tool/openvino/tools/benchmark/main.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py @@ -85,7 +85,8 @@ def is_flag_set_in_command_line(flag): next_step(step_id=2) benchmark = Benchmark(args.target_device, args.number_infer_requests, - args.number_iterations, args.time, args.api_type, args.inference_only) + args.number_iterations, args.time, args.api_type, + args.inference_only, args.maximum_inference_rate) if args.extensions: benchmark.add_extension(path_to_extensions=args.extensions) diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py index aa79767cecc397..c358a26f736980 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py @@ -72,6 +72,9 @@ def parse_args(): args.add_argument('-niter', '--number_iterations', type=check_positive, required=False, default=None, help='Optional. Number of iterations. ' 'If not specified, the number of iterations is calculated depending on a device.') + args.add_argument('-max_irate', '--maximum_inference_rate', type=float, required=False, default=0, + help='Optional. Maximum inference rate by frame per second. ' + 'If not specified, the inference will run at maximum rate based on a device capabilities.') args.add_argument('-t', '--time', type=check_positive, required=False, default=None, help='Optional. Time in seconds to execute topology.') diff --git a/tools/benchmark_tool/setup.py b/tools/benchmark_tool/setup.py old mode 100644 new mode 100755