From f9bd7402ffb7e01fb6b6f0dc3cac4a022dc03d8e Mon Sep 17 00:00:00 2001 From: Alexey Lebedev Date: Thu, 2 Dec 2021 16:18:18 +0300 Subject: [PATCH] [tools] new api and support dynamic models in python benchmark app (#8582) * Preprocessing API - base classes Includes API definition for trivial mean/scale operations (which don't require layout) Mean/scale with 'layout' support will be done under separate task together with Layout Current test code coverage: 100% * Python bindings for base preprocessing API * remove pre_post_process directory from ngraph/core * remove files from ngraph/python dir * move pyngraph pre_post_process files from ngraph/python to runtime * remove pre_post_process test from CMakeList * move include to the header * update include path for pre_post_process * style fix * bind InputTensorInfo::set_layout * cleaned test_preprocess * fix test expected output * remove duplicate test * update description of set_element_type * fix style * move preprocess from pyngraph to pyopenvino/graph * update test_preprocess imports and remove unnecessary test * remove duplicate import * update custom method * update test * update test * create decorator that changes Node into Output * create function that cast Node to Output * update test_preprocess to use decorator for custom function * change _cast_to_output -> _from_node * style fix * add tests fro scale and mean with vector input * style fix * add docstring for custom_preprocess_function * bind InputInfo network method * style fix * bind OutputInfo * fix description of preprocess submodule * fix style * update copyright year * bind OutputTensorInfo * bind OutputNetworkInfo and InputNetworkInfo * Bind exec core ov (#50) * Output const node python tests (#52) * add python bindings tests for Output * add proper tests * add new line * rename ie_version to version * Pszmel/bind infer request (#51) * remove set_batch, get_blob and set_blob * update InferRequest class * change InferenceEngine::InferRequest to ov::runtime::InferRequest * update set_callback body * update bindings to reflect ov::runtime::InferRequest * bind set_input_tensor and get_input_tensor * style fix * clen ie_infer_queue.cpp * Bind exec core ov (#50) * bind core, exec_net classes * rm unused function * add new line * rename ie_infer_request -> infer_request * update imports * update __init__.py * update ie_api.py * Replace old containers with the new one * create impl for create_infer_request * comment out infer_queue to avoid errors with old infer_request * update infer_request bind to reflect new infer_request api * comment out inpuit_info from ie_network to avoid errors with old containers * Register new containers and comment out InferQueue * update infer request tests * style fix * remove unused imports * remove unused imports and 2 methods * add tests to cover all new methods from infer_request * style fix * add test * remove registration of InferResults * update name of exception_ptr parameter * update the loops that iterate through inputs and outputs * clean setCustomCallbacks * style fix * add Tensor import * style fix * update infer and normalize_inputs * style fix * rename startTime and endTime * Create test for mixed keys as infer arguments * update infer function * update return type of infer Co-authored-by: Bartek Szmelczynski * fix get_version * fix opaque issue * some cosmetic changes * fix codestyle in tests * make tests green * Extend python InferRequest * Extend python Function * Change return value of infer call * Fix missing precisions conversions in CPU plugin * Rework of runtime for new tests * Fixed onnx reading in python tests * Edit compatibility tests * Edit tests * Add FLOAT_LIKE xfails * bind ColorFormat and ResizeAlgorithm * clean imports * fix typo * [Python API] bind ProfilingInfo (#55) * bind ProfilingInfo * Add tests * Fix code style * Add property * fix codestyle * Infer new request method (#56) * fix conflicts, add infer_new_request function * remove redundant functions, fix style * revert the unwanted changes * revert removal of the Blob * revert removal of isTblob * add add_extension from path * codestyle * add PostProcessSteps to init * bind PreProcessSteps * create additional tests * fix win build * add inputs-outputs to function * update infer queue * fix code style * Hot-fix CPU plugin with precision * fix start_async * add performance hint to time infer (#8480) * Updated common migration pipeline (#8176) * Updated common migration pipeline * Fixed merge issue * Added new model and extended example * Fixed typo * Added v10-v11 comparison * Avoid redundant graph nodes scans (#8415) * Refactor work with env variables (#8208) * del MO_ROOT * del MO_ROOT from common_utils.py * add MO_PATH to common_utils.py * change mo_path * [IE Sample Scripts] Use cmake to build samples (#8442) * Use cmake to build samples * Add the option to set custom build output folder * Remove opset8 from compatibility ngraph python API (#8452) * [GPU] OneDNN gpu submodule update to version 2.5 (#8449) * [GPU] OneDNN gpu submodule update to version 2.5 * [GPU] Updated onednn submodule and added layout optimizer fix * Install rules for static libraries case (#8384) * Proper cmake install for static libraries case * Added an ability to skip template plugin * Added install rules for VPU / GPU * Install more libraries * Fixed absolute TBB include paths * Disable GNA * Fixed issue with linker * Some fixes * Fixed linkage issues in tests * Disabled some tests * Updated CI pipelines * Fixed Windows linkage * Fixed custom_opset test for static casr * Fixed CVS-70313 * Continue on error * Fixed clanf-format * Try to fix Windows linker * Fixed compilation * Disable samples * Fixed samples build with THREADING=SEQ * Fixed link error on Windows * Fixed ieFuncTests * Added static Azure CI * Revert "Fixed link error on Windows" This reverts commit 78cca36fd21cdbd639216df6cca10df7f88bce3e. * Merge static and dynamic linux pipelines * Fixed Azure * fix codestyle * rename all methods in this class to snake_case * some updates * code style * fix code style in tests * update statistics reporting * update filling inputs * change ngraph.Type to ov.Type * fix typo * save work * save work * save work * compute latency in callback * save work * Fix get_idle_request * save work * fix latency * Fix code style * update AppInputInfo * add iteration to PatrialShape * fix rebasing * bind result::get_layout() * correct mistakes * fix setup * use parameters/results instead inputs/outputs * move _from_node to node_output.hpp * add read_model from buffer * update imports * revert package struct * add new line * remove bad quotes * update imports * style fix * add new line * Fix preprocessing * rename functin args * set NCHW layout to image as default * Fix input fillings * remove Type import * update tests * style fix * test clean * remove blank line * Add tensor_shape * fix comments * update PrePostProcessor init and build methods * create test with model update tests with new PrePostProcessor init and build * Change filling inputs * fix preprocessing * basic support dynamic shapes * fix legacy mode * rename ie to core * fix cpp code style * fix input files parsing * fix binary filling * support dynamic batch size * process images with original shapes if no tensor shapes were given * fix fps and number of iterations * Add new metrics * support pass path to folder into input mapping * add pcseq flag * fix resolving conflicts * dump statistic per group * check for compatibility with partial shape * revert statistic report names * code refactoring * update parameters * enable legacy_mode if data size less than nireq * add serialize to offline_transformations * Fix preprocessing import * change log output due to ci parsing * fix layout * allow to pass batch size with undefined layout * add serializer * fix comments from jiwaszki * Fix latency parsing for ci * code style * rename tensor_shape to data_shape * add message if image is processed with original shape * fix syntax warning * remove default legacy_mode if requests cover all data * rewrite all file parsing * fix preprocessing * Fix preprocessing #2 * Use layout instead str * Fix file extensions * Fix image sizes filling * sort input files * [Python API] quick fix of packaging * update tests * fix setup.py * small fix * small fixes according to comments * skip mo frontend tests * full mode is default for dynamic models only * backward compatibility * Fix package * set layout in runtime * static mode for dynamic models with all equal data shapes * use get_tensor instead set_tensor in legacy mode * benchmarking dynamic model available in full mode only * fix layout detection * use batch_size * iteration instead processed_frames in legacy mode * fix tensor naming * represent --inference_only * refactoring main loop * Fix number of iterations for full mode Co-authored-by: Michael Nosov Co-authored-by: pszmel Co-authored-by: Bartek Szmelczynski Co-authored-by: Anastasia Kuporosova Co-authored-by: jiwaszki Co-authored-by: Victor Kuznetsov Co-authored-by: Ilya Churaev Co-authored-by: Tomasz Jankowski Co-authored-by: Dmitry Pigasin Co-authored-by: Artur Kulikowski Co-authored-by: Ilya Znamenskiy Co-authored-by: Ilya Lavrenov --- .../src/pyopenvino/graph/partial_shape.cpp | 19 + .../python/src/pyopenvino/graph/shape.cpp | 9 +- .../openvino/tools/benchmark/benchmark.py | 237 ++++--- .../openvino/tools/benchmark/main.py | 215 ++++-- .../openvino/tools/benchmark/parameters.py | 19 +- .../tools/benchmark/utils/constants.py | 4 +- .../tools/benchmark/utils/inputs_filling.py | 619 ++++++++++-------- .../benchmark/utils/statistics_report.py | 50 +- .../openvino/tools/benchmark/utils/utils.py | 528 ++++++++++----- 9 files changed, 1076 insertions(+), 624 deletions(-) diff --git a/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp b/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp index 3eea18e334e28e..32ccbc29255587 100644 --- a/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp +++ b/src/bindings/python/src/pyopenvino/graph/partial_shape.cpp @@ -192,6 +192,25 @@ void regclass_graph_PartialShape(py::module m) { }, py::is_operator()); + shape.def("__len__", [](const ov::PartialShape& self) { + return self.size(); + }); + + shape.def("__setitem__", [](ov::PartialShape& self, size_t key, ov::Dimension& d) { + self[key] = d; + }); + + shape.def("__getitem__", [](const ov::PartialShape& self, size_t key) { + return self[key]; + }); + + shape.def( + "__iter__", + [](ov::PartialShape& self) { + return py::make_iterator(self.begin(), self.end()); + }, + py::keep_alive<0, 1>()); /* Keep vector alive while iterator is used */ + shape.def("__str__", [](const ov::PartialShape& self) -> std::string { std::stringstream ss; ss << self; diff --git a/src/bindings/python/src/pyopenvino/graph/shape.cpp b/src/bindings/python/src/pyopenvino/graph/shape.cpp index 02b906e6c820dd..ffd7a9ee3ad349 100644 --- a/src/bindings/python/src/pyopenvino/graph/shape.cpp +++ b/src/bindings/python/src/pyopenvino/graph/shape.cpp @@ -11,6 +11,7 @@ #include #include +#include "openvino/core/dimension.hpp" // ov::Dimension #include "pyopenvino/graph/shape.hpp" namespace py = pybind11; @@ -24,7 +25,13 @@ void regclass_graph_Shape(py::module m) { shape.def("__len__", [](const ov::Shape& v) { return v.size(); }); - shape.def("__getitem__", [](const ov::Shape& v, int key) { + shape.def("__setitem__", [](ov::Shape& self, size_t key, size_t d) { + self[key] = d; + }); + shape.def("__setitem__", [](ov::Shape& self, size_t key, ov::Dimension d) { + self[key] = d.get_length(); + }); + shape.def("__getitem__", [](const ov::Shape& v, size_t key) { return v[key]; }); diff --git a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py index 0f744cf49374d1..2b7544966bfacf 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py @@ -4,7 +4,8 @@ import os from datetime import datetime from math import ceil -from openvino.inference_engine import IENetwork, IECore, get_version, StatusCode +from typing import Union +from openvino.runtime import Core, get_version, AsyncInferQueue from .utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, GPU_DEVICE_NAME, XML_EXTENSION, BIN_EXTENSION from .utils.logging import logger @@ -15,31 +16,33 @@ def percentile(values, percent): return values[ceil(len(values) * percent / 100) - 1] class Benchmark: - def __init__(self, device: str, number_infer_requests: int = None, number_iterations: int = None, - duration_seconds: int = None, api_type: str = 'async'): + def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None, + duration_seconds: int = None, api_type: str = 'async', inference_only = None): self.device = device - self.ie = IECore() - self.nireq = number_infer_requests + self.core = Core() + self.nireq = number_infer_requests if api_type == 'async' else 1 self.niter = number_iterations self.duration_seconds = get_duration_seconds(duration_seconds, self.niter, self.device) self.api_type = api_type + self.inference_only = inference_only + self.latency_groups = [] def __del__(self): - del self.ie + del self.core def add_extension(self, path_to_extension: str=None, path_to_cldnn_config: str=None): if path_to_cldnn_config: - self.ie.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) + self.core.set_config({'CONFIG_FILE': path_to_cldnn_config}, GPU_DEVICE_NAME) logger.info(f'GPU extensions is loaded {path_to_cldnn_config}') if path_to_extension: - self.ie.add_extension(extension_path=path_to_extension, device_name=CPU_DEVICE_NAME) + self.core.add_extension(extension_path=path_to_extension) logger.info(f'CPU extensions is loaded {path_to_extension}') def get_version_info(self) -> str: logger.info(f"InferenceEngine:\n{'': <9}{'API version':.<24} {get_version()}") version_string = 'Device info\n' - for device, version in self.ie.get_versions(self.device).items(): + for device, version in self.core.get_versions(self.device).items(): version_string += f"{'': <9}{device}\n" version_string += f"{'': <9}{version.description:.<24}{' version'} {version.major}.{version.minor}\n" version_string += f"{'': <9}{'Build':.<24} {version.build_number}\n" @@ -47,119 +50,161 @@ def get_version_info(self) -> str: def set_config(self, config = {}): for device in config.keys(): - self.ie.set_config(config[device], device) + self.core.set_config(config[device], device) def set_cache_dir(self, cache_dir: str): - self.ie.set_config({'CACHE_DIR': cache_dir}, '') + self.core.set_config({'CACHE_DIR': cache_dir}, '') - def read_network(self, path_to_model: str): + def read_model(self, path_to_model: str): model_filename = os.path.abspath(path_to_model) head, ext = os.path.splitext(model_filename) weights_filename = os.path.abspath(head + BIN_EXTENSION) if ext == XML_EXTENSION else "" - ie_network = self.ie.read_network(model_filename, weights_filename) - return ie_network - - def load_network(self, ie_network: IENetwork, config = {}): - exe_network = self.ie.load_network(ie_network, - self.device, - config=config, - num_requests=1 if self.api_type == 'sync' else self.nireq or 0) - # Number of requests - self.nireq = len(exe_network.requests) - - return exe_network - - def load_network_from_file(self, path_to_model: str, config = {}): - exe_network = self.ie.load_network(path_to_model, - self.device, - config=config, - num_requests=1 if self.api_type == 'sync' else self.nireq or 0) - # Number of requests - self.nireq = len(exe_network.requests) - - return exe_network - - def import_network(self, path_to_file : str, config = {}): - exe_network = self.ie.import_network(model_file=path_to_file, - device_name=self.device, - config=config, - num_requests=1 if self.api_type == 'sync' else self.nireq or 0) - # Number of requests - self.nireq = len(exe_network.requests) - return exe_network - - def first_infer(self, exe_network): - infer_request = exe_network.requests[0] - - # warming up - out of scope + return self.core.read_model(model_filename, weights_filename) + + def create_infer_requests(self, exe_network): if self.api_type == 'sync': - infer_request.infer() + requests = [exe_network.create_infer_request()] else: - infer_request.async_infer() - status = infer_request.wait() - if status != StatusCode.OK: - raise Exception(f"Wait for all requests is failed with status code {status}!") - return infer_request.latency + requests = AsyncInferQueue(exe_network, self.nireq) + self.nireq = len(requests) + return requests - def infer(self, exe_network, batch_size, latency_percentile, progress_bar=None): + def first_infer(self, requests): + if self.api_type == 'sync': + requests[0].infer() + return requests[0].latency + else: + id = requests.get_idle_request_id() + requests.start_async() + requests.wait_all() + return requests[id].latency + + def update_progress_bar(self, progress_bar, exec_time, progress_count): + if self.duration_seconds: + # calculate how many progress intervals are covered by current iteration. + # depends on the current iteration time and time of each progress interval. + # Previously covered progress intervals must be skipped. + progress_interval_time = self.duration_seconds / progress_bar.total_num + new_progress = int(exec_time / progress_interval_time - progress_count) + progress_bar.add_progress(new_progress) + progress_count += new_progress + elif self.niter: + progress_bar.add_progress(1) + return progress_count + + def sync_inference(self, request, data_queue, progress_bar): progress_count = 0 - infer_requests = exe_network.requests - + exec_time = 0 + iteration = 0 + times = [] start_time = datetime.utcnow() + while (self.niter and iteration < self.niter) or \ + (self.duration_seconds and exec_time < self.duration_seconds): + if self.inference_only == False: + request.set_input_tensors(data_queue.get_next_input()) + request.infer() + times.append(request.latency) + iteration += 1 + + exec_time = (datetime.utcnow() - start_time).total_seconds() + + if progress_bar: + progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) + + total_duration_sec = (datetime.utcnow() - start_time).total_seconds() + return sorted(times), total_duration_sec, iteration + + def async_inference_only(self, infer_queue, progress_bar): + progress_count = 0 exec_time = 0 iteration = 0 + times = [] + in_fly = set() + start_time = datetime.utcnow() + while (self.niter and iteration < self.niter) or \ + (self.duration_seconds and exec_time < self.duration_seconds) or \ + (iteration % self.nireq): + idle_id = infer_queue.get_idle_request_id() + if idle_id in in_fly: + times.append(infer_queue[idle_id].latency) + else: + in_fly.add(idle_id) + infer_queue.start_async() + iteration += 1 + + exec_time = (datetime.utcnow() - start_time).total_seconds() + + if progress_bar: + progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) + infer_queue.wait_all() + total_duration_sec = (datetime.utcnow() - start_time).total_seconds() + for infer_request_id in in_fly: + times.append(infer_queue[infer_request_id].latency) + return sorted(times), total_duration_sec, iteration + + def async_inference_full_mode(self, infer_queue, data_queue, progress_bar, pcseq): + progress_count = 0 + processed_frames = 0 + exec_time = 0 + iteration = 0 times = [] + num_groups = len(self.latency_groups) in_fly = set() - # Start inference & calculate performance - # to align number if iterations to guarantee that last infer requests are executed in the same conditions **/ + start_time = datetime.utcnow() while (self.niter and iteration < self.niter) or \ (self.duration_seconds and exec_time < self.duration_seconds) or \ - (self.api_type == 'async' and iteration % self.nireq): - if self.api_type == 'sync': - infer_requests[0].infer() - times.append(infer_requests[0].latency) + (iteration % num_groups): + processed_frames += data_queue.get_next_batch_size() + idle_id = infer_queue.get_idle_request_id() + if idle_id in in_fly: + times.append(infer_queue[idle_id].latency) + if pcseq: + self.latency_groups[infer_queue.userdata[idle_id]].times.append(infer_queue[idle_id].latency) else: - infer_request_id = exe_network.get_idle_request_id() - if infer_request_id < 0: - status = exe_network.wait(num_requests=1) - if status != StatusCode.OK: - raise Exception("Wait for idle request failed!") - infer_request_id = exe_network.get_idle_request_id() - if infer_request_id < 0: - raise Exception("Invalid request id!") - if infer_request_id in in_fly: - times.append(infer_requests[infer_request_id].latency) - else: - in_fly.add(infer_request_id) - infer_requests[infer_request_id].async_infer() + in_fly.add(idle_id) + group_id = data_queue.current_group_id + infer_queue[idle_id].set_input_tensors(data_queue.get_next_input()) + infer_queue.start_async(userdata=group_id) iteration += 1 exec_time = (datetime.utcnow() - start_time).total_seconds() if progress_bar: - if self.duration_seconds: - # calculate how many progress intervals are covered by current iteration. - # depends on the current iteration time and time of each progress interval. - # Previously covered progress intervals must be skipped. - progress_interval_time = self.duration_seconds / progress_bar.total_num - new_progress = int(exec_time / progress_interval_time - progress_count) - progress_bar.add_progress(new_progress) - progress_count += new_progress - elif self.niter: - progress_bar.add_progress(1) - - # wait the latest inference executions - status = exe_network.wait() - if status != StatusCode.OK: - raise Exception(f"Wait for all requests is failed with status code {status}!") + progress_count = self.update_progress_bar(progress_bar, exec_time, progress_count) + infer_queue.wait_all() total_duration_sec = (datetime.utcnow() - start_time).total_seconds() for infer_request_id in in_fly: - times.append(infer_requests[infer_request_id].latency) - times.sort() - latency_ms = percentile(times, latency_percentile) - fps = batch_size * 1000 / latency_ms if self.api_type == 'sync' else batch_size * iteration / total_duration_sec + times.append(infer_queue[infer_request_id].latency) + return sorted(times), total_duration_sec, processed_frames, iteration + + def main_loop(self, requests, data_queue, batch_size, latency_percentile, progress_bar, pcseq): + if self.api_type == 'sync': + times, total_duration_sec, iteration = self.sync_inference(requests[0], data_queue, progress_bar) + elif self.inference_only: + times, total_duration_sec, iteration = self.async_inference_only(requests, progress_bar) + fps = len(batch_size) * iteration / total_duration_sec + else: + times, total_duration_sec, processed_frames, iteration = self.async_inference_full_mode(requests, data_queue, progress_bar, pcseq) + fps = processed_frames / total_duration_sec + + median_latency_ms = percentile(times, latency_percentile) + avg_latency_ms = sum(times) / len(times) + min_latency_ms = times[0] + max_latency_ms = times[-1] + + if self.api_type == 'sync': + fps = len(batch_size) * 1000 / median_latency_ms + + if pcseq: + for group in self.latency_groups: + if group.times: + group.times.sort() + group.avg = sum(group.times) / len(group.times) + group.min = group.times[0] + group.max = group.times[-1] + if progress_bar: progress_bar.finish() - return fps, latency_ms, total_duration_sec, iteration + return fps, median_latency_ms, avg_latency_ms, min_latency_ms, max_latency_ms, total_duration_sec, iteration diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py index 422dc0d3991dc7..235676ad39ee1d 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/main.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py @@ -9,13 +9,14 @@ from openvino.tools.benchmark.parameters import parse_args from openvino.tools.benchmark.utils.constants import MULTI_DEVICE_NAME, HETERO_DEVICE_NAME, CPU_DEVICE_NAME, \ GPU_DEVICE_NAME, MYRIAD_DEVICE_NAME, GNA_DEVICE_NAME, BLOB_EXTENSION -from openvino.tools.benchmark.utils.inputs_filling import set_inputs +from openvino.tools.benchmark.utils.inputs_filling import get_input_data from openvino.tools.benchmark.utils.logging import logger from openvino.tools.benchmark.utils.progress_bar import ProgressBar -from openvino.tools.benchmark.utils.utils import next_step, get_number_iterations, process_precision, \ +from openvino.tools.benchmark.utils.utils import next_step, get_number_iterations, pre_post_processing, \ process_help_inference_string, print_perf_counters, dump_exec_graph, get_duration_in_milliseconds, \ get_command_line_arguments, parse_nstreams_value_per_device, parse_devices, get_inputs_info, \ - print_inputs_and_outputs_info, get_batch_size, load_config, dump_config + print_inputs_and_outputs_info, get_batch_size, load_config, dump_config, get_latency_groups, \ + check_for_static from openvino.tools.benchmark.utils.statistics_report import StatisticsReport, averageCntReport, detailedCntReport @@ -55,13 +56,13 @@ def is_flag_set_in_command_line(flag): if ext == BLOB_EXTENSION: is_network_compiled = True - print("Network is compiled") + print("Model is compiled") # ------------------------------ 2. Loading Inference Engine --------------------------------------------------- next_step(step_id=2) benchmark = Benchmark(args.target_device, args.number_infer_requests, - args.number_iterations, args.time, args.api_type) + args.number_iterations, args.time, args.api_type, args.inference_only) ## CPU (MKLDNN) extensions if CPU_DEVICE_NAME in device_name and args.path_to_extension: @@ -134,7 +135,7 @@ def set_throughput_streams(): key = get_device_type_from_name(device) + "_THROUGHPUT_STREAMS" if device in device_number_streams.keys(): ## set to user defined value - supported_config_keys = benchmark.ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS') + supported_config_keys = benchmark.core.get_metric(device, 'SUPPORTED_CONFIG_KEYS') if key not in supported_config_keys: raise Exception(f"Device {device} doesn't support config key '{key}'! " + "Please specify -nstreams for correct devices in format :,:") @@ -188,7 +189,7 @@ def set_throughput_streams(): if args.number_threads and is_flag_set_in_command_line("nthreads"): config[device]['GNA_LIB_N_THREADS'] = str(args.number_threads) else: - supported_config_keys = benchmark.ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS') + supported_config_keys = benchmark.core.get_metric(device, 'SUPPORTED_CONFIG_KEYS') if 'CPU_THREADS_NUM' in supported_config_keys and args.number_threads and is_flag_set_in_command_line("nthreads"): config[device]['CPU_THREADS_NUM'] = str(args.number_threads) if 'CPU_THROUGHPUT_STREAMS' in supported_config_keys and args.number_streams and is_flag_set_in_command_line("streams"): @@ -198,7 +199,6 @@ def set_throughput_streams(): perf_counts = perf_counts benchmark.set_config(config) - batch_size = args.batch_size if args.cache_dir: benchmark.set_cache_dir(args.cache_dir) @@ -206,36 +206,37 @@ def set_throughput_streams(): load_from_file_enabled = is_flag_set_in_command_line('load_from_file') or is_flag_set_in_command_line('lfile') if load_from_file_enabled and not is_network_compiled: next_step() - print("Skipping the step for loading network from file") + print("Skipping the step for loading model from file") next_step() - print("Skipping the step for loading network from file") + print("Skipping the step for loading model from file") next_step() - print("Skipping the step for loading network from file") + print("Skipping the step for loading model from file") # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() - exe_network = benchmark.load_network(args.path_to_model) + exe_network = benchmark.core.compile_model(args.path_to_model) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" - logger.info(f"Load network took {duration_ms} ms") + logger.info(f"Compile model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('load network time (ms)', duration_ms) ]) - app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info) - if batch_size == 0: - batch_size = 1 + app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters()) + batch_size = get_batch_size(app_inputs_info) + if batch_size.is_dynamic and benchmark.api_type == 'sync': + raise Exception("Dynamic batch size is supported only in async mode") elif not is_network_compiled: # --------------------- 4. Read the Intermediate Representation of the network ----------------------------- next_step() start_time = datetime.utcnow() - ie_network = benchmark.read_network(args.path_to_model) - topology_name = ie_network.name + function = benchmark.read_model(args.path_to_model) + topology_name = function.get_name() duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" - logger.info(f"Read network took {duration_ms} ms") + logger.info(f"Read model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ @@ -245,15 +246,15 @@ def set_throughput_streams(): # --------------------- 5. Resizing network to match image sizes and given batch --------------------------- next_step() - app_inputs_info, reshape = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, ie_network.input_info) + app_inputs_info, reshape = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, function.get_parameters()) if reshape: start_time = datetime.utcnow() - shapes = { k : v.shape for k,v in app_inputs_info.items() } + shapes = { info.name : info.partial_shape for info in app_inputs_info } logger.info( - 'Reshaping network: {}'.format(', '.join("'{}': {}".format(k, v) for k, v in shapes.items()))) - ie_network.reshape(shapes) + 'Reshaping model: {}'.format(', '.join("'{}': {}".format(k, str(v)) for k, v in shapes.items()))) + function.reshape(shapes) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" - logger.info(f"Reshape network took {duration_ms} ms") + logger.info(f"Reshape model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ @@ -261,23 +262,25 @@ def set_throughput_streams(): ]) # use batch size according to provided layout and shapes - batch_size = get_batch_size(app_inputs_info) if args.layout else ie_network.batch_size + batch_size = get_batch_size(app_inputs_info) + if batch_size.is_dynamic and benchmark.api_type == 'sync': + raise Exception("Dynamic batch size is supported only in async mode") logger.info(f'Network batch size: {batch_size}') # --------------------- 6. Configuring inputs and outputs of the model -------------------------------------------------- next_step() - process_precision(ie_network, app_inputs_info, args.input_precision, args.output_precision, args.input_output_precision) - print_inputs_and_outputs_info(ie_network) + pre_post_processing(function, app_inputs_info, args.input_precision, args.output_precision, args.input_output_precision) + print_inputs_and_outputs_info(function) # --------------------- 7. Loading the model to the device ------------------------------------------------- next_step() start_time = datetime.utcnow() - exe_network = benchmark.load_network(ie_network) + exe_network = benchmark.core.compile_model(function, benchmark.device) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" - logger.info(f"Load network took {duration_ms} ms") + logger.info(f"Compile model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ @@ -295,24 +298,26 @@ def set_throughput_streams(): next_step() start_time = datetime.utcnow() - exe_network = benchmark.import_network(args.path_to_model) + exe_network = benchmark.core.import_model(args.path_to_model) duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" - logger.info(f"Import network took {duration_ms} ms") + logger.info(f"Import model took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('import network time (ms)', duration_ms) ]) - app_inputs_info, _ = get_inputs_info(args.shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.input_info) - if batch_size == 0: - batch_size = 1 + app_inputs_info, _ = get_inputs_info(args.shape, args.data_shape, args.layout, args.batch_size, args.input_scale, args.input_mean, exe_network.get_runtime_function().get_parameters()) + batch_size = get_batch_size(app_inputs_info) + if batch_size.is_dynamic and benchmark.api_type == 'sync': + raise Exception("Dynamic batch size is supported only in async mode") + # --------------------- 8. Querying optimal runtime parameters -------------------------------------------------- next_step() if is_flag_set_in_command_line('hint'): ## actual device-deduced settings for the hint for device in devices: - keys = benchmark.ie.get_metric(device, 'SUPPORTED_CONFIG_KEYS') + keys = benchmark.core.get_metric(device, 'SUPPORTED_CONFIG_KEYS') logger.info(f'DEVICE: {device}') for k in keys: logger.info(f' {k} , {exe_network.get_config(k)}') @@ -320,17 +325,23 @@ def set_throughput_streams(): # Update number of streams for device in device_number_streams.keys(): key = get_device_type_from_name(device) + '_THROUGHPUT_STREAMS' - device_number_streams[device] = benchmark.ie.get_config(device, key) - - # Number of requests - infer_requests = exe_network.requests - - # Iteration limit - benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, args.api_type) + device_number_streams[device] = benchmark.core.get_config(device, key) - # ------------------------------------ 9. Creating infer requests and filling input blobs ---------------------- + # ------------------------------------ 9. Creating infer requests and preparing input data ---------------------- next_step() + # Create infer requests + start_time = datetime.utcnow() + requests = benchmark.create_infer_requests(exe_network) + duration_ms = f"{(datetime.utcnow() - start_time).total_seconds() * 1000:.2f}" + logger.info(f"Create {benchmark.nireq} infer requests took {duration_ms} ms") + if statistics: + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + ('create infer requests time (ms)', duration_ms) + ]) + + # Prepare input data paths_to_input = list() if args.paths_to_input: for path in args.paths_to_input: @@ -338,7 +349,39 @@ def set_throughput_streams(): paths_to_input.extend(path) else: paths_to_input.append(os.path.abspath(*path)) - set_inputs(paths_to_input, batch_size, app_inputs_info, infer_requests) + + data_queue = get_input_data(paths_to_input, app_inputs_info) + + static_mode = check_for_static(app_inputs_info) + if not static_mode and benchmark.api_type == 'sync': + raise Exception("Benchmarking of the model with dynamic shapes is available for async API only." + "Please use -api async -nstreams 1 -nireq 1 to emulate sync behavior.") + + if benchmark.inference_only == None: + if static_mode: + benchmark.inference_only = True + else: + benchmark.inference_only = False + elif benchmark.inference_only and not static_mode: + raise Exception("Benchmarking dynamic model available with input filling in measurement loop only!") + + benchmark.latency_groups = get_latency_groups(app_inputs_info) + + if len(benchmark.latency_groups) > 1: + logger.info(f"Defined {len(benchmark.latency_groups)} tensor groups:") + for group in benchmark.latency_groups: + print(f"\t{str(group)}") + + # Iteration limit + benchmark.niter = get_number_iterations(benchmark.niter, benchmark.nireq, max(len(info.shapes) for info in app_inputs_info), benchmark.api_type) + + # Set input tensors before first inference + for request in requests: + data_tensors = data_queue.get_next_input() + for port, data_tensor in data_tensors.items(): + input_tensor = request.get_input_tensor(port) + input_tensor.shape = data_tensor.shape + input_tensor.data[:] = data_tensor.data if statistics: statistics.add_parameters(StatisticsReport.Category.RUNTIME_CONFIG, @@ -346,9 +389,10 @@ def set_throughput_streams(): ('topology', topology_name), ('target device', device_name), ('API', args.api_type), + ('inference_only', benchmark.inference_only), ('precision', "UNSPECIFIED"), ('batch size', str(batch_size)), - ('number of iterations', str(benchmark.niter) if benchmark.niter else "0"), + ('number of iterations', str(benchmark.niter)), ('number of parallel infer requests', str(benchmark.nireq)), ('duration (ms)', str(get_duration_in_milliseconds(benchmark.duration_seconds))), ]) @@ -370,14 +414,19 @@ def set_throughput_streams(): progress_bar = ProgressBar(progress_bar_total_count, args.stream_output, args.progress) if args.progress else None - duration_ms = f"{benchmark.first_infer(exe_network):.2f}" + duration_ms = f"{benchmark.first_infer(requests):.2f}" logger.info(f"First inference took {duration_ms} ms") if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('first inference time (ms)', duration_ms) ]) - fps, latency_ms, total_duration_sec, iteration = benchmark.infer(exe_network, batch_size, args.latency_percentile, progress_bar) + + pcseq = args.pcseq + if static_mode or len(benchmark.latency_groups) == 1: + pcseq = False + + fps, median_latency_ms, avg_latency_ms, min_latency_ms, max_latency_ms, total_duration_sec, iteration = benchmark.main_loop(requests, data_queue, batch_size, args.latency_percentile, progress_bar, pcseq) # ------------------------------------ 11. Dumping statistics report ------------------------------------------- next_step() @@ -391,12 +440,12 @@ def set_throughput_streams(): if perf_counts: perfs_count_list = [] - for ni in range(int(benchmark.nireq)): - perfs_count_list.append(exe_network.requests[ni].get_perf_counts()) + for request in requests: + perfs_count_list.append(request.profiling_info) if args.perf_counts: print_perf_counters(perfs_count_list) if statistics: - statistics.dump_performance_counters(perfs_count_list) + statistics.dump_performance_counters(perfs_count_list) if statistics: statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, @@ -405,30 +454,74 @@ def set_throughput_streams(): ('total number of iterations', str(iteration)), ]) if MULTI_DEVICE_NAME not in device_name: - if args.latency_percentile == 50: + latency_prefix = None + if args.latency_percentile == 50 and static_mode: + #latency_prefix = 'median latency (ms)' latency_prefix = 'latency (ms)' - else: - latency_prefix = 'latency (' + args.latency_percentile + ' percentile) (ms)' + elif args.latency_percentile != 50: + latency_prefix = 'latency (' + str(args.latency_percentile) + ' percentile) (ms)' + if latency_prefix: + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + (latency_prefix, f'{median_latency_ms:.2f}'), + ]) statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ - (latency_prefix, f'{latency_ms:.2f}'), + ("avg latency", f'{avg_latency_ms:.2f}'), + ]) + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + ("min latency", f'{min_latency_ms:.2f}'), + ]) + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + ("max latency", f'{max_latency_ms:.2f}'), + ]) + if pcseq: + for group in benchmark.latency_groups: + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + ("group", str(group)), + ]) + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + ("avg latency", f'{group.avg:.2f}'), + ]) + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + ("min latency", f'{group.min:.2f}'), + ]) + statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, + [ + ("max latency", f'{group.max:.2f}'), ]) - statistics.add_parameters(StatisticsReport.Category.EXECUTION_RESULTS, [ ('throughput', f'{fps:.2f}'), ]) + statistics.dump() - if statistics: - statistics.dump() print(f'Count: {iteration} iterations') print(f'Duration: {get_duration_in_milliseconds(total_duration_sec):.2f} ms') if MULTI_DEVICE_NAME not in device_name: - if args.latency_percentile == 50: - print(f'Latency: {latency_ms:.2f} ms') - else: - print(f'Latency ({args.latency_percentile} percentile): {latency_ms:.2f} ms') + print('Latency:') + if args.latency_percentile == 50 and static_mode: + print(f'Median: {median_latency_ms:.2f} ms') + elif args.latency_percentile != 50: + print(f'({args.latency_percentile} percentile): {median_latency_ms:.2f} ms') + print(f'AVG: {avg_latency_ms:.2f} ms') + print(f'MIN: {min_latency_ms:.2f} ms') + print(f'MAX: {max_latency_ms:.2f} ms') + + if pcseq: + print("Latency for each data shape group: ") + for group in benchmark.latency_groups: + print(f"{str(group)}") + print(f'AVG: {group.avg:.2f} ms') + print(f'MIN: {group.min:.2f} ms') + print(f'MAX: {group.max:.2f} ms') + print(f'Throughput: {fps:.2f} FPS') del exe_network diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py index f7f42b1c16c4d3..f5de067e33f030 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py @@ -35,7 +35,7 @@ def parse_args(): help='Optional. ' 'Path to a folder with images and/or binaries or to specific image or binary file.' 'It is also allowed to map files to network inputs: ' - 'input_1:file_1,file_2,input_4:file_4 input_2:file_3') + 'input_1:file_1/dir1,file_2/dir2,input_4:file_4/dir4 input_2:file_3/dir3') args.add_argument('-m', '--path_to_model', type=str, required=True, help='Required. Path to an .xml/.onnx file with a trained model or ' 'to a .blob file with a trained compiled model.') @@ -60,9 +60,9 @@ def parse_args(): args.add_argument('-niter', '--number_iterations', type=check_positive, required=False, default=None, help='Optional. Number of iterations. ' 'If not specified, the number of iterations is calculated depending on a device.') - args.add_argument('-nireq', '--number_infer_requests', type=check_positive, required=False, default=None, + args.add_argument('-nireq', '--number_infer_requests', type=check_positive, required=False, default=0, help='Optional. Number of infer requests. Default value is determined automatically for device.') - args.add_argument('-b', '--batch_size', type=int, required=False, default=0, + args.add_argument('-b', '--batch_size', type=str, required=False, default='', help='Optional. ' + 'Batch size value. ' + 'If not specified, the batch size value is determined from Intermediate Representation') @@ -77,7 +77,13 @@ def parse_args(): 'Show progress bar (can affect performance measurement). Default values is \'False\'.') args.add_argument('-shape', type=str, required=False, default='', help='Optional. ' - 'Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.') + 'Set shape for input. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224]" in case of one input size.' + 'This parameter affect model Parameter shape, can be dynamic. For dynamic dimesions use symbol `?`, `-1` or range `low.. up`.') + args.add_argument('-data_shape', type=str, required=False, default='', + help='Optional. ' + 'Optional if network shapes are all static (original ones or set by -shape).' + 'Required if at least one input shape is dynamic and input images are not provided.' + 'Set shape for input tensors. For example, "input1[1,3,224,224],input2[1,4]" or "[1,3,224,224] in case of one input size.') args.add_argument('-layout', type=str, required=False, default='', help='Optional. ' 'Prompts how network layouts should be treated by application. ' @@ -110,6 +116,11 @@ def parse_args(): help='Optional. Path to a file where to store executable graph information serialized.') args.add_argument('-pc', '--perf_counts', type=str2bool, required=False, default=False, nargs='?', const=True, help='Optional. Report performance counters.', ) + args.add_argument('-pcseq', '--pcseq', type=str2bool, required=False, default=False, nargs='?', const=True, + help='Optional. Report latencies for each shape in -data_shape sequence.', ) + args.add_argument('-inference_only', '--inference_only', type=str2bool, required=False, default=None, nargs='?', const=True, + help='Optional. If true inputs filling only once before measurements (default for static models), ' + 'else inputs filling is included into loop measurement (default for dynamic models)', ) args.add_argument('-report_type', '--report_type', type=str, required=False, choices=['no_counters', 'average_counters', 'detailed_counters'], help="Optional. Enable collecting statistics report. \"no_counters\" report contains " diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py index 762502f2c96f3d..545db17b459fed 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/constants.py @@ -15,8 +15,8 @@ BIN_EXTENSION = '.bin' BLOB_EXTENSION = '.blob' -IMAGE_EXTENSIONS = ['JPEG', 'JPG', 'PNG', 'BMP'] -BINARY_EXTENSIONS = ['BIN'] +IMAGE_EXTENSIONS = ['.jpeg', '.jpg', '.png', '.bmp'] +BINARY_EXTENSIONS = ['.bin'] DEVICE_DURATION_IN_SECS = { CPU_DEVICE_NAME: 60, diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py index a5377ba6b985c2..6838f583af2ac9 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/inputs_filling.py @@ -5,333 +5,374 @@ import cv2 import re import numpy as np -from glob import glob from collections import defaultdict from pathlib import Path -from itertools import chain + +from openvino.runtime import Tensor, PartialShape from .constants import IMAGE_EXTENSIONS, BINARY_EXTENSIONS from .logging import logger -def set_inputs(paths_to_input, batch_size, app_input_info, requests): - requests_input_data = get_inputs(paths_to_input, batch_size, app_input_info, requests) - for i in range(len(requests)): - inputs = requests[i].input_blobs - for k, v in requests_input_data[i].items(): - if k not in inputs.keys(): - raise Exception(f"No input with name {k} found!") - inputs[k].buffer[:] = v - - -def get_inputs(paths_to_input, batch_size, app_input_info, requests): - input_file_mapping = parse_paths_to_input(paths_to_input) - check_input_file_mapping(input_file_mapping, app_input_info) - - input_image_sizes = {} - for key in sorted(app_input_info.keys()): - info = app_input_info[key] - if info.is_image: - input_image_sizes[key] = (info.width, info.height) +class DataQueue: + def __init__(self, input_data: dict, batch_sizes: list): + self.input_data = input_data + self.sizes_map = {} + for name, tensors in input_data.items(): + self.sizes_map[name] = len(tensors) + self.index_map = defaultdict.fromkeys(input_data.keys(), 0) + self.batch_sizes = batch_sizes + self.size = len(batch_sizes) + self.current_group_id = 0 + + def get_next_input(self): + data = {} + for input_name, input_tensors in self.input_data.items(): + data[input_name] = input_tensors[self.index_map[input_name]] + self.index_map[input_name] = (self.index_map[input_name] + 1) % self.sizes_map[input_name] + self.current_group_id = (self.current_group_id + 1) % self.size + return data + + def get_next_batch_size(self): + return self.batch_sizes[self.current_group_id] + + +def get_group_batch_sizes(app_input_info): + batch_sizes = [] + niter = max(len(info.shapes) for info in app_input_info) + for i in range(niter): + batch_size = 0 + for info in app_input_info: + batch_index = info.layout.get_index_by_name('N') if info.layout.has_name('N') else -1 + if batch_index != -1: + shape = info.shapes[i % len(info.shapes)] + if batch_size == 0: + batch_size = shape[batch_index] + elif batch_size != shape[batch_index]: + raise Exception("Can't deterimine batch size: batch is different for different inputs!") + if batch_size == 0: + batch_size = 1 + batch_sizes.append(batch_size) + return batch_sizes + + +def get_batch_sizes_per_input_map(app_input_info): + batch_sizes_map = {} + for info in app_input_info: + if info.layout.has_name('N'): + if info.is_dynamic: + batch_sizes_map[info.name] = info.getDimentionsByLayout('N') + else: + batch_sizes_map[info.name] = [len(info.getDimentionByLayout('N'))] + else: + batch_sizes_map[info.name] = [1] * len(info.shapes) + return batch_sizes_map + + +def get_input_data(paths_to_input, app_input_info): + image_mapping, binary_mapping = get_input_file_mappings(paths_to_input, app_input_info) + + image_sizes = get_image_sizes(app_input_info) + batch_sizes_map = get_batch_sizes_per_input_map(app_input_info) + + images_to_be_used_map = {input_name: len(images) for input_name, images in image_mapping.items()} + binaries_to_be_used_map = {input_name: len(binaries) for input_name, binaries in binary_mapping.items()} + + for info in app_input_info: + if info.shapes: + total_frames = np.sum(batch_sizes_map[info.name]) + if info.name in image_mapping: + if images_to_be_used_map[info.name] > total_frames and images_to_be_used_map[info.name] % total_frames != 0: + images_to_be_used_map[info.name] = images_to_be_used_map[info.name] - images_to_be_used_map[info.name] % total_frames + logger.warning(f"Number of provided images for input '{info.name}' is not a multiple of the number of " + f"provided data shapes. Only {images_to_be_used_map[info.name]} images will be processed for this input.") + elif images_to_be_used_map[info.name] < total_frames: + logger.warning(f"Some images will be dublicated: {total_frames} is required, " + f"but only {images_to_be_used_map[info.name]} were provided.") + elif info.name in binary_mapping: + if binaries_to_be_used_map[info.name] > total_frames and binaries_to_be_used_map[info.name] % total_frames != 0: + binaries_to_be_used_map[info.name] = binaries_to_be_used_map - binaries_to_be_used_map % total_frames + logger.warning(f"Number of provided binaries for input '{info.name}' is not a multiple of the number of " + f"provided data shapes. Only {binaries_to_be_used_map[info.name]} binaries will be processed for this input.") + elif binaries_to_be_used_map[info.name] < total_frames: + logger.warning(f"Some binaries will be dublicated: {total_frames} is required, " + f"but only {images_to_be_used_map[info.name]} were provided.") + else: + logger.warning(f"No input files were given for input '{info.name}'!. This input will be filled with random values!") + else: + if info.name in image_mapping: + logger.info(f"Images given for input '{info.name}' will be processed with original shapes.") + else: + raise Exception(f"Input {info.name} is dynamic. Provide data shapes!") - images_count = len(input_image_sizes.keys()) - binaries_count = len(app_input_info) - images_count + data = {} + for port, info in enumerate(app_input_info): + if info.name in image_mapping: + data[port] = get_image_tensors(image_mapping[info.name][:images_to_be_used_map[info.name]], info, batch_sizes_map[info.name]) - image_files = list() - binary_files = list() + elif info.name in binary_mapping: + data[port] = get_binary_tensors(binary_mapping[info.name][:binaries_to_be_used_map[info.name]], info, batch_sizes_map[info.name]) - if paths_to_input and not input_file_mapping: - image_files = get_files_by_extensions(paths_to_input, IMAGE_EXTENSIONS) - binary_files = get_files_by_extensions(paths_to_input, BINARY_EXTENSIONS) + elif info.is_image_info and len(image_sizes) == 1: + image_size = image_sizes[0] + logger.info(f"Create input tensors for input '{info.name}' with image sizes: {image_size}") + data[port] = get_image_info_tensors(image_size, info) - if input_file_mapping and len(input_file_mapping) < len(app_input_info): - not_provided_inputs = set(app_input_info) - set(input_file_mapping) - logger.warning("No input files were given for the inputs: " - f"{', '.join(not_provided_inputs)}. This inputs will be filled with random values!") - elif (len(image_files) == 0) and (len(binary_files) == 0): - logger.warning("No input files were given: all inputs will be filled with random values!") - else: - binary_to_be_used = binaries_count * batch_size * len(requests) - if binary_to_be_used > 0 and len(binary_files) == 0: - logger.warning(f"No supported binary inputs found! " - f"Please check your file extensions: {','.join(BINARY_EXTENSIONS)}") - elif binary_to_be_used > len(binary_files): - logger.warning( - f"Some binary input files will be duplicated: " - f"{binary_to_be_used} files are required, " - f"but only {len(binary_files)} were provided") - elif binary_to_be_used < len(binary_files): - logger.warning( - f"Some binary input files will be ignored: only {binary_to_be_used} " - f"files are required from {len(binary_files)}") - - images_to_be_used = images_count * batch_size * len(requests) - if images_to_be_used > 0 and len(image_files) == 0: - logger.warning(f"No supported image inputs found! Please check your " - f"file extensions: {','.join(IMAGE_EXTENSIONS)}") - elif images_to_be_used > len(image_files): - logger.warning( - f"Some image input files will be duplicated: {images_to_be_used} " - f"files are required, but only {len(image_files)} were provided") - elif images_to_be_used < len(image_files): - logger.warning( - f"Some image input files will be ignored: only {images_to_be_used} " - f"files are required from {len(image_files)}") - - requests_input_data = [] - for request_id in range(0, len(requests)): - logger.info(f"Infer Request {request_id} filling") - input_data = {} - keys = list(sorted(app_input_info.keys())) - for key in keys: - info = app_input_info[key] - if info.is_image: - # input is image - if key in input_file_mapping: - input_data[key] = fill_blob_with_image(input_file_mapping[key], request_id, batch_size, - keys.index(key), len(keys), info, from_map=True) - continue - - if len(image_files) > 0: - input_data[key] = fill_blob_with_image(image_files, request_id, batch_size, keys.index(key), - len(keys), info) - continue - - # input is binary - if len(binary_files) or key in input_file_mapping: - if key in input_file_mapping: - input_data[key] = fill_blob_with_binary(input_file_mapping[key], request_id, batch_size, - keys.index(key), len(keys), info, from_map=True) - continue - - input_data[key] = fill_blob_with_binary(binary_files, request_id, batch_size, keys.index(key), - len(keys), info) - continue - - # most likely input is image info - if info.is_image_info and len(input_image_sizes) == 1: - image_size = input_image_sizes[list(input_image_sizes.keys()).pop()] - logger.info("Fill input '" + key + "' with image size " + str(image_size[0]) + "x" + - str(image_size[1])) - input_data[key] = fill_blob_with_image_info(image_size, info) - continue - - # fill with random data - logger.info(f"Fill input '{key}' with random values " - f"({'image' if info.is_image else 'some binary data'} is expected)") - input_data[key] = fill_blob_with_random(info) - - requests_input_data.append(input_data) - - return requests_input_data - - -def get_files_by_extensions(paths_to_input, extensions): - if len(paths_to_input) == 1: - files = [file for file in paths_to_input[0].split(",") if file] - - if all(get_extension(file) in extensions for file in files): - check_files_exist(files) - return files - - return get_files_by_extensions_for_directory_or_list_of_files(paths_to_input, extensions) - - -def get_files_by_extensions_for_directory_or_list_of_files(paths_to_input, extensions): - input_files = list() - - for path_to_input in paths_to_input: - if os.path.isfile(path_to_input): - files = [os.path.normpath(path_to_input)] else: - path = os.path.join(path_to_input, '*') - files = glob(path, recursive=True) - for file in files: - file_extension = get_extension(file) - if file_extension in extensions: - input_files.append(file) - input_files.sort() - return input_files - - -def get_extension(file_path): - return file_path.split(".")[-1].upper() + logger.info(f"Fill input '{info.name}' with random values ") + data[port] = fill_tensors_with_random(info) + + return DataQueue(data, get_group_batch_sizes(app_input_info)) + + +def get_image_tensors(image_paths, info, batch_sizes): + processed_frames = 0 + widthes = info.widthes if info.is_dynamic else [info.width] + heights = info.heights if info.is_dynamic else [info.height] + tensors = [] + process_with_original_shapes = False + num_shapes = len(info.shapes) + if num_shapes == 0: + process_with_original_shapes = True + num_images = len(image_paths) + niter = max(num_shapes, num_images) + for i in range(niter): + shape = list(info.shapes[i % num_shapes]) if num_shapes else [] + dtype = get_dtype(info.element_type.get_type_name())[0] + images = np.ndarray(shape=shape, dtype=dtype) + image_index = processed_frames + current_batch_size = 1 if process_with_original_shapes else batch_sizes[i % num_shapes] + for b in range(current_batch_size): + image_index %= num_images + image_filename = image_paths[image_index] + logger.info(f'Prepare image {image_filename}') + image = cv2.imread(image_filename) + if process_with_original_shapes: + logger.info(f'Image will be processed with original shape - {image.shape[:-1]}') + elif info.layout.has_name('H') and info.layout.has_name('W'): + new_im_size = (widthes[i % num_shapes], heights[i % num_shapes]) + if image.shape[:-1] != new_im_size: + logger.warning(f"Image is resized from ({image.shape[:-1]}) to ({new_im_size})") + image = cv2.resize(image, new_im_size) + + if info.scale or info.mean: + blue, green, red = cv2.split(image) + if info.mean: + blue = np.subtract(blue, info.mean[0]) + green = np.subtract(green, info.mean[1]) + red = np.subtract(red, info.mean[2]) + if info.scale: + blue = np.divide(blue, info.scale[0]) + green = np.divide(green, info.scale[1]) + red = np.divide(red, info.scale[2]) + image = cv2.merge([blue, green, red]) + + if str(info.layout) in ['[N,C,H,W]', '[C,H,W]']: + image = image.transpose((2, 0, 1)) + + if process_with_original_shapes: + if len(info.partial_shape) == 4: + image = np.expand_dims(image, 0) + p_shape = PartialShape(image.shape) + if info.partial_shape.compatible(p_shape): + info.data_shapes.append(p_shape.to_shape()) + else: + raise Exception(f"Data shape '{str(p_shape)}' provided for input '{info.name}' " + f"is not compatible with partial shape '{str(info.partial_shape)}' for this input.") + tensors.append(Tensor(image.astype(dtype))) + else: + try: + images[b] = image + except ValueError: + #raise Exception(f"Image shape {image.shape} is not compatible with input shape {shape}. " + #f"Try to provide layout for input '{info.name}'.") + # backward compatibility + # will be removed + logger.warning(f"Image shape {image.shape} is not compatible with input shape {shape}. " + f"Input '{info.name}' will be filled with random values!") + return fill_tensors_with_random(info) - -def fill_blob_with_image(image_paths, request_id, batch_size, input_id, input_size, info, from_map=False): - shape = info.shape - images = np.ndarray(shape) - if from_map: - image_index = request_id * batch_size - else: - image_index = request_id * batch_size * input_size + input_id - - scale_mean = (not np.array_equal(info.scale, (1.0, 1.0, 1.0)) or not np.array_equal(info.mean, (0.0, 0.0, 0.0))) - - for b in range(batch_size): - image_index %= len(image_paths) - image_filename = image_paths[image_index] - logger.info(f'Prepare image {image_filename}') - image = cv2.imread(image_filename) - new_im_size = tuple((info.width, info.height)) - if image.shape[:-1] != new_im_size: - logger.warning(f"Image is resized from ({image.shape[:-1]}) to ({new_im_size})") - image = cv2.resize(image, new_im_size) - - if scale_mean: - blue, green, red = cv2.split(image) - blue = np.subtract(blue, info.mean[0]) - blue = np.divide(blue, info.scale[0]) - green = np.subtract(green, info.mean[1]) - green = np.divide(green, info.scale[1]) - red = np.subtract(red, info.mean[2]) - red = np.divide(red, info.scale[2]) - image = cv2.merge([blue, green, red]) - - if info.layout in ['NCHW', 'CHW']: - image = image.transpose((2, 0, 1)) - - images[b] = image - - if from_map: image_index += 1 - else: - image_index += input_size - return images + processed_frames += current_batch_size + if not process_with_original_shapes: + tensors.append(Tensor(images)) + return tensors + def get_dtype(precision): format_map = { - 'FP32' : (np.float32, np.finfo(np.float32).min, np.finfo(np.float32).max), - 'I32' : (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max), - 'I64' : (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max), - 'FP16' : (np.float16, np.finfo(np.float16).min, np.finfo(np.float16).max), - 'I16' : (np.int16, np.iinfo(np.int16).min, np.iinfo(np.int16).max), - 'U16' : (np.uint16, np.iinfo(np.uint16).min, np.iinfo(np.uint16).max), - 'I8' : (np.int8, np.iinfo(np.int8).min, np.iinfo(np.int8).max), - 'U8' : (np.uint8, np.iinfo(np.uint8).min, np.iinfo(np.uint8).max), - 'BOOL' : (np.uint8, 0, 1), + 'f32' : (np.float32, np.finfo(np.float32).min, np.finfo(np.float32).max), + 'i32' : (np.int32, np.iinfo(np.int32).min, np.iinfo(np.int32).max), + 'i64' : (np.int64, np.iinfo(np.int64).min, np.iinfo(np.int64).max), + 'fp16' : (np.float16, np.finfo(np.float16).min, np.finfo(np.float16).max), + 'i16' : (np.int16, np.iinfo(np.int16).min, np.iinfo(np.int16).max), + 'u16' : (np.uint16, np.iinfo(np.uint16).min, np.iinfo(np.uint16).max), + 'i8' : (np.int8, np.iinfo(np.int8).min, np.iinfo(np.int8).max), + 'u8' : (np.uint8, np.iinfo(np.uint8).min, np.iinfo(np.uint8).max), + 'boolean' : (np.uint8, 0, 1), } if precision in format_map.keys(): return format_map[precision] raise Exception("Can't find data type for precision: " + precision) -def fill_blob_with_binary(binary_paths, request_id, batch_size, input_id, input_size, info, from_map=False): - binaries = np.ndarray(info.shape) - shape = info.shape.copy() - if 'N' in info.layout: - shape[info.layout.index('N')] = 1 - if from_map: - binary_index = request_id * batch_size - else: - binary_index = request_id * batch_size * input_size + input_id - dtype = get_dtype(info.precision)[0] - for b in range(batch_size): - binary_index %= len(binary_paths) - binary_filename = binary_paths[binary_index] - logger.info("Prepare binary file " + binary_filename) - - binary_file_size = os.path.getsize(binary_filename) - blob_size = dtype().nbytes * int(np.prod(shape)) - if blob_size != binary_file_size: - raise Exception( - f"File {binary_filename} contains {binary_file_size} bytes but network expects {blob_size}") - binaries[b] = np.reshape(np.fromfile(binary_filename, dtype), shape) - - if from_map: - binary_index += 1 - else: - binary_index += input_size - - return binaries +def get_binary_tensors(binary_paths, info, batch_sizes): + num_shapes = len(info.shapes) + num_binaries = len(binary_paths) + niter = max(num_shapes, num_binaries) + processed_frames = 0 + tensors = [] + for i in range(niter): + shape_id = i % num_shapes + dtype = get_dtype(info.element_type.get_type_name())[0] + shape = list(info.shapes[shape_id]) + binaries = np.ndarray(shape=shape, dtype=dtype) + if info.layout.has_name('N'): + shape[info.layout.get_index_by_name('N')] = 1 + binary_index = processed_frames + current_batch_size = batch_sizes[shape_id] + for b in range(current_batch_size): + binary_index %= num_binaries + binary_filename = binary_paths[binary_index] + logger.info("Prepare binary file " + binary_filename) + + binary_file_size = os.path.getsize(binary_filename) + blob_size = dtype().nbytes * int(np.prod(shape)) + if blob_size != binary_file_size: + raise Exception( + f"File {binary_filename} contains {binary_file_size} bytes but network expects {blob_size}") + binaries[b] = np.reshape(np.fromfile(binary_filename, dtype), shape) -def fill_blob_with_image_info(image_size, layer): - shape = layer.shape - im_info = np.ndarray(shape) - for b in range(shape[0]): - for i in range(shape[1]): - im_info[b][i] = image_size[i] if i in [0, 1] else 1 + binary_index += 1 + processed_frames += current_batch_size + tensors.append(Tensor(binaries)) + return tensors - return im_info -def fill_blob_with_random(layer): - dtype, rand_min, rand_max = get_dtype(layer.precision) +def get_image_sizes(app_input_info): + image_sizes = [] + for info in app_input_info: + if info.is_image: + if info.is_static: + image_sizes.append((info.width, info.height)) + else: + info_image_sizes = [] + for w, h in zip(info.widthes, info.heights): + info_image_sizes.append((w, h)) + image_sizes.append(info_image_sizes) + return image_sizes + + +def get_image_info_tensors(image_sizes, layer): + im_infos = [] + for shape, image_size in zip(layer.shapes, image_sizes): + im_info = np.ndarray(shape, dtype=get_dtype(layer.element_type.get_type_name())[0]) + for b in range(shape[0]): + for i in range(shape[1]): + im_info[b][i] = image_size if i in [0, 1] else 1 + im_infos.append(Tensor(im_info)) + return im_infos + + +def fill_tensors_with_random(layer): + dtype, rand_min, rand_max = get_dtype(layer.element_type.get_type_name()) # np.random.uniform excludes high: add 1 to have it generated if np.dtype(dtype).kind in ['i', 'u', 'b']: rand_max += 1 rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(0))) - if layer.shape: - return rs.uniform(rand_min, rand_max, layer.shape).astype(dtype) - return (dtype)(rs.uniform(rand_min, rand_max)) - + input_tensors = [] + for shape in layer.shapes: + if shape: + input_tensors.append(Tensor(rs.uniform(rand_min, rand_max, list(shape)).astype(dtype))) + else: + input_tensors.append(Tensor(rs.uniform(rand_min, rand_max))) + return input_tensors -def parse_paths_to_input(paths_to_inputs): - input_dicts_list = [parse_path(path) for path in paths_to_inputs] - inputs = defaultdict(list) - for input_dict in input_dicts_list: - for input_name, input_files in input_dict.items(): - inputs[input_name] += input_files - return {input_: files for input_, files in inputs.items() if files} +def get_input_file_mappings(paths_to_inputs, app_input_info): + image_dicts_list = [] + binary_dicts_list = [] + for path in paths_to_inputs: + image_dict, binary_dict = parse_path(path, app_input_info) + image_dicts_list.append(image_dict) + binary_dicts_list.append(binary_dict) -def parse_path(path): - """ - Parse "input_1:file1,file2,input_2:file3" into a dict - """ - inputs = re.findall(r"([^,]\w+):", path) - input_files = [file for file in re.split(r"[^,]\w+:", path) if file] - return { - input_: files.strip(",").split(",") for input_, files in zip(inputs, input_files) - } + def merge_dicts(dicts_list): + merged = defaultdict(list) + for dict in dicts_list: + for k,v in dict.items(): + merged[k] += v + return merged + def remove_empty_items(dict): + return {k: sorted(v) for k,v in dict.items() if v} -def check_input_file_mapping(input_file_mapping, app_input_info): - check_inputs(app_input_info, input_file_mapping) - check_input_file_mapping_files_exists(input_file_mapping) - check_files_extensions(app_input_info, input_file_mapping) + return remove_empty_items(merge_dicts(image_dicts_list)), remove_empty_items(merge_dicts(binary_dicts_list)) -def check_inputs(app_input_info, input_file_mapping): - wrong_inputs = [ - input_ for input_ in input_file_mapping if input_ not in app_input_info - ] - if wrong_inputs: +def parse_path(path, app_input_info): + """ + Parse "input_1:file1/dir1,file2/dir2,input_2:file3/dir3 or file1/dir1,file2/dir2" into two dicts - with binary files and with images + """ + input_names = sorted(list(info.name for info in app_input_info)) + parsed_names = re.findall(r"([^,]\w+):", path) + wrong_names = list(name for name in parsed_names if name not in input_names) + if wrong_names: raise Exception( - f"Wrong input mapping! Cannot find inputs: {wrong_inputs}. " - f"Available inputs: {list(app_input_info)}. " + f"Wrong input mapping! Cannot find inputs: {wrong_names}. " + f"Available inputs: {input_names}. " "Please check `-i` input data" ) - - -def check_input_file_mapping_files_exists(input_file_mapping): - check_files_exist(chain.from_iterable(input_file_mapping.values())) - - -def check_files_exist(input_files_list): - not_files = [ - file for file in input_files_list if not Path(file).is_file() - ] - if not_files: - not_files = ",\n".join(not_files) - raise Exception( - f"Inputs are not files or does not exist!\n {not_files}" - ) - - -def check_files_extensions(app_input_info, input_file_mapping): - unsupported_files = [] - for input_, files in input_file_mapping.items(): - info = app_input_info[input_] - - proper_extentions = IMAGE_EXTENSIONS if info.is_image else BINARY_EXTENSIONS - unsupported = "\n".join( - [file for file in files if Path(file).suffix.upper().strip(".") not in proper_extentions] - ) - if unsupported: - unsupported_files.append(unsupported) + input_pathes = [path for path in re.split(r"[^,]\w+:", path) if path] + input_path_mapping = defaultdict(list) + # input mapping is used + if parsed_names: + input_path_mapping = {input_: files.strip(",").split(",") for input_, files in zip(parsed_names, input_pathes)} + else: + input_files = list() + _input_pathes = input_pathes[0].strip(",").split(",") + for _input_path in _input_pathes: + input_path = Path(_input_path) + if input_path.exists(): + if input_path.is_dir(): + input_files += list(str(file_path) for file_path in input_path.iterdir()) + elif input_path.is_file: + input_files.append(str(input_path)) + else: + raise Exception(f"Path '{str(input_path)}' doesn't exist \n {str(input_path)}") + num_files, num_inputs = len(input_files), len(app_input_info) + if num_inputs > 1: + logger.warning(f"Model has {num_inputs} inputs. It's recommended to use name mapping to specify parameters for each input.") + if num_files > num_inputs and num_files % num_inputs != 0: + input_files = input_files[:num_files - num_files % num_inputs] + logger.warning(f"Number of provided input files '{num_files}' is not a multiple of the number of " + f"model inputs. Only {len(input_files)} files fill be used.") + num_files = len(input_files) + for i in range(num_files): + input_path_mapping[input_names[i % num_inputs]].append(input_files[i]) + + images_mapping = defaultdict(list) + binary_mapping = defaultdict(list) + unsupported_files = list() + for input_name, _input_pathes in input_path_mapping.items(): + for _input_path in _input_pathes: + input_path = Path(_input_path) + if input_path.exists(): + files = list() + if input_path.is_dir(): + files = input_path.iterdir() + elif input_path.is_file: + files = [input_path] + for file in files: + if file.suffix.lower() in IMAGE_EXTENSIONS: + images_mapping[input_name].append(str(file)) + elif file.suffix.lower() in BINARY_EXTENSIONS: + binary_mapping[input_name].append(str(file)) + else: + unsupported_files.append(str(file)) + else: + raise Exception(f"Path for input '{input_name}' doesn't exist \n {str(input_path)}") if unsupported_files: - unsupported_files = "\n".join(unsupported_files) - raise Exception( - f"This files has unsupported extensions: {unsupported_files}.\n" - f"Supported extentions:\nImages: {IMAGE_EXTENSIONS}\nBinary: {BINARY_EXTENSIONS}" - ) + logger.warning(f"This files has unsupported extensions and will be ignored: {unsupported_files}.\n" + f"Supported extentions:\nImages: {IMAGE_EXTENSIONS}\nBinary: {BINARY_EXTENSIONS}") + return images_mapping, binary_mapping diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py index 553c1fe1f616a1..71454a44a1ca4f 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/statistics_report.py @@ -1,6 +1,7 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +from datetime import timedelta import os import sys from enum import Enum @@ -58,49 +59,50 @@ def dump_parameters(f, parameters): logger.info(f"Statistics report is stored to {f.name}") - def dump_performance_counters_request(self, f, perf_counts): - total = 0 - total_cpu = 0 + def dump_performance_counters_request(self, f, prof_info): + total = timedelta() + total_cpu = timedelta() f.write(self.csv_separator.join(['layerName', 'execStatus', 'layerType', 'execType', 'realTime (ms)', 'cpuTime (ms)\n'])) - for k, v in sorted(perf_counts.items(), key=lambda x: x[1]['execution_index']): - f.write(self.csv_separator.join([k, v['status'], v['layer_type'], v['exec_type'], str(v['real_time']/1000.0), str(v['cpu_time']/1000.0)])) + for pi in prof_info: + f.write(self.csv_separator.join([pi.node_name, str(pi.status), pi.node_type, pi.exec_type, str(pi.real_time/1000.0), str(pi.cpu_time/1000.0)])) f.write('\n') - total += v['real_time'] - total_cpu += v['cpu_time'] + total += pi.real_time + total_cpu += pi.cpu_time f.write(self.csv_separator.join(['Total','','','',str(total/1000.0),str(total_cpu/1000.0)])) f.write('\n\n') - def dump_performance_counters(self, perf_counts): + def dump_performance_counters(self, prof_info_list): if self.config.report_type == '' or self.config.report_type == noCntReport: logger.info("Statistics collecting for performance counters was not requested. No reports are dumped.") return - if not perf_counts: + if not prof_info_list: logger.info('Performance counters are empty. No reports are dumped.') return filename = os.path.join(self.config.report_folder, f'benchmark_{self.config.report_type}_report.csv') with open(filename, 'w') as f: if self.config.report_type == detailedCntReport: - for pc in perf_counts: - self.dump_performance_counters_request(f, pc) + for prof_info in prof_info_list: + self.dump_performance_counters_request(f, prof_info) elif self.config.report_type == averageCntReport: - def get_average_performance_counters(perf_counts): - performance_counters_avg = {} + def get_average_performance_counters(prof_info_list): + performance_counters_avg = [] ## iterate over each processed infer request and handle its PM data - for i in range(0, len(perf_counts)): - ## iterate over each layer from sorted vector and add required PM data to the per-layer maps - for k in perf_counts[0].keys(): - if k not in performance_counters_avg.keys(): - performance_counters_avg[k] = perf_counts[i][k] + for prof_info in prof_info_list: + for pi in prof_info: + item = next((x for x in performance_counters_avg if x.node_name == pi.node_name), None) + if item: + item.real_time += pi.real_time + item.cpu_time += pi.cpu_time else: - performance_counters_avg[k]['real_time'] += perf_counts[i][k]['real_time'] - performance_counters_avg[k]['cpu_time'] += perf_counts[i][k]['cpu_time'] - for _, v in performance_counters_avg.items(): - v['real_time'] /= len(perf_counts) - v['cpu_time'] /= len(perf_counts) + performance_counters_avg.append(pi) + + for pi in performance_counters_avg: + pi.real_time /= len(prof_info_list) + pi.cpu_time /= len(prof_info_list) return performance_counters_avg - self.dump_performance_counters_request(f, get_average_performance_counters(perf_counts)) + self.dump_performance_counters_request(f, get_average_performance_counters(prof_info_list)) else: raise Exception('PM data can only be collected for average or detailed report types') diff --git a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py index 849a76f1397029..f0d6876b968282 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/utils/utils.py @@ -1,7 +1,12 @@ # Copyright (C) 2018-2021 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from openvino.inference_engine import IENetwork,IECore +from collections import defaultdict +import datetime +from openvino.runtime import Core, Function, PartialShape, Dimension, Layout +from openvino.runtime.impl import Type +from openvino.preprocess import PrePostProcessor +from openvino.offline_transformations_pybind import serialize from .constants import DEVICE_DURATION_IN_SECS, UNKNOWN_DEVICE_TYPE, \ CPU_DEVICE_NAME, GPU_DEVICE_NAME @@ -31,7 +36,7 @@ def next_step(additional_info='', step_id=0): 6: "Configuring input of the model", 7: "Loading the model to the device", 8: "Querying optimal runtime parameters", - 9: "Creating infer requests and filling input blobs with images", + 9: "Creating infer requests and preparing input data", 10: "Measuring performance", 11: "Dumping statistics report", } @@ -48,52 +53,71 @@ def next_step(additional_info='', step_id=0): step_info_template = step_info_template.format(next_step.step_id, len(step_names), step_name) print(step_info_template) -def process_precision(ie_network: IENetwork, app_inputs_info, input_precision: str, output_precision: str, input_output_precision: str): + +def get_element_type(precision): + format_map = { + 'FP32' : Type.f32, + 'I32' : Type.i32, + 'I64' : Type.i64, + 'FP16' : Type.f16, + 'I16' : Type.i16, + 'U16' : Type.u16, + 'I8' : Type.i8, + 'U8' : Type.u8, + 'BOOL' : Type.boolean, + } + if precision in format_map.keys(): + return format_map[precision] + raise Exception("Can't find openvino element type for precision: " + precision) + + +def pre_post_processing(function: Function, app_inputs_info, input_precision: str, output_precision: str, input_output_precision: str): + pre_post_processor = PrePostProcessor(function) if input_precision: - _configure_network_inputs(ie_network, app_inputs_info, input_precision) + element_type = get_element_type(input_precision) + for i in range(len(function.inputs)): + pre_post_processor.input(i).tensor().set_element_type(element_type) + app_inputs_info[i].element_type = element_type if output_precision: - _configure_network_outputs(ie_network, output_precision) + element_type = get_element_type(output_precision) + for i in range(len(function.outputs)): + pre_post_processor.output(i).tensor().set_element_type(element_type) + user_precision_map = {} if input_output_precision: - _configure_network_inputs_and_outputs(ie_network, input_output_precision) - input_info = ie_network.input_info - for key in app_inputs_info.keys(): - ## if precision for input set by user, then set it to app_inputs - ## if it an image, set U8 - if input_precision or (input_output_precision and key in input_output_precision.keys()): - app_inputs_info[key].precision = input_info[key].precision - elif app_inputs_info[key].is_image: - app_inputs_info[key].precision = 'U8' - input_info[key].precision = 'U8' - -def _configure_network_inputs(ie_network: IENetwork, app_inputs_info, input_precision: str): - input_info = ie_network.input_info - - for key in input_info.keys(): - app_inputs_info[key].precision = input_precision - input_info[key].precision = input_precision - -def _configure_network_outputs(ie_network: IENetwork, output_precision: str): - output_info = ie_network.outputs - - for key in output_info.keys(): - output_info[key].precision = output_precision - -def _configure_network_inputs_and_outputs(ie_network: IENetwork, input_output_precision: str): - if not input_output_precision: - raise Exception("Input/output precision is empty") - - user_precision_map = _parse_arg_map(input_output_precision) - - input_info = ie_network.input_info - output_info = ie_network.outputs - - for key, value in user_precision_map.items(): - if key in input_info: - input_info[key].precision = value - elif key in output_info: - output_info[key].precision = value - else: - raise Exception(f"Element '{key}' does not exist in network") + user_precision_map = _parse_arg_map(input_output_precision) + input_names = get_input_output_names(function.get_parameters()) + output_names = get_input_output_names(function.get_results()) + for node_name, precision in user_precision_map.items(): + user_precision_map[node_name] = get_element_type(precision) + for name, element_type in user_precision_map.items(): + if name in input_names: + port = input_names.index(name) + app_inputs_info[port].element_type = element_type + pre_post_processor.input(port).tensor().set_element_type(element_type) + elif name in output_names: + port = output_names.index(name) + pre_post_processor.output(port).tensor().set_element_type(element_type) + else: + raise Exception(f"Node '{name}' does not exist in network") + + # update app_inputs_info + if not input_precision: + inputs = function.inputs + for i in range(len(inputs)): + if app_inputs_info[i].name in user_precision_map.keys(): + app_inputs_info[i].element_type = user_precision_map[app_inputs_info[i].name] + elif app_inputs_info[i].is_image: + app_inputs_info[i].element_type = Type.u8 + pre_post_processor.input(i).tensor().set_element_type(Type.u8) + else: + app_inputs_info[i].element_type = inputs[i].get_element_type() + + # set layout for model input + for port, info in enumerate(app_inputs_info): + pre_post_processor.input(port).network().set_layout(info.layout) + + function = pre_post_processor.build() + def _parse_arg_map(arg_map: str): arg_map = arg_map.replace(" ", "") @@ -106,28 +130,54 @@ def _parse_arg_map(arg_map: str): return parsed_map -def print_inputs_and_outputs_info(ie_network: IENetwork): - input_info = ie_network.input_info - for key in input_info.keys(): - tensor_desc = input_info[key].tensor_desc - logger.info(f"Network input '{key}' precision {tensor_desc.precision}, " - f"dimensions ({tensor_desc.layout}): " - f"{' '.join(str(x) for x in tensor_desc.dims)}") - output_info = ie_network.outputs - for key in output_info.keys(): - info = output_info[key] - logger.info(f"Network output '{key}' precision {info.precision}, " - f"dimensions ({info.layout}): " - f"{' '.join(str(x) for x in info.shape)}") - -def get_number_iterations(number_iterations: int, nireq: int, api_type: str): + +def get_precision(element_type: Type): + format_map = { + 'f32' : 'FP32', + 'i32' : 'I32', + 'i64' : 'I64', + 'f16' : 'FP16', + 'i16' : 'I16', + 'u16' : 'U16', + 'i8' : 'I8', + 'u8' : 'U8', + 'boolean' : 'BOOL', + } + if element_type.get_type_name() in format_map.keys(): + return format_map[element_type.get_type_name()] + raise Exception("Can't find precision for openvino element type: " + str(element_type)) + + +def print_inputs_and_outputs_info(function: Function): + parameters = function.get_parameters() + input_names = get_input_output_names(parameters) + for i in range(len(parameters)): + logger.info(f"Network input '{input_names[i]}' precision {get_precision(parameters[i].get_element_type())}, " + f"dimensions ({str(parameters[i].get_layout())}): " + f"{' '.join(str(x) for x in parameters[i].get_partial_shape())}") + results = function.get_results() + output_names = get_input_output_names(results) + results = function.get_results() + for i in range(len(results)): + logger.info(f"Network output '{output_names[i]}' precision {get_precision(results[i].get_element_type())}, " + f"dimensions ({str(results[i].get_layout())}): " + f"{' '.join(str(x) for x in results[i].get_output_partial_shape(0))}") + + +def get_number_iterations(number_iterations: int, nireq: int, num_shapes: int, api_type: str): niter = number_iterations if api_type == 'async' and niter: - niter = int((niter + nireq - 1) / nireq) * nireq - if number_iterations != niter: - logger.warning('Number of iterations was aligned by request number ' - f'from {number_iterations} to {niter} using number of requests {nireq}') + if num_shapes > nireq: + niter = int(((niter + num_shapes -1) / num_shapes) * num_shapes) + if number_iterations != niter: + logger.warning('Number of iterations was aligned by number of input shapes ' + f'from {number_iterations} to {niter} using number of possible input shapes {num_shapes}') + else: + niter = int((niter + nireq - 1) / nireq) * nireq + if number_iterations != niter: + logger.warning('Number of iterations was aligned by request number ' + f'from {number_iterations} to {niter} using number of requests {nireq}') return niter @@ -142,6 +192,32 @@ def get_duration_seconds(time, number_iterations, device): return 0 +class LatencyGroup: + def __init__(self, input_names, input_shapes): + self.input_names = input_names + self.input_shapes = input_shapes + self.times = list() + self.avg = 0. + self.min = 0. + self.max = 0. + + def __str__(self): + return str().join(f"{name}: {str(shape)} " for name, shape in zip(self.input_names, self.input_shapes)) + + +def get_latency_groups(app_input_info): + num_groups = max(len(info.shapes) for info in app_input_info) + latency_groups = [] + for i in range(num_groups): + names = list() + shapes = list() + for info in app_input_info: + names.append(info.name) + shapes.append(info.shapes[i % len(info.shapes)]) + latency_groups.append(LatencyGroup(names, shapes)) + return latency_groups + + def get_duration_in_milliseconds(duration): return duration * 1000 @@ -159,6 +235,14 @@ def get_duration_in_secs(target_device): return duration +def check_for_static(app_input_info): + is_static = True + for info in app_input_info: + if info.is_dynamic: + return False + return is_static + + def parse_devices(device_string): if device_string in ['MULTI', 'HETERO']: return list() @@ -206,6 +290,8 @@ def process_help_inference_string(benchmark_app, device_number_streams): if device_ss: output_string += ' using ' + device_ss + output_string += f', inference only: {benchmark_app.inference_only}' + limits = '' if benchmark_app.niter and not benchmark_app.duration_seconds: @@ -219,35 +305,33 @@ def process_help_inference_string(benchmark_app, device_number_streams): return output_string -def dump_exec_graph(exe_network, exec_graph_path): - try: - exec_graph_info = exe_network.get_exec_graph_info() - exec_graph_info.serialize(exec_graph_path) - logger.info(f'Executable graph is stored to {exec_graph_path}') - del exec_graph_info - except Exception as e: - logger.exception(e) +def dump_exec_graph(exe_network, model_path, weight_path = None): + if not weight_path: + weight_path = model_path[:model_path.find(".xml")] + ".bin" + serialize(exe_network.get_runtime_function(), model_path, weight_path) + def print_perf_counters(perf_counts_list): + max_layer_name = 30 for ni in range(len(perf_counts_list)): perf_counts = perf_counts_list[ni] - total_time = 0 - total_time_cpu = 0 + total_time = datetime.timedelta() + total_time_cpu = datetime.timedelta() logger.info(f"Performance counts for {ni}-th infer request") - for layer, stats in sorted(perf_counts.items(), key=lambda x: x[1]['execution_index']): - max_layer_name = 30 - print(f"{layer[:max_layer_name - 4] + '...' if (len(layer) >= max_layer_name) else layer:<30}" - f"{stats['status']:<15}" - f"{'layerType: ' + str(stats['layer_type']):<30}" - f"{'realTime: ' + str(stats['real_time']):<20}" - f"{'cpu: ' + str(stats['cpu_time']):<20}" - f"{'execType: ' + str(stats['exec_type']):<20}") - total_time += stats['real_time'] - total_time_cpu += stats['cpu_time'] + for pi in perf_counts: + print(f"{pi.node_name[:max_layer_name - 4] + '...' if (len(pi.node_name) >= max_layer_name) else pi.node_name:<30}" + f"{str(pi.status):<15}" + f"{'layerType: ' + pi.node_type:<30}" + f"{'realTime: ' + str(pi.real_time):<20}" + f"{'cpu: ' + str(pi.cpu_time):<20}" + f"{'execType: ' + pi.exec_type:<20}") + total_time += pi.real_time + total_time_cpu += pi.cpu_time print(f'Total time: {total_time} microseconds') print(f'Total CPU time: {total_time_cpu} microseconds\n') + def get_command_line_arguments(argv): parameters = [] arg_name = '' @@ -260,17 +344,49 @@ def get_command_line_arguments(argv): arg_value = '' else: if arg[0] == '-': - if arg_name is not '': + if arg_name != '': parameters.append((arg_name, arg_value)) arg_value = '' arg_name = arg else: arg_value = arg - if arg_name is not '': + if arg_name != '': parameters.append((arg_name, arg_value)) return parameters -def parse_input_parameters(parameter_string, input_info): + +def get_input_output_names(nodes): + return [node.friendly_name for node in nodes] + + +def get_data_shapes_map(data_shape_string, input_names): + # Parse parameter string like "input0[shape1][shape2],input1[shape1]" or "[shape1][shape2]" (applied to all inputs) + return_value = {} + if data_shape_string: + data_shape_string += ',' + matches = re.findall(r'(.*?\[.*?\]),', data_shape_string) + if matches: + for match in matches: + input_name = match[:match.find('[')] + shapes = re.findall(r'\[(.*?)\]', match[len(input_name):]) + if input_name: + return_value[input_name] = list(parse_partial_shape(shape_str) for shape_str in shapes) + else: + data_shapes = list(parse_partial_shape(shape_str) for shape_str in shapes) + num_inputs, num_shapes = len(input_names), len(data_shapes) + if num_shapes != 1 and num_shapes % num_inputs != 0: + raise Exception(f"Number of provided data_shapes is not a multiple of the number of model inputs!") + return_value = defaultdict(list) + for i in range(max(num_shapes, num_inputs)): + return_value[input_names[i % num_inputs]].append(data_shapes[i % num_shapes]) + return return_value + else: + raise Exception(f"Can't parse input parameter: {data_shape_string}") + return return_value + + + +def parse_input_parameters(parameter_string, input_names): # Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs) return_value = {} if parameter_string: @@ -281,12 +397,13 @@ def parse_input_parameters(parameter_string, input_info): if input_name != '': return_value[input_name] = value else: - return_value = { k:value for k in input_info.keys() } + return_value = { k:value for k in input_names } break else: raise Exception(f"Can't parse input parameter: {parameter_string}") return return_value + def parse_scale_or_mean(parameter_string, input_info): # Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all inputs) return_value = {} @@ -299,122 +416,239 @@ def parse_scale_or_mean(parameter_string, input_info): if input_name != '': return_value[input_name] = f_value else: - print("input_info: ", input_info) - for name, description in input_info.items(): - if description.is_image: - return_value[name] = f_value + for input in input_info: + if input.is_image: + return_value[input.name] = f_value else: raise Exception(f"Can't parse input parameter: {parameter_string}") return return_value -class InputInfo: + +class AppInputInfo: def __init__(self): - self.precision = None - self.layout = "" - self.shape = [] + self.element_type = None + self.layout = Layout() + self.partial_shape = None + self.data_shapes = [] self.scale = [] self.mean = [] + self.name = None @property def is_image(self): - if self.layout not in [ "NCHW", "NHWC", "CHW", "HWC" ]: + if str(self.layout) not in [ "[N,C,H,W]", "[N,H,W,C]", "[C,H,W]", "[H,W,C]" ]: return False return self.channels == 3 @property def is_image_info(self): - if self.layout != "NC": + if str(self.layout) != "[N,C]": return False - return self.channels >= 2 + return self.channels.relaxes(Dimension(2)) def getDimentionByLayout(self, character): - if character not in self.layout: - raise Exception(f"Error: Can't get {character} from layout {self.layout}") - return self.shape[self.layout.index(character)] + if self.layout.has_name(character): + return self.partial_shape[self.layout.get_index_by_name(character)] + else: + return Dimension(0) + + def getDimentionsByLayout(self, character): + if self.layout.has_name(character): + d_index = self.layout.get_index_by_name(character) + dims = [] + for shape in self.data_shapes: + dims.append(shape[d_index]) + return dims + else: + return [0] * len(self.data_shapes) + + @property + def shapes(self): + if self.is_static: + return [self.partial_shape.to_shape()] + else: + return self.data_shapes @property def width(self): - return self.getDimentionByLayout("W") + return len(self.getDimentionByLayout("W")) + + @property + def widthes(self): + return self.getDimentionsByLayout("W") @property def height(self): - return self.getDimentionByLayout("H") + return len(self.getDimentionByLayout("H")) + + @property + def heights(self): + return self.getDimentionsByLayout("H") @property def channels(self): return self.getDimentionByLayout("C") @property - def batch(self): - return self.getDimentionByLayout("N") + def is_static(self): + return self.partial_shape.is_static @property - def depth(self): - return self.getDimentionByLayout("D") + def is_dynamic(self): + return self.partial_shape.is_dynamic + + +def parse_partial_shape(shape_str): + dims = [] + for dim in shape_str.split(','): + if '.. ' in dim: + range = list(int(d) for d in dim.split('..')) + assert len(range) == 2 + dims.append(Dimension(range)) + elif dim == '?': + dims.append(Dimension()) + else: + dims.append(Dimension(int(dim))) + return PartialShape(dims) + + +def parse_batch_size(batch_size_str): + if batch_size_str: + error_message = f"Can't parse batch size '{batch_size_str}'" + dims = batch_size_str.split("..") + if len(dims) > 2: + raise Exception(error_message) + elif len(dims) == 2: + range = [] + for d in dims: + if d.isnumeric(): + range.append(int(d)) + else: + raise Exception(error_message) + return Dimension(*range) + else: + if dims[0].lstrip("-").isnumeric(): + return Dimension(int(dims[0])) + elif dims[0] == "?": + return Dimension() + else: + raise Exception(error_message) + else: + return Dimension(0) + -def get_inputs_info(shape_string, layout_string, batch_size, scale_string, mean_string, input_info): - shape_map = parse_input_parameters(shape_string, input_info) - layout_map = parse_input_parameters(layout_string, input_info) +def get_inputs_info(shape_string, data_shape_string, layout_string, batch_size, scale_string, mean_string, parameters): + input_names = get_input_output_names(parameters) + shape_map = parse_input_parameters(shape_string, input_names) + data_shape_map = get_data_shapes_map(data_shape_string, input_names) + layout_map = parse_input_parameters(layout_string, input_names) + batch_size = parse_batch_size(batch_size) reshape = False - info_map = {} - for name, descriptor in input_info.items(): - info = InputInfo() - # Precision - info.precision = descriptor.precision + input_info = [] + for i in range(len(parameters)): + info = AppInputInfo() + # Input name + info.name = input_names[i] # Shape - if name in shape_map.keys(): - parsed_shape = [int(dim) for dim in shape_map[name].split(',')] - info.shape = parsed_shape + if info.name in shape_map.keys(): + info.partial_shape = parse_partial_shape(shape_map[info.name]) reshape = True else: - info.shape = descriptor.input_data.shape + info.partial_shape = parameters[i].get_partial_shape() + # Layout - info.layout = layout_map[name].upper() if name in layout_map.keys() else descriptor.tensor_desc.layout + if info.name in layout_map.keys(): + info.layout = Layout(layout_map[info.name]) + elif parameters[i].get_layout() != Layout(): + info.layout = parameters[i].get_layout() + else: + image_colors_dim = Dimension(3) + shape = info.partial_shape + num_dims = len(shape) + if num_dims == 4: + if(shape[1]) == image_colors_dim: + info.layout = Layout("NCHW") + elif(shape[3] == image_colors_dim): + info.layout = Layout("NHWC") + elif num_dims == 3: + if(shape[0]) == image_colors_dim: + info.layout = Layout("CHW") + elif(shape[2] == image_colors_dim): + info.layout = Layout("HWC") + # Update shape with batch if needed if batch_size != 0: - batch_index = info.layout.index('N') if 'N' in info.layout else -1 - if batch_index != -1 and info.shape[batch_index] != batch_size: - info.shape[batch_index] = batch_size - reshape = True - info_map[name] = info + if batch_size.is_static and data_shape_map: + logger.warning(f"Batch size will be ignored. Provide batch deminsion in data_shape parameter.") + else: + batch_index = -1 + if info.layout.has_name('N'): + batch_index = info.layout.get_index_by_name('N') + elif info.layout == Layout(): + supposed_batch = info.partial_shape[0] + if supposed_batch.is_dynamic or supposed_batch in [0, 1]: + logger.warning(f"Batch dimension is not specified in layout. " + "The first dimension will be interpreted as batch size.") + batch_index = 0 + info.layout = Layout("N...") + if batch_index != -1 and info.partial_shape[batch_index] != batch_size: + info.partial_shape[batch_index] = batch_size + reshape = True + elif batch_index == -1: + raise Exception(f"Batch dimension is not specified for this model!") + + # Data shape + if info.name in data_shape_map.keys() and info.is_dynamic: + for p_shape in data_shape_map[info.name]: + if p_shape.is_dynamic: + raise Exception(f"Data shape always should be static, {str(p_shape)} is dynamic.") + elif info.partial_shape.compatible(p_shape): + info.data_shapes.append(p_shape.to_shape()) + else: + raise Exception(f"Data shape '{str(p_shape)}' provided for input '{info.name}' " + f"is not compatible with partial shape '{str(info.partial_shape)}' for this input.") + elif info.name in data_shape_map.keys(): + logger.warning(f"Input '{info.name}' has static shape. Provided data shapes for this input will be ignored.") + + input_info.append(info) # Update scale, mean - scale_map = parse_scale_or_mean(scale_string, info_map) - mean_map = parse_scale_or_mean(mean_string, info_map) + scale_map = parse_scale_or_mean(scale_string, input_info) + mean_map = parse_scale_or_mean(mean_string, input_info) - for name, descriptor in info_map.items(): - if descriptor.is_image: - descriptor.scale = np.ones(3) - descriptor.mean = np.zeros(3) + for input in input_info: + if input.name in scale_map: + input.scale = scale_map[input.name] + if input.name in mean_map: + input.mean = mean_map[input.name] - if name in scale_map: - descriptor.scale = scale_map[name] - if name in mean_map: - descriptor.mean = mean_map[name] + return input_info, reshape - return info_map, reshape def get_batch_size(inputs_info): - batch_size = 0 - for _, info in inputs_info.items(): - batch_index = info.layout.index('N') if 'N' in info.layout else -1 + null_dimension = Dimension(0) + batch_size = null_dimension + for info in inputs_info: + batch_index = info.layout.get_index_by_name('N') if info.layout.has_name('N') else -1 if batch_index != -1: - if batch_size == 0: - batch_size = info.shape[batch_index] - elif batch_size != info.shape[batch_index]: + if batch_size == null_dimension: + batch_size = info.partial_shape[batch_index] + elif batch_size != info.partial_shape[batch_index]: raise Exception("Can't deterimine batch size: batch is different for different inputs!") - if batch_size == 0: - batch_size = 1 + if batch_size == null_dimension: + batch_size = Dimension(1) return batch_size + def show_available_devices(): - ie = IECore() - print("\nAvailable target devices: ", (" ".join(ie.available_devices))) + print("\nAvailable target devices: ", (" ".join(Core().available_devices))) + def dump_config(filename, config): with open(filename, 'w') as f: json.dump(config, f, indent=4) + def load_config(filename, config): with open(filename) as f: config.update(json.load(f))