diff --git a/.gitmodules b/.gitmodules index a9cad1dee5f494..5feb7458da1801 100644 --- a/.gitmodules +++ b/.gitmodules @@ -78,6 +78,9 @@ [submodule "src/plugins/intel_npu/thirdparty/level-zero-ext"] path = src/plugins/intel_npu/thirdparty/level-zero-ext url = https://github.com/intel/level-zero-npu-extensions.git +[submodule "src/plugins/intel_npu/thirdparty/yaml-cpp"] + path = src/plugins/intel_npu/thirdparty/yaml-cpp + url = https://github.com/jbeder/yaml-cpp.git [submodule "thirdparty/telemetry"] path = thirdparty/telemetry url = https://github.com/openvinotoolkit/telemetry.git diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 73cdd57e508bdb..69ad9f460e357a 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -12,6 +12,7 @@ set(shellcheck_skip_list "${OpenVINO_SOURCE_DIR}/thirdparty" "${OpenVINO_SOURCE_DIR}/src/plugins/intel_cpu/thirdparty" "${OpenVINO_SOURCE_DIR}/src/plugins/intel_gpu/thirdparty" + "${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/thirdparty" "${OpenVINO_SOURCE_DIR}/src/bindings/python/thirdparty/pybind11" "${TEMP}") diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 07efefd4452403..8a9dce04f071b9 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -20,3 +20,5 @@ if(NOT BUILD_SHARED_LIBS AND NOT ENABLE_MLIR_COMPILER AND NOT ENABLE_DRIVER_COMP endif() ov_dependent_option(ENABLE_IMD_BACKEND "Enable InferenceManagerDemo based NPU AL backend" OFF "NOT WIN32;NOT CMAKE_CROSSCOMPILING" OFF) + +ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF) diff --git a/src/plugins/intel_npu/thirdparty/CMakeLists.txt b/src/plugins/intel_npu/thirdparty/CMakeLists.txt index 4d0c66beeb7520..b064b5c7b9acd5 100644 --- a/src/plugins/intel_npu/thirdparty/CMakeLists.txt +++ b/src/plugins/intel_npu/thirdparty/CMakeLists.txt @@ -12,3 +12,15 @@ if(ENABLE_ZEROAPI_BACKEND) add_library(LevelZero::NPUExt ALIAS level-zero-ext) install(TARGETS level-zero-ext EXPORT "${PROJECT_NAME}Targets") endif() + +# +# yaml-cpp +# + +if(ENABLE_INTEL_NPU_PROTOPIPE) + add_subdirectory(yaml-cpp EXCLUDE_FROM_ALL) + # NB: Suppress warnings in yaml-cpp + if(SUGGEST_OVERRIDE_SUPPORTED) + target_compile_options(yaml-cpp PRIVATE -Wno-suggest-override) + endif() +endif() diff --git a/src/plugins/intel_npu/thirdparty/yaml-cpp b/src/plugins/intel_npu/thirdparty/yaml-cpp new file mode 160000 index 00000000000000..da82fd982c260e --- /dev/null +++ b/src/plugins/intel_npu/thirdparty/yaml-cpp @@ -0,0 +1 @@ +Subproject commit da82fd982c260e7f335ce5acbceff24b270544d1 diff --git a/src/plugins/intel_npu/tools/CMakeLists.txt b/src/plugins/intel_npu/tools/CMakeLists.txt index c0e620981952e1..ac1a51f74519c8 100644 --- a/src/plugins/intel_npu/tools/CMakeLists.txt +++ b/src/plugins/intel_npu/tools/CMakeLists.txt @@ -6,3 +6,7 @@ add_subdirectory(common) add_subdirectory(compile_tool) add_subdirectory(single-image-test) + +if (ENABLE_INTEL_NPU_PROTOPIPE) + add_subdirectory(protopipe) +endif() diff --git a/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt new file mode 100644 index 00000000000000..9ba76d89ca8445 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt @@ -0,0 +1,72 @@ +# +# Copyright (C) 2023-2024 Intel Corporation. +# SPDX-License-Identifier: Apache 2.0 +# + +set(TARGET_NAME protopipe) + +if (NOT DEFINED PROJECT_NAME) + cmake_minimum_required(VERSION 3.13 FATAL_ERROR) + project(protopipe_standalone) + include("cmake/standalone.cmake") + return() +endif() + +# +# Dependencies +# + +find_package(OpenCV QUIET COMPONENTS gapi) +if(OpenCV_VERSION VERSION_LESS 4.9) + message(STATUS "NPU ${TARGET_NAME} tool is disabled due to missing dependencies: gapi from OpenCV >= 4.9.") + return() +endif() + +if (WIN32) + # WA: add_tool_target expects to have all dependencies as cmake targets. + add_library(winmm INTERFACE) + target_link_libraries(winmm INTERFACE "winmm.lib") +endif() + +# +# Define the target +# + +set(PROTOPIPE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src) + +ov_add_target(ADD_CPPLINT + TYPE EXECUTABLE + NAME ${TARGET_NAME} + ROOT ${CMAKE_CURRENT_SOURCE_DIR} + ADDITIONAL_SOURCE_DIRS ${PROTOPIPE_SOURCE_DIR} + INCLUDES ${PROTOPIPE_SOURCE_DIR} + LINK_LIBRARIES + PRIVATE + Threads::Threads + gflags + yaml-cpp + openvino::runtime + opencv_gapi + winmm) + + + +set_target_properties(${TARGET_NAME} PROPERTIES + FOLDER ${CMAKE_CURRENT_SOURCE_DIR} + CXX_STANDARD 17) + +# +# Install +# + +install(TARGETS ${TARGET_NAME} + RUNTIME DESTINATION "tools/${TARGET_NAME}" + COMPONENT ${NPU_INTERNAL_COMPONENT} + ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL}) + +if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md") + install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md" + DESTINATION "tools/${TARGET_NAME}" + COMPONENT ${NPU_INTERNAL_COMPONENT} + ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL}) +endif() diff --git a/src/plugins/intel_npu/tools/protopipe/README.md b/src/plugins/intel_npu/tools/protopipe/README.md new file mode 100644 index 00000000000000..afe6e8cffbc8c3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/README.md @@ -0,0 +1,608 @@ +# Protopipe +Protopipe is the C++ tool for simulating performance and validating accuracy of the various AI scenarios. + +Protopipe is built atop of [OpenCV G-API](https://github.com/opencv/opencv/wiki/Graph-API) and supports running inference through the [OpenVINO](https://github.com/openvinotoolkit/openvino) and [ONNXRuntime](https://github.com/microsoft/onnxruntime) frameworks. + +## Table of Contents +* [Quick start](#quick-start) +* [How to configure](#how-to-configure) + * [Global parameters](#global-parameters) + * [Model parameters](#model-parameters) + * [Graph structure](#graph-structure) + * [Dependency Graph](#dependency-graph) + * [Network sequence](#network-sequence) + * [Scenario parameters](#scenario-parameters) + * [Config example](#config-example) +* [How to run](#how-to-run) +* [Use cases](#use-cases) + * [Measure Performance](#measure-performance) + * [Generate Reference](#generate-reference) + * [Validate Accuracy](#validate-accuracy) +* [How to build](#how-to-build) + +## Quick start +Consider the following [Config example](#config-example) to start using Protopipe. + +Learn more about available config parameters (see: [How to configure](#how-to-configure)) and explore different execution modes (see: [Use-cases](#use-cases)) for more advanced usage. + +## How to configure +Protopipe uses **YAML** format file to describe the AI scenario structure and its parameters + +### Global parameters +The **YAML** config starts with specifying the several global parameters: +- `model_dir` - **Optional**. Path to the models location. (**Default**: ".") +- `blob_dir` - **Optional**. Path to the models location. (**Default**: ".") +- `device_name` - **Optional**. OpenVINO device name: _CPU_, _GPU_, etc. (**Default**: _NPU_) +- `compiler_type` - **Optional**. NPU compiler type: _DRIVER_, _MLIR_. (**Default**: _DRIVER_) +- `log_level` - **Optional**. Log level: _NONE_, _INFO_, _DEBUG_. (**Default**: _NONE_) +- `disable_high_resolution_waitable_timer` - **Optional**. Disables high resolution timer used to perform delays on Windows. (**Default**: false) + +Example: +``` +model_dir: + local: C:\workspace\models +device_name: NPU +compiler_type: MLIR +log_level: INFO +``` +### Model parameters +#### Common parameters +- `name` or `path` - **Required**. Path to the model file. +- `framework` - **Optional**. Framework to use for inference: *onnxrt*, *openvino*. (**Default**: *openvino*) +- `input_data`, `output_data`, `metric`, `random` - **Optional**. Follow [Use-cases](#use-cases) to learn the details. +#### OpenVINO parameters +- `priority` - **Optional**. Model priority: _HIGH_, _MEDIUM_, _LOW_. (Default: _MEDIUM_) +- `config` - **Optional**. OpenVINO Plugin specific parameters. +- `device` - **Optional**. OpenVINO device name. +- `ip` - **Optional**. Input layer precision: _FP16_, _FP32_, _U8_, _I32_. +- `op` - **Optional**. Output layer precision: _FP16_, _FP32_, _U8_, _I32_. +- `il` - **Optional**. Input layer layout. +- `ol` - **Optional**. Output layer layout. +- `iml` - **Optional**. Input model layout. +- `oml` - **Optional**. Output model layout. + +Examples: +``` +- { name: model.xml, ip: FP16, iml: NHWC, il: NCHW } +- { name: model.xml, ip: { data: FP16 }, priority: HIGH } +- { name: model.xml, device: NPU, config: { PERFORMANCE_HINT: THROUGHPUT } } +``` +#### ONNXRT parameters +- `ep` - **Optional**. Specifies the parameters for particular execution provider. +- `session_options` - **Optional**. Set various session options for the ONNX Runtime. + +##### Supported Execution Providers +- [OpenVINO Execution Provider](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html) + - `name: OV` - **Required**. Enables OpenVINO Execution Provider. + - `device_type` - **Optional**.The device type: _NPU_U8_, _CPU_FP32_, etc. + - `params` - **Optional**. Accepts a map of options and their corresponding values that can be passed to OV EP. + +**Note**: If none of the supported execution providers are specified, the default `MLAS` will be used. + +Examples: +``` +- { name: model.onnx, framework: onnxrt } # Default (MLAS) EP will be used +- { name: model.onnx, framework: onnxrt, session_options: { session.disable_cpu_ep_fallback: 1 } } # Default (MLAS) EP with the sessions options will be used +- { name: model.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8, params: { enable_qdq_optimizer: False, model_priority: LOW } } } # OpenVINO EP will be used +``` + +### Graph structure +There are two ways to describe the execution graph structure in Protopipe: +1. Using [Dependency Graph](#dependency-graph) (preferable) +2. Using [Network Sequence](#network-sequence) (old) + +#### Dependency Graph +The dependency graph in Protopipe is specified by: +- `op_desc` - The list of operations, every operation has the following parameters: + - `tag` - **Required**. The unique name of operation. + - `type` - **Optional**. The operation type: _Infer_, _CPU_, _Compound_ (**Default**: _Infer_) + - `repeat_count` - **Optional**. Runs operation over specified number of iterations. +- `connections` - The list of connections between operations. + +Supported operation types +1. `Infer` - Performs model inference. Follow [Model parameters](#model-parameters) for the details. +2. `CPU` - Simulates CPU load by performing the busy wait during `time_in_us` amount of time in microseconds +3. `Compound` - Defines a subgraphs that consists of `Infer` and `CPU` node types + +``` +op_desc: + - { tag: A, path: Model-A.xml, ip: FP16, op: FP16 } + - { tag: B, path: Model-B.onnx, framework: onnxrt, ep: { name: OV, device_type: CPU_FP32 } } + - { tag: C, type: CPU, time_in_us: 5000 } + - { tag: D, path: Model-D.onnx, framework: onnxrt } + - { tag: E, path: Model-E.xml, il: NCHW, device: NPU, config: { PEFORMANCE_HINT: LATENCY } } + - { tag: F, path: Model-F.xml } +connections: + - [A, C, E, F] + - [A, B, D, F] + - [B, F] +``` +```mermaid + graph LR; + A-->B + A-->C + B-->D + B-->F + C-->E + E-->F + D-->F +``` + +The source **is not** reflected in graph structure, assume that all operations that don't have input connections are implicitly linked with the source, e.g for the graph above: +```mermaid + graph LR; + Source-->A + A-->B + A-->C + B-->D + B-->F + C-->E + E-->F + D-->F +``` +**Note:** The situation when all nodes don't have input connections is also possible, consider: +``` +op_desc: + - { tag: A, path: Model-A.xml } + - { tag: B, path: Model-B.xml } + - { tag: C, path: Model-C.xml } +``` + +```mermaid + graph LR; + Source-->A + Source-->B + Source-->C +``` +In this case the section `connections` **can be omitted**. + +**Note:** Graph must remain `DAG`, so any loops in graph are prohibited including the self-loops as well as double edges. These are examples of incorrect graphs: +``` +#1: Invalid - The list must contain at least two operations to connect +- [A] +#2: Invalid - Self-loop is prohibited +- [A, A] +#3: Invalid - Loop is prohibited +- [A, B, C, A] +#4: Invalid - Double edge [B->C] is prohibited +- [A, B, C] +- [B, C] +``` +**Example of repeat_count usage** +``` +- op_desc: + - { tag: A, path: Model_A.xml, ... } + - { tag: B, path: Model_B.xml, repeat_count: 20 } + - { tag: C, path: Model_C.xml, ... } + connections: + - [A, B, C] +``` +This defines the following pipeline: +```mermaid +graph LR; + A-->B + B-->C + B--->|20 iterations|B + +``` +**Example of "Compound" type operation**. +``` +op_desc: + - { tag: A, path: Model-A.xml } + - tag: B, + type: Compound, + repeat_count: 10, + op_desc: + - { tag: D, path: Model-D.xml } + - { tag: E, path: Model-E.xml } + - { tag: F, path: Model-F.xml } + connections: + - [D, E] + - [D, F] + - { tag: C, path: Model-C.xml } +connections: + - [A, B, C] +``` +This defines the following pipeline: +```mermaid +graph LR; + A[Model-A.xml] + C[Model-C.xml] + + subgraph B[Repeats 10 iterations] + direction LR + D[Model-D.xml] + E[Model-E.xml] + F[Model-F.xml] + + D --> E + D --> F + + end + + A --> B + B --> C +``` + +#### Network Sequence +There is also a way to describe the graph by using chain-like structure: +`network` - **Required**. List or list of lists of model parameters. Follow [Model Parameters](#model-parameters) for the details. +`delay_in_us` - **Optional**. Delay between models in microseconds. + +``` +input_stream_list: +- network: + - { name: A.xml, ip: FP16, il: NCHW, device: CPU } + - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }] + - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } } + delay_in_us: 5000 +``` + +```mermaid + graph LR; + A-->Delay1; + Delay1-->B; + Delay1-->C; + B-->Delay2; + C-->Delay2; + Delay2-->D +``` + +### Scenario parameters +The list of scenarios are specified by using `multi_inference` parameter, every scenario has the following parameters: +- `name` - **Optional**. The name of execution scenario. +- `input_stream_list` - **Required**. The list of the streams that will be run in parallel. + +Every stream has the following execution parameters: +- `name` - **Optional**. The name of the stream. +- `iteration_count` - **Optional**. Number of iterations to execute. +- `exec_time_in_secs` - **Optional**. Execute until timeout specified. +- `frames_interval_in_ms` - **Optional**. Execution frequency of the stream (**Default**: 0 - Unbounded) +- `target_fps` - **Optional**. Execution frequency of the stream. `target_fps = 1000 / frames_interval_in_ms`. `target_fps` and `frames_interval_in_ms` are mutually exclusive and cannot be provided together. +- `target_latency_in_ms` - **Optional**. When iteration isn't finished within specified interval, the next frame will be dropped from execution. (**Default**: Disabled) +- `op_desc`/`conections` or `network` - **Required**. Execution graph structure. Follow [Graph structure](#graph-structure) for the details. + +### Config example +Consider the following scenario that consists of two parallel streams specified on `config.yaml`: +``` +model_dir: + local: C:\workspace\models +device_name: NPU +compiler_type: MLIR +log_level: INFO + +multi_inference: +- input_stream_list: + - network: + - { name: A.xml, ip: FP16, il: NCHW, device: CPU } + - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }] + - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } } + target_fps: 30 + exec_time_in_secs: 15 + - op_desc: + - { tag: E, path: E.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8 } } + - { tag: F, type: CPU, time_in_us: 5000 } + - { tag: G, path: G.xml, ip: FP16, op: FP16, priority: HIGH } + connections: + - [E, F, G] + target_fps: 100 + exec_time_in_secs: 15 +``` +- The first `stream` is defined by using [Network sequence](#network-sequence) syntax and will execute the following graph with `30` FPS cadence: + ```mermaid + graph LR; + A-->B; + A-->C; + B-->D; + C-->D; + ``` +- The second `stream` is defined by using [Dependency graph](#dependency-graph) syntax and will execute the following graph with `100` FPS cadence. + ```mermaid + graph LR; + E-->F; + F-->G; + ``` + +Run: +``` +./protopipe -cfg config.yaml --drop_frames +``` +Both streams will be executed simultaneously in different threads during `15` seconds. + +Output format: +``` +stream 0: throughput: FPS, latency: min: ms, avg: ms, max: ms, frames dropped: / +stream 1: throughput: FPS, latency: min: ms, avg: ms, max: ms, frames dropped: / +``` + +## How to run +Protopipe has the following `CLI` options to configure the execution behaviour: + +`--cfg ` - Path to configuration file. +`--drop_frames`- **Optional**. Drop frames if they come earlier than stream is completed. E.g if `stream` works with `target_fps: 10` (~`100ms` latency) but stream iteration takes `150ms` - the next iteration will be triggered only in `50ms` if option is enabled. +`--pipeline` - **Optional**. Enables pipelined execution for all scenarios/streams. +`--niter ` - **Optional**. Number of iterations. If specified overwrites termination criterion specified in configuration file for all scenarios/streams. +`-t ` - **Optional**. Time in seconds. If specified overwrites termination criterion specified in configuration file for all scenarios/streams. +`--mode ` - **Optional**. Execution mode: *performance*, *reference*, *validation* (**Default**: *performance*) +`--exec_filter ` - **Optional**. Run only the scenarios that match provided string pattern. +`--inference_only` - **Optional**. Run only inference execution for every model excluding i/o data transfer (**Default**: true) + +### Filtering +Sometime it's needed to run particular set of scenarios specified in config file rather than all of them. +For example consider the following config file with three scenarios specified in `scenarios.yaml`: +``` +model_dir: + local: /models/ +device_name: CPU +multi_inference: +- input_stream_list: + - network: + - { name: A.xml } +- input_stream_list: + - network: + - { name: B.xml } +- input_stream_list: + - network: + - { name: C.xml } +``` +By default all scenarios are assigned unique names according to the following `multi_inference_` pattern. +E.g scenario with model `A.xml` has default name `multi_inference_0`. +Use `-exec_filter ` CLI option to control what scenarios from config should be executed: +``` +./protopipe -cfg scenarios.yaml -niter 100 -exec_filter=".*[0-1]" +``` +Only `multi_inference_0` and `multi_inference_1` scenarios will be executed. + +It's also possible to overwrite the default names in config file: +``` +model_dir: + local: /models/ +device_name: CPU +multi_inference: +- name: Model-A-Scenario + input_stream_list: + - network: + - { name: A.xml } +- name: Model-B-Scenario + input_stream_list: + - network: + - { name: B.xml } +- name: Model-C-Scenario + input_stream_list: + - network: + - { name: C.xml } +``` +and use them for filtering: +``` +./protopipe --cfg scenarios.yaml --niter 100 --exec_filter ".*-[AB].*" +``` +Only `Model-A-Scenario` and `Model-B-Scenario` scenarios will be executed. + +**Note**: Protopipe uses [std::regex](https://en.cppreference.com/w/cpp/regex) rules for pattern matching. + +## Use cases +Once scenario configuration is defined (see: [How to configure](#how-to-configure)) it can be used for various uses cases. +### Measure performance +`Protopipe` can report the performance statistics, consider the following run example: +``` +./protopipe --cfg config.yaml --drop_frames -t 30 +``` +Example of output: +``` +stream 0: throughput: 7.62659 FPS, latency: min: 93.804 ms, avg: 111.31 ms, max: 145.178 ms, frames dropped: 290/390 +``` +It might be also interesting to play with the following `CLI` options: +- `--drop_frames=false` - Disables frame drop. By default, if iteration doesn't fit into 1000 / `target_fps` latency interval, the next iteration will be skipped. +- `--inference_only=false` - Enables i/o data transfer for inference. By default only inference time is captured in performance statistics. +- `--pipeline` - Enables ***pipelined*** execution. + +### Generate reference +As the prerequisite for accuracy validation it's useful to have a mechanism that provides an opportunity to generate the reference output data to compare with. In Protopipe in can be done by using the `reference` mode. +Use additional parameters to configure `reference` mode: +- `input_data` - **Required**. Path that contain input data for the model, if entity under the path is empty, input data will be generated randomly and dumped into the path specified. +- `output_data` - **Required**. Path where to dump reference output data. +- `random` - **Optional**. Initializer to generate input data randomly. (Default: ` { dist: uniform, low: 0.0, high: 255 }`) + +Examples: +``` +random: { dist: uniform, low: -1.0, high: 1.0 } # specified globally for all models +multi_inference: +- input_stream_list: + - network: + - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: B-inputs/ } + # overwrites global initializer for the model B.xml + - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outptus/, random: { name: uniform, low: 0, high: 255.0 } +``` + +Run `Protopipe` in `reference` mode: +``` +./protopipe -cfg config.yaml -mode reference -niter 10 +``` +Output: +``` +stream 0: Reference data has been generated for 10 iteration(s) +``` + +### Validate accuracy +Protopipe has the dedicated `validation` mode to perform accuracy validation. Existing configuration file can be simply extended to perform accuracy validation: + +- `save_validation_outputs` - **Optional**. Accepts the path where to dump actual execution outputs. (Default: disabled) +- `metric` - **Optional**. Accuracy metric to compare actual vs reference outputs. (Default: `{ name: norm, tolerance: 0.0 }`) +- `input_data` - **Required**. Path that contain input data for the model. +- `output_data` - **Required**. Path that contain **reference** data to compare with. + +**Note**: If folder is provided either for **input_data** or **output_data**, it must be in the following format: +``` +input_data/ + / + input_0.bin + input_1.bin + ... + input_N.bin + +output_data/ + / + output_0.bin + output_1.bin + ... + output_N.bin +``` +**Note**: input and output data can be generated automatically by using `Protopipe` in **reference** mode. (see: [Generate reference](#generate-reference)) + +Examples: +``` +- { name: model.xml, ip: FP16, input_data: input_data/, output_data: output_data/ } +- { name: model.xml, ip: FP16, input_data: input.bin, output_data: output.bin } +- { name: model.xml, ip: FP16, input_data: { data: input.bin }, output_data: { result: output.bin} } +``` + +### Supported metrics +1. L2 Norm: $$\text{Norm}(\mathbf{A}, \mathbf{B}) = \sqrt{\sum_{i,j} (A_{i,j} - B_{i,j})^2}$$ +Parameters: + - `name: norm` - **Required**. Enables L2 Norm metric. + - `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**. +3. Cosine similarity: $$\text{Cosine}(\mathbf{A}, \mathbf{B}) = \frac{\mathbf{A} \cdot \mathbf{B}}{\| \mathbf{A} \|_2 \| \mathbf{B} \|_2}$$ +Parameters: + - `name: cosine` - **Required**. Enables cosine similarity metric. + - `threshold` - **Required**. If value of metric is lower than **threshold** it will be treated as **FAIL**. +3. NRMSE : $$\text{NRMSE}(\mathbf{A}, \mathbf{B}) = \frac{1}{D}\sqrt{\frac{1}{N}\sum_{i=1}^N(A_i - B_i)^2}$$ +Where, +$$D = \text{max}(0.001, \text{max}(A_{max}-A_{min}\text{, } B_{max}-B_{min}))$$ +Parameters: + - `name: nrmse` - **Required**. Enables nrmse metric. + - `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**. + +### Example +Consider the following `config.yaml`: +``` +model_dir: + local: C:\workspace\models +device_name: NPU +compiler_type: MLIR +log_level: INFO + +save_validation_outputs: actual-outputs/ +metric: { name: norm, tolerance: 0.01 } + +multi_inference: +- input_stream_list: + - network: + - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: A-outputs/ } + # overwrites the global metric for the model B.xml + - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outputs/, metric: { name: norm, tolerance: 0.0 } +``` + +Use `reference` mode to generate the input random data for every model and calculate reference outputs +**Note**: If reference device is different, it can be changed in config file (`device_name`) accordingly +``` +./protopipe --cfg config.yaml --mode reference -niter 10 +``` +Use `validation` mode to perform accuracy validation: +``` +./protopipe --cfg config.yaml --mode validation -t 15 +``` +Example of successful validation: +``` +stream 0: Validation has passed for iteration(s) +``` +In case of accuracy issues the output will be the following: +``` +stream 0: Accuraccy check failed on iteration(s) (first 10): +Iteration : + Model: A, Layer: , Metric: Norm{tolerance: 0.01}, Reason: > 0.01; +``` + +## How to build +### Prerequisites +1. Clone `npu-plugin` repository +2. Build OpenCV G-API with OpenVINO/ONNXRT support +#### Build OpenCV G-API with OpenVINO/ONNXRT support +1. Clone OpenCV repo: + ``` + git clone https://github.com/opencv/opencv + cd opencv && git checkout 78195bc3df + ``` +2. Build OpenCV G-API: + ``` + mkdir -p build && cd build + cmake ../ -DBUILD_LIST=gapi \ + -DCMAKE_BUILD_TYPE=Release \ + -DWITH_OPENVINO=ON \ + -DOpenVINO_DIR= \ + -DWITH_ONNX=ON \ + -DORT_INSTALL_DIR= + cmake --build . --config Release --target opencv_gapi --parallel + ``` +### In-plugin build + +1. Clone and build [OpenVINO](https://github.com/openvinotoolkit/openvino) from sources +2. Build OpenCV G-API with OpenVINO / ONNXRT support +3. Clone `npu-plugin` repository + ``` + git clone https://github.com/openvinotoolkit/npu_plugin + git submodule update --init --recursive + ``` +4. Build `Protopipe` as part of the `npu-plugin` build: + ``` + mkdir build && cd build + cmake ../ -DOpenCV_DIR= -DOpenVINODeveloperPackage_DIR= + cmake --build . --config Release --target protopipe --parallel + ``` + +### Standalone build +1. Build `yaml-cpp` + ``` + mkdir -p yaml-cpp_build cd && yaml-cpp_build + cmake ..//thirdparty/yaml-cpp -DCMAKE_INSTALL_PREFIX=install + cmake --build . --config Release --target install --parallel + ``` +2. Build `gflags` + ``` + git clone https://github.com/gflags/gflags + cd gflags + mkdir -p gflags_build cd && gflags_build + cmake ../ -DCMAKE_INSTALL_PREFIX=install + cmake --build . --config Release --target install --parallel + ``` +3. Build `Protopipe` + ``` + mkdir -b protopipe_build && cd protopipe_build + cmake /tools/protopipe/ \ + -DOpenCV_DIR= \ + -Dgflags_DIR= \ + -DOpenVINO_DIR= \ + + cmake --build . --config Release --target protopipe --parallel + ``` +### Verify the installation +**Note**: Make sure `opencv_*` libraries are visible in the environment: +- Windows: + ``` + set PATH=\build\bin\Release\;%PATH% + ``` +- Linux: + ``` + export LD_LIBRARY_PATH=/build/lib/:$LD_LIBRARY_PATH + ``` +**Note**: If `OpenCV` has been build with `ONNXRT` support, all `ONNXRT` related libraries must be located in the same folder as `protopipe` executable. + +Run `Protopipe` with -h flag to verify installation: +``` +> protopipe.exe -h +``` +Successful build will show the information about `Protopipe` CLI options: +``` +protopipe [OPTIONS] + + Common options: + -h Optional. Print the usage message. + -cfg Path to the configuration file. + -pipeline Optional. Enable pipelined execution. + -drop_frames Optional. Drop frames if they come earlier than pipeline is completed. + -mode Optional. Simulation mode: performance (default), reference, validation. + -niter Optional. Number of iterations. If specified overwrites termination criterion for all scenarios in configuration file. + -t Optional. Time in seconds. If specified overwrites termination criterion for all scenarios in configuration file. + -inference_only Optional. Run only inference execution for every model excluding i/o data transfer. Applicable only for "performance" mode. (default: true). + -exec_filter Optional. Run the scenarios that match provided string pattern. +``` diff --git a/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake new file mode 100644 index 00000000000000..090756f86c44c0 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake @@ -0,0 +1,63 @@ +# +# Copyright (C) 2024 Intel Corporation. +# SPDX-License-Identifier: Apache 2.0 +# + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +if("${CMAKE_BUILD_TYPE}" STREQUAL "") + set(CMAKE_BUILD_TYPE "Release") +endif() + +find_package(OpenVINO REQUIRED COMPONENTS Runtime) +find_package(Threads REQUIRED) +find_package(OpenCV 4.9.0 REQUIRED COMPONENTS gapi) + +find_package(yaml-cpp QUIET) +find_package(gflags QUIET) + +if (NOT yaml-cpp_FOUND) + set(YAML_CPP_SOURCES_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/yaml-cpp") + message(STATUS "yaml-cpp package was not found. Trying to find source package in ${YAML_CPP_SOURCES_PATH}.") + if(EXISTS ${YAML_CPP_SOURCES_PATH}) + message(STATUS "yaml-cpp source package found. yaml-cpp will be built from sources.") + add_subdirectory(${YAML_CPP_SOURCES_PATH} yaml-cpp EXCLUDE_FROM_ALL) + else() + message(FATAL_ERROR "yaml-cpp package and sources were not found. CMake will exit." ) + endif() +endif() + +if (NOT gflags_FOUND) + set(GFLAGS_SOURCES_PATH "${PACKAGE_PREFIX_DIR}/samples/cpp/thirdparty/gflags") + message(STATUS "gflags package was not found. Trying to find source package in ${GFLAGS_SOURCES_PATH}.") + if(EXISTS ${GFLAGS_SOURCES_PATH}) + message(STATUS "gflags source package found. gflags will be built from sources.") + add_subdirectory(${GFLAGS_SOURCES_PATH} gflags EXCLUDE_FROM_ALL) + else() + message(FATAL_ERROR "gflags was not found. CMake will exit." ) + endif() +endif() + +set(DEPENDENCIES + Threads::Threads + gflags + yaml-cpp + openvino::runtime + opencv_gapi +) + +if (WIN32) + list(APPEND DEPENDENCIES "winmm.lib") +endif() + +file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp") +list(APPEND SOURCES main.cpp) + +add_executable(${TARGET_NAME} ${SOURCES}) +target_link_libraries(${TARGET_NAME} PRIVATE ${DEPENDENCIES}) +target_include_directories(${TARGET_NAME} PUBLIC "${PROJECT_SOURCE_DIR}/src/") + +install(TARGETS ${TARGET_NAME} + DESTINATION "tools/${TARGET_NAME}" + COMPONENT npu_tools) diff --git a/src/plugins/intel_npu/tools/protopipe/main.cpp b/src/plugins/intel_npu/tools/protopipe/main.cpp new file mode 100644 index 00000000000000..8596ba864335ca --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/main.cpp @@ -0,0 +1,266 @@ +// +// Copyright (C) 2023-2024 Intel Corporation. +// SPDX-License-Identifier: Apache 2.0 +// + +#include +#include +#include + +#include + +#include "parser/parser.hpp" +#include "scenario/scenario_graph.hpp" +#include "simulation/performance_mode.hpp" +#include "simulation/reference_mode.hpp" +#include "simulation/validation_mode.hpp" + +#include "utils/error.hpp" +#include "utils/logger.hpp" + +static constexpr char help_message[] = "Optional. Print the usage message."; +static constexpr char cfg_message[] = "Path to the configuration file."; +static constexpr char device_message[] = + "Optional. Device name. If specified overwrites device specified in config file."; +static constexpr char pipeline_message[] = "Optional. Enable pipelined execution."; +static constexpr char drop_message[] = "Optional. Drop frames if they come earlier than pipeline is completed."; +static constexpr char mode_message[] = "Optional. Simulation mode: performance (default), reference, validation."; +static constexpr char niter_message[] = "Optional. Number of iterations. If specified overwrites termination criterion" + " for all scenarios in configuration file."; +static constexpr char exec_time_message[] = "Optional. Time in seconds. If specified overwrites termination criterion" + " for all scenarios in configuration file."; +static constexpr char inference_only_message[] = + "Optional. Run only inference execution for every model excluding i/o data transfer." + " Applicable only for \"performance\" mode. (default: true)."; + +static constexpr char exec_filter_msg[] = "Optional. Run the scenarios that match provided string pattern."; + +DEFINE_bool(h, false, help_message); +DEFINE_string(cfg, "", cfg_message); +DEFINE_string(d, "", device_message); +DEFINE_bool(pipeline, false, pipeline_message); +DEFINE_bool(drop_frames, false, drop_message); +DEFINE_string(mode, "performance", mode_message); +DEFINE_uint64(niter, 0, niter_message); +DEFINE_uint64(t, 0, exec_time_message); +DEFINE_bool(inference_only, true, inference_only_message); +DEFINE_string(exec_filter, ".*", exec_filter_msg); + +static void showUsage() { + std::cout << "protopipe [OPTIONS]" << std::endl; + std::cout << std::endl; + std::cout << " Common options: " << std::endl; + std::cout << " -h " << help_message << std::endl; + std::cout << " -cfg " << cfg_message << std::endl; + std::cout << " -pipeline " << pipeline_message << std::endl; + std::cout << " -drop_frames " << drop_message << std::endl; + std::cout << " -d " << device_message << std::endl; + std::cout << " -mode " << mode_message << std::endl; + std::cout << " -niter " << niter_message << std::endl; + std::cout << " -t " << exec_time_message << std::endl; + std::cout << " -inference_only " << inference_only_message << std::endl; + std::cout << " -exec_filter " << exec_filter_msg << std::endl; + std::cout << std::endl; +} + +bool parseCommandLine(int* argc, char*** argv) { + gflags::ParseCommandLineNonHelpFlags(argc, argv, true); + + if (FLAGS_h) { + showUsage(); + return false; + } + + if (FLAGS_cfg.empty()) { + throw std::invalid_argument("Path to config file is required"); + } + + std::cout << "Parameters:" << std::endl; + std::cout << " Config file: " << FLAGS_cfg << std::endl; + std::cout << " Pipelining is enabled: " << std::boolalpha << FLAGS_pipeline << std::endl; + std::cout << " Simulation mode: " << FLAGS_mode << std::endl; + std::cout << " Inference only: " << std::boolalpha << FLAGS_inference_only << std::endl; + std::cout << " Device: " << FLAGS_d << std::endl; + return true; +} + +static ICompiled::Ptr compileSimulation(Simulation::Ptr simulation, const bool pipelined, const bool drop_frames) { + LOG_INFO() << "Compile simulation" << std::endl; + if (pipelined) { + return simulation->compilePipelined(drop_frames); + } + return simulation->compileSync(drop_frames); +}; + +class ThreadRunner { +public: + using F = std::function; + void add(F&& func) { + m_funcs.push_back(std::move(func)); + } + void run(); + +private: + std::vector m_funcs; +}; + +void ThreadRunner::run() { + std::vector> futures; + futures.reserve(m_funcs.size()); + for (auto&& func : m_funcs) { + futures.push_back(std::async(std::launch::async, std::move(func))); + } + for (auto& future : futures) { + future.get(); + }; +}; + +class Task { +public: + Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion); + + void operator()(); + const Result& result() const; + const std::string& name() const; + +private: + ICompiled::Ptr m_compiled; + std::string m_name; + ITermCriterion::Ptr m_criterion; + + Result m_result; +}; + +Task::Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion) + : m_compiled(std::move(compiled)), m_name(std::move(name)), m_criterion(std::move(criterion)) { +} + +void Task::operator()() { + try { + m_result = m_compiled->run(m_criterion); + } catch (const std::exception& e) { + m_result = Error{e.what()}; + } +} + +const Result& Task::result() const { + return m_result; +} + +const std::string& Task::name() const { + return m_name; +} + +static Simulation::Ptr createSimulation(const std::string& mode, StreamDesc&& stream, const bool inference_only, + const Config& config) { + Simulation::Ptr simulation; + // NB: Common parameters for all simulations + Simulation::Config cfg{stream.name, stream.frames_interval_in_us, config.disable_high_resolution_timer, + std::move(stream.graph), std::move(stream.infer_params_map)}; + if (mode == "performance") { + PerformanceSimulation::Options opts{config.initializer, std::move(stream.initializers_map), + std::move(stream.input_data_map), inference_only, + std::move(stream.target_latency)}; + simulation = std::make_shared(std::move(cfg), std::move(opts)); + } else if (mode == "reference") { + CalcRefSimulation::Options opts{config.initializer, std::move(stream.initializers_map), + std::move(stream.input_data_map), std::move(stream.output_data_map)}; + simulation = std::make_shared(std::move(cfg), std::move(opts)); + } else if (mode == "validation") { + ValSimulation::Options opts{config.metric, std::move(stream.metrics_map), std::move(stream.input_data_map), + std::move(stream.output_data_map), std::move(stream.per_iter_outputs_path)}; + simulation = std::make_shared(std::move(cfg), std::move(opts)); + } else { + throw std::logic_error("Unsupported simulation mode: " + mode); + } + ASSERT(simulation); + return simulation; +} + +int main(int argc, char* argv[]) { + // NB: Intentionally wrapped into try-catch to display exceptions occur on windows. + try { + if (!parseCommandLine(&argc, &argv)) { + return 0; + } + ReplaceBy replace_by{FLAGS_d}; + + auto parser = std::make_shared(FLAGS_cfg); + + LOG_INFO() << "Parse scenarios from " << FLAGS_cfg << " config file" << std::endl; + auto config = parser->parseScenarios(replace_by); + LOG_INFO() << "Found " << config.scenarios.size() << " scenario(s)" << std::endl; + + // NB: Overwrite termination criteria for all scenarios if specified via CLI + ITermCriterion::Ptr global_criterion; + if (FLAGS_niter != 0u) { + LOG_INFO() << "Termination criterion of " << FLAGS_niter << " iteration(s) will be used for all scenarios" + << std::endl; + global_criterion = std::make_shared(FLAGS_niter); + } + if (FLAGS_t != 0u) { + if (global_criterion) { + // TODO: In fact, it make sense to have them both enabled. + THROW_ERROR("-niter and -t options can't be specified together!"); + } + LOG_INFO() << "Termination criterion of " << FLAGS_t << " second(s) will be used for all scenarios" + << std::endl; + // NB: TimeOut accepts microseconds + global_criterion = std::make_shared(FLAGS_t * 1'000'000); + } + + std::regex filter_regex{FLAGS_exec_filter}; + bool any_scenario_failed = false; + for (auto&& scenario : config.scenarios) { + // NB: Skip the scenarios that don't match provided filter pattern + if (!std::regex_match(scenario.name, filter_regex)) { + LOG_INFO() << "Skip the scenario " << scenario.name << " as it doesn't match the -exec_filter=\"" + << FLAGS_exec_filter << "\" pattern" << std::endl; + continue; + } + LOG_INFO() << "Start processing " << scenario.name << std::endl; + + ThreadRunner runner; + std::vector tasks; + tasks.reserve(scenario.streams.size()); + for (auto&& stream : scenario.streams) { + auto criterion = stream.criterion; + auto stream_name = stream.name; + if (global_criterion) { + if (criterion) { + LOG_INFO() << "Stream: " << stream_name + << " termination criterion is overwritten by CLI parameter" << std::endl; + } + criterion = global_criterion->clone(); + } + auto simulation = createSimulation(FLAGS_mode, std::move(stream), FLAGS_inference_only, config); + auto compiled = compileSimulation(simulation, FLAGS_pipeline, FLAGS_drop_frames); + tasks.emplace_back(std::move(compiled), std::move(stream_name), std::move(criterion)); + runner.add(std::ref(tasks.back())); + } + + LOG_INFO() << "Run " << tasks.size() << " stream(s) asynchronously" << std::endl; + runner.run(); + LOG_INFO() << "Execution has finished" << std::endl; + + for (const auto& task : tasks) { + if (!task.result()) { + // NB: Scenario failed if any of the streams failed + any_scenario_failed = true; + } + std::cout << "stream " << task.name() << ": " << task.result().str() << std::endl; + } + std::cout << "\n"; + } + if (any_scenario_failed) { + return EXIT_FAILURE; + } + } catch (const std::exception& e) { + std::cout << e.what() << std::endl; + throw; + } catch (...) { + std::cout << "Unknown error" << std::endl; + throw; + } + return 0; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.cpp b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp new file mode 100644 index 00000000000000..d13d2954a21b12 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp @@ -0,0 +1,140 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "graph.hpp" + +Nodes Node::srcNodes() const { + Nodes src_nodes; + src_nodes.reserve(m_src_edges.size()); + std::transform(m_src_edges.begin(), m_src_edges.end(), std::back_inserter(src_nodes), [](EdgeHandle edge) { + return edge->srcNode(); + }); + return src_nodes; +} + +Nodes Node::dstNodes() const { + Nodes dst_nodes; + dst_nodes.reserve(m_dst_edges.size()); + std::transform(m_dst_edges.begin(), m_dst_edges.end(), std::back_inserter(dst_nodes), [](EdgeHandle edge) { + return edge->dstNode(); + }); + return dst_nodes; +} + +Edges Node::srcEdges() const { + return {m_src_edges.begin(), m_src_edges.end()}; +} + +Edges Node::dstEdges() const { + return {m_dst_edges.begin(), m_dst_edges.end()}; +} + +NodeHandle Graph::create() { + auto node = std::make_shared(); + NodeHandle nh(node); + m_nodes.emplace(node.get(), MetaPtr{node, Meta{}}); + return nh; +} + +void Graph::remove(NodeHandle nh) { + auto src_edges = nh->srcEdges(); + for (size_t i = 0; i < src_edges.size(); ++i) { + remove(src_edges[i]); + } + auto dst_edges = nh->dstEdges(); + for (size_t i = 0; i < dst_edges.size(); ++i) { + remove(dst_edges[i]); + } + m_nodes.erase(nh.get()); +} + +void Graph::remove(EdgeHandle eh) { + auto src = eh->srcNode(); + auto dst = eh->dstNode(); + src->m_dst_edges.erase(eh); + dst->m_src_edges.erase(eh); + m_edges.erase(eh.get()); +}; + +EdgeHandle Graph::link(NodeHandle src, NodeHandle dst) { + auto edge = std::make_shared(src, dst); + EdgeHandle eh{edge}; + m_edges.emplace(edge.get(), MetaPtr{edge, Meta{}}); + src->m_dst_edges.insert(eh); + dst->m_src_edges.insert(eh); + return eh; +} + +Meta& Graph::meta(NodeHandle handle) { + const auto it = m_nodes.find(handle.get()); + ASSERT(it != m_nodes.end()); + return it->second.meta; +} + +const Meta& Graph::meta(NodeHandle handle) const { + const auto it = m_nodes.find(handle.get()); + ASSERT(it != m_nodes.end()); + return it->second.meta; +} + +Meta& Graph::meta(EdgeHandle handle) { + const auto it = m_edges.find(handle.get()); + ASSERT(it != m_edges.end()); + return it->second.meta; +} + +const Meta& Graph::meta(EdgeHandle handle) const { + const auto it = m_edges.find(handle.get()); + ASSERT(it != m_edges.end()); + return it->second.meta; +} + +std::vector Graph::nodes() const { + std::vector ret; + std::transform(m_nodes.begin(), m_nodes.end(), std::back_inserter(ret), [](const auto& p) { + return NodeHandle{p.second.ptr}; + }); + return ret; +} + +static void dfs(NodeHandle& nh, std::unordered_set& visited, std::stack& stack) { + visited.insert(nh); + auto dst_nodes = nh->dstNodes(); + for (auto dst_nh : dst_nodes) { + auto it = visited.find(dst_nh); + if (it == visited.end()) { + dfs(dst_nh, visited, stack); + } + } + stack.push(nh); +}; + +std::vector Graph::sorted() const { + std::unordered_set visited; + std::stack stack; + const auto nodes = this->nodes(); + for (auto nh : nodes) { + auto it = visited.find(nh); + if (it == visited.end()) { + dfs(nh, visited, stack); + } + } + std::vector sorted; + while (!stack.empty()) { + sorted.push_back(stack.top()); + stack.pop(); + } + return sorted; +} + +Meta& Meta::operator+=(const Meta& other) { + for (const auto& p : other.store) { + ASSERT(store.emplace(p.first, p.second).second); + } + return *this; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.hpp b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp new file mode 100644 index 00000000000000..66aeccbe156d09 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp @@ -0,0 +1,168 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "utils/error.hpp" + +template +class WeakHandle { +public: + explicit WeakHandle(std::shared_ptr obj): m_obj(obj) { + } + T* get() const { + return m_obj.lock().get(); + } + T* operator->() const { + return get(); + } + bool operator==(const WeakHandle& other) const { + return get() == other.get(); + } + +private: + std::weak_ptr m_obj; +}; + +namespace std { +template +struct hash> { + uint64_t operator()(const WeakHandle& handle) const { + return std::hash()(handle.get()); + } +}; +} // namespace std + +class Graph; +class Node; +class Edge; + +using NodeHandle = WeakHandle; +using EdgeHandle = WeakHandle; +using Nodes = std::vector; +using Edges = std::vector; +using NodeSet = std::unordered_set; +using EdgeSet = std::unordered_set; + +class Node { + friend class Graph; + using Ptr = std::shared_ptr; + +public: + Nodes srcNodes() const; + Nodes dstNodes() const; + Edges srcEdges() const; + Edges dstEdges() const; + +private: + EdgeSet m_src_edges; + EdgeSet m_dst_edges; +}; + +class Edge { + friend class Graph; + using Ptr = std::shared_ptr; + +public: + Edge(NodeHandle src, NodeHandle dst): m_src(src), m_dst(dst) { + } + NodeHandle srcNode() const { + return m_src; + } + NodeHandle dstNode() const { + return m_dst; + } + +private: + NodeHandle m_src; + NodeHandle m_dst; +}; + +class Meta { +public: + template + void set(T&& meta); + template + const T& get() const; + template + T& get(); + template + bool has() const; + Meta& operator+=(const Meta& other); + +private: + using MetaStore = std::unordered_map; + MetaStore store; +}; + +template +void Meta::set(T&& meta) { + // NB: Check if there is no such meta yet. + ASSERT(store.emplace(std::type_index(typeid(T)), std::forward(meta)).second); +} + +template +bool Meta::has() const { + auto it = store.find(std::type_index(typeid(T))); + return it != store.end(); +} + +template +const T& Meta::get() const { + const auto it = store.find(std::type_index(typeid(T))); + ASSERT(it != store.end()); + return *std::any_cast(&it->second); +} + +template +T& Meta::get() { + auto it = store.find(std::type_index(typeid(T))); + ASSERT(it != store.end()); + return *std::any_cast(&it->second); +} + +class Graph { +public: + NodeHandle create(); + void remove(NodeHandle nh); + void remove(EdgeHandle eh); + EdgeHandle link(NodeHandle src, NodeHandle dst); + + Meta& meta() { + return m_graph_meta; + } + const Meta& meta() const { + return m_graph_meta; + } + + Meta& meta(NodeHandle handle); + const Meta& meta(NodeHandle handle) const; + Meta& meta(EdgeHandle handle); + const Meta& meta(EdgeHandle handle) const; + + std::vector nodes() const; + std::vector sorted() const; + +private: + template + struct MetaPtr { + std::shared_ptr ptr; + Meta meta; + }; + template + using MetaMap = std::unordered_map>; + + Meta m_graph_meta; + MetaMap m_nodes; + MetaMap m_edges; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp new file mode 100644 index 00000000000000..34099d36a69fdb --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp @@ -0,0 +1,872 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "parser/config.hpp" + +#include "utils/error.hpp" +#include "utils/logger.hpp" + +#include +#include +#include +#include + +#include // depth + +namespace fs = std::filesystem; + +struct GlobalOptions { + std::string blob_dir = "."; + std::string model_dir = "."; + std::string device_name = "NPU"; + std::string log_level = "NONE"; + std::string compiler_type = "DRIVER"; + std::optional save_validation_outputs; +}; + +struct Network { + std::string tag; + InferenceParams params; + LayerVariantAttr input_data; + LayerVariantAttr output_data; + LayerVariantAttr initializers; + LayerVariantAttr accuracy_metrics; +}; + +struct InferOp { + InferenceParams params; + LayerVariantAttr input_data; + LayerVariantAttr output_data; + LayerVariantAttr initializers; + LayerVariantAttr accuracy_metrics; +}; + +struct CPUOp { + uint64_t time_in_us; +}; + +struct CompoundOp { + uint64_t repeat_count; + InferenceParamsMap params; + ScenarioGraph subgraph; +}; + +struct OpDesc { + std::string tag; + using OpType = std::variant; + OpType op; +}; + +// NB: Handles duplicating tags. +class TagsManager { +public: + std::string add(const std::string& tag); + +private: + std::unordered_multiset m_tags; +}; + +std::string TagsManager::add(const std::string& tag) { + std::string t = tag; + m_tags.insert(t); + const auto c = m_tags.count(t); + if (c > 1) { + t += "-" + std::to_string(c); + } + return t; +} + +static LogLevel toLogLevel(const std::string& lvl) { + if (lvl == "NONE") + return LogLevel::None; + if (lvl == "INFO") + return LogLevel::Info; + if (lvl == "DEBUG") + return LogLevel::Debug; + THROW_ERROR("Unsupported log level: " << lvl); +} + +static int toDepth(const std::string& prec) { + if (prec == "FP32") + return CV_32F; + if (prec == "FP16") + return CV_16F; + if (prec == "U8") + return CV_8U; + if (prec == "I32") + return CV_32S; + throw std::logic_error("Unsupported precision type: " + prec); +} + +static AttrMap toDepth(const AttrMap& attrmap) { + AttrMap depthmap; + for (const auto& [name, str_depth] : attrmap) { + depthmap.emplace(name, toDepth(str_depth)); + } + return depthmap; +} + +static LayerVariantAttr toDepth(const LayerVariantAttr& attr) { + LayerVariantAttr depthattr; + if (std::holds_alternative(attr)) { + depthattr = toDepth(std::get(attr)); + } else { + depthattr = toDepth(std::get>(attr)); + } + return depthattr; +} + +static std::string toPriority(const std::string& priority) { + if (priority == "LOW") { + return "LOW"; + } + if (priority == "NORMAL") { + return "MEDIUM"; + } + if (priority == "HIGH") { + return "HIGH"; + } + throw std::logic_error("Unsupported model priority: " + priority); +} + +static ScenarioGraph buildGraph(const std::vector& op_descs, + const std::vector>& connections); + +namespace YAML { + +template +struct convert> { + static bool decode(const Node& node, std::vector& vec) { + if (!node.IsSequence()) { + return false; + } + + for (auto& child : node) { + vec.push_back(child.as()); + } + return true; + } +}; + +template +struct convert> { + static bool decode(const Node& node, std::map& map) { + if (!node.IsMap()) { + return false; + } + for (const auto& itr : node) { + map.emplace(itr.first.as(), itr.second.as()); + } + return true; + } +}; + +template +struct convert> { + static bool decode(const Node& node, LayerVariantAttr& layer_attr) { + if (node.IsMap()) { + layer_attr = node.as>(); + } else { + layer_attr = node.as(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, UniformGenerator::Ptr& generator) { + if (!node["low"]) { + THROW_ERROR("Uniform distribution must have \"low\" attribute"); + } + if (!node["high"]) { + THROW_ERROR("Uniform distribution must have \"high\" attribute"); + } + generator = std::make_shared(node["low"].as(), node["high"].as()); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, IRandomGenerator::Ptr& generator) { + if (!node["dist"]) { + THROW_ERROR("\"random\" must have \"dist\" attribute!"); + } + const auto dist = node["dist"].as(); + if (dist == "uniform") { + generator = node.as(); + } else { + THROW_ERROR("Unsupported random distribution: \"" << dist << "\""); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, Norm::Ptr& metric) { + // NB: If bigger than tolerance - fail. + if (!node["tolerance"]) { + THROW_ERROR("Metric \"norm\" must have \"tolerance\" attribute!"); + } + const auto tolerance = node["tolerance"].as(); + metric = std::make_shared(tolerance); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, Cosine::Ptr& metric) { + // NB: If lower than threshold - fail. + if (!node["threshold"]) { + THROW_ERROR("Metric \"cosine\" must have \"threshold\" attribute!"); + } + const auto threshold = node["threshold"].as(); + metric = std::make_shared(threshold); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, NRMSE::Ptr& metric) { + // NB: If bigger than tolerance - fail. + if (!node["tolerance"]) { + THROW_ERROR("Metric \"nrmse\" must have \"tolerance\" attribute!"); + } + const auto tolerance = node["tolerance"].as(); + metric = std::make_shared(tolerance); + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, IAccuracyMetric::Ptr& metric) { + const auto type = node["name"].as(); + if (type == "norm") { + metric = node.as(); + } else if (type == "cosine") { + metric = node.as(); + } else if (type == "nrmse") { + metric = node.as(); + } else { + THROW_ERROR("Unsupported metric type: " << type); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, GlobalOptions& opts) { + if (node["model_dir"]) { + if (!node["model_dir"]["local"]) { + THROW_ERROR("\"model_dir\" must contain \"local\" key!"); + } + opts.model_dir = node["model_dir"]["local"].as(); + } + + if (node["blob_dir"]) { + if (!node["blob_dir"]["local"]) { + THROW_ERROR("\"blob_dir\" must contain \"local\" key!"); + } + opts.blob_dir = node["blob_dir"]["local"].as(); + } + + if (node["device_name"]) { + opts.device_name = node["device_name"].as(); + } + + if (node["log_level"]) { + opts.log_level = node["log_level"].as(); + } + + if (node["compiler_type"]) { + opts.compiler_type = node["compiler_type"].as(); + } + + if (node["save_validation_outputs"]) { + const auto path = node["save_validation_outputs"].as(); + opts.save_validation_outputs = std::make_optional(std::filesystem::path{path}); + } + + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, OpenVINOParams& params) { + // FIXME: Worth to separate these two + const auto name = node["name"] ? node["name"].as() : node["path"].as(); + fs::path path{name}; + if (path.extension() == ".xml") { + auto bin_path = path; + bin_path.replace_extension(".bin"); + params.path = OpenVINOParams::ModelPath{path.string(), bin_path.string()}; + } else if (path.extension() == ".blob") { + params.path = OpenVINOParams::BlobPath{path.string()}; + } else { + // NB: *.onnx, *.pdpd, and any other format supported in future + params.path = OpenVINOParams::ModelPath{path.string(), "" /*weights*/}; + } + // NB: If "device" isn't presented in config for network, + // the device specified globally will be substitued later on + if (node["device"]) { + params.device = node["device"].as(); + } + + if (node["ip"]) { + params.input_precision = toDepth(node["ip"].as>()); + } + + if (node["op"]) { + params.output_precision = toDepth(node["op"].as>()); + } + + if (node["il"]) { + params.input_layout = node["il"].as>(); + } + + if (node["ol"]) { + params.output_layout = node["ol"].as>(); + } + + if (node["iml"]) { + params.input_model_layout = node["iml"].as>(); + } + + if (node["oml"]) { + params.output_model_layout = node["oml"].as>(); + } + + if (node["config"]) { + params.config = node["config"].as>(); + } + + // NB: Note, it should be handled after "config" is set above + if (node["priority"]) { + params.config.emplace("MODEL_PRIORITY", toPriority(node["priority"].as())); + } + + if (node["nireq"]) { + params.nireq = node["nireq"].as(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, ONNXRTParams::OpenVINO& ov_ep) { + if (node["params"]) { + ov_ep.params_map = node["params"].as>(); + } + if (node["device_type"]) { + std::string device_type = node["device_type"].as(); + // Check if device_type already exists in params_map (collision check) + if (ov_ep.params_map.count("device_type") > 0) { + THROW_ERROR("Configuration error: 'device_type' has already been specified in the params."); + } else { + ov_ep.params_map["device_type"] = device_type; + } + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, ONNXRTParams::EP& ep) { + const auto ep_name = node["name"].as(); + if (ep_name == "OV") { + ep = node.as(); + } else { + THROW_ERROR("Unsupported \"ep name\" value: " << ep_name); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, ONNXRTParams& params) { + // FIXME: Worth to separate these two + params.model_path = node["name"] ? node["name"].as() : node["path"].as(); + if (node["session_options"]) { + params.session_options = node["session_options"].as>(); + } + if (node["ep"]) { + params.ep = node["ep"].as(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, Network& network) { + // NB: Take path stem as network tag + // Note that at this point, it's fine if names aren't unique + const auto name = node["name"].as(); + network.tag = std::filesystem::path{name}.stem().string(); + // NB: OpenVINO is default to keep back compatibility for config syntax + const auto framework = node["framework"] ? node["framework"].as() : "openvino"; + if (framework == "openvino") { + // NB: Parse OpenVINO model parameters such as path, device, precision, etc + network.params = node.as(); + } else if (framework == "onnxrt") { + network.params = node.as(); + } else { + THROW_ERROR("Unsupported \"framework:\" value: " << framework); + } + + if (node["random"]) { + network.initializers = node["random"].as>(); + } + if (node["metric"]) { + network.accuracy_metrics = node["metric"].as>(); + } + if (node["input_data"]) { + network.input_data = node["input_data"].as>(); + } + + if (node["output_data"]) { + network.output_data = node["output_data"].as>(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, CPUOp& op) { + // TODO: Assert there are no more options provided + op.time_in_us = node["time_in_us"] ? node["time_in_us"].as() : 0u; + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, InferOp& op) { + const auto framework = node["framework"] ? node["framework"].as() : "openvino"; + if (framework == "openvino") { + // NB: Parse OpenVINO model parameters such as path, device, precision, etc + op.params = node.as(); + } else if (framework == "onnxrt") { + op.params = node.as(); + } else { + THROW_ERROR("Unsupported \"framework:\" value: " << framework); + } + + if (node["random"]) { + op.initializers = node["random"].as>(); + } + if (node["metric"]) { + op.accuracy_metrics = node["metric"].as>(); + } + if (node["input_data"]) { + op.input_data = node["input_data"].as>(); + } + + if (node["output_data"]) { + op.output_data = node["output_data"].as>(); + } + return true; + } +}; + +template <> +struct convert { + static bool decode(const Node& node, OpDesc& opdesc) { + opdesc.tag = node["tag"].as(); + auto type = node["type"] ? node["type"].as() : "Infer"; + auto repeat_count = node["repeat_count"] ? node["repeat_count"].as() : 1u; + ASSERT(repeat_count > 0) + if (repeat_count > 1u) { + // NB: repeat_count > 1u assume that "Compound" operation will be used + type = "Compound"; + } + if (type == "Infer") { + opdesc.op = node.as(); + } else if (type == "CPU") { + opdesc.op = node.as(); + } else if (type == "Compound") { + std::vector> connections; + if (node["connections"]) { + connections = node["connections"].as>>(); + } + auto op_descs = node["op_desc"].as>(); + InferenceParamsMap inference_params; + for (const auto& op_desc : op_descs) { + if (std::holds_alternative(op_desc.op)) { + inference_params.emplace(op_desc.tag, std::get(op_desc.op).params); + } + } + opdesc.op = CompoundOp{repeat_count, std::move(inference_params), buildGraph(op_descs, connections)}; + } else { + THROW_ERROR("Unsupported operation type: \"" << type << "\"!"); + } + return true; + } +}; + +} // namespace YAML + +static std::vector> parseNetworks(const YAML::Node& node) { + ASSERT(node.IsSequence()); + TagsManager tgs_mngr; + std::vector> networks_list; + for (const auto& subnode : node) { + if (subnode.IsSequence()) { + networks_list.push_back(subnode.as>()); + } else { + networks_list.push_back({subnode.as()}); + } + // NB: Ensure all network tags are unique! + for (auto& network : networks_list.back()) { + network.tag = tgs_mngr.add(network.tag); + } + } + return networks_list; +} + +static ScenarioGraph buildGraph(const std::vector>& networks_list, const uint32_t delay_in_us) { + ScenarioGraph graph; + auto src = graph.makeSource(); + std::vector producers = {src}; + for (uint32_t list_idx = 0; list_idx < networks_list.size(); ++list_idx) { + auto& networks = networks_list[list_idx]; + // NB: Delay if specified, will not be added to the beginning + // and end of the stream, ONLY between models + if (list_idx != 0u && delay_in_us != 0u) { + auto delay = graph.makeDelay(delay_in_us); + for (auto p : producers) { + graph.link(p, delay); + } + producers = {delay.out()}; + } + std::vector curr_outs; + curr_outs.reserve(networks.size()); + for (uint32_t net_idx = 0; net_idx < networks.size(); ++net_idx) { + auto infer = graph.makeInfer(networks[net_idx].tag); + for (auto p : producers) { + graph.link(p, infer); + } + curr_outs.push_back(infer.out()); + } + producers = std::move(curr_outs); + } + return graph; +} + +static InferenceParams adjustParams(OpenVINOParams&& params, const GlobalOptions& opts, const ReplaceBy& replace_by) { + // NB: Adjust the model path according to base directories provided for blobs & models + auto& path = params.path; + if (std::holds_alternative(path)) { + auto& model_path = std::get(path); + fs::path model_file_path{model_path.model}; + fs::path bin_file_path{model_path.bin}; + if (model_file_path.is_relative()) { + model_path.model = (opts.model_dir / model_file_path).string(); + } + if (!model_path.bin.empty() && bin_file_path.is_relative()) { + model_path.bin = (opts.model_dir / bin_file_path).string(); + } + } else { + ASSERT(std::holds_alternative(path)); + auto& blob_path = std::get(path); + fs::path blob_file_path{blob_path.blob}; + if (blob_file_path.is_relative()) { + blob_path.blob = (opts.blob_dir / blob_file_path).string(); + } + } + // NB: Adjust device property based on opts.device_name or replace_by + + if (!replace_by.device.empty()) { + // NB: ReplaceBy has priority - overwrite + params.device = replace_by.device; + } else if (params.device.empty()) { + // NB: Otherwise, if empty - take the value from global device name + params.device = opts.device_name; + } + + // NB: Compiler type is only relevant for NPU device + if (params.device == "NPU") { + // NB: Don't overwrite compiler type if it already has been + // specified explicitly for particular model + if (const auto it = params.config.find("NPU_COMPILER_TYPE"); it == params.config.end()) { + params.config.emplace("NPU_COMPILER_TYPE", opts.compiler_type); + } + } + return std::move(params); +} + +static InferenceParams adjustParams(ONNXRTParams&& params, const GlobalOptions& opts) { + fs::path model_file_path{params.model_path}; + if (model_file_path.is_relative()) { + params.model_path = (opts.model_dir / model_file_path).string(); + } + return std::move(params); +} + +static InferenceParams adjustParams(InferenceParams&& params, const GlobalOptions& opts, const ReplaceBy& replace_by) { + if (std::holds_alternative(params)) { + return adjustParams(std::get(std::move(params)), opts, replace_by); + } + ASSERT(std::holds_alternative(params)); + return adjustParams(std::get(std::move(params)), opts); +} + +static StreamDesc parseStream(const YAML::Node& node, const GlobalOptions& opts, const std::string& default_name, + const ReplaceBy& replace_by) { + StreamDesc stream; + + // FIXME: Create a function for the duplicate code below + stream.name = node["name"] ? node["name"].as() : default_name; + stream.frames_interval_in_us = 0u; + if (node["frames_interval_in_ms"]) { + stream.frames_interval_in_us = node["frames_interval_in_ms"].as() * 1000u; + if (node["target_fps"]) { + THROW_ERROR("Both \"target_fps\" and \"frames_interval_in_ms\" are defined for the stream: \"" + << stream.name << "\"! Please specify only one of them as they are mutually exclusive."); + } + } else if (node["target_fps"]) { + uint32_t target_fps = node["target_fps"].as(); + stream.frames_interval_in_us = (target_fps != 0) ? (1000u * 1000u / target_fps) : 0; + } + + if (node["target_latency_in_ms"]) { + stream.target_latency = std::make_optional(node["target_latency_in_ms"].as()); + if (stream.target_latency < 0) { + THROW_ERROR("\"target_latency_in_ms\" is negative for the stream: \"" << stream.name << "\"!"); + } + } + if (node["exec_time_in_secs"]) { + const auto exec_time_in_secs = node["exec_time_in_secs"].as(); + stream.criterion = std::make_shared(exec_time_in_secs * 1'000'000); + } + if (node["iteration_count"]) { + const auto iteration_count = node["iteration_count"].as(); + stream.criterion = std::make_shared(iteration_count); + } + + auto networks_list = parseNetworks(node["network"]); + const auto delay_in_us = node["delay_in_us"] ? node["delay_in_us"].as() : 0u; + stream.graph = buildGraph(networks_list, delay_in_us); + // NB: Collect network parameters + for (auto& networks : networks_list) { + for (auto& network : networks) { + stream.metrics_map.emplace(network.tag, std::move(network.accuracy_metrics)); + stream.initializers_map.emplace(network.tag, std::move(network.initializers)); + stream.input_data_map.emplace(network.tag, std::move(network.input_data)); + stream.output_data_map.emplace(network.tag, std::move(network.output_data)); + stream.infer_params_map.emplace(network.tag, adjustParams(std::move(network.params), opts, replace_by)); + } + } + return stream; +} + +using DependencyMap = std::unordered_map>; + +static ScenarioGraph buildGraph(const std::vector& op_descs, + const std::vector>& connections) { + // NB: Build the graph based on list of operations and connections between them + // + // The algorithm is straightforward: + // 1) For every operation create corresponding graph node + // 2) Go though connections and create the dependency map + // 3) Go through every operation and connect with its dependencies + // 3.1) If operation has no dependencies, connect it directly with the source + + // NB: For the fast access to operation node by name + std::unordered_map op_node_map; + // NB: To store the list of dependencies for every operation + std::unordered_map> dependency_map; + + // (1) For every operation create corresponding graph node + ScenarioGraph graph; + for (const auto& desc : op_descs) { + // NB: Initialize dependency list for every operation + dependency_map[desc.tag]; + // FIXME: Implement visitor + if (std::holds_alternative(desc.op)) { + op_node_map.emplace(desc.tag, graph.makeInfer(desc.tag)); + } else if (std::holds_alternative(desc.op)) { + const auto& compound = std::get(desc.op); + op_node_map.emplace( + desc.tag, graph.makeCompound(compound.repeat_count, compound.subgraph, compound.params, desc.tag)); + } else { + ASSERT(std::holds_alternative(desc.op)); + const auto& cpu = std::get(desc.op); + op_node_map.emplace(desc.tag, graph.makeDelay(cpu.time_in_us)); + } + } + + // (2) Go though connections and create the dependency map + for (const auto& tags : connections) { + if (tags.size() < 2) { + THROW_ERROR("Connections list must be at least size of 2!"); + } + for (uint32_t i = 1; i < tags.size(); ++i) { + // [A, B, C] - means B depends on A, and C depends on B + auto deps_it = dependency_map.find(tags[i]); + if (deps_it == dependency_map.end()) { + THROW_ERROR("Operation \"" << tags[i] << "\" hasn't been registered in op_desc list!"); + } + if (tags[i - 1] == tags[i]) { + THROW_ERROR("Operation \"" << tags[i] << "\" cannot be connected with itself!"); + } + auto& dep_set = deps_it->second; + // NB: Check if such connection already exists + auto is_inserted = deps_it->second.emplace(tags[i - 1]).second; + if (!is_inserted) { + THROW_ERROR("Connection between \"" << tags[i - 1] << "\" and \"" << tags[i] + << "\" operations already exists!"); + } + } + } + + // (3) Go through every operation and connect with its dependencies + auto src = graph.makeSource(); + for (const auto& [tag, deps] : dependency_map) { + auto op = op_node_map.at(tag); + // (3.1) If operation has no dependencies, connect it directly to the source + if (deps.empty()) { + graph.link(src, op); + } else { + for (auto dep_tag : deps) { + auto dep = op_node_map.at(dep_tag); + graph.link(dep.out(), op); + } + } + } + return graph; +} + +static StreamDesc parseAdvancedStream(const YAML::Node& node, const GlobalOptions& opts, + const std::string& default_name, const ReplaceBy& replace_by) { + StreamDesc stream; + + // FIXME: Create a function for the duplicate code below + stream.name = node["name"] ? node["name"].as() : default_name; + stream.frames_interval_in_us = 0u; + if (node["frames_interval_in_ms"]) { + stream.frames_interval_in_us = node["frames_interval_in_ms"].as() * 1000u; + if (node["target_fps"]) { + THROW_ERROR("Both \"target_fps\" and \"frames_interval_in_ms\" are defined for the stream: \"" + << stream.name << "\"! Please specify only one of them as they are mutually exclusive."); + } + } else if (node["target_fps"]) { + uint32_t target_fps = node["target_fps"].as(); + stream.frames_interval_in_us = (target_fps != 0) ? (1000u * 1000u / target_fps) : 0; + } + + if (node["target_latency_in_ms"]) { + stream.target_latency = std::make_optional(node["target_latency_in_ms"].as()); + if (stream.target_latency < 0) { + THROW_ERROR("\"target_latency_in_ms\" is negative for the stream: \"" << stream.name << "\"!"); + } + } + if (node["exec_time_in_secs"]) { + const auto exec_time_in_secs = node["exec_time_in_secs"].as(); + stream.criterion = std::make_shared(exec_time_in_secs * 1'000'000); + } + if (node["iteration_count"]) { + const auto iteration_count = node["iteration_count"].as(); + stream.criterion = std::make_shared(iteration_count); + } + + auto op_descs = node["op_desc"].as>(); + std::vector> connections; + if (node["connections"]) { + connections = node["connections"].as>>(); + } + + for (auto& desc : op_descs) { + if (std::holds_alternative(desc.op)) { + auto&& infer = std::get(desc.op); + stream.metrics_map.emplace(desc.tag, std::move(infer.accuracy_metrics)); + stream.initializers_map.emplace(desc.tag, std::move(infer.initializers)); + stream.input_data_map.emplace(desc.tag, std::move(infer.input_data)); + stream.output_data_map.emplace(desc.tag, std::move(infer.output_data)); + stream.infer_params_map.emplace(desc.tag, adjustParams(std::move(infer.params), opts, replace_by)); + } + if (std::holds_alternative(desc.op)) { + auto& compound = std::get(desc.op); + InferenceParamsMap& params_map = compound.params; + for (auto& pair : params_map) { + pair.second = adjustParams(std::move(pair.second), opts, replace_by); + } + } + } + + stream.graph = buildGraph(op_descs, connections); + return stream; +} + +static std::vector parseStreams(const YAML::Node& node, const GlobalOptions& opts, + const ReplaceBy& replace_by) { + std::vector streams; + uint32_t stream_idx = 0; + for (const auto& subnode : node) { + const auto default_name = std::to_string(stream_idx); + auto stream = subnode["op_desc"] ? parseAdvancedStream(subnode, opts, default_name, replace_by) + : parseStream(subnode, opts, default_name, replace_by); + streams.push_back(std::move(stream)); + ++stream_idx; + } + return streams; +} + +static std::vector parseScenarios(const YAML::Node& node, const GlobalOptions& opts, + const ReplaceBy& replace_by) { + std::vector scenarios; + for (const auto& subnode : node) { + ScenarioDesc scenario; + scenario.name = subnode["name"] ? subnode["name"].as() + : "multi_inference_" + std::to_string(scenarios.size()); + scenario.streams = parseStreams(subnode["input_stream_list"], opts, replace_by); + + if (opts.save_validation_outputs) { + for (auto& stream : scenario.streams) { + const auto& root_path = opts.save_validation_outputs.value(); + std::string stream_dir = "stream_" + stream.name; + std::filesystem::path stream_outputs_path = root_path / scenario.name / stream_dir; + stream.per_iter_outputs_path = std::make_optional(std::move(stream_outputs_path)); + } + } + scenarios.push_back(std::move(scenario)); + } + return scenarios; +} + +Config parseConfig(const YAML::Node& node, const ReplaceBy& replace_by) { + const auto global_opts = node.as(); + + // FIXME: Perhaps should be done somewhere else... + Logger::global_lvl = toLogLevel(global_opts.log_level); + + Config config; + config.scenarios = parseScenarios(node["multi_inference"], global_opts, replace_by); + + ASSERT(!config.scenarios.empty()); + if (node["metric"]) { + config.metric = node["metric"].as(); + } + if (node["random"]) { + config.initializer = node["random"].as(); + } + + config.disable_high_resolution_timer = false; + if (node["disable_high_resolution_waitable_timer"]) { + config.disable_high_resolution_timer = node["disable_high_resolution_waitable_timer"].as(); + } + return config; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp new file mode 100644 index 00000000000000..1dec64ece423b6 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.hpp @@ -0,0 +1,12 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "parser/parser.hpp" + +#include + +Config parseConfig(const YAML::Node& root, const ReplaceBy& replace_by); diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp new file mode 100644 index 00000000000000..b4f48b7415615c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.cpp @@ -0,0 +1,20 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "parser/parser.hpp" +#include "parser/config.hpp" + +#include "utils/error.hpp" + +#include + +ScenarioParser::ScenarioParser(const std::string& filepath): m_filepath(filepath) { +} + +Config ScenarioParser::parseScenarios(const ReplaceBy& replace_by) { + const auto root = YAML::LoadFile(m_filepath); + // TODO: Extend to any other config syntax + return parseConfig(root, replace_by); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp new file mode 100644 index 00000000000000..ec228ee8070fd3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/parser/parser.hpp @@ -0,0 +1,61 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "scenario/criterion.hpp" +#include "scenario/inference.hpp" +#include "scenario/scenario_graph.hpp" + +struct StreamDesc { + // NB: Commons parameters for all modes + std::string name; + uint64_t frames_interval_in_us; + ScenarioGraph graph; + InferenceParamsMap infer_params_map; + ITermCriterion::Ptr criterion; + // Mode specific params + ModelsAttrMap metrics_map; + ModelsAttrMap initializers_map; + ModelsAttrMap input_data_map; + ModelsAttrMap output_data_map; + std::optional target_latency; + std::optional per_iter_outputs_path; +}; + +struct ScenarioDesc { + std::string name; + std::vector streams; + bool disable_high_resolution_timer; +}; + +struct Config { + IRandomGenerator::Ptr initializer; + IAccuracyMetric::Ptr metric; + bool disable_high_resolution_timer; + std::vector scenarios; +}; + +struct ReplaceBy { + std::string device; +}; + +struct IScenarioParser { + virtual Config parseScenarios(const ReplaceBy& replace_by) = 0; + virtual ~IScenarioParser() = default; +}; + +class ScenarioParser : public IScenarioParser { +public: + ScenarioParser(const std::string& filepath); + Config parseScenarios(const ReplaceBy& replace_by) override; + +private: + std::string m_filepath; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/result.cpp b/src/plugins/intel_npu/tools/protopipe/src/result.cpp new file mode 100644 index 00000000000000..23c6c315eaf123 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/result.cpp @@ -0,0 +1,22 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "result.hpp" +#include "utils/error.hpp" + +Result::Result(const Error& error): m_status(error){}; +Result::Result(const Success& success): m_status(success){}; + +Result::operator bool() const { + return std::holds_alternative(m_status); +} + +std::string Result::str() const { + if (std::holds_alternative(m_status)) { + return std::get(m_status).msg; + } + ASSERT(std::holds_alternative(m_status)); + return std::get(m_status).reason; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/result.hpp b/src/plugins/intel_npu/tools/protopipe/src/result.hpp new file mode 100644 index 00000000000000..08cbd7b06fc940 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/result.hpp @@ -0,0 +1,30 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +struct Success { + std::string msg; +}; +struct Error { + std::string reason; +}; + +class Result { +public: + Result() = default; // monostate (empty) + Result(const Error& error); + Result(const Success& success); + + operator bool() const; + std::string str() const; + +private: + using Status = std::variant; + Status m_status; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp new file mode 100644 index 00000000000000..9f779b8dab8cfd --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.cpp @@ -0,0 +1,121 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/accuracy_metrics.hpp" + +#include "utils/error.hpp" + +Norm::Norm(const double tolerance): m_tolerance(tolerance){}; + +Result Norm::compare(const cv::Mat& lhs, const cv::Mat& rhs) { + cv::Mat lhsf32, rhsf32; + lhs.convertTo(lhsf32, CV_32F); + rhs.convertTo(rhsf32, CV_32F); + + ASSERT(lhsf32.total() == rhsf32.total()); + auto value = cv::norm(lhsf32, rhsf32); + + if (value > m_tolerance) { + std::stringstream ss; + ss << value << " > " << m_tolerance; + return Error{ss.str()}; + } + return Success{}; +} + +std::string Norm::str() { + std::stringstream ss; + ss << "Norm{tolerance: " << m_tolerance << "}"; + return ss.str(); +} + +Cosine::Cosine(const double threshold): m_threshold(threshold){}; + +Result Cosine::compare(const cv::Mat& lhs, const cv::Mat& rhs) { + cv::Mat lhsf32, rhsf32; + lhs.convertTo(lhsf32, CV_32F); + rhs.convertTo(rhsf32, CV_32F); + + ASSERT(lhsf32.total() == rhsf32.total()); + const auto* lhsptr = lhsf32.ptr(); + const auto* rhsptr = rhsf32.ptr(); + + double lhsdot = 0.0, rhsdot = 0.0, numr = 0.0; + for (size_t i = 0; i < lhsf32.total(); ++i) { + numr += lhsptr[i] * rhsptr[i]; + lhsdot += lhsptr[i] * lhsptr[i]; + rhsdot += rhsptr[i] * rhsptr[i]; + } + + const double eps = 1e-9; + if (lhsdot < eps || rhsdot < eps) { + return Error{"Division by zero!"}; + } + + const double similarity = numr / (std::sqrt(lhsdot) * std::sqrt(rhsdot)); + if (similarity > (1.0 + eps) || similarity < -(1.0 + eps)) { + std::stringstream ss; + ss << "Invalid result " << similarity << " (valid range [-1 : +1])"; + return Error{ss.str()}; + } + + if (m_threshold - eps > similarity) { + std::stringstream ss; + ss << similarity << " < " << m_threshold; + return Error{ss.str()}; + } + return Success{}; +} + +std::string Cosine::str() { + std::stringstream ss; + ss << "Cosine{threshold: " << m_threshold << "}"; + return ss.str(); +} + +NRMSE::NRMSE(const double tolerance): m_tolerance(tolerance){}; + +Result NRMSE::compare(const cv::Mat& lhs, const cv::Mat& rhs) { + cv::Mat lhsf32, rhsf32; + lhs.convertTo(lhsf32, CV_32F); + rhs.convertTo(rhsf32, CV_32F); + + const auto size = lhsf32.total(); + if (size == 0) { + std::stringstream ss; + ss << "Empty output and reference tensors, nrmse loss set to 0" << std::endl; + return Success{}; + } + + const auto* lhsptr = lhsf32.ptr(); + const auto* rhsptr = rhsf32.ptr(); + + double error = 0.0; + float lhsmax = 0.0, rhsmax = 0.0, lhsmin = 0.0, rhsmin = 0.0; + + for (size_t i = 0; i < size; ++i) { + const auto diff = lhsptr[i] - rhsptr[i]; + error += diff * diff; + lhsmax = std::max(lhsptr[i], lhsmax); + rhsmax = std::max(rhsptr[i], rhsmax); + lhsmin = std::min(lhsptr[i], lhsmin); + rhsmin = std::min(rhsptr[i], rhsmin); + } + + double nrmse = sqrt(error / size) / std::max(0.001f, std::max(lhsmax - lhsmin, rhsmax - rhsmin)); + + if (m_tolerance < nrmse) { + std::stringstream ss; + ss << nrmse << " > " << m_tolerance; + return Error{ss.str()}; + } + return Success{}; +} + +std::string NRMSE::str() { + std::stringstream ss; + ss << "nrmse{tolerance: " << m_tolerance << "}"; + return ss.str(); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp new file mode 100644 index 00000000000000..010039360ecb9b --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/accuracy_metrics.hpp @@ -0,0 +1,52 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "result.hpp" + +struct IAccuracyMetric { + using Ptr = std::shared_ptr; + virtual Result compare(const cv::Mat& lhs, const cv::Mat& rhs) = 0; + virtual std::string str() = 0; + virtual ~IAccuracyMetric() = default; +}; + +class Norm : public IAccuracyMetric { +public: + using Ptr = std::shared_ptr; + explicit Norm(const double tolerance); + Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override; + std::string str() override; + +private: + double m_tolerance; +}; + +class Cosine : public IAccuracyMetric { +public: + using Ptr = std::shared_ptr; + explicit Cosine(const double threshold); + Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override; + std::string str() override; + +private: + double m_threshold; +}; + +class NRMSE : public IAccuracyMetric { +public: + using Ptr = std::shared_ptr; + explicit NRMSE(const double tolerance); + Result compare(const cv::Mat& lhs, const cv::Mat& rhs) override; + std::string str() override; + +private: + double m_tolerance; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp new file mode 100644 index 00000000000000..b348fe92e811cb --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.cpp @@ -0,0 +1,72 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "criterion.hpp" + +#include + +#include "utils/utils.hpp" + +Iterations::Iterations(uint64_t num_iters): m_num_iters(num_iters), m_counter(0) { +} + +bool Iterations::check() const { + return m_counter != m_num_iters; +} + +void Iterations::update() { + ++m_counter; +} + +void Iterations::init() { + m_counter = 0; +} + +ITermCriterion::Ptr Iterations::clone() const { + return std::make_shared(*this); +} + +TimeOut::TimeOut(uint64_t time_in_us): m_time_in_us(time_in_us), m_start_ts(-1) { +} + +bool TimeOut::check() const { + return utils::timestamp() - m_start_ts < m_time_in_us; +} + +void TimeOut::update(){/* do nothing */}; + +void TimeOut::init() { + m_start_ts = utils::timestamp(); +} + +ITermCriterion::Ptr TimeOut::clone() const { + return std::make_shared(*this); +} + +CombinedCriterion::CombinedCriterion(ITermCriterion::Ptr lhs, ITermCriterion::Ptr rhs): m_lhs(lhs), m_rhs(rhs) { +} + +CombinedCriterion::CombinedCriterion(const CombinedCriterion& other) { + m_lhs = other.m_lhs->clone(); + m_rhs = other.m_rhs->clone(); +} + +bool CombinedCriterion::check() const { + return m_lhs->check() && m_rhs->check(); +} + +void CombinedCriterion::update() { + m_lhs->update(); + m_rhs->update(); +}; + +void CombinedCriterion::init() { + m_lhs->init(); + m_rhs->init(); +} + +ITermCriterion::Ptr CombinedCriterion::clone() const { + return std::make_shared(*this); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp new file mode 100644 index 00000000000000..28b440a7b3b0a3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/criterion.hpp @@ -0,0 +1,58 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +struct ITermCriterion { + using Ptr = std::shared_ptr; + virtual void init() = 0; + virtual void update() = 0; + virtual bool check() const = 0; + virtual ITermCriterion::Ptr clone() const = 0; +}; + +class Iterations : public ITermCriterion { +public: + Iterations(uint64_t num_iters); + + void init() override; + void update() override; + bool check() const override; + ITermCriterion::Ptr clone() const override; + +private: + uint64_t m_num_iters; + uint64_t m_counter; +}; + +class TimeOut : public ITermCriterion { +public: + TimeOut(uint64_t time_in_us); + + void init() override; + void update() override; + bool check() const override; + ITermCriterion::Ptr clone() const override; + +private: + uint64_t m_time_in_us; + uint64_t m_start_ts; +}; + +class CombinedCriterion : public ITermCriterion { +public: + CombinedCriterion(ITermCriterion::Ptr lhs, ITermCriterion::Ptr rhs); + CombinedCriterion(const CombinedCriterion& other); + + void init() override; + void update() override; + bool check() const override; + ITermCriterion::Ptr clone() const override; + +private: + ITermCriterion::Ptr m_lhs, m_rhs; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp new file mode 100644 index 00000000000000..c1648f3755cbfd --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.cpp @@ -0,0 +1,17 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/inference.hpp" + +#include +#include + +std::vector extractLayerNames(const std::vector& layers) { + std::vector names; + std::transform(layers.begin(), layers.end(), std::back_inserter(names), [](const auto& layer) { + return layer.name; + }); + return names; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp new file mode 100644 index 00000000000000..c4fd85aa26721a --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/inference.hpp @@ -0,0 +1,111 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +using AttrMap = std::map; +// NB: This type is supposed to be used to hold in/out layers +// attributes such as precision, layout, shape etc. +// +// User can provide attributes either: +// 1. std::monostate - No value specified explicitly. +// 2. Attr - value specified explicitly that should be broadcasted to all layers. +// 3. AttrMap[str->T] - map specifies value for particular layer. +template +using LayerVariantAttr = std::variant, Attr>; + +// NB: Map of model tag -> LayerVariantAttr +template +using ModelsAttrMap = std::unordered_map>; + +struct LayerInfo { + std::string name; + std::vector dims; + int prec; +}; +using LayersInfo = std::vector; + +std::vector extractLayerNames(const std::vector& layers); + +template +std::optional lookUp(const std::map& map, const K& key) { + const auto it = map.find(key); + if (it == map.end()) { + return {}; + } + return std::make_optional(std::move(it->second)); +} + +template +static AttrMap unpackLayerAttr(const LayerVariantAttr& attr, const std::vector& layer_names, + const std::string& attrname) { + AttrMap attrmap; + if (std::holds_alternative(attr)) { + auto value = std::get(attr); + for (const auto& name : layer_names) { + attrmap.emplace(name, value); + } + } else if (std::holds_alternative>(attr)) { + attrmap = std::get>(attr); + std::unordered_set layers_set{layer_names.begin(), layer_names.end()}; + for (const auto& [name, attr] : attrmap) { + const auto it = layers_set.find(name); + if (it == layers_set.end()) { + throw std::logic_error("Failed to find layer \"" + name + "\" to specify " + attrname); + } + } + } + return attrmap; +} + +struct OpenVINOParams { + struct ModelPath { + std::string model; + std::string bin; + }; + struct BlobPath { + std::string blob; + }; + using Path = std::variant; + + // NB: Mandatory parameters + Path path; + std::string device; + // NB: Optional parameters + LayerVariantAttr input_precision; + LayerVariantAttr output_precision; + LayerVariantAttr input_layout; + LayerVariantAttr output_layout; + LayerVariantAttr input_model_layout; + LayerVariantAttr output_model_layout; + std::map config; + size_t nireq = 1u; +}; + +struct ONNXRTParams { + std::string model_path; + std::map session_options; + // TODO: Extend for other available ONNXRT EP (e.g DML, CoreML, TensorRT, etc) + struct OpenVINO { + std::map params_map; + }; + // NB: std::monostate stands for the default MLAS Execution provider + using EP = std::variant; + EP ep; +}; + +using InferenceParams = std::variant; +using InferenceParamsMap = std::unordered_map; diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp new file mode 100644 index 00000000000000..96984966fbc6fc --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.cpp @@ -0,0 +1,40 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/scenario_graph.hpp" + +DataNode::DataNode(Graph* graph, NodeHandle nh): m_nh(nh) { + graph->meta(nh).set(Data{}); +}; + +OpNode::OpNode(NodeHandle nh, DataNode out_data): m_nh(nh), m_out_data(out_data) { +} + +DataNode OpNode::out() { + return m_out_data; +} + +DataNode ScenarioGraph::makeSource() { + NodeHandle nh = m_graph.create(); + m_graph.meta(nh).set(Source{}); + return DataNode(&m_graph, nh); +} + +void ScenarioGraph::link(DataNode data, OpNode op) { + m_graph.link(data.m_nh, op.m_nh); +} + +OpNode ScenarioGraph::makeInfer(const std::string& tag) { + return makeOp(Infer{tag}); +} + +OpNode ScenarioGraph::makeDelay(uint64_t time_in_us) { + return makeOp(Delay{time_in_us}); +} + +OpNode ScenarioGraph::makeCompound(uint64_t repeat_count, ScenarioGraph subgraph, InferenceParamsMap infer_params, + const std::string& tag) { + return makeOp(Compound{repeat_count, subgraph, infer_params, tag}); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp new file mode 100644 index 00000000000000..a9b6523a6be52d --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/scenario/scenario_graph.hpp @@ -0,0 +1,102 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "graph.hpp" +#include "scenario/accuracy_metrics.hpp" +#include "scenario/inference.hpp" +#include "utils/data_providers.hpp" + +struct Source {}; +struct Data {}; + +class DataNode { +public: + DataNode(Graph* graph, NodeHandle nh); + +private: + friend class ScenarioGraph; + NodeHandle m_nh; +}; + +class OpNode; +template <> +struct std::hash; + +class OpNode { +public: + OpNode(NodeHandle nh, DataNode out_data); + DataNode out(); + +private: + friend class ScenarioGraph; + friend struct std::hash; + NodeHandle m_nh; + DataNode m_out_data; +}; + +namespace std { +template <> +struct hash { + uint64_t operator()(const OpNode& op_node) const { + return std::hash()(op_node.m_nh); + } +}; +} // namespace std + +class ScenarioGraph { +public: + DataNode makeSource(); + OpNode makeInfer(const std::string& tag); + OpNode makeDelay(uint64_t time_in_us); + OpNode makeCompound(uint64_t repeat_count, ScenarioGraph subgraph, InferenceParamsMap infer_params, + const std::string& tag); + + void link(DataNode data, OpNode op); + + template + void pass(F&& f) { + f(m_graph); + } + +private: + template + OpNode makeOp(Kind&& kind); + +private: + Graph m_graph; +}; + +struct Infer { + std::string tag; +}; + +struct Delay { + uint64_t time_in_us; +}; + +struct Compound { + uint64_t repeat_count; + ScenarioGraph subgraph; + InferenceParamsMap infer_params; + std::string tag; +}; + +struct Op { + using Kind = std::variant; + Kind kind; +}; + +template +OpNode ScenarioGraph::makeOp(Kind&& kind) { + auto op_nh = m_graph.create(); + auto out_nh = m_graph.create(); + m_graph.meta(op_nh).set(Op{std::forward(kind)}); + m_graph.link(op_nh, out_nh); + return OpNode(op_nh, DataNode(&m_graph, out_nh)); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp new file mode 100644 index 00000000000000..ad0abc7fe89f9b --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.cpp @@ -0,0 +1,42 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/computation.hpp" + +Computation::Computation(cv::GComputation&& comp, cv::GCompileArgs&& args, std::vector&& metas, GraphDesc&& desc) + : m_comp(std::move(comp)), + m_compile_args(std::move(args)), + m_out_meta(std::move(metas)), + m_desc(std::move(desc)) { +} + +uint32_t Computation::getMaxParallelBranches() const { + return m_desc.max_parallel_branches; +} + +const std::vector& Computation::getOutMeta() const { + return m_out_meta; +} + +cv::GCompiled Computation::compile(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args) { + auto compile_args = m_compile_args; + compile_args += std::move(args); + return m_comp.compile(std::move(in_meta), std::move(compile_args)); +} + +cv::GStreamingCompiled Computation::compileStreaming(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args) { + auto compile_args = m_compile_args; + compile_args += std::move(args); + return m_comp.compileStreaming(std::move(in_meta), std::move(compile_args)); +} + +cv::GMetaArgs descr_of(const std::vector& sources) { + cv::GMetaArgs meta; + meta.reserve(sources.size()); + for (auto src : sources) { + meta.push_back(src->descr_of()); + } + return meta; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp new file mode 100644 index 00000000000000..f9eba3b8c95a5f --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation.hpp @@ -0,0 +1,36 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "graph.hpp" +#include "simulation/dummy_source.hpp" + +#include +#include + +class Computation { +public: + // NB: Holds information about Graph structure + struct GraphDesc { + const uint32_t max_parallel_branches; + }; + + Computation(cv::GComputation&& comp, cv::GCompileArgs&& args, std::vector&& metas, GraphDesc&& desc); + + uint32_t getMaxParallelBranches() const; + const std::vector& getOutMeta() const; + + cv::GCompiled compile(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args = {}); + cv::GStreamingCompiled compileStreaming(cv::GMetaArgs&& in_meta, cv::GCompileArgs&& args = {}); + +private: + cv::GComputation m_comp; + cv::GCompileArgs m_compile_args; + std::vector m_out_meta; + GraphDesc m_desc; +}; + +cv::GMetaArgs descr_of(const std::vector& sources); diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp new file mode 100644 index 00000000000000..d43a84ef5fe3a8 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.cpp @@ -0,0 +1,462 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/computation_builder.hpp" +#include "simulation/layers_reader.hpp" +#include "simulation/operations.hpp" +#include "simulation/performance_mode.hpp" +#include "simulation/simulation.hpp" + +#include "utils/error.hpp" + +#include + +struct OpBuilder { + void build(NodeHandle nh, const Infer& infer); + void build(NodeHandle nh, const Delay& delay); + void build(NodeHandle nh, const Compound& compound); + + Graph& graph; + IBuildStrategy::Ptr strategy; + const InferenceParamsMap& params_map; +}; + +void OpBuilder::build(NodeHandle nh, const Compound& compound) { + // Retrieving destination nodes of the current node nh + auto out_nhs = nh->dstNodes(); + + // NB: The Dummy node ensures proper handling of multiple inputs + auto dummy_nh = graph.create(); + auto provider = std::make_shared(utils::createRandom({1}, CV_8U)); + DummyCall dummy_call{{provider}, 0}; + graph.meta(dummy_nh).set(GOperation{std::move(dummy_call)}); + auto in_nhs = nh->srcNodes(); + + // removing input edges to go through dummy node and not to compound node + auto src_edges = nh->srcEdges(); + for (size_t i = 0; i < src_edges.size(); ++i) { + graph.remove(src_edges[i]); + } + + for (uint32_t i = 0; i < in_nhs.size(); ++i) { + graph.meta(graph.link(in_nhs[i], dummy_nh)).set(InputIdx{i}); // Linking in_nhs with dummy_nh + } + + auto dummy_out_nh = graph.create(); // Creating output dunmmy node + graph.meta(graph.link(dummy_nh, dummy_out_nh)) + .set(OutputIdx{0u}); // linking dummy node handle and output dummy node handle + graph.meta(dummy_out_nh).set(GData{}); + graph.meta(graph.link(dummy_out_nh, nh)).set(InputIdx{0u}); + + ASSERT(nh->dstEdges().size() == 1u); + auto dst_edge = nh->dstEdges().front(); + graph.meta(dst_edge).set(OutputIdx{0u}); + + graph.meta(graph.link(nh, out_nhs.front())).set(OutputIdx{0u}); + + ModelsAttrMap input_data_map; + ModelsAttrMap initializers_map; + + for (const auto& [tag, params] : compound.infer_params) { + input_data_map[tag]; + initializers_map[tag]; + } + + PerformanceSimulation::Options opts{ + nullptr, // global_initializer + initializers_map, + input_data_map, + true, // inference_only + {} // target latency + }; + + Simulation::Config cfg{compound.tag, + 0u, // frames_interval_in_ms + false, // disable_high_resolution_timer + compound.subgraph, compound.infer_params}; + + auto compiled = std::make_shared(std::move(cfg), std::move(opts)) + ->compileSync(false /*drop_frames*/); + auto term_criterion = std::make_shared(compound.repeat_count); + auto f = [compiled, term_criterion]() { + compiled->run(term_criterion); + }; + + CompoundCall compound_call{f}; + graph.meta(nh).set(GOperation{std::move(compound_call)}); +} + +void OpBuilder::build(NodeHandle nh, const Delay& delay) { + auto in_nhs = nh->srcNodes(); + auto out_nhs = nh->dstNodes(); + // FIXME: Once nh is removed, delay info is no longer alive!!! + const auto time_in_us = delay.time_in_us; + graph.remove(nh); + + auto delay_nh = graph.create(); + auto provider = std::make_shared(utils::createRandom({1}, CV_8U)); + graph.meta(delay_nh).set(GOperation{DummyCall{{provider}, time_in_us}}); + + for (uint32_t i = 0; i < in_nhs.size(); ++i) { + graph.meta(graph.link(in_nhs[i], delay_nh)).set(InputIdx{i}); + } + graph.meta(graph.link(delay_nh, out_nhs.front())).set(OutputIdx{0u}); +} + +void OpBuilder::build(NodeHandle nh, const Infer& infer) { + const auto& params = params_map.at(infer.tag); + auto [in_layers, out_layers] = LayersReader::readLayers(params); + InferDesc desc{infer.tag, std::move(in_layers), std::move(out_layers)}; + + auto out_nhs = nh->dstNodes(); + ASSERT(out_nhs.size() == 1); + + auto [providers, in_meta, out_meta, disable_copy] = strategy->build(desc); + ASSERT(providers.size() == desc.input_layers.size()); + ASSERT(in_meta.size() == desc.input_layers.size()); + ASSERT(out_meta.size() == desc.output_layers.size()); + + // NB: Check if some of the Delay's was fused to this Infer + uint64_t delay_in_us = 0u; + if (graph.meta(nh).has()) { + delay_in_us = graph.meta(nh).get().time_in_us; + } + + auto dummy_nh = graph.create(); + DummyCall dummy_call{providers, delay_in_us, disable_copy}; + graph.meta(dummy_nh).set(GOperation{std::move(dummy_call)}); + auto in_nhs = nh->srcNodes(); + for (uint32_t i = 0; i < in_nhs.size(); ++i) { + graph.meta(graph.link(in_nhs[i], dummy_nh)).set(InputIdx{i}); + } + + graph.remove(nh); + + auto infer_nh = graph.create(); + for (uint32_t layer_idx = 0; layer_idx < desc.input_layers.size(); ++layer_idx) { + // NB: Create dummy out node and link with dummy. + auto dummy_out_nh = graph.create(); + graph.meta(dummy_out_nh) += std::move(in_meta[layer_idx]); + graph.meta(graph.link(dummy_nh, dummy_out_nh)).set(OutputIdx{layer_idx}); + graph.meta(dummy_out_nh).set(GData{}); + // NB: Finally link dummy out with infer + graph.meta(graph.link(dummy_out_nh, infer_nh)).set(InputIdx{layer_idx}); + } + + auto out_nh = out_nhs.front(); + graph.meta(graph.link(infer_nh, out_nh)).set(OutputIdx{0u}); + graph.meta(out_nh) += out_meta.front(); + for (uint32_t layer_idx = 1; layer_idx < desc.output_layers.size(); ++layer_idx) { + auto infer_out_nh = graph.create(); + graph.meta(infer_out_nh) = std::move(out_meta[layer_idx]); + graph.meta(infer_out_nh).set(GData{}); + graph.meta(graph.link(infer_nh, infer_out_nh)).set(OutputIdx{layer_idx}); + } + + InferCall infer_call{desc.tag, extractLayerNames(desc.input_layers), extractLayerNames(desc.output_layers)}; + graph.meta(infer_nh).set(GOperation{std::move(infer_call)}); +}; + +static bool fuseDelay(Graph& graph, NodeHandle nh, const Delay& delay) { + // NB: Current fusing is trivial and applied only for the following case: + // 1) Delay has only single Infer reader + // 2) Infer doesn't have any other writers except Delay + // e.g: [Delay] -> (out) -> [Infer] + + // NB: Access readers of delay output data node. + auto delay_out_nh = nh->dstNodes().front(); + auto out_edges = delay_out_nh->dstEdges(); + // NB: Don't fuse Delay either if it has multiple readers + // or doesn't have readers at all (1) + if (out_edges.size() != 1u) { + return false; + } + + auto out_edge = out_edges.front(); + auto op_nh = out_edge->dstNode(); + auto op = graph.meta(op_nh).get().kind; + // NB: Don't fuse Delay if reader either not an Infer (1) + // or it has other writers except Delay (2). + if (!std::holds_alternative(op) || op_nh->srcEdges().size() != 1u) { + // TODO: Can be also fused to another "delay". + return false; + } + + // NB: Fuse the Delay into Infer: + // 1) Assign Delay meta directly to Infer + // 2) Remove Delay node + // 3) Redirect Delay writers to Infer + graph.meta(op_nh).set(delay); + for (auto in_nh : nh->srcNodes()) { + graph.link(in_nh, op_nh); + } + graph.remove(nh); + graph.remove(delay_out_nh); + + return true; +} + +struct Protocol { + cv::GProtoArgs graph_inputs; + cv::GProtoArgs graph_outputs; +}; + +enum class NodeState { EXPLORING, VISITED }; + +static void visit(NodeHandle nh, std::unordered_map& state) { + auto curr_node_it = state.emplace(nh, NodeState::EXPLORING).first; + for (const auto& dst_nh : nh->dstNodes()) { + const auto dst_it = state.find(dst_nh); + if (dst_it == state.end()) { + visit(dst_nh, state); + } else if (dst_it->second == NodeState::EXPLORING) { + THROW_ERROR("Scenario graph has a cycle!"); + } + } + curr_node_it->second = NodeState::VISITED; +}; + +namespace passes { + +// NB: Throw an exception if there is a cycle in graph +void throwIfCycle(Graph& graph) { + std::unordered_map state; + for (const auto& nh : graph.nodes()) { + if (state.find(nh) == state.end()) { + visit(nh, state); + } + } +} + +// NB: Determines what would be the computation graph +// inputs and outputs and marks intermediate data nodes +void init(Graph& graph) { + ASSERT(!graph.nodes().empty()); + uint32_t num_sources = 0; + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + ++num_sources; + graph.meta(nh).set(GraphInput{}); + } else { + // NB: Check that graph is connected + ASSERT(!nh->srcNodes().empty()); + } + if (nh->dstNodes().empty()) { + ASSERT(graph.meta(nh).has()); + graph.meta(nh).set(GraphOutput{}); + } + if (!graph.meta(nh).has()) { + ASSERT(graph.meta(nh).has()); + graph.meta(nh).set(GData{}); + } + } + ASSERT(num_sources != 0); +}; + +// NB: Fuses delay to the inference nodes as the delay can be performed +// as part of the model dummy preprocessing +void fuseDelays(Graph& graph) { + // NB: Iterate over graph nodes until all delays are fused. + while (true) { + bool is_fused = false; + for (auto nh : graph.nodes()) { + if (!graph.meta(nh).has()) { + continue; + } + auto op = graph.meta(nh).get().kind; + if (std::holds_alternative(op)) { + auto delay = std::get(op); + if (fuseDelay(graph, nh, delay)) { + is_fused = true; + break; + } + } + } + // NB: If delay was fused, some of the nodes were removed + // Iterate one more time... + if (!is_fused) { + break; + } + } +}; + +// NB: Finds the maximum parallelism depth to tell concurrent executor +// how many threads should be used for execution +void findMaxParallelBranches(Graph& graph, uint32_t& max_parallel_branches) { + // NB: Basically the maximum parallelism in computational graph + // is the maximum width of its level in BFS traversal, taking into + // account that dependencies for the node are resolved + std::unordered_set curr_lvl; + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + for (auto op_nh : nh->dstNodes()) { + curr_lvl.emplace(op_nh); + } + } + } + + std::unordered_set visited; + + auto get_all_deps = [&](auto nh) { + std::unordered_set deps; + for (auto in_nhs : nh->srcNodes()) { + for (auto op_nhs : in_nhs->srcNodes()) { + deps.emplace(op_nhs); + } + } + return deps; + }; + + auto all_deps_resolved = [&](auto nh) { + auto deps = get_all_deps(nh); + return std::all_of(deps.begin(), deps.end(), [&](auto dep) { + return visited.find(dep) != visited.end(); + }); + }; + + max_parallel_branches = static_cast(curr_lvl.size()); + while (!curr_lvl.empty()) { + std::unordered_set next_lvl; + for (auto nh : curr_lvl) { + visited.emplace(nh); + ASSERT(nh->dstNodes().size() == 1u); + auto data_nh = nh->dstNodes().front(); + for (auto op_nh : data_nh->dstNodes()) { + if (all_deps_resolved(op_nh)) { + next_lvl.emplace(op_nh); + } + } + } + if (next_lvl.size() > max_parallel_branches) { + max_parallel_branches = static_cast(next_lvl.size()); + } + curr_lvl = std::move(next_lvl); + } +} + +// NB: Build "G" operations according to scenario graph nodes +void buildOperations(Graph& graph, IBuildStrategy::Ptr strategy, const InferenceParamsMap& params_map) { + OpBuilder builder{graph, strategy, params_map}; + for (auto nh : graph.nodes()) { + // NB: Skip data nodes + if (!graph.meta(nh).has()) { + continue; + } + std::visit( + [nh, &builder](const auto& op) { + builder.build(nh, op); + }, + graph.meta(nh).get().kind); + } + + for (auto nh : graph.nodes()) { + // NB: Make sure all data nodes that needs to be + // dumped or validated are graph outputs. + if (!graph.meta(nh).has() && (graph.meta(nh).has() || graph.meta(nh).has())) { + graph.meta(nh).set(GraphOutput{}); + } + } +}; + +void buildComputation(Graph& graph, Protocol& proto) { + cv::GProtoArgs graph_inputs; + cv::GProtoArgs graph_outputs; + + std::unordered_map all_data; + auto sorted = graph.sorted(); + + // NB: Initialize "G" inputs + for (auto nh : sorted) { + if (graph.meta(nh).has()) { + auto it = all_data.emplace(nh, cv::GProtoArg{cv::GMat()}).first; + graph_inputs.push_back(it->second); + } + } + // NB: Apply "G" operations in topological order + for (auto nh : sorted) { + if (graph.meta(nh).has()) { + const auto& operation = graph.meta(nh).get(); + // NB: Map input args to the correct input index. + std::unordered_map idx_to_arg; + auto in_ehs = nh->srcEdges(); + for (auto in_eh : in_ehs) { + ASSERT(graph.meta(in_eh).has()); + const uint32_t in_idx = graph.meta(in_eh).get().idx; + auto arg = all_data.at(in_eh->srcNode()); + idx_to_arg.emplace(in_idx, arg); + } + cv::GProtoArgs in_args; + for (uint32_t idx = 0; idx < idx_to_arg.size(); ++idx) { + in_args.push_back(idx_to_arg.at(idx)); + } + // NB: Link G-API operation with its io data. + auto out_args = operation.on(in_args); + // TODO: Validation in/out amount and types... + // NB: Map output args to the correct index. + auto out_ehs = nh->dstEdges(); + for (auto out_eh : out_ehs) { + ASSERT(graph.meta(out_eh).has()); + const uint32_t out_idx = graph.meta(out_eh).get().idx; + auto out_nh = out_eh->dstNode(); + all_data.emplace(out_nh, out_args[out_idx]); + } + } + } + + // NB: Collect "G" outputs + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + graph_outputs.push_back(all_data.at(nh)); + } + } + + ASSERT(!graph_inputs.empty()) + ASSERT(!graph_outputs.empty()) + // NB: Finally save computation i/o to build GComputation later on + proto = Protocol{std::move(graph_inputs), std::move(graph_outputs)}; +} + +static void collectOutputMeta(Graph& graph, std::vector& out_meta) { + for (auto nh : graph.nodes()) { + if (graph.meta(nh).has()) { + out_meta.push_back(graph.meta(nh)); + } + } +} + +} // namespace passes + +ComputationBuilder::ComputationBuilder(IBuildStrategy::Ptr strategy): m_strategy(strategy) { +} + +Computation ComputationBuilder::build(ScenarioGraph& graph, const InferenceParamsMap& infer_params, + const ComputationBuilder::Options& opts) { + uint32_t max_parallel_branches = 1u; + auto compile_args = cv::compile_args(cv::gapi::kernels()); + std::vector outputs_meta; + Protocol proto; + + using namespace std::placeholders; + graph.pass(passes::throwIfCycle); + graph.pass(passes::init); + graph.pass(passes::fuseDelays); + graph.pass(std::bind(passes::findMaxParallelBranches, _1, std::ref(max_parallel_branches))); + graph.pass(std::bind(passes::buildOperations, _1, m_strategy, std::cref(infer_params))); + graph.pass(std::bind(passes::buildComputation, _1, std::ref(proto))); + graph.pass(std::bind(passes::collectOutputMeta, _1, std::ref(outputs_meta))); + + if (opts.add_perf_meta) { + // FIXME: Must work with any G-Type! + ASSERT(cv::util::holds_alternative(proto.graph_outputs.front())); + cv::GMat g = cv::util::get(proto.graph_outputs.front()); + proto.graph_outputs.emplace_back(cv::gapi::streaming::timestamp(g).strip()); + proto.graph_outputs.emplace_back(cv::gapi::streaming::seq_id(g).strip()); + } + + cv::GComputation comp(cv::GProtoInputArgs{std::move(proto.graph_inputs)}, + cv::GProtoOutputArgs{std::move(proto.graph_outputs)}); + + return Computation{std::move(comp), std::move(compile_args), std::move(outputs_meta), {max_parallel_branches}}; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp new file mode 100644 index 00000000000000..6a51b068065284 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/computation_builder.hpp @@ -0,0 +1,74 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "result.hpp" +#include "scenario/inference.hpp" +#include "scenario/scenario_graph.hpp" +#include "simulation/computation.hpp" +#include "utils/data_providers.hpp" + +#include +#include +#include + +struct InputIdx { + uint32_t idx; +}; + +struct OutputIdx { + uint32_t idx; +}; + +struct GraphInput {}; +struct GraphOutput {}; +struct GData {}; +struct GOperation { + using F = std::function; + F on; +}; + +struct Dump { + std::filesystem::path path; +}; + +struct Validate { + using F = std::function; + F validator; + std::vector reference; +}; + +struct InferDesc { + std::string tag; + LayersInfo input_layers; + LayersInfo output_layers; +}; + +struct IBuildStrategy { + using Ptr = std::shared_ptr; + struct InferBuildInfo { + std::vector providers; + std::vector inputs_meta; + std::vector outputs_meta; + const bool disable_copy; + }; + // NB: Extend for any further node types needed + virtual InferBuildInfo build(const InferDesc& infer) = 0; +}; + +class ComputationBuilder { +public: + explicit ComputationBuilder(IBuildStrategy::Ptr strategy); + + struct Options { + bool add_perf_meta; + }; + + Computation build(ScenarioGraph& graph, const InferenceParamsMap& infer_params, const Options& opts); + +private: + IBuildStrategy::Ptr m_strategy; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp new file mode 100644 index 00000000000000..3b10767b34135f --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.cpp @@ -0,0 +1,89 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "dummy_source.hpp" + +#include + +#include "utils/utils.hpp" + +DummySource::DummySource(const uint64_t frames_interval_in_us, const bool drop_frames, + const bool disable_high_resolution_timer) + // NB: 0 is special value means no limit fps for source. + : m_latency_in_us(frames_interval_in_us), + m_drop_frames(drop_frames), + m_timer(SleepTimer::create(disable_high_resolution_timer)), + // NB: Used for simulation, just return 1 byte. + m_mat(utils::createRandom({1}, CV_8U)) { +} + +bool DummySource::pull(cv::gapi::wip::Data& data) { + using namespace std::chrono; + using namespace cv::gapi::streaming; + using ts_t = microseconds; + + // NB: Wait m_latency_in_us before return the first frame. + if (m_next_tick_ts == -1) { + m_next_tick_ts = utils::timestamp() + m_latency_in_us; + } + + int64_t curr_ts = utils::timestamp(); + if (curr_ts < m_next_tick_ts) { + /* + * curr_ts + * | + * ------|----*-----|-------> + * ^ + * m_next_tick_ts + * + * + * NB: New frame will be produced at the m_next_tick_ts point. + */ + m_timer->wait(ts_t{m_next_tick_ts - curr_ts}); + } else if (m_latency_in_us != 0) { + /* + * curr_ts + * +1 +2 | + * |----------|----------|----------|----*-----|-------> + * ^ ^ + * m_next_tick_ts -------------> + * + */ + + // NB: Count how many frames have been produced since last pull (m_next_tick_ts). + int64_t num_frames = static_cast((curr_ts - m_next_tick_ts) / m_latency_in_us); + // NB: Shift m_next_tick_ts to the nearest tick before curr_ts. + m_next_tick_ts += num_frames * m_latency_in_us; + // NB: if drop_frames is enabled, update current seq_id and wait for the next tick, otherwise + // return last written frame (+2 at the picture above) immediately. + if (m_drop_frames) { + // NB: Shift tick to the next frame. + m_next_tick_ts += m_latency_in_us; + // NB: Wait for the next frame. + m_timer->wait(ts_t{m_next_tick_ts - curr_ts}); + // NB: Drop already produced frames + update seq_id for the current. + m_curr_seq_id += num_frames + 1; + } + } + // NB: Just increase reference counter not to release mat memory + // after assigning it to the data. + cv::Mat mat = m_mat; + + data.meta[meta_tag::timestamp] = utils::timestamp(); + data.meta[meta_tag::seq_id] = m_curr_seq_id++; + data = mat; + m_next_tick_ts += m_latency_in_us; + + return true; +} + +cv::GMetaArg DummySource::descr_of() const { + return cv::GMetaArg{cv::descr_of(m_mat)}; +} + +void DummySource::reset() { + m_next_tick_ts = -1; + m_curr_seq_id = 0; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp new file mode 100644 index 00000000000000..304e4e7ef2f512 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/dummy_source.hpp @@ -0,0 +1,37 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include +#include // cv::gapi::wip::IStreamSource + +#include "utils/timer.hpp" +#include "utils/utils.hpp" + +class DummySource final : public cv::gapi::wip::IStreamSource { +public: + using Ptr = std::shared_ptr; + + explicit DummySource(const uint64_t frames_interval_in_us, const bool drop_frames, + const bool disable_high_resolution_timer); + + bool pull(cv::gapi::wip::Data& data) override; + cv::GMetaArg descr_of() const override; + void reset(); + +private: + uint64_t m_latency_in_us; + bool m_drop_frames; + IWaitable::Ptr m_timer; + + cv::Mat m_mat; + int64_t m_next_tick_ts = -1; + int64_t m_curr_seq_id = 0; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp new file mode 100644 index 00000000000000..4a0fa451dace91 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.cpp @@ -0,0 +1,66 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "executor.hpp" +#include "utils/error.hpp" + +#include + +PipelinedExecutor::PipelinedExecutor(cv::GStreamingCompiled&& compiled): m_compiled(std::move(compiled)) { +} + +PipelinedExecutor::Output PipelinedExecutor::runLoop(cv::GRunArgs&& inputs, Callback callback, + ITermCriterion::Ptr criterion) { + if (!criterion) { + THROW_ERROR("Termination criterion hasn't been specified!"); + } + + using namespace std::chrono; + using clock_t = high_resolution_clock; + + m_compiled.setSource(std::move(inputs)); + criterion->init(); + + const auto start_tick = clock_t::now(); + m_compiled.start(); + while (criterion->check()) { + if (!callback(m_compiled)) { + break; + } + criterion->update(); + } + const auto end_tick = clock_t::now(); + // NB: Some frames might be in queue just wait until they processed. + // They shouldn't be taken into account since execution is over. + m_compiled.stop(); + return Output{static_cast(duration_cast(end_tick - start_tick).count())}; +} + +SyncExecutor::SyncExecutor(cv::GCompiled&& compiled): m_compiled(std::move(compiled)) { +} + +SyncExecutor::Output SyncExecutor::runLoop(Callback callback, ITermCriterion::Ptr criterion) { + if (!criterion) { + THROW_ERROR("Termination criterion hasn't been specified!"); + } + + using namespace std::chrono; + using clock_t = high_resolution_clock; + + const auto start_tick = clock_t::now(); + criterion->init(); + while (criterion->check()) { + if (!callback(m_compiled)) { + break; + } + criterion->update(); + } + const auto end_tick = clock_t::now(); + return Output{static_cast(duration_cast(end_tick - start_tick).count())}; +} + +void SyncExecutor::reset() { + m_compiled.prepareForNewStream(); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp new file mode 100644 index 00000000000000..17d32937b8ba54 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/executor.hpp @@ -0,0 +1,42 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include // cv::GCompiled +#include // cv::GStreamingCompiled + +#include "scenario/criterion.hpp" + +class PipelinedExecutor { +public: + explicit PipelinedExecutor(cv::GStreamingCompiled&& compiled); + + struct Output { + uint64_t elapsed_us; + }; + using Callback = std::function; + + Output runLoop(cv::GRunArgs&& inputs, Callback callback, ITermCriterion::Ptr criterion); + +private: + cv::GStreamingCompiled m_compiled; +}; + +class SyncExecutor { +public: + explicit SyncExecutor(cv::GCompiled&& compiled); + + struct Output { + uint64_t elapsed_us; + }; + using Callback = std::function; + + Output runLoop(Callback callback, ITermCriterion::Ptr criterion); + void reset(); + +private: + cv::GCompiled m_compiled; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp new file mode 100644 index 00000000000000..f3b621c68e8f99 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.cpp @@ -0,0 +1,155 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "layers_data.hpp" + +#include +#include + +#include "utils/error.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +std::string normalizeLayerName(const std::string& layer_name) { + std::string normalized = layer_name; + std::unordered_set prohibited = {'\\', '/', ':', '*', '?', '"', '<', '>'}; + std::replace_if( + normalized.begin(), normalized.end(), + [&prohibited](char ch) { + return prohibited.find(ch) != prohibited.end(); + }, + '_'); + return normalized; +}; + +std::vector uploadLayerData(const std::filesystem::path& path, const std::string& tag, + const LayerInfo& layer) { + if (!std::filesystem::exists(path) || !std::filesystem::is_directory(path)) { + THROW_ERROR("Failed to find data folder: " << path << " for model: " << tag << ", layer: " << layer.name); + } + std::string iter_file_pattern = "iter_(\\d+)\\.bin"; + std::regex regex(iter_file_pattern); + std::unordered_map iter_files_map; + for (const auto& entry : std::filesystem::directory_iterator{path}) { + std::smatch match; + const auto& filename = entry.path().filename().string(); + if (std::regex_match(filename, match, regex)) { + const auto iter_idx = std::stoi(match[1].str()); + iter_files_map.emplace(iter_idx, entry); + } + } + std::vector out_mats; + for (int i = 0; i < iter_files_map.size(); ++i) { + if (auto it = iter_files_map.find(i); it != iter_files_map.end()) { + cv::Mat mat; + utils::createNDMat(mat, layer.dims, layer.prec); + utils::readFromBinFile(it->second.string(), mat); + out_mats.push_back(std::move(mat)); + } else { + THROW_ERROR("Failed to find data for iteration: " << i << ", model: " << tag << ", layer: " << layer.name); + } + } + return out_mats; +} + +using LayersDataMap = std::unordered_map>; +LayersDataMap uploadFromDirectory(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers) { + LayersDataMap layers_data; + for (const auto& layer : layers) { + auto normalized = normalizeLayerName(layer.name); + auto data = uploadLayerData(path / normalized, tag, layer); + if (data.empty()) { + THROW_ERROR("No iterations data found for model: " << tag << ", layer: " << layer.name); + } + LOG_INFO() << " - Found " << data.size() << " iteration(s) for layer: " << layer.name << std::endl; + layers_data.emplace(layer.name, std::move(data)); + } + return layers_data; +} + +LayersDataMap uploadData(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers, + LayersType type) { + ASSERT(!layers.empty()); + const std::string kLayersTypeStr = type == LayersType::INPUT ? "input" : "output"; + if (!std::filesystem::exists(path)) { + THROW_ERROR("" << path << " must exist to upload layers data!") + } + LayersDataMap layers_data; + if (std::filesystem::is_directory(path)) { + layers_data = uploadFromDirectory(path, tag, layers); + } else { + if (layers.size() > 1u) { + THROW_ERROR("Model: " << tag << " must have exactly one " << kLayersTypeStr + << " layer in order to upload data from: " << path); + } + const auto& layer = layers.front(); + cv::Mat mat; + utils::createNDMat(mat, layer.dims, layer.prec); + utils::readFromBinFile(path.string(), mat); + LOG_INFO() << " - Found single iteration data for model: " << tag << ", layer: " << layer.name << std::endl; + layers_data = {{layer.name, std::vector{mat}}}; + } + // NB: layers_data can't be empty as long as layers vector is non-empty. + const auto kNumPerLayerIterations = layers_data.begin()->second.size(); + // NB: All i/o layers for model must have the equal amount of data. + for (const auto& [layer_name, data_vec] : layers_data) { + if (data_vec.size() != kNumPerLayerIterations) { + THROW_ERROR("Model: " << tag << " has different amount of data for " << kLayersTypeStr + << " layer: " << layer_name << "(" << data_vec.size() << ") and layer: " + << layers_data.begin()->first << "(" << kNumPerLayerIterations << ")"); + } + } + return layers_data; +} + +bool isDirectory(const std::filesystem::path& path) { + if (std::filesystem::exists(path)) { + return std::filesystem::is_directory(path); + } + return path.extension().empty(); +} + +std::vector createConstantProviders(LayersDataMap&& layers_data, + const std::vector& layer_names) { + std::vector providers; + for (const auto& layer_name : layer_names) { + auto layer_data = layers_data.at(layer_name); + providers.push_back(std::make_shared(std::move(layer_data))); + } + return providers; +} + +std::vector createRandomProviders(const LayersInfo& layers, + const std::map& generators) { + std::vector providers; + for (const auto& layer : layers) { + auto generator = generators.at(layer.name); + auto provider = std::make_shared(generator, layer.dims, layer.prec); + LOG_INFO() << " - Random generator: " << generator->str() << " will be used for layer: " << layer.name + << std::endl; + providers.push_back(std::move(provider)); + } + return providers; +} + +std::vector createDirectoryLayout(const std::filesystem::path& path, + const std::vector& layer_names) { + std::vector dirs_path; + std::filesystem::create_directories(path); + for (const auto& layer_name : layer_names) { + // NB: Use normalized layer name to create dir + // to store reference data for particular layer. + std::filesystem::path curr_dir = path / normalizeLayerName(layer_name); + dirs_path.push_back(curr_dir); + std::filesystem::create_directory(curr_dir); + { + // NB: Save the original layer name; + std::ofstream file{curr_dir / "layer_name.txt"}; + ASSERT(file.is_open()); + file << layer_name; + } + } + return dirs_path; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp new file mode 100644 index 00000000000000..6d2b9bc6716212 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_data.hpp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "scenario/inference.hpp" +#include "utils/data_providers.hpp" + +std::string normalizeLayerName(const std::string& layer_name); +std::vector uploadLayerData(const std::filesystem::path& path, const std::string& tag, const LayerInfo& layer); + +enum class LayersType { INPUT = 0, OUTPUT }; +using LayersDataMap = std::unordered_map>; +LayersDataMap uploadFromDirectory(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers); + +LayersDataMap uploadData(const std::filesystem::path& path, const std::string& tag, const LayersInfo& layers, + LayersType type); + +bool isDirectory(const std::filesystem::path& path); + +std::vector createConstantProviders(LayersDataMap&& layers_data, + const std::vector& layer_names); + +std::vector createRandomProviders(const LayersInfo& layers, + const std::map& generators); + +std::vector createDirectoryLayout(const std::filesystem::path& path, + const std::vector& layer_names); +template +std::map unpackWithDefault(const LayerVariantAttr& attr, const std::vector& layer_names, + const T& def_value) { + std::map result; + if (std::holds_alternative(attr)) { + for (const auto& layer_name : layer_names) { + result.emplace(layer_name, def_value); + } + } else if (std::holds_alternative(attr)) { + auto val = std::get(attr); + for (const auto& layer_name : layer_names) { + result.emplace(layer_name, val); + } + } else { + auto map = std::get>(attr); + for (const auto& layer_name : layer_names) { + if (auto it = map.find(layer_name); it != map.end()) { + result.emplace(layer_name, it->second); + } else { + result.emplace(layer_name, def_value); + } + } + } + return result; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp new file mode 100644 index 00000000000000..72c1e9539773e3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.cpp @@ -0,0 +1,46 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/layers_reader.hpp" +#include "scenario/inference.hpp" +#include "utils/error.hpp" +#include "utils/logger.hpp" + +OpenVINOLayersReader& getOVReader() { + static OpenVINOLayersReader reader; + return reader; +} + +static std::string getModelFileName(const InferenceParams& params) { + if (std::holds_alternative(params)) { + const auto& ov_params = std::get(params); + if (std::holds_alternative(ov_params.path)) { + return std::get(ov_params.path).model; + } else { + ASSERT(std::holds_alternative(ov_params.path)); + return std::get(ov_params.path).blob; + } + } else if (std::holds_alternative(params)) { + return std::get(params).model_path; + } else { + THROW_ERROR("Unsupported model parameters type!"); + } + // NB: Unreachable + ASSERT(false); +} + +InOutLayers LayersReader::readLayers(const InferenceParams& params) { + LOG_INFO() << "Reading model " << getModelFileName(params) << std::endl; + if (std::holds_alternative(params)) { + const auto& ov = std::get(params); + return getOVReader().readLayers(ov); + } + ASSERT(std::holds_alternative(params)); + const auto& ort = std::get(params); + // NB: Using OpenVINO to read the i/o layers information for *.onnx model + OpenVINOParams ov; + ov.path = OpenVINOParams::ModelPath{ort.model_path, ""}; + return getOVReader().readLayers(ov, true /* use_results_names */); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp new file mode 100644 index 00000000000000..1d701272255fb0 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/layers_reader.hpp @@ -0,0 +1,27 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "scenario/inference.hpp" + +#include + +struct InOutLayers { + LayersInfo in_layers; + LayersInfo out_layers; +}; + +class OpenVINOLayersReader { +public: + OpenVINOLayersReader(); + InOutLayers readLayers(const OpenVINOParams& params, const bool use_results_names = false); + +private: + class Impl; + std::shared_ptr m_impl; +}; + +namespace LayersReader { +InOutLayers readLayers(const InferenceParams& params); +} // namespace LayersReader diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp new file mode 100644 index 00000000000000..1b353dbf6e7288 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.cpp @@ -0,0 +1,131 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/operations.hpp" +#include "utils/error.hpp" + +cv::GProtoArgs InferCall::operator()(const cv::GProtoArgs& inputs) { + cv::GInferInputs infer_inputs; + for (int i = 0; i < inputs.size(); ++i) { + auto gmat = cv::util::get(inputs[i]); + infer_inputs[input_names[i]] = gmat; + } + auto infer_outputs = cv::gapi::infer(tag, infer_inputs); + cv::GProtoArgs outputs; + for (int i = 0; i < output_names.size(); ++i) { + outputs.emplace_back(infer_outputs.at(output_names[i])); + } + return outputs; +} + +std::vector GDummyM::on(const std::vector& ins, const uint64_t delay_in_us, + const std::vector& providers, const bool disable_copy) { + std::vector shapes; + std::vector op_kinds; + std::vector host_ctors; + std::vector gargs; + std::vector out_kinds; + + gargs.emplace_back(providers); + gargs.emplace_back(delay_in_us); + gargs.emplace_back(disable_copy); + + for (int i = 0; i < ins.size(); ++i) { + auto shape = cv::detail::GTypeTraits::shape; + shapes.push_back(shape); + auto op_kind = cv::detail::GTypeTraits::op_kind; + op_kinds.push_back(op_kind); + host_ctors.push_back(cv::detail::GObtainCtor::get()); + gargs.emplace_back(ins[i]); + } + + const size_t num_outputs = providers.size(); + for (int i = 0; i < num_outputs; ++i) { + auto op_kind = cv::detail::GTypeTraits::op_kind; + out_kinds.push_back(op_kind); + } + + using namespace std::placeholders; + cv::GKernel k{GDummyM::id(), + "", + std::bind(&GDummyM::getOutMeta, _1, _2), + std::move(shapes), + std::move(op_kinds), + std::move(host_ctors), + std::move(out_kinds)}; + + cv::GCall call(std::move(k)); + call.setArgs(std::move(gargs)); + + std::vector outs; + outs.reserve(num_outputs); + for (int i = 0; i < num_outputs; ++i) { + outs.push_back(call.yield(i)); + } + + return outs; +} + +cv::GMetaArgs GDummyM::getOutMeta(const cv::GMetaArgs&, const cv::GArgs& args) { + const auto& providers = args.front().get>(); + cv::GMetaArgs out_metas; + out_metas.reserve(providers.size()); + for (auto provider : providers) { + out_metas.emplace_back(provider->desc()); + } + return out_metas; +} + +cv::gapi::GBackend GCPUDummyM::backend() { + return cv::gapi::cpu::backend(); +} + +cv::GCPUKernel GCPUDummyM::kernel() { + return cv::GCPUKernel(&GCPUDummyM::call, &GCPUDummyM::setup); +} + +void GCPUDummyM::setup(const cv::GMetaArgs& metas, cv::GArgs gargs, cv::GArg& state, const cv::GCompileArgs& args) { + state = cv::GArg(std::make_shared()); + auto providers = gargs.front().get>(); + for (auto& provider : providers) { + provider->reset(); + } +} + +void GCPUDummyM::call(cv::GCPUContext& ctx) { + using namespace std::chrono; + const bool disable_copy = ctx.inArg(2u); + uint64_t elapsed = disable_copy ? 0u : utils::measure([&]() { + auto& providers = ctx.inArg>(0u); + for (size_t i = 0; i < providers.size(); ++i) { + providers[i]->pull(ctx.outMatR(static_cast(i))); + } + }); + const auto delay_in_us = ctx.inArg(1u); + utils::busyWait(microseconds{std::max(delay_in_us - elapsed, uint64_t{0})}); +} + +cv::GProtoArgs DummyCall::operator()(const cv::GProtoArgs& inputs) { + std::vector gmats; + gmats.reserve(inputs.size()); + for (auto& in : inputs) { + gmats.emplace_back(cv::util::get(in)); + } + auto outputs = GDummyM::on(gmats, delay_in_us, providers, disable_copy); + cv::GProtoArgs proto_outputs; + for (auto& out : outputs) { + proto_outputs.emplace_back(cv::GProtoArg{out}); + } + return proto_outputs; +} + +cv::GProtoArgs CompoundCall::operator()(const cv::GProtoArgs& inputs) { + ASSERT(inputs.size() == 1) + cv::GMat in = cv::util::get(inputs[0]); + + cv::GProtoArgs proto_outputs; + proto_outputs.emplace_back(GCompound::on(in, function)); + return proto_outputs; +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp new file mode 100644 index 00000000000000..cce38c9d83d07f --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/operations.hpp @@ -0,0 +1,77 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include // GAPI_OCV_KERNEL +#include // G_API_OP +#include + +#include "utils/data_providers.hpp" +#include "utils/utils.hpp" + +// clang-format off +struct InferCall { + cv::GProtoArgs operator()(const cv::GProtoArgs& inputs); + + std::string tag; + std::vector input_names; + std::vector output_names; +}; + +struct DummyState { }; +struct GDummyM { + static const char *id() { return "custom.dummym"; } + static std::vector on(const std::vector &ins, + const uint64_t delay_in_us, + const std::vector &providers, + const bool disable_copy); + static cv::GMetaArgs getOutMeta(const cv::GMetaArgs&, const cv::GArgs &args); +}; + +struct GCPUDummyM: public cv::detail::KernelTag { + using API = GDummyM; + using State = DummyState; + + static cv::gapi::GBackend backend(); + static cv::GCPUKernel kernel(); + static void setup(const cv::GMetaArgs &metas, + cv::GArgs gargs, + cv::GArg &state, + const cv::GCompileArgs &args); + static void call(cv::GCPUContext &ctx); +}; + +struct DummyCall { + std::vector providers; + uint64_t delay_in_us; + // NB: Don't pull data from providers if enabled + bool disable_copy = false; + cv::GProtoArgs operator()(const cv::GProtoArgs& inputs); +}; + +using F = std::function; + +G_TYPED_KERNEL(GCompound, , "custom.compound") +{ + static cv::GMatDesc outMeta(cv::GMatDesc in, F){ + return in; + } +}; + +GAPI_OCV_KERNEL(GCPUCompound, GCompound) +{ + static void run(const cv::Mat& in, + F function, + cv::Mat& out) + { + function(); + } +}; + +struct CompoundCall { + cv::GProtoArgs operator()(const cv::GProtoArgs& inputs); + F function; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp new file mode 100644 index 00000000000000..57527cef0cc4aa --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/ov_layers_reader.cpp @@ -0,0 +1,215 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/layers_reader.hpp" + +#include // CV_* +#include + +#include "utils/error.hpp" + +#include + +class OpenVINOLayersReader::Impl { +public: + InOutLayers readLayers(const OpenVINOParams& params, const bool use_results_names); + +private: + InOutLayers readFromBlob(const std::string& blob, const std::string& device, + const std::map& config); + + InOutLayers readFromModel(const std::string& xml, const std::string& bin, const OpenVINOParams& params, + const bool use_results_names); + +private: + ov::Core m_core; +}; + +OpenVINOLayersReader::OpenVINOLayersReader(): m_impl(new OpenVINOLayersReader::Impl{}) { +} + +static ov::element::Type toElementType(int cvdepth) { + switch (cvdepth) { + case CV_8U: + return ov::element::u8; + case CV_32S: + return ov::element::i32; + case CV_32F: + return ov::element::f32; + case CV_16F: + return ov::element::f16; + } + throw std::logic_error("Failed to convert opencv depth to ov::element::Type"); +} + +static std::vector toDims(const std::vector& sz_vec) { + std::vector result; + result.reserve(sz_vec.size()); + for (auto sz : sz_vec) { + // FIXME: Probably requires some check... + result.push_back(static_cast(sz)); + } + return result; +} + +static int toPrecision(ov::element::Type prec) { + switch (prec) { + case ov::element::u8: + return CV_8U; + case ov::element::i32: + return CV_32S; + case ov::element::f32: + return CV_32F; + case ov::element::f16: + return CV_16F; + case ov::element::i64: + return CV_32S; + } + throw std::logic_error("Unsupported OV precision"); +} + +template +std::vector ovToLayersInfo(const InfoVec& vec) { + std::vector layers; + layers.reserve(vec.size()); + std::transform(vec.begin(), vec.end(), std::back_inserter(layers), [](const auto& node) { + return LayerInfo{node.get_any_name(), toDims(node.get_shape()), toPrecision(node.get_element_type())}; + }); + return layers; +}; + +static void cfgInputPreproc(ov::preprocess::PrePostProcessor& ppp, const std::shared_ptr& model, + const AttrMap& input_precision, const AttrMap& input_layout, + const AttrMap& input_model_layout) { + for (const auto& input : model->inputs()) { + const auto& name = input.get_any_name(); + auto& ii = ppp.input(name); + + const auto ip = lookUp(input_precision, name); + if (ip.has_value()) { + ii.tensor().set_element_type(toElementType(*ip)); + } + + const auto il = lookUp(input_layout, name); + if (il.has_value()) { + ii.tensor().set_layout(ov::Layout(*il)); + } + + const auto iml = lookUp(input_model_layout, name); + if (iml.has_value()) { + ii.model().set_layout(ov::Layout(*iml)); + } + } +} + +static void cfgOutputPostproc(ov::preprocess::PrePostProcessor& ppp, const std::shared_ptr& model, + const AttrMap& output_precision, const AttrMap& output_layout, + const AttrMap output_model_layout) { + for (const auto& output : model->outputs()) { + const auto& name = output.get_any_name(); + auto& oi = ppp.output(name); + + const auto op = lookUp(output_precision, name); + if (op.has_value()) { + oi.tensor().set_element_type(toElementType(*op)); + } + + const auto ol = lookUp(output_layout, name); + if (ol.has_value()) { + oi.tensor().set_layout(ov::Layout(*ol)); + } + + const auto oml = lookUp(output_model_layout, name); + if (oml.has_value()) { + oi.model().set_layout(ov::Layout(*oml)); + } + } +} + +static std::vector extractLayerNames(const std::vector>& nodes) { + std::vector names; + std::transform(nodes.begin(), nodes.end(), std::back_inserter(names), [](const auto& node) { + return node.get_any_name(); + }); + return names; +} + +InOutLayers OpenVINOLayersReader::Impl::readFromModel(const std::string& model_path, const std::string& bin_path, + const OpenVINOParams& params, const bool use_results_names) { + auto model = m_core.read_model(model_path, bin_path); + { + ov::preprocess::PrePostProcessor ppp(model); + + const auto& input_names = extractLayerNames(model->inputs()); + const auto ip_map = unpackLayerAttr(params.input_precision, input_names, "input precision"); + const auto il_map = unpackLayerAttr(params.input_layout, input_names, "input layout"); + const auto iml_map = unpackLayerAttr(params.input_model_layout, input_names, "input model layout"); + cfgInputPreproc(ppp, model, ip_map, il_map, iml_map); + + const auto& output_names = extractLayerNames(model->outputs()); + const auto op_map = unpackLayerAttr(params.output_precision, output_names, "output precision"); + const auto ol_map = unpackLayerAttr(params.output_layout, output_names, "output layout"); + const auto oml_map = unpackLayerAttr(params.output_model_layout, output_names, "output model layout"); + cfgOutputPostproc(ppp, model, op_map, ol_map, oml_map); + + model = ppp.build(); + } + + auto input_layers = ovToLayersInfo(model->inputs()); + auto output_layers = ovToLayersInfo(model->outputs()); + + // FIXME: UGLY WA in order to use layer names obtained by OV reader in ONNXRT. + // Ideally there should be corresponding ONNXRT reader instead!!! + // Result nodes friendly names preserve the names from original model, + // so the could be used in different framework (not only OpenVINO) + if (use_results_names) { + const auto& results = model->get_results(); + for (int i = 0; i < results.size(); ++i) { + auto result_name = results[i]->get_friendly_name(); + // This suffix is hardcoded at the OpenVINO side + const std::string suffix = "/sink_port_0"; + const auto kSuffixStartPos = result_name.length() - suffix.length(); + // Check that suffix is still presented at the OpenVINO side + ASSERT(result_name.substr(kSuffixStartPos) == suffix); + // Drop the suffix as it's not needed and update the name + result_name = result_name.substr(0, kSuffixStartPos); + output_layers[i].name = result_name; + } + } + + return {std::move(input_layers), std::move(output_layers)}; +} + +InOutLayers OpenVINOLayersReader::Impl::readFromBlob(const std::string& blob, const std::string& device, + const std::map& config) { + std::ifstream file(blob, std::ios_base::in | std::ios_base::binary); + if (!file.is_open()) { + THROW_ERROR("Failed to import model from: " << blob); + } + + auto compiled_model = m_core.import_model(file, device, {config.begin(), config.end()}); + + auto input_layers = ovToLayersInfo(compiled_model.inputs()); + auto output_layers = ovToLayersInfo(compiled_model.outputs()); + + return {std::move(input_layers), std::move(output_layers)}; +} + +InOutLayers OpenVINOLayersReader::Impl::readLayers(const OpenVINOParams& params, const bool use_results_names) { + if (std::holds_alternative(params.path)) { + const auto& path = std::get(params.path); + return readFromModel(path.model, path.bin, params, use_results_names); + } + ASSERT(std::holds_alternative(params.path)); + // NB: use_results_names is WA for reading layer names for the further usage in ONNXRT + // since ONNXRT is always ModelPath case (*.onnx format), no need to handle this for *.blob's + ASSERT(!use_results_names); + const auto& path = std::get(params.path); + return readFromBlob(path.blob, params.device, params.config); +} + +InOutLayers OpenVINOLayersReader::readLayers(const OpenVINOParams& params, const bool use_results_names) { + return m_impl->readLayers(params, use_results_names); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp new file mode 100644 index 00000000000000..4e47b34e3d2d35 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.cpp @@ -0,0 +1,337 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "performance_mode.hpp" + +#include "simulation/computation_builder.hpp" +#include "simulation/executor.hpp" +#include "simulation/layers_data.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +#include // cv::GCompileArgs +#include // ov::benchmark_mode{} + +#include + +class PerformanceMetrics { +public: + PerformanceMetrics(const uint64_t elapsed, const std::vector latency, const std::vector seq_ids); + friend std::ostream& operator<<(std::ostream& os, const PerformanceMetrics& metrics); + +private: + // TODO: avg, min, max statistics can be encapsulated. + double avg_latency_ms; + double min_latency_ms; + double max_latency_ms; + int64_t total_frames; + double fps; + int64_t dropped; +}; + +PerformanceMetrics::PerformanceMetrics(const uint64_t elapsed_us, const std::vector latency_us, + const std::vector seq_ids) { + avg_latency_ms = utils::avg(latency_us) / 1000.0; + min_latency_ms = utils::min(latency_us) / 1000.0; + max_latency_ms = utils::max(latency_us) / 1000.0; + double elapsed_ms = static_cast(elapsed_us / 1000.0); + fps = latency_us.size() / elapsed_ms * 1000; + + dropped = 0; + int64_t prev_seq_id = seq_ids[0]; + for (size_t i = 1; i < seq_ids.size(); ++i) { + dropped += seq_ids[i] - prev_seq_id - 1; + prev_seq_id = seq_ids[i]; + } + total_frames = seq_ids.back() + 1; +} + +std::ostream& operator<<(std::ostream& os, const PerformanceMetrics& metrics) { + os << "throughput: " << metrics.fps << " FPS, latency: min: " << metrics.min_latency_ms + << " ms, avg: " << metrics.avg_latency_ms << " ms, max: " << metrics.max_latency_ms + << " ms, frames dropped: " << metrics.dropped << "/" << metrics.total_frames; + return os; +} + +namespace { + +struct InputDataVisitor { + InputDataVisitor(const InferDesc& _infer, const PerformanceSimulation::Options& _opts) + : infer(_infer), opts(_opts), providers(infer.input_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + const InferDesc& infer; + const PerformanceSimulation::Options& opts; + std::vector providers; +}; + +void InputDataVisitor::operator()(std::monostate) { + LOG_INFO() << "Input data path for model: " << infer.tag << " hasn't been provided. Will be generated randomly" + << std::endl; + auto initializers = opts.initializers_map.at(infer.tag); + auto default_initialzer = + opts.global_initializer ? opts.global_initializer : std::make_shared(0.0, 255.0); + auto per_layer_initializers = + unpackWithDefault(initializers, extractLayerNames(infer.input_layers), default_initialzer); + providers = createRandomProviders(infer.input_layers, per_layer_initializers); +}; + +void InputDataVisitor::operator()(const std::string& path_str) { + const std::filesystem::path path{path_str}; + if (std::filesystem::exists(path)) { + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded" + << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT); + providers = createConstantProviders(std::move(layers_data), extractLayerNames(infer.input_layers)); + } else { + auto initializers = opts.initializers_map.at(infer.tag); + auto default_initialzer = + opts.global_initializer ? opts.global_initializer : std::make_shared(0.0, 255.0); + auto per_layer_initializers = + unpackWithDefault(initializers, extractLayerNames(infer.input_layers), default_initialzer); + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag + << " provided but doesn't exist - will be generated randomly" << std::endl; + providers = createRandomProviders(infer.input_layers, per_layer_initializers); + } +} + +void InputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Performance mode supports input data in form of either directory or single file!"); +}; + +} // anonymous namespace + +PerformanceStrategy::PerformanceStrategy(const PerformanceSimulation::Options& _opts): opts(_opts){}; + +IBuildStrategy::InferBuildInfo PerformanceStrategy::build(const InferDesc& infer) { + const auto& input_data = opts.input_data_map.at(infer.tag); + InputDataVisitor in_data_visitor{infer, opts}; + std::visit(in_data_visitor, input_data); + // NB: No special I/O meta for this mode + std::vector inputs_meta(infer.input_layers.size(), Meta{}); + std::vector outputs_meta(infer.output_layers.size(), Meta{}); + return {std::move(in_data_visitor.providers), std::move(inputs_meta), std::move(outputs_meta), opts.inference_only}; +} + +namespace { + +class SyncSimulation : public SyncCompiled { +public: + struct Options { + uint32_t after_iter_delay_in_us = 0u; + }; + + SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, const size_t num_outputs, + const Options& options); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + void reset(); + bool process(cv::GCompiled& pipeline); + + SyncExecutor m_exec; + std::vector m_sources; + std::vector m_out_mats; + int64_t m_ts, m_seq_id; + + std::vector m_per_iter_latency; + std::vector m_per_iter_seq_ids; + + Options m_opts; +}; + +class PipelinedSimulation : public PipelinedCompiled { +public: + PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + const size_t num_outputs); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GStreamingCompiled& pipeline); + + PipelinedExecutor m_exec; + std::vector m_sources; + cv::optional m_ts, m_seq_id; + std::vector> m_opt_mats; + + std::vector m_per_iter_latency; + std::vector m_per_iter_seq_ids; +}; + +//////////////////////////////// SyncSimulation /////////////////////////////// +SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, + const size_t num_outputs, const SyncSimulation::Options& options) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_mats(num_outputs), + m_ts(-1), + m_seq_id(-1), + m_opts(options) { + LOG_DEBUG() << "Run warm-up iteration" << std::endl; + this->run(std::make_shared(1u)); + LOG_DEBUG() << "Warm-up has finished successfully." << std::endl; +} + +void SyncSimulation::reset() { + for (auto src : m_sources) { + src->reset(); + } + m_exec.reset(); +}; + +Result SyncSimulation::run(ITermCriterion::Ptr criterion) { + using namespace std::placeholders; + auto cb = std::bind(&SyncSimulation::process, this, _1); + auto out = m_exec.runLoop(cb, criterion); + PerformanceMetrics metrics(out.elapsed_us, m_per_iter_latency, m_per_iter_seq_ids); + m_per_iter_latency.clear(); + m_per_iter_seq_ids.clear(); + std::stringstream ss; + ss << metrics; + this->reset(); + return Success{ss.str()}; +}; + +bool SyncSimulation::process(cv::GCompiled& pipeline) { + using ts_t = std::chrono::microseconds; + auto pipeline_outputs = cv::gout(); + // NB: Reference is mandatory there since copying empty + // Mat may lead to weird side effects. + for (auto& out_mat : m_out_mats) { + pipeline_outputs += cv::gout(out_mat); + } + pipeline_outputs += cv::gout(m_ts); + pipeline_outputs += cv::gout(m_seq_id); + + cv::GRunArgs pipeline_inputs; + pipeline_inputs.reserve(m_sources.size()); + for (auto src : m_sources) { + cv::gapi::wip::Data data; + src->pull(data); + pipeline_inputs.push_back(std::move(data)); + } + pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs)); + const auto curr_ts = utils::timestamp(); + m_per_iter_latency.push_back(curr_ts - m_ts); + m_per_iter_seq_ids.push_back(m_seq_id); + + // NB: Do extra busy wait to simulate the user's post processing after stream. + if (m_opts.after_iter_delay_in_us != 0) { + utils::busyWait(std::chrono::microseconds{m_opts.after_iter_delay_in_us}); + } + return true; +} + +//////////////////////////////// PipelinedSimulation /////////////////////////////// +PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + const size_t num_outputs) + : m_exec(std::move(compiled)), m_sources(std::move(sources)), m_opt_mats(num_outputs) { + LOG_DEBUG() << "Run warm-up iteration" << std::endl; + this->run(std::make_shared(1u)); + LOG_DEBUG() << "Warm-up has finished successfully." << std::endl; +} + +Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) { + auto pipeline_inputs = cv::gin(); + for (auto source : m_sources) { + pipeline_inputs += cv::gin(static_cast(source)); + } + + using namespace std::placeholders; + auto cb = std::bind(&PipelinedSimulation::process, this, _1); + auto out = m_exec.runLoop(std::move(pipeline_inputs), cb, criterion); + PerformanceMetrics metrics(out.elapsed_us, m_per_iter_latency, m_per_iter_seq_ids); + m_per_iter_latency.clear(); + m_per_iter_seq_ids.clear(); + + std::stringstream ss; + ss << metrics; + + // NB: Reset sources since they may have their state changed. + for (auto src : m_sources) { + src->reset(); + } + return Success{ss.str()}; +}; + +bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) { + using ts_t = std::chrono::microseconds; + cv::GOptRunArgsP pipeline_outputs; + for (auto& opt_mat : m_opt_mats) { + pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]); + } + pipeline_outputs.emplace_back(cv::gout(m_ts)[0]); + pipeline_outputs.emplace_back(cv::gout(m_seq_id)[0]); + const bool has_data = pipeline.pull(std::move(pipeline_outputs)); + const auto curr_ts = utils::timestamp(); + ASSERT(m_ts.has_value()); + ASSERT(m_seq_id.has_value()); + m_per_iter_latency.push_back(curr_ts - *m_ts); + m_per_iter_seq_ids.push_back(*m_seq_id); + return has_data; +} + +} // anonymous namespace + +PerformanceSimulation::PerformanceSimulation(Simulation::Config&& cfg, PerformanceSimulation::Options&& opts) + : Simulation(std::move(cfg)), + m_opts(std::move(opts)), + m_strategy(std::make_shared(m_opts)), + m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {true /* add performance meta */})) { +} + +std::shared_ptr PerformanceSimulation::compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) { + if (m_opts.inference_only) { + // TODO: Extend also for ONNXRT backend + compile_args += cv::compile_args(cv::gapi::wip::ov::benchmark_mode{}); + } + auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args)); + return std::make_shared(std::move(compiled), std::move(sources), m_comp.getOutMeta().size()); +} + +std::shared_ptr PerformanceSimulation::compileSync(const bool drop_frames) { + auto compile_args = cv::compile_args(getNetworksPackage()); + if (m_opts.inference_only) { + // TODO: Extend also for ONNXRT backend + compile_args += cv::compile_args(cv::gapi::wip::ov::benchmark_mode{}); + } + + const uint32_t max_parallel_branches = m_comp.getMaxParallelBranches(); + if (max_parallel_branches > 1u) { + LOG_INFO() << "Found at most " << max_parallel_branches + << " parallel branches in graph," + " so threaded executor will be used" + << std::endl; + ; + compile_args += cv::compile_args(cv::use_threaded_executor{max_parallel_branches}); + } + + auto sources = createSources(drop_frames); + SyncSimulation::Options options{0u}; + if (m_opts.target_latency.has_value()) { + if (!drop_frames) { + THROW_ERROR("Target latency for the stream is only supported when frames drop is enabled!"); + } + // NB: There is no way to specify more than one source currently so assert if it happened. + ASSERT(sources.size() == 1u); + const double target_latency_in_ms = m_opts.target_latency.value(); + const uint64_t source_latency_in_ms = m_cfg.frames_interval_in_us / 1000u; + if (target_latency_in_ms > source_latency_in_ms) { + THROW_ERROR("Target latency must be less or equal than source latency!"); + } + options.after_iter_delay_in_us = static_cast(source_latency_in_ms - target_latency_in_ms) * 1000u; + } + + auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args)); + return std::make_shared(std::move(compiled), std::move(sources), m_comp.getOutMeta().size(), + options); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp new file mode 100644 index 00000000000000..16eff684c4e2de --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/performance_mode.hpp @@ -0,0 +1,41 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "simulation/computation.hpp" +#include "simulation/computation_builder.hpp" +#include "simulation/simulation.hpp" + +struct PerformanceStrategy; +class PerformanceSimulation : public Simulation { +public: + struct Options { + IRandomGenerator::Ptr global_initializer; + ModelsAttrMap initializers_map; + ModelsAttrMap input_data_map; + const bool inference_only; + std::optional target_latency; + }; + explicit PerformanceSimulation(Simulation::Config&& cfg, Options&& opts); + + std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compiler_args) override; + std::shared_ptr compileSync(const bool drop_frames) override; + +private: + Options m_opts; + std::shared_ptr m_strategy; + Computation m_comp; +}; + +struct PerformanceStrategy : public IBuildStrategy { + explicit PerformanceStrategy(const PerformanceSimulation::Options& opts); + IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override; + + const PerformanceSimulation::Options& opts; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp new file mode 100644 index 00000000000000..6eb55ee11fcc30 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.cpp @@ -0,0 +1,361 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "reference_mode.hpp" + +#include + +#include "simulation/computation_builder.hpp" +#include "simulation/executor.hpp" +#include "simulation/layers_data.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +#include // cv::GCompileArgs + +namespace { + +struct InputDataVisitor { + InputDataVisitor(const InferDesc& _infer, const CalcRefSimulation::Options& _opts) + : infer(_infer), opts(_opts), providers(infer.input_layers.size()), metas(infer.input_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const CalcRefSimulation::Options& opts; + // NB: Relevant when input reference data already exists and need to + // generate exactly the same amount of output data. + // Note that this value must be the same for all models within stream. + cv::util::optional model_required_iterations; + std::vector providers; + std::vector metas; +}; + +void InputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const std::string& path_str) { + // NB: Single path provided - either single file or directory. + const auto input_names = extractLayerNames(infer.input_layers); + const auto& initializers = opts.initializers_map.at(infer.tag); + + std::filesystem::path path{path_str}; + if (std::filesystem::exists(path)) { + // NB: Provided path exists - upload input data from there. + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded" + << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT); + // NB: The Number of iterations for every layer is ALWAYS the same. + model_required_iterations = cv::util::make_optional(layers_data.begin()->second.size()); + providers = createConstantProviders(std::move(layers_data), input_names); + } else { + // NB: Provided path doesn't exist - generate data and dump. + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag + << " doesn't exist - input data will be generated and dumped" << std::endl; + std::vector dump_path_vec; + if (isDirectory(path)) { + // NB: When the directory is provided, the number of input iterations to be generated aren't + // bounded so the "random" providers will generate input data on every iteration that will + // be dumped on the disk afterwards. + dump_path_vec = createDirectoryLayout(path, input_names); + } else { + // NB: When the single file is provided, the execution must be limited to perform + // only 1 iteration. + model_required_iterations = cv::util::optional(1ul); + if (infer.input_layers.size() > 1) { + THROW_ERROR("Model: " << infer.tag + << " must have exactly one input layer in order to dump input data to file: " + << path); + } + // NB: In case directories in that path don't exist. + std::filesystem::create_directories(path.parent_path()); + dump_path_vec = {path}; + } + auto default_initialzer = + opts.global_initializer ? opts.global_initializer : std::make_shared(0.0, 255.0); + auto layer_initializers = unpackWithDefault(initializers, input_names, default_initialzer); + providers = createRandomProviders(infer.input_layers, std::move(layer_initializers)); + for (uint32_t i = 0; i < infer.input_layers.size(); ++i) { + metas[i].set(Dump{dump_path_vec[i]}); + } + } +} + +struct OutputDataVisitor { + OutputDataVisitor(const InferDesc& _infer, const CalcRefSimulation::Options& _opts) + : infer(_infer), opts(_opts), metas(infer.output_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const CalcRefSimulation::Options& opts; + std::vector metas; +}; + +void OutputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Reference mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const std::string& path_str) { + std::filesystem::path path{path_str}; + // NB: It doesn't matter if path exist or not - regenerate and dump outputs anyway. + std::vector dump_path_vec; + if (isDirectory(path)) { + dump_path_vec = createDirectoryLayout(path, extractLayerNames(infer.output_layers)); + } else { + if (infer.output_layers.size() > 1) { + THROW_ERROR("Model: " << infer.tag + << " must have exactly one output layer in order to dump output data to file: " + << path); + } + dump_path_vec = {path}; + } + for (uint32_t i = 0; i < infer.output_layers.size(); ++i) { + const auto& layer = infer.output_layers[i]; + metas[i].set(Dump{dump_path_vec[i]}); + } +} + +} // anonymous namespace + +class ReferenceStrategy : public IBuildStrategy { +public: + explicit ReferenceStrategy(const CalcRefSimulation::Options& opts); + + IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override; + + // NB: If specified will force execution to perform exactly require_num_iterations + // regardless what user specified. + // Use case is when N input iterations are provided, + // generate exactly the same amount of output iterations. + // Another use case is when there is only single file provided + // so only one input / output iteration must be generated. + cv::optional required_num_iterations; + const CalcRefSimulation::Options& opts; +}; + +ReferenceStrategy::ReferenceStrategy(const CalcRefSimulation::Options& _opts): opts(_opts) { +} + +IBuildStrategy::InferBuildInfo ReferenceStrategy::build(const InferDesc& infer) { + const auto& input_data = opts.input_data_map.at(infer.tag); + InputDataVisitor in_data_visitor{infer, opts}; + std::visit(in_data_visitor, input_data); + // NB: Check if there is required number iterations for current model + // and fail if it's different comparing to other models in stream. + if (in_data_visitor.model_required_iterations) { + const uint64_t required_iters_value = in_data_visitor.model_required_iterations.value(); + LOG_INFO() << "Model: " << infer.tag << " will perform at most " << required_iters_value << " iteration(s)" + << std::endl; + if (!required_num_iterations) { + required_num_iterations = in_data_visitor.model_required_iterations; + } else { + if (required_iters_value != required_num_iterations.value()) { + THROW_ERROR("All models in stream are required to have the same number of iterations!"); + } + } + } + + const auto& output_data = opts.output_data_map.at(infer.tag); + OutputDataVisitor out_data_visitor{infer, opts}; + std::visit(out_data_visitor, output_data); + + return {std::move(in_data_visitor.providers), std::move(in_data_visitor.metas), std::move(out_data_visitor.metas)}; +} + +static void updateCriterion(ITermCriterion::Ptr* criterion, cv::util::optional required_num_iterations) { + if (required_num_iterations.has_value()) { + if (*criterion) { + // NB: Limit user's termination criterion to perfom at most m_required_num_iterations + *criterion = std::make_shared( + *criterion, std::make_shared(required_num_iterations.value())); + } else { + *criterion = std::make_shared(required_num_iterations.value()); + } + } +} + +static void dumpIterOutput(const cv::Mat& mat, const Dump& dump, const size_t iter) { + auto dump_path = dump.path; + if (isDirectory(dump.path)) { + std::stringstream ss; + ss << "iter_" << iter << ".bin"; + dump_path = dump_path / ss.str(); + } + utils::writeToBinFile(dump_path.string(), mat); +}; + +namespace { + +class SyncSimulation : public SyncCompiled { +public: + SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, std::vector&& out_meta, + cv::util::optional required_num_iterations); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GCompiled& pipeline); + + SyncExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector m_out_mats; + size_t m_iter_idx; + cv::optional m_required_num_iterations; +}; + +class PipelinedSimulation : public PipelinedCompiled { +public: + PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta, cv::util::optional required_num_iterations); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GStreamingCompiled& pipeline); + + PipelinedExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector> m_opt_mats; + size_t m_iter_idx; + cv::optional m_required_num_iterations; +}; + +//////////////////////////////// SyncSimulation /////////////////////////////// +SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta, cv::util::optional required_num_iterations) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_out_mats(m_out_meta.size()), + m_iter_idx(0u), + m_required_num_iterations(required_num_iterations) { +} + +Result SyncSimulation::run(ITermCriterion::Ptr criterion) { + for (auto src : m_sources) { + src->reset(); + } + using namespace std::placeholders; + auto cb = std::bind(&SyncSimulation::process, this, _1); + updateCriterion(&criterion, m_required_num_iterations); + m_exec.runLoop(cb, criterion); + std::stringstream ss; + ss << "Reference data has been generated for " << m_iter_idx << " iteration(s)"; + return Success{ss.str()}; +}; + +bool SyncSimulation::process(cv::GCompiled& pipeline) { + auto pipeline_outputs = cv::gout(); + // NB: Reference is mandatory there since copying empty + // Mat may lead to weird side effects. + for (auto& out_mat : m_out_mats) { + pipeline_outputs += cv::gout(out_mat); + } + cv::GRunArgs pipeline_inputs; + pipeline_inputs.reserve(m_sources.size()); + for (auto src : m_sources) { + cv::gapi::wip::Data data; + src->pull(data); + pipeline_inputs.push_back(std::move(data)); + } + pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs)); + for (size_t i = 0; i < m_out_mats.size(); ++i) { + if (m_out_meta[i].has()) { + const auto& dump = m_out_meta[i].get(); + dumpIterOutput(m_out_mats[i], dump, m_iter_idx); + } + } + ++m_iter_idx; + return true; +} + +//////////////////////////////// PipelinedSimulation /////////////////////////////// +PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta, + cv::util::optional required_num_iterations) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_opt_mats(m_out_meta.size()), + m_iter_idx(0u), + m_required_num_iterations(required_num_iterations) { +} + +Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) { + auto pipeline_inputs = cv::gin(); + for (auto source : m_sources) { + pipeline_inputs += cv::gin(static_cast(source)); + } + using namespace std::placeholders; + auto cb = std::bind(&PipelinedSimulation::process, this, _1); + updateCriterion(&criterion, m_required_num_iterations); + m_exec.runLoop(std::move(pipeline_inputs), cb, criterion); + std::stringstream ss; + ss << "Reference data has been generated for " << m_iter_idx << " iteration(s)"; + return Success{ss.str()}; +}; + +bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) { + cv::GOptRunArgsP pipeline_outputs; + for (auto& opt_mat : m_opt_mats) { + pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]); + } + const bool has_data = pipeline.pull(std::move(pipeline_outputs)); + for (size_t i = 0; i < m_out_meta.size(); ++i) { + if (m_out_meta[i].has()) { + const auto& dump = m_out_meta[i].get(); + ASSERT(m_opt_mats[i].has_value()); + dumpIterOutput(m_opt_mats[i].value(), dump, m_iter_idx); + } + } + ++m_iter_idx; + return has_data; +} + +} // anonymous namespace + +CalcRefSimulation::CalcRefSimulation(Simulation::Config&& cfg, CalcRefSimulation::Options&& opts) + : Simulation(std::move(cfg)), + m_opts(std::move(opts)), + m_strategy(std::make_shared(m_opts)), + m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {false /* add performance meta */})) { +} + +std::shared_ptr CalcRefSimulation::compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) { + auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta), + m_strategy->required_num_iterations); +} + +std::shared_ptr CalcRefSimulation::compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args) { + auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta), + m_strategy->required_num_iterations); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp new file mode 100644 index 00000000000000..22d2fd92cce2c6 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/reference_mode.hpp @@ -0,0 +1,35 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "simulation/computation.hpp" +#include "simulation/simulation.hpp" + +class ReferenceStrategy; +class CalcRefSimulation : public Simulation { +public: + struct Options { + // FIXME: In fact, there should be only input data initializers + // and the path where to dump outputs + IRandomGenerator::Ptr global_initializer; + ModelsAttrMap initializers_map; + ModelsAttrMap input_data_map; + ModelsAttrMap output_data_map; + }; + + explicit CalcRefSimulation(Simulation::Config&& cfg, Options&& opts); + + std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) override; + std::shared_ptr compileSync(DummySources&& sources, cv::GCompileArgs&& compiler_args) override; + +private: + Options m_opts; + std::shared_ptr m_strategy; + Computation m_comp; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp new file mode 100644 index 00000000000000..52f57c2881a3b6 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.cpp @@ -0,0 +1,131 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/simulation.hpp" + +#include "scenario/inference.hpp" +#include "utils/error.hpp" + +#include // onnx::Params +#include // ov::Params + +static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const OpenVINOParams& params) { + using P = cv::gapi::ov::Params; + std::unique_ptr

network; + if (std::holds_alternative(params.path)) { + const auto& model_path = std::get(params.path); + network = std::make_unique

(tag, model_path.model, model_path.bin, params.device); + } else { + GAPI_Assert(std::holds_alternative(params.path)); + const auto& blob_path = std::get(params.path); + network = std::make_unique

(tag, blob_path.blob, params.device); + } + + network->cfgPluginConfig(params.config); + network->cfgNumRequests(params.nireq); + + // NB: Pre/Post processing can be configured only for Model case. + if (std::holds_alternative(params.path)) { + if (std::holds_alternative(params.output_precision)) { + network->cfgOutputTensorPrecision(std::get(params.output_precision)); + } else if (std::holds_alternative>(params.output_precision)) { + network->cfgOutputTensorPrecision(std::get>(params.output_precision)); + } + + if (std::holds_alternative(params.input_layout)) { + network->cfgInputTensorLayout(std::get(params.input_layout)); + } else if (std::holds_alternative>(params.input_layout)) { + network->cfgInputTensorLayout(std::get>(params.input_layout)); + } + + if (std::holds_alternative(params.output_layout)) { + network->cfgOutputTensorLayout(std::get(params.output_layout)); + } else if (std::holds_alternative>(params.output_layout)) { + network->cfgOutputTensorLayout(std::get>(params.output_layout)); + } + + if (std::holds_alternative(params.input_model_layout)) { + network->cfgInputModelLayout(std::get(params.input_model_layout)); + } else if (std::holds_alternative>(params.input_model_layout)) { + network->cfgInputModelLayout(std::get>(params.input_model_layout)); + } + + if (std::holds_alternative(params.output_model_layout)) { + network->cfgOutputModelLayout(std::get(params.output_model_layout)); + } else if (std::holds_alternative>(params.output_model_layout)) { + network->cfgOutputModelLayout(std::get>(params.output_model_layout)); + } + } + return cv::gapi::networks(*network); +} + +static void cfgExecutionProvider(cv::gapi::onnx::Params& network, + const ONNXRTParams::OpenVINO& ovep) { + network.cfgAddExecutionProvider(cv::gapi::onnx::ep::OpenVINO{ovep.params_map}); +} + +static void cfgExecutionProvider(cv::gapi::onnx::Params& network, const ONNXRTParams::EP& ep) { + // NB: Nothing to configure for default MLAS EP + if (std::holds_alternative(ep)) { + return; + } + // TODO: Extend for any other available execution provider + ASSERT(std::holds_alternative(ep)); + cfgExecutionProvider(network, std::get(ep)); +} + +static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const ONNXRTParams& params) { + cv::gapi::onnx::Params network{tag, params.model_path}; + network.cfgSessionOptions(params.session_options); + cfgExecutionProvider(network, params.ep); + return cv::gapi::networks(network); +} + +static cv::gapi::GNetPackage getNetPackage(const std::string& tag, const InferenceParams& params) { + if (std::holds_alternative(params)) { + return getNetPackage(tag, std::get(params)); + } + ASSERT(std::holds_alternative(params)); + return getNetPackage(tag, std::get(params)); +} + +cv::gapi::GNetPackage Simulation::getNetworksPackage() const { + cv::gapi::GNetPackage networks; + for (const auto& [tag, params] : m_cfg.params) { + networks += getNetPackage(tag, params); + } + return networks; +} + +Simulation::Simulation(Config&& cfg): m_cfg(std::move(cfg)){}; + +std::vector Simulation::createSources(const bool drop_frames) { + auto src = std::make_shared(m_cfg.frames_interval_in_us, drop_frames, + m_cfg.disable_high_resolution_timer); + return {src}; +}; + +std::shared_ptr Simulation::compilePipelined(const bool drop_frames) { + if (drop_frames) { + THROW_ERROR("Pipelined simulation doesn't support frames drop!"); + } + // NB: Hardcoded for pipelining mode as the best option + auto compile_args = cv::compile_args(getNetworksPackage()); + compile_args += cv::compile_args(cv::gapi::streaming::queue_capacity{1u}); + return compilePipelined(createSources(drop_frames), std::move(compile_args)); +} + +std::shared_ptr Simulation::compileSync(const bool drop_frames) { + auto compile_args = cv::compile_args(getNetworksPackage()); + return compileSync(createSources(drop_frames), std::move(compile_args)); +} + +std::shared_ptr Simulation::compilePipelined(DummySources&&, cv::GCompileArgs&&) { + THROW_ERROR("Not implemented!"); +}; + +std::shared_ptr Simulation::compileSync(DummySources&&, cv::GCompileArgs&&) { + THROW_ERROR("Not implemented!"); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp new file mode 100644 index 00000000000000..b60eaf6b5a3148 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/simulation.hpp @@ -0,0 +1,57 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "result.hpp" +#include "scenario/criterion.hpp" +#include "scenario/inference.hpp" +#include "scenario/scenario_graph.hpp" +#include "simulation/dummy_source.hpp" + +#include // cv::gapi::GNetPackage + +struct ICompiled { + using Ptr = std::shared_ptr; + virtual Result run(ITermCriterion::Ptr) = 0; +}; + +struct PipelinedCompiled : public ICompiled {}; +struct SyncCompiled : public ICompiled {}; + +using DummySources = std::vector; + +class Simulation { +public: + using Ptr = std::shared_ptr; + + struct Config { + std::string stream_name; + uint64_t frames_interval_in_us; + bool disable_high_resolution_timer; + ScenarioGraph graph; + InferenceParamsMap params; + }; + + explicit Simulation(Config&& cfg); + + virtual std::shared_ptr compilePipelined(const bool drop_frames); + virtual std::shared_ptr compileSync(const bool drop_frames); + + virtual ~Simulation() = default; + +protected: + virtual std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args); + virtual std::shared_ptr compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args); + + std::vector createSources(const bool drop_frames); + cv::gapi::GNetPackage getNetworksPackage() const; + +protected: + Config m_cfg; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp new file mode 100644 index 00000000000000..c6544522287048 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.cpp @@ -0,0 +1,363 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "simulation/validation_mode.hpp" + +#include "scenario/accuracy_metrics.hpp" +#include "simulation/computation_builder.hpp" +#include "simulation/executor.hpp" +#include "simulation/layers_data.hpp" +#include "simulation/validation_mode.hpp" +#include "utils/logger.hpp" +#include "utils/utils.hpp" + +#include // cv::GCompileArgs + +class LayerValidator { +public: + LayerValidator(const std::string& tag, const std::string& layer_name, IAccuracyMetric::Ptr metric); + Result operator()(const cv::Mat& lhs, const cv::Mat& rhs); + +private: + std::string m_tag; + std::string m_layer_name; + IAccuracyMetric::Ptr m_metric; +}; + +LayerValidator::LayerValidator(const std::string& tag, const std::string& layer_name, IAccuracyMetric::Ptr metric) + : m_tag(tag), m_layer_name(layer_name), m_metric(metric) { +} + +Result LayerValidator::operator()(const cv::Mat& lhs, const cv::Mat& rhs) { + auto result = m_metric->compare(lhs, rhs); + if (!result) { + std::stringstream ss; + ss << "Model: " << m_tag << ", Layer: " << m_layer_name << ", Metric: " << m_metric->str() + << ", Reason: " << result.str() << ";"; + return Error{ss.str()}; + } + return Success{"Passed"}; +} + +namespace { + +struct InputDataVisitor { + InputDataVisitor(const InferDesc& _infer, const ValSimulation::Options& _opts) + : infer(_infer), opts(_opts), providers(infer.input_layers.size()), metas(infer.input_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const ValSimulation::Options& opts; + std::vector providers; + std::vector metas; +}; + +void InputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Validation mode requires input data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Validation mode requires input data path to be provided" + " in form of either directory or single file!"); +}; + +void InputDataVisitor::operator()(const std::string& path_str) { + std::filesystem::path path{path_str}; + LOG_INFO() << "Input data path: " << path << " for model: " << infer.tag << " exists - data will be uploaded" + << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.input_layers, LayersType::INPUT); + providers = createConstantProviders(std::move(layers_data), extractLayerNames(infer.input_layers)); +}; + +struct OutputDataVisitor { + OutputDataVisitor(const InferDesc& _infer, const ValSimulation::Options& _opts) + : infer(_infer), opts(_opts), metas(infer.output_layers.size()) { + } + + void operator()(std::monostate); + void operator()(const std::string&); + void operator()(const LayerVariantAttr&); + + InferDesc infer; + const ValSimulation::Options& opts; + std::vector metas; +}; + +void OutputDataVisitor::operator()(std::monostate) { + THROW_ERROR("Validation mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const LayerVariantAttr&) { + THROW_ERROR("Validation mode requires output data path to be provided" + " in form of either directory or single file!"); +} + +void OutputDataVisitor::operator()(const std::string& path_str) { + auto default_metric = opts.global_metric ? opts.global_metric : std::make_shared(0.0); + auto per_layer_metrics = + unpackWithDefault(opts.metrics_map.at(infer.tag), extractLayerNames(infer.output_layers), default_metric); + std::filesystem::path path{path_str}; + LOG_INFO() << "Reference output data path: " << path << " for model: " << infer.tag + << " exists - data will be uploaded" << std::endl; + auto layers_data = uploadData(path, infer.tag, infer.output_layers, LayersType::OUTPUT); + for (uint32_t i = 0; i < infer.output_layers.size(); ++i) { + const auto& layer = infer.output_layers[i]; + LayerValidator validator{infer.tag, layer.name, per_layer_metrics.at(layer.name)}; + metas[i].set(Validate{std::move(validator), layers_data.at(layer.name)}); + } +} + +} // anonymous namespace + +class ValidationStrategy : public IBuildStrategy { +public: + explicit ValidationStrategy(const ValSimulation::Options& _opts): opts(_opts) { + } + + IBuildStrategy::InferBuildInfo build(const InferDesc& infer) override { + const auto& input_data = opts.input_data_map.at(infer.tag); + InputDataVisitor in_data_visitor{infer, opts}; + std::visit(in_data_visitor, input_data); + + const auto& output_data = opts.output_data_map.at(infer.tag); + OutputDataVisitor out_data_visitor{infer, opts}; + std::visit(out_data_visitor, output_data); + + if (opts.per_iter_outputs_path.has_value()) { + auto model_dir = opts.per_iter_outputs_path.value() / infer.tag; + // NB: Remove the data from the previous run if such exist + LOG_INFO() << "Actual output data for model: " << infer.tag + << " will be dumped and replaced at path: " << model_dir << std::endl; + std::filesystem::remove_all(model_dir); + auto dump_path_vec = createDirectoryLayout(model_dir, extractLayerNames(infer.output_layers)); + for (uint32_t i = 0; i < infer.output_layers.size(); ++i) { + out_data_visitor.metas[i].set(Dump{dump_path_vec[i]}); + } + } + + // NB: No special input meta for this mode. + std::vector input_meta(infer.input_layers.size(), Meta{}); + return {std::move(in_data_visitor.providers), std::move(input_meta), std::move(out_data_visitor.metas)}; + } + + const ValSimulation::Options& opts; +}; + +struct FailedIter { + size_t iter_idx; + std::vector reasons; +}; + +static Result reportValidationResult(const std::vector& failed_iters, const size_t total_iters) { + std::stringstream ss; + if (!failed_iters.empty()) { + const auto kItersToShow = 10u; + const auto kLimit = failed_iters.size() < kItersToShow ? failed_iters.size() : kItersToShow; + ss << "Accuraccy check failed on " << failed_iters.size() << " iteration(s)" + << " (first " << kLimit << "):"; + ss << "\n"; + for (uint32_t i = 0; i < kLimit; ++i) { + ss << "Iteration " << failed_iters[i].iter_idx << ":\n"; + for (const auto& reason : failed_iters[i].reasons) { + ss << " " << reason << "\n"; + } + } + return Error{ss.str()}; + } + ss << "Validation has passed for " << total_iters << " iteration(s)"; + return Success{ss.str()}; +} + +static std::vector validateOutputs(const std::vector& out_mats, const std::vector& out_meta, + const size_t iter_idx) { + std::vector failed_list; + for (size_t i = 0; i < out_mats.size(); ++i) { + if (out_meta[i].has()) { + const auto& val = out_meta[i].get(); + const auto& refvec = val.reference; + ASSERT(!refvec.empty()); + const auto& refmat = refvec[iter_idx % refvec.size()]; + auto result = val.validator(refmat, out_mats[i]); + if (!result) { + failed_list.push_back(std::move(result.str())); + } + } + } + return failed_list; +} + +static void dumpOutputs(const std::vector& out_mats, const std::vector& out_meta, + const size_t iter_idx) { + for (size_t i = 0; i < out_mats.size(); ++i) { + if (out_meta[i].has()) { + std::stringstream ss; + ss << "iter_" << iter_idx << ".bin"; + auto dump_path = out_meta[i].get().path / ss.str(); + utils::writeToBinFile(dump_path.string(), out_mats[i]); + } + } +} + +namespace { + +class SyncSimulation : public SyncCompiled { +public: + SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, std::vector&& out_meta); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GCompiled& pipeline); + + SyncExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector m_out_mats; + size_t m_iter_idx; + std::vector m_failed_iters; +}; + +class PipelinedSimulation : public PipelinedCompiled { +public: + PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta); + + Result run(ITermCriterion::Ptr criterion) override; + +private: + bool process(cv::GStreamingCompiled& pipeline); + + PipelinedExecutor m_exec; + std::vector m_sources; + std::vector m_out_meta; + std::vector> m_opt_mats; + size_t m_iter_idx; + std::vector m_failed_iters; +}; + +//////////////////////////////// SyncSimulation /////////////////////////////// +SyncSimulation::SyncSimulation(cv::GCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_out_mats(m_out_meta.size()), + m_iter_idx(0u) { +} + +Result SyncSimulation::run(ITermCriterion::Ptr criterion) { + for (auto src : m_sources) { + src->reset(); + } + using namespace std::placeholders; + auto cb = std::bind(&SyncSimulation::process, this, _1); + m_exec.runLoop(cb, criterion); + return reportValidationResult(m_failed_iters, m_iter_idx); +}; + +bool SyncSimulation::process(cv::GCompiled& pipeline) { + auto pipeline_outputs = cv::gout(); + // NB: Reference is mandatory there since copying empty + // Mat may lead to weird side effects. + for (auto& out_mat : m_out_mats) { + pipeline_outputs += cv::gout(out_mat); + } + cv::GRunArgs pipeline_inputs; + pipeline_inputs.reserve(m_sources.size()); + for (auto src : m_sources) { + cv::gapi::wip::Data data; + src->pull(data); + pipeline_inputs.push_back(std::move(data)); + } + pipeline(std::move(pipeline_inputs), std::move(pipeline_outputs)); + + dumpOutputs(m_out_mats, m_out_meta, m_iter_idx); + auto failed_list = validateOutputs(m_out_mats, m_out_meta, m_iter_idx); + if (!failed_list.empty()) { + m_failed_iters.push_back(FailedIter{m_iter_idx, std::move(failed_list)}); + } + ++m_iter_idx; + return true; +} + +//////////////////////////////// PipelinedSimulation /////////////////////////////// +PipelinedSimulation::PipelinedSimulation(cv::GStreamingCompiled&& compiled, std::vector&& sources, + std::vector&& out_meta) + : m_exec(std::move(compiled)), + m_sources(std::move(sources)), + m_out_meta(std::move(out_meta)), + m_opt_mats(m_out_meta.size()), + m_iter_idx(0u) { +} + +Result PipelinedSimulation::run(ITermCriterion::Ptr criterion) { + auto pipeline_inputs = cv::gin(); + for (auto source : m_sources) { + pipeline_inputs += cv::gin(static_cast(source)); + } + using namespace std::placeholders; + auto cb = std::bind(&PipelinedSimulation::process, this, _1); + m_exec.runLoop(std::move(pipeline_inputs), cb, criterion); + return reportValidationResult(m_failed_iters, m_iter_idx); +}; + +bool PipelinedSimulation::process(cv::GStreamingCompiled& pipeline) { + cv::GOptRunArgsP pipeline_outputs; + for (auto& opt_mat : m_opt_mats) { + pipeline_outputs.emplace_back(cv::gout(opt_mat)[0]); + } + const bool has_data = pipeline.pull(std::move(pipeline_outputs)); + std::vector out_mats; + out_mats.reserve(m_opt_mats.size()); + for (auto opt_mat : m_opt_mats) { + ASSERT(opt_mat.has_value()); + out_mats.push_back(opt_mat.value()); + } + + dumpOutputs(out_mats, m_out_meta, m_iter_idx); + auto failed_list = validateOutputs(out_mats, m_out_meta, m_iter_idx); + if (!failed_list.empty()) { + m_failed_iters.push_back(FailedIter{m_iter_idx, std::move(failed_list)}); + } + ++m_iter_idx; + return has_data; +} + +} // anonymous namespace + +ValSimulation::ValSimulation(Simulation::Config&& cfg, ValSimulation::Options&& opts) + : Simulation(std::move(cfg)), + m_opts(std::move(opts)), + m_strategy(std::make_shared(m_opts)), + m_comp(ComputationBuilder{m_strategy}.build(m_cfg.graph, m_cfg.params, {false /* add performance meta */})) { +} + +std::shared_ptr ValSimulation::compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) { + auto compiled = m_comp.compileStreaming(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta)); +} + +std::shared_ptr ValSimulation::compileSync(DummySources&& sources, cv::GCompileArgs&& compile_args) { + const uint32_t max_parallel_branches = m_comp.getMaxParallelBranches(); + if (max_parallel_branches > 1u) { + LOG_INFO() << "Found at most " << max_parallel_branches + << " parallel branches in graph," + " so threaded executor will be used" + << std::endl; + ; + compile_args += cv::compile_args(cv::use_threaded_executor{max_parallel_branches}); + } + auto compiled = m_comp.compile(descr_of(sources), std::move(compile_args)); + auto out_meta = m_comp.getOutMeta(); + return std::make_shared(std::move(compiled), std::move(sources), std::move(out_meta)); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp new file mode 100644 index 00000000000000..180c802803a68c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/simulation/validation_mode.hpp @@ -0,0 +1,34 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "simulation/computation.hpp" +#include "simulation/simulation.hpp" + +class ValidationStrategy; +class ValSimulation : public Simulation { +public: + struct Options { + IAccuracyMetric::Ptr global_metric; + ModelsAttrMap metrics_map; + ModelsAttrMap input_data_map; + ModelsAttrMap output_data_map; + std::optional per_iter_outputs_path; + }; + explicit ValSimulation(Simulation::Config&& cfg, Options&& opts); + + std::shared_ptr compilePipelined(DummySources&& sources, + cv::GCompileArgs&& compile_args) override; + std::shared_ptr compileSync(DummySources&& sources, cv::GCompileArgs&& compiler_args) override; + +private: + Options m_opts; + std::shared_ptr m_strategy; + Computation m_comp; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp new file mode 100644 index 00000000000000..f3eaf7756e1793 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.cpp @@ -0,0 +1,64 @@ +// +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + + +#include "data_providers.hpp" + +#include + +#include "utils.hpp" +#include "utils/error.hpp" + +UniformGenerator::UniformGenerator(double low, double high): m_low(low), m_high(high) { + ASSERT(low <= high); +} + +void UniformGenerator::generate(cv::Mat& mat) { + cv::randu(mat, m_low, m_high); +} + +std::string UniformGenerator::str() const { + std::stringstream ss; + ss << "{dist: uniform, range: [" << m_low << ", " << m_high << "]}"; + return ss.str(); +} + +RandomProvider::RandomProvider(IRandomGenerator::Ptr impl, const std::vector& dims, const int depth) + : m_impl(impl), m_dims(dims), m_depth(depth) { +} + +void RandomProvider::pull(cv::Mat& mat) { + utils::createNDMat(mat, m_dims, m_depth); + m_impl->generate(mat); +} + +cv::GMatDesc RandomProvider::desc() { + if (m_dims.size() == 2u) { + return cv::GMatDesc{m_depth, 1, cv::Size(m_dims[1], m_dims[0])}; + } + return cv::GMatDesc{m_depth, m_dims}; +} + +CircleBuffer::CircleBuffer(const std::vector& buffer): m_buffer(buffer), m_pos(0u) { + ASSERT(!m_buffer.empty()); +} + +CircleBuffer::CircleBuffer(std::vector&& buffer): m_buffer(std::move(buffer)), m_pos(0u) { + ASSERT(!m_buffer.empty()); +} + +CircleBuffer::CircleBuffer(cv::Mat mat): CircleBuffer(std::vector{mat}) { +} + +void CircleBuffer::pull(cv::Mat& mat) { + m_buffer[m_pos++].copyTo(mat); + if (m_pos == m_buffer.size()) { + m_pos = 0; + } +} + +cv::GMatDesc CircleBuffer::desc() { + return cv::descr_of(m_buffer[0]); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp new file mode 100644 index 00000000000000..2bd45b7f19cc25 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/data_providers.hpp @@ -0,0 +1,70 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include + +struct IDataProvider { + using Ptr = std::shared_ptr; + virtual void pull(cv::Mat& mat) = 0; + virtual cv::GMatDesc desc() = 0; + virtual void reset() = 0; + virtual ~IDataProvider() = default; +}; + +class IRandomGenerator { +public: + using Ptr = std::shared_ptr; + virtual void generate(cv::Mat& mat) = 0; + virtual ~IRandomGenerator() = default; + virtual std::string str() const = 0; +}; + +class UniformGenerator : public IRandomGenerator { +public: + using Ptr = std::shared_ptr; + UniformGenerator(double low, double high); + void generate(cv::Mat& mat) override; + virtual std::string str() const override; + +private: + double m_low, m_high; +}; + +class RandomProvider : public IDataProvider { +public: + RandomProvider(IRandomGenerator::Ptr impl, const std::vector& dims, const int depth); + + void pull(cv::Mat& mat) override; + cv::GMatDesc desc() override; + void reset() override { /* do nothing */ + } + +private: + IRandomGenerator::Ptr m_impl; + std::vector m_dims; + int m_depth; +}; + +class CircleBuffer : public IDataProvider { +public: + CircleBuffer(const std::vector& buffer); + CircleBuffer(std::vector&& buffer); + CircleBuffer(cv::Mat mat); + + void pull(cv::Mat& mat) override; + cv::GMatDesc desc() override; + void reset() override { + m_pos = 0; + } + +private: + std::vector m_buffer; + uint64_t m_pos; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp new file mode 100644 index 00000000000000..23cb2a8f46436c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/error.hpp @@ -0,0 +1,39 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace details { + +[[noreturn]] inline void assert_abort(const char* str, const int line, const char* file, const char* func) { + std::stringstream ss; + ss << file << ":" << line << ": Assertion " << str << " in function " << func << " failed\n"; + std::cerr << ss.str() << std::flush; + abort(); +} + +[[noreturn]] inline void throw_error(const char* str) { + std::stringstream ss; + ss << "An exception thrown! " << str << std::flush; + throw std::logic_error(ss.str()); +} + +} // namespace details + +#define ASSERT(expr) \ + { \ + if (!(expr)) \ + ::details::assert_abort(#expr, __LINE__, __FILE__, __func__); \ + } + +#define THROW_ERROR(msg) \ + { \ + std::ostringstream os; \ + os << msg; \ + ::details::throw_error(os.str().c_str()); \ + } diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp new file mode 100644 index 00000000000000..ccba64e701975c --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.cpp @@ -0,0 +1,32 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "utils/logger.hpp" + +#include + +LogLevel Logger::global_lvl = LogLevel::None; + +Logger::Logger(LogLevel lvl): m_lvl(lvl) { +} + +std::stringstream& Logger::stream() { + return m_ss; +} + +Logger::~Logger() { + if (m_lvl <= Logger::global_lvl) { + switch (m_lvl) { + case LogLevel::Info: + std::cout << "[ INFO ] " << m_ss.str(); + break; + case LogLevel::Debug: + std::cout << "[ DEBUG ] " << m_ss.str(); + break; + default: + /* do nothing */; + } + } +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp new file mode 100644 index 00000000000000..e8b1f5df7f8fa3 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/logger.hpp @@ -0,0 +1,29 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +enum class LogLevel { + None = 0, + Info = 1, + Debug = 2, +}; + +class Logger { +public: + static LogLevel global_lvl; + explicit Logger(LogLevel lvl); + std::stringstream& stream(); + ~Logger(); + +private: + LogLevel m_lvl; + std::stringstream m_ss; +}; + +#define LOG_INFO() Logger{LogLevel::Info}.stream() +#define LOG_DEBUG() Logger{LogLevel::Debug}.stream() diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp new file mode 100644 index 00000000000000..a1fc0f4c2643c4 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.cpp @@ -0,0 +1,73 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "timer.hpp" +#include "utils.hpp" + +#include + +#if defined(_WIN32) +#include + +class WinTimer : public SleepTimer { +public: + WinTimer(bool disable_high_resolution_timer); + void wait(std::chrono::microseconds time) override; + ~WinTimer(); + +private: + HANDLE m_handle = nullptr; +}; + +WinTimer::WinTimer(bool disable_high_resolution_timer) { + // FIXME: It should be called once. + timeBeginPeriod(1); + m_handle = CreateWaitableTimerEx( + NULL, NULL, disable_high_resolution_timer ? 0 : CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS); +} + +void WinTimer::wait(std::chrono::microseconds time) { + LARGE_INTEGER li; + using ns_t = std::chrono::nanoseconds; + using ns_100_t = std::chrono::duration, ns_t::period>>; + + li.QuadPart = -std::chrono::duration_cast(time).count(); + if (!SetWaitableTimer(m_handle, &li, 0, NULL, NULL, false)) { + CloseHandle(m_handle); + throw std::logic_error("WinTimer failed to setup"); + } + + if (WaitForSingleObject(m_handle, INFINITE) != WAIT_OBJECT_0) { + CloseHandle(m_handle); + throw std::logic_error("WinTimer failed to sleep"); + } +} + +WinTimer::~WinTimer() { + CancelWaitableTimer(m_handle); + CloseHandle(m_handle); +} + +#endif // defined(_WIN32) + +class ChronoTimer : public SleepTimer { + void wait(std::chrono::microseconds time) override; +}; + +void ChronoTimer::wait(std::chrono::microseconds time) { + std::this_thread::sleep_for(time); +} + +SleepTimer::Ptr SleepTimer::create(bool disable_high_resolution_timer) { +#if defined(_WIN32) + return std::make_shared(disable_high_resolution_timer); +#else + return std::make_shared(); +#endif +} + +void BusyTimer::wait(std::chrono::microseconds time) { + utils::busyWait(time); +} diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp new file mode 100644 index 00000000000000..423966ad2300a9 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/timer.hpp @@ -0,0 +1,25 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +struct IWaitable { + using Ptr = std::shared_ptr; + virtual void wait(std::chrono::microseconds time) = 0; + virtual ~IWaitable() = default; +}; + +struct SleepTimer : public IWaitable { + using Ptr = std::shared_ptr; + static Ptr create(bool disable_high_resolution_timer = false); +}; + +struct BusyTimer : public IWaitable { + void wait(std::chrono::microseconds time) override; +}; diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp new file mode 100644 index 00000000000000..94081dd295229e --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.cpp @@ -0,0 +1,84 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "utils.hpp" + +#include + +#include + +namespace utils { + +void createNDMat(cv::Mat& mat, const std::vector& dims, int depth) { + GAPI_Assert(!dims.empty()); + mat.create(dims, depth); + if (dims.size() == 1) { + // FIXME: Well-known 1D mat WA + mat.dims = 1; + } +} + +void generateRandom(cv::Mat& out) { + switch (out.depth()) { + case CV_8U: + cv::randu(out, 0, 255); + break; + case CV_32S: + cv::randu(out, 0, 255); + break; + case CV_32F: + cv::randu(out, 0.f, 255.f); + break; + case CV_16F: { + std::vector dims; + for (int i = 0; i < out.size.dims(); ++i) { + dims.push_back(out.size[i]); + } + cv::Mat fp32_mat; + createNDMat(fp32_mat, dims, CV_32F); + cv::randu(fp32_mat, 0.f, 255.f); + fp32_mat.convertTo(out, out.type()); + break; + } + default: + throw std::logic_error("Unsupported preprocessing depth"); + } +} + +cv::Mat createRandom(const std::vector& dims, int depth) { + cv::Mat mat; + createNDMat(mat, dims, depth); + generateRandom(mat); + return mat; +} + +void readFromBinFile(const std::string& filepath, cv::Mat& mat) { + std::ifstream ifs(filepath, std::ios::binary | std::ios::ate); + + if (!ifs.is_open()) { + throw std::logic_error("Failed to open: " + filepath); + } + + const auto file_byte_size = ifs.tellg(); + ifs.seekg(0, std::ios::beg); + + const auto mat_byte_size = mat.total() * mat.elemSize(); + if (file_byte_size != mat_byte_size) { + throw std::logic_error("Failed to read cv::Mat from binary file: " + filepath + ". Mat size: " + + std::to_string(mat_byte_size) + ", File size: " + std::to_string(file_byte_size)); + } + + ifs.read(mat.ptr(), mat_byte_size); +} + +void writeToBinFile(const std::string& filepath, const cv::Mat& mat) { + std::ofstream fout(filepath, std::ios::out | std::ios::binary); + if (!fout.is_open()) { + throw std::logic_error("Failed to open/create: " + filepath); + } + fout.write(mat.ptr(), mat.total() * mat.elemSize()); +} + +} // namespace utils diff --git a/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp new file mode 100644 index 00000000000000..a2ee4bdcf742d5 --- /dev/null +++ b/src/plugins/intel_npu/tools/protopipe/src/utils/utils.hpp @@ -0,0 +1,65 @@ +// +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace utils { + +void createNDMat(cv::Mat& mat, const std::vector& dims, int depth); +void generateRandom(cv::Mat& out); +cv::Mat createRandom(const std::vector& dims, int depth); + +template +typename duration_t::rep measure(std::function f) { + using namespace std::chrono; + auto start = high_resolution_clock::now(); + f(); + return duration_cast(high_resolution_clock::now() - start).count(); +} + +template +typename duration_t::rep timestamp() { + using namespace std::chrono; + auto now = high_resolution_clock::now(); + return duration_cast(now.time_since_epoch()).count(); +} + +inline void busyWait(std::chrono::microseconds delay) { + auto start_ts = timestamp(); + auto end_ts = start_ts; + auto time_to_wait = delay.count(); + + while (end_ts - start_ts < time_to_wait) { + end_ts = timestamp(); + } +} + +template +double avg(const std::vector& vec) { + return std::accumulate(vec.begin(), vec.end(), 0.0) / vec.size(); +} + +template +T max(const std::vector& vec) { + return *std::max_element(vec.begin(), vec.end()); +} + +template +T min(const std::vector& vec) { + return *std::min_element(vec.begin(), vec.end()); +} + +void readFromBinFile(const std::string& filepath, cv::Mat& mat); +void writeToBinFile(const std::string& filepath, const cv::Mat& mat); + +} // namespace utils diff --git a/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt b/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt index 09ed0db315785c..e6c24566777d4b 100644 --- a/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt +++ b/src/plugins/intel_npu/tools/single-image-test/CMakeLists.txt @@ -26,7 +26,7 @@ foreach(LIB opencv_core opencv_imgproc opencv_imgcodecs) endforeach() if(NOT MISSING_DEPENDENCIES STREQUAL "") - message(WARNING "${TARGET_NAME} tool is disabled due to missing dependencies: ${MISSING_DEPENDENCIES}") + message(STATUS "NPU ${TARGET_NAME} tool is disabled due to missing dependencies: ${MISSING_DEPENDENCIES}") return() endif()