diff --git a/.gitmodules b/.gitmodules
index a9cad1dee5f494..5feb7458da1801 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -78,6 +78,9 @@
[submodule "src/plugins/intel_npu/thirdparty/level-zero-ext"]
path = src/plugins/intel_npu/thirdparty/level-zero-ext
url = https://github.com/intel/level-zero-npu-extensions.git
+[submodule "src/plugins/intel_npu/thirdparty/yaml-cpp"]
+ path = src/plugins/intel_npu/thirdparty/yaml-cpp
+ url = https://github.com/jbeder/yaml-cpp.git
[submodule "thirdparty/telemetry"]
path = thirdparty/telemetry
url = https://github.com/openvinotoolkit/telemetry.git
diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt
index 73cdd57e508bdb..69ad9f460e357a 100644
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@@ -12,6 +12,7 @@ set(shellcheck_skip_list
"${OpenVINO_SOURCE_DIR}/thirdparty"
"${OpenVINO_SOURCE_DIR}/src/plugins/intel_cpu/thirdparty"
"${OpenVINO_SOURCE_DIR}/src/plugins/intel_gpu/thirdparty"
+ "${OpenVINO_SOURCE_DIR}/src/plugins/intel_npu/thirdparty"
"${OpenVINO_SOURCE_DIR}/src/bindings/python/thirdparty/pybind11"
"${TEMP}")
diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake
index 07efefd4452403..8a9dce04f071b9 100644
--- a/src/plugins/intel_npu/cmake/features.cmake
+++ b/src/plugins/intel_npu/cmake/features.cmake
@@ -20,3 +20,5 @@ if(NOT BUILD_SHARED_LIBS AND NOT ENABLE_MLIR_COMPILER AND NOT ENABLE_DRIVER_COMP
endif()
ov_dependent_option(ENABLE_IMD_BACKEND "Enable InferenceManagerDemo based NPU AL backend" OFF "NOT WIN32;NOT CMAKE_CROSSCOMPILING" OFF)
+
+ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF)
diff --git a/src/plugins/intel_npu/thirdparty/CMakeLists.txt b/src/plugins/intel_npu/thirdparty/CMakeLists.txt
index 4d0c66beeb7520..b064b5c7b9acd5 100644
--- a/src/plugins/intel_npu/thirdparty/CMakeLists.txt
+++ b/src/plugins/intel_npu/thirdparty/CMakeLists.txt
@@ -12,3 +12,15 @@ if(ENABLE_ZEROAPI_BACKEND)
add_library(LevelZero::NPUExt ALIAS level-zero-ext)
install(TARGETS level-zero-ext EXPORT "${PROJECT_NAME}Targets")
endif()
+
+#
+# yaml-cpp
+#
+
+if(ENABLE_INTEL_NPU_PROTOPIPE)
+ add_subdirectory(yaml-cpp EXCLUDE_FROM_ALL)
+ # NB: Suppress warnings in yaml-cpp
+ if(SUGGEST_OVERRIDE_SUPPORTED)
+ target_compile_options(yaml-cpp PRIVATE -Wno-suggest-override)
+ endif()
+endif()
diff --git a/src/plugins/intel_npu/thirdparty/yaml-cpp b/src/plugins/intel_npu/thirdparty/yaml-cpp
new file mode 160000
index 00000000000000..da82fd982c260e
--- /dev/null
+++ b/src/plugins/intel_npu/thirdparty/yaml-cpp
@@ -0,0 +1 @@
+Subproject commit da82fd982c260e7f335ce5acbceff24b270544d1
diff --git a/src/plugins/intel_npu/tools/CMakeLists.txt b/src/plugins/intel_npu/tools/CMakeLists.txt
index c0e620981952e1..ac1a51f74519c8 100644
--- a/src/plugins/intel_npu/tools/CMakeLists.txt
+++ b/src/plugins/intel_npu/tools/CMakeLists.txt
@@ -6,3 +6,7 @@
add_subdirectory(common)
add_subdirectory(compile_tool)
add_subdirectory(single-image-test)
+
+if (ENABLE_INTEL_NPU_PROTOPIPE)
+ add_subdirectory(protopipe)
+endif()
diff --git a/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt
new file mode 100644
index 00000000000000..9ba76d89ca8445
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/CMakeLists.txt
@@ -0,0 +1,72 @@
+#
+# Copyright (C) 2023-2024 Intel Corporation.
+# SPDX-License-Identifier: Apache 2.0
+#
+
+set(TARGET_NAME protopipe)
+
+if (NOT DEFINED PROJECT_NAME)
+ cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
+ project(protopipe_standalone)
+ include("cmake/standalone.cmake")
+ return()
+endif()
+
+#
+# Dependencies
+#
+
+find_package(OpenCV QUIET COMPONENTS gapi)
+if(OpenCV_VERSION VERSION_LESS 4.9)
+ message(STATUS "NPU ${TARGET_NAME} tool is disabled due to missing dependencies: gapi from OpenCV >= 4.9.")
+ return()
+endif()
+
+if (WIN32)
+ # WA: add_tool_target expects to have all dependencies as cmake targets.
+ add_library(winmm INTERFACE)
+ target_link_libraries(winmm INTERFACE "winmm.lib")
+endif()
+
+#
+# Define the target
+#
+
+set(PROTOPIPE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+ov_add_target(ADD_CPPLINT
+ TYPE EXECUTABLE
+ NAME ${TARGET_NAME}
+ ROOT ${CMAKE_CURRENT_SOURCE_DIR}
+ ADDITIONAL_SOURCE_DIRS ${PROTOPIPE_SOURCE_DIR}
+ INCLUDES ${PROTOPIPE_SOURCE_DIR}
+ LINK_LIBRARIES
+ PRIVATE
+ Threads::Threads
+ gflags
+ yaml-cpp
+ openvino::runtime
+ opencv_gapi
+ winmm)
+
+
+
+set_target_properties(${TARGET_NAME} PROPERTIES
+ FOLDER ${CMAKE_CURRENT_SOURCE_DIR}
+ CXX_STANDARD 17)
+
+#
+# Install
+#
+
+install(TARGETS ${TARGET_NAME}
+ RUNTIME DESTINATION "tools/${TARGET_NAME}"
+ COMPONENT ${NPU_INTERNAL_COMPONENT}
+ ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL})
+
+if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
+ install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
+ DESTINATION "tools/${TARGET_NAME}"
+ COMPONENT ${NPU_INTERNAL_COMPONENT}
+ ${OV_CPACK_COMP_NPU_INTERNAL_EXCLUDE_ALL})
+endif()
diff --git a/src/plugins/intel_npu/tools/protopipe/README.md b/src/plugins/intel_npu/tools/protopipe/README.md
new file mode 100644
index 00000000000000..afe6e8cffbc8c3
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/README.md
@@ -0,0 +1,608 @@
+# Protopipe
+Protopipe is the C++ tool for simulating performance and validating accuracy of the various AI scenarios.
+
+Protopipe is built atop of [OpenCV G-API](https://github.com/opencv/opencv/wiki/Graph-API) and supports running inference through the [OpenVINO](https://github.com/openvinotoolkit/openvino) and [ONNXRuntime](https://github.com/microsoft/onnxruntime) frameworks.
+
+## Table of Contents
+* [Quick start](#quick-start)
+* [How to configure](#how-to-configure)
+ * [Global parameters](#global-parameters)
+ * [Model parameters](#model-parameters)
+ * [Graph structure](#graph-structure)
+ * [Dependency Graph](#dependency-graph)
+ * [Network sequence](#network-sequence)
+ * [Scenario parameters](#scenario-parameters)
+ * [Config example](#config-example)
+* [How to run](#how-to-run)
+* [Use cases](#use-cases)
+ * [Measure Performance](#measure-performance)
+ * [Generate Reference](#generate-reference)
+ * [Validate Accuracy](#validate-accuracy)
+* [How to build](#how-to-build)
+
+## Quick start
+Consider the following [Config example](#config-example) to start using Protopipe.
+
+Learn more about available config parameters (see: [How to configure](#how-to-configure)) and explore different execution modes (see: [Use-cases](#use-cases)) for more advanced usage.
+
+## How to configure
+Protopipe uses **YAML** format file to describe the AI scenario structure and its parameters
+
+### Global parameters
+The **YAML** config starts with specifying the several global parameters:
+- `model_dir` - **Optional**. Path to the models location. (**Default**: ".")
+- `blob_dir` - **Optional**. Path to the models location. (**Default**: ".")
+- `device_name` - **Optional**. OpenVINO device name: _CPU_, _GPU_, etc. (**Default**: _NPU_)
+- `compiler_type` - **Optional**. NPU compiler type: _DRIVER_, _MLIR_. (**Default**: _DRIVER_)
+- `log_level` - **Optional**. Log level: _NONE_, _INFO_, _DEBUG_. (**Default**: _NONE_)
+- `disable_high_resolution_waitable_timer` - **Optional**. Disables high resolution timer used to perform delays on Windows. (**Default**: false)
+
+Example:
+```
+model_dir:
+ local: C:\workspace\models
+device_name: NPU
+compiler_type: MLIR
+log_level: INFO
+```
+### Model parameters
+#### Common parameters
+- `name` or `path` - **Required**. Path to the model file.
+- `framework` - **Optional**. Framework to use for inference: *onnxrt*, *openvino*. (**Default**: *openvino*)
+- `input_data`, `output_data`, `metric`, `random` - **Optional**. Follow [Use-cases](#use-cases) to learn the details.
+#### OpenVINO parameters
+- `priority` - **Optional**. Model priority: _HIGH_, _MEDIUM_, _LOW_. (Default: _MEDIUM_)
+- `config` - **Optional**. OpenVINO Plugin specific parameters.
+- `device` - **Optional**. OpenVINO device name.
+- `ip` - **Optional**. Input layer precision: _FP16_, _FP32_, _U8_, _I32_.
+- `op` - **Optional**. Output layer precision: _FP16_, _FP32_, _U8_, _I32_.
+- `il` - **Optional**. Input layer layout.
+- `ol` - **Optional**. Output layer layout.
+- `iml` - **Optional**. Input model layout.
+- `oml` - **Optional**. Output model layout.
+
+Examples:
+```
+- { name: model.xml, ip: FP16, iml: NHWC, il: NCHW }
+- { name: model.xml, ip: { data: FP16 }, priority: HIGH }
+- { name: model.xml, device: NPU, config: { PERFORMANCE_HINT: THROUGHPUT } }
+```
+#### ONNXRT parameters
+- `ep` - **Optional**. Specifies the parameters for particular execution provider.
+- `session_options` - **Optional**. Set various session options for the ONNX Runtime.
+
+##### Supported Execution Providers
+- [OpenVINO Execution Provider](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html)
+ - `name: OV` - **Required**. Enables OpenVINO Execution Provider.
+ - `device_type` - **Optional**.The device type: _NPU_U8_, _CPU_FP32_, etc.
+ - `params` - **Optional**. Accepts a map of options and their corresponding values that can be passed to OV EP.
+
+**Note**: If none of the supported execution providers are specified, the default `MLAS` will be used.
+
+Examples:
+```
+- { name: model.onnx, framework: onnxrt } # Default (MLAS) EP will be used
+- { name: model.onnx, framework: onnxrt, session_options: { session.disable_cpu_ep_fallback: 1 } } # Default (MLAS) EP with the sessions options will be used
+- { name: model.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8, params: { enable_qdq_optimizer: False, model_priority: LOW } } } # OpenVINO EP will be used
+```
+
+### Graph structure
+There are two ways to describe the execution graph structure in Protopipe:
+1. Using [Dependency Graph](#dependency-graph) (preferable)
+2. Using [Network Sequence](#network-sequence) (old)
+
+#### Dependency Graph
+The dependency graph in Protopipe is specified by:
+- `op_desc` - The list of operations, every operation has the following parameters:
+ - `tag` - **Required**. The unique name of operation.
+ - `type` - **Optional**. The operation type: _Infer_, _CPU_, _Compound_ (**Default**: _Infer_)
+ - `repeat_count` - **Optional**. Runs operation over specified number of iterations.
+- `connections` - The list of connections between operations.
+
+Supported operation types
+1. `Infer` - Performs model inference. Follow [Model parameters](#model-parameters) for the details.
+2. `CPU` - Simulates CPU load by performing the busy wait during `time_in_us` amount of time in microseconds
+3. `Compound` - Defines a subgraphs that consists of `Infer` and `CPU` node types
+
+```
+op_desc:
+ - { tag: A, path: Model-A.xml, ip: FP16, op: FP16 }
+ - { tag: B, path: Model-B.onnx, framework: onnxrt, ep: { name: OV, device_type: CPU_FP32 } }
+ - { tag: C, type: CPU, time_in_us: 5000 }
+ - { tag: D, path: Model-D.onnx, framework: onnxrt }
+ - { tag: E, path: Model-E.xml, il: NCHW, device: NPU, config: { PEFORMANCE_HINT: LATENCY } }
+ - { tag: F, path: Model-F.xml }
+connections:
+ - [A, C, E, F]
+ - [A, B, D, F]
+ - [B, F]
+```
+```mermaid
+ graph LR;
+ A-->B
+ A-->C
+ B-->D
+ B-->F
+ C-->E
+ E-->F
+ D-->F
+```
+
+The source **is not** reflected in graph structure, assume that all operations that don't have input connections are implicitly linked with the source, e.g for the graph above:
+```mermaid
+ graph LR;
+ Source-->A
+ A-->B
+ A-->C
+ B-->D
+ B-->F
+ C-->E
+ E-->F
+ D-->F
+```
+**Note:** The situation when all nodes don't have input connections is also possible, consider:
+```
+op_desc:
+ - { tag: A, path: Model-A.xml }
+ - { tag: B, path: Model-B.xml }
+ - { tag: C, path: Model-C.xml }
+```
+
+```mermaid
+ graph LR;
+ Source-->A
+ Source-->B
+ Source-->C
+```
+In this case the section `connections` **can be omitted**.
+
+**Note:** Graph must remain `DAG`, so any loops in graph are prohibited including the self-loops as well as double edges. These are examples of incorrect graphs:
+```
+#1: Invalid - The list must contain at least two operations to connect
+- [A]
+#2: Invalid - Self-loop is prohibited
+- [A, A]
+#3: Invalid - Loop is prohibited
+- [A, B, C, A]
+#4: Invalid - Double edge [B->C] is prohibited
+- [A, B, C]
+- [B, C]
+```
+**Example of repeat_count usage**
+```
+- op_desc:
+ - { tag: A, path: Model_A.xml, ... }
+ - { tag: B, path: Model_B.xml, repeat_count: 20 }
+ - { tag: C, path: Model_C.xml, ... }
+ connections:
+ - [A, B, C]
+```
+This defines the following pipeline:
+```mermaid
+graph LR;
+ A-->B
+ B-->C
+ B--->|20 iterations|B
+
+```
+**Example of "Compound" type operation**.
+```
+op_desc:
+ - { tag: A, path: Model-A.xml }
+ - tag: B,
+ type: Compound,
+ repeat_count: 10,
+ op_desc:
+ - { tag: D, path: Model-D.xml }
+ - { tag: E, path: Model-E.xml }
+ - { tag: F, path: Model-F.xml }
+ connections:
+ - [D, E]
+ - [D, F]
+ - { tag: C, path: Model-C.xml }
+connections:
+ - [A, B, C]
+```
+This defines the following pipeline:
+```mermaid
+graph LR;
+ A[Model-A.xml]
+ C[Model-C.xml]
+
+ subgraph B[Repeats 10 iterations]
+ direction LR
+ D[Model-D.xml]
+ E[Model-E.xml]
+ F[Model-F.xml]
+
+ D --> E
+ D --> F
+
+ end
+
+ A --> B
+ B --> C
+```
+
+#### Network Sequence
+There is also a way to describe the graph by using chain-like structure:
+`network` - **Required**. List or list of lists of model parameters. Follow [Model Parameters](#model-parameters) for the details.
+`delay_in_us` - **Optional**. Delay between models in microseconds.
+
+```
+input_stream_list:
+- network:
+ - { name: A.xml, ip: FP16, il: NCHW, device: CPU }
+ - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }]
+ - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } }
+ delay_in_us: 5000
+```
+
+```mermaid
+ graph LR;
+ A-->Delay1;
+ Delay1-->B;
+ Delay1-->C;
+ B-->Delay2;
+ C-->Delay2;
+ Delay2-->D
+```
+
+### Scenario parameters
+The list of scenarios are specified by using `multi_inference` parameter, every scenario has the following parameters:
+- `name` - **Optional**. The name of execution scenario.
+- `input_stream_list` - **Required**. The list of the streams that will be run in parallel.
+
+Every stream has the following execution parameters:
+- `name` - **Optional**. The name of the stream.
+- `iteration_count` - **Optional**. Number of iterations to execute.
+- `exec_time_in_secs` - **Optional**. Execute until timeout specified.
+- `frames_interval_in_ms` - **Optional**. Execution frequency of the stream (**Default**: 0 - Unbounded)
+- `target_fps` - **Optional**. Execution frequency of the stream. `target_fps = 1000 / frames_interval_in_ms`. `target_fps` and `frames_interval_in_ms` are mutually exclusive and cannot be provided together.
+- `target_latency_in_ms` - **Optional**. When iteration isn't finished within specified interval, the next frame will be dropped from execution. (**Default**: Disabled)
+- `op_desc`/`conections` or `network` - **Required**. Execution graph structure. Follow [Graph structure](#graph-structure) for the details.
+
+### Config example
+Consider the following scenario that consists of two parallel streams specified on `config.yaml`:
+```
+model_dir:
+ local: C:\workspace\models
+device_name: NPU
+compiler_type: MLIR
+log_level: INFO
+
+multi_inference:
+- input_stream_list:
+ - network:
+ - { name: A.xml, ip: FP16, il: NCHW, device: CPU }
+ - [{ name: B.xml, ip: FP16, op: FP16 }, { name: C.xml, ip: FP16, op: FP16 }]
+ - { name: D.xml, ip: FP16, op: FP16, config: { PEROFMRANCE_HINT: LATENCY } }
+ target_fps: 30
+ exec_time_in_secs: 15
+ - op_desc:
+ - { tag: E, path: E.onnx, framework: onnxrt, ep: { name: OV, device_type: NPU_U8 } }
+ - { tag: F, type: CPU, time_in_us: 5000 }
+ - { tag: G, path: G.xml, ip: FP16, op: FP16, priority: HIGH }
+ connections:
+ - [E, F, G]
+ target_fps: 100
+ exec_time_in_secs: 15
+```
+- The first `stream` is defined by using [Network sequence](#network-sequence) syntax and will execute the following graph with `30` FPS cadence:
+ ```mermaid
+ graph LR;
+ A-->B;
+ A-->C;
+ B-->D;
+ C-->D;
+ ```
+- The second `stream` is defined by using [Dependency graph](#dependency-graph) syntax and will execute the following graph with `100` FPS cadence.
+ ```mermaid
+ graph LR;
+ E-->F;
+ F-->G;
+ ```
+
+Run:
+```
+./protopipe -cfg config.yaml --drop_frames
+```
+Both streams will be executed simultaneously in different threads during `15` seconds.
+
+Output format:
+```
+stream 0: throughput: FPS, latency: min: ms, avg: ms, max: ms, frames dropped: /
+stream 1: throughput: FPS, latency: min: ms, avg: ms, max: ms, frames dropped: /
+```
+
+## How to run
+Protopipe has the following `CLI` options to configure the execution behaviour:
+
+`--cfg ` - Path to configuration file.
+`--drop_frames`- **Optional**. Drop frames if they come earlier than stream is completed. E.g if `stream` works with `target_fps: 10` (~`100ms` latency) but stream iteration takes `150ms` - the next iteration will be triggered only in `50ms` if option is enabled.
+`--pipeline` - **Optional**. Enables pipelined execution for all scenarios/streams.
+`--niter ` - **Optional**. Number of iterations. If specified overwrites termination criterion specified in configuration file for all scenarios/streams.
+`-t ` - **Optional**. Time in seconds. If specified overwrites termination criterion specified in configuration file for all scenarios/streams.
+`--mode ` - **Optional**. Execution mode: *performance*, *reference*, *validation* (**Default**: *performance*)
+`--exec_filter ` - **Optional**. Run only the scenarios that match provided string pattern.
+`--inference_only` - **Optional**. Run only inference execution for every model excluding i/o data transfer (**Default**: true)
+
+### Filtering
+Sometime it's needed to run particular set of scenarios specified in config file rather than all of them.
+For example consider the following config file with three scenarios specified in `scenarios.yaml`:
+```
+model_dir:
+ local: /models/
+device_name: CPU
+multi_inference:
+- input_stream_list:
+ - network:
+ - { name: A.xml }
+- input_stream_list:
+ - network:
+ - { name: B.xml }
+- input_stream_list:
+ - network:
+ - { name: C.xml }
+```
+By default all scenarios are assigned unique names according to the following `multi_inference_` pattern.
+E.g scenario with model `A.xml` has default name `multi_inference_0`.
+Use `-exec_filter ` CLI option to control what scenarios from config should be executed:
+```
+./protopipe -cfg scenarios.yaml -niter 100 -exec_filter=".*[0-1]"
+```
+Only `multi_inference_0` and `multi_inference_1` scenarios will be executed.
+
+It's also possible to overwrite the default names in config file:
+```
+model_dir:
+ local: /models/
+device_name: CPU
+multi_inference:
+- name: Model-A-Scenario
+ input_stream_list:
+ - network:
+ - { name: A.xml }
+- name: Model-B-Scenario
+ input_stream_list:
+ - network:
+ - { name: B.xml }
+- name: Model-C-Scenario
+ input_stream_list:
+ - network:
+ - { name: C.xml }
+```
+and use them for filtering:
+```
+./protopipe --cfg scenarios.yaml --niter 100 --exec_filter ".*-[AB].*"
+```
+Only `Model-A-Scenario` and `Model-B-Scenario` scenarios will be executed.
+
+**Note**: Protopipe uses [std::regex](https://en.cppreference.com/w/cpp/regex) rules for pattern matching.
+
+## Use cases
+Once scenario configuration is defined (see: [How to configure](#how-to-configure)) it can be used for various uses cases.
+### Measure performance
+`Protopipe` can report the performance statistics, consider the following run example:
+```
+./protopipe --cfg config.yaml --drop_frames -t 30
+```
+Example of output:
+```
+stream 0: throughput: 7.62659 FPS, latency: min: 93.804 ms, avg: 111.31 ms, max: 145.178 ms, frames dropped: 290/390
+```
+It might be also interesting to play with the following `CLI` options:
+- `--drop_frames=false` - Disables frame drop. By default, if iteration doesn't fit into 1000 / `target_fps` latency interval, the next iteration will be skipped.
+- `--inference_only=false` - Enables i/o data transfer for inference. By default only inference time is captured in performance statistics.
+- `--pipeline` - Enables ***pipelined*** execution.
+
+### Generate reference
+As the prerequisite for accuracy validation it's useful to have a mechanism that provides an opportunity to generate the reference output data to compare with. In Protopipe in can be done by using the `reference` mode.
+Use additional parameters to configure `reference` mode:
+- `input_data` - **Required**. Path that contain input data for the model, if entity under the path is empty, input data will be generated randomly and dumped into the path specified.
+- `output_data` - **Required**. Path where to dump reference output data.
+- `random` - **Optional**. Initializer to generate input data randomly. (Default: ` { dist: uniform, low: 0.0, high: 255 }`)
+
+Examples:
+```
+random: { dist: uniform, low: -1.0, high: 1.0 } # specified globally for all models
+multi_inference:
+- input_stream_list:
+ - network:
+ - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: B-inputs/ }
+ # overwrites global initializer for the model B.xml
+ - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outptus/, random: { name: uniform, low: 0, high: 255.0 }
+```
+
+Run `Protopipe` in `reference` mode:
+```
+./protopipe -cfg config.yaml -mode reference -niter 10
+```
+Output:
+```
+stream 0: Reference data has been generated for 10 iteration(s)
+```
+
+### Validate accuracy
+Protopipe has the dedicated `validation` mode to perform accuracy validation. Existing configuration file can be simply extended to perform accuracy validation:
+
+- `save_validation_outputs` - **Optional**. Accepts the path where to dump actual execution outputs. (Default: disabled)
+- `metric` - **Optional**. Accuracy metric to compare actual vs reference outputs. (Default: `{ name: norm, tolerance: 0.0 }`)
+- `input_data` - **Required**. Path that contain input data for the model.
+- `output_data` - **Required**. Path that contain **reference** data to compare with.
+
+**Note**: If folder is provided either for **input_data** or **output_data**, it must be in the following format:
+```
+input_data/
+ /
+ input_0.bin
+ input_1.bin
+ ...
+ input_N.bin
+
+output_data/
+ /
+ output_0.bin
+ output_1.bin
+ ...
+ output_N.bin
+```
+**Note**: input and output data can be generated automatically by using `Protopipe` in **reference** mode. (see: [Generate reference](#generate-reference))
+
+Examples:
+```
+- { name: model.xml, ip: FP16, input_data: input_data/, output_data: output_data/ }
+- { name: model.xml, ip: FP16, input_data: input.bin, output_data: output.bin }
+- { name: model.xml, ip: FP16, input_data: { data: input.bin }, output_data: { result: output.bin} }
+```
+
+### Supported metrics
+1. L2 Norm: $$\text{Norm}(\mathbf{A}, \mathbf{B}) = \sqrt{\sum_{i,j} (A_{i,j} - B_{i,j})^2}$$
+Parameters:
+ - `name: norm` - **Required**. Enables L2 Norm metric.
+ - `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**.
+3. Cosine similarity: $$\text{Cosine}(\mathbf{A}, \mathbf{B}) = \frac{\mathbf{A} \cdot \mathbf{B}}{\| \mathbf{A} \|_2 \| \mathbf{B} \|_2}$$
+Parameters:
+ - `name: cosine` - **Required**. Enables cosine similarity metric.
+ - `threshold` - **Required**. If value of metric is lower than **threshold** it will be treated as **FAIL**.
+3. NRMSE : $$\text{NRMSE}(\mathbf{A}, \mathbf{B}) = \frac{1}{D}\sqrt{\frac{1}{N}\sum_{i=1}^N(A_i - B_i)^2}$$
+Where,
+$$D = \text{max}(0.001, \text{max}(A_{max}-A_{min}\text{, } B_{max}-B_{min}))$$
+Parameters:
+ - `name: nrmse` - **Required**. Enables nrmse metric.
+ - `tolerance` - **Required**. If value of metric is greater than **tolerance** it will be treated as **FAIL**.
+
+### Example
+Consider the following `config.yaml`:
+```
+model_dir:
+ local: C:\workspace\models
+device_name: NPU
+compiler_type: MLIR
+log_level: INFO
+
+save_validation_outputs: actual-outputs/
+metric: { name: norm, tolerance: 0.01 }
+
+multi_inference:
+- input_stream_list:
+ - network:
+ - { name: A.xml, ip: FP16, input_data: A-inputs/, output_data: A-outputs/ }
+ # overwrites the global metric for the model B.xml
+ - { name: B.xml, ip: FP16, input_data: B-inputs/, output_data: B-outputs/, metric: { name: norm, tolerance: 0.0 }
+```
+
+Use `reference` mode to generate the input random data for every model and calculate reference outputs
+**Note**: If reference device is different, it can be changed in config file (`device_name`) accordingly
+```
+./protopipe --cfg config.yaml --mode reference -niter 10
+```
+Use `validation` mode to perform accuracy validation:
+```
+./protopipe --cfg config.yaml --mode validation -t 15
+```
+Example of successful validation:
+```
+stream 0: Validation has passed for iteration(s)
+```
+In case of accuracy issues the output will be the following:
+```
+stream 0: Accuraccy check failed on iteration(s) (first 10):
+Iteration :
+ Model: A, Layer: , Metric: Norm{tolerance: 0.01}, Reason: > 0.01;
+```
+
+## How to build
+### Prerequisites
+1. Clone `npu-plugin` repository
+2. Build OpenCV G-API with OpenVINO/ONNXRT support
+#### Build OpenCV G-API with OpenVINO/ONNXRT support
+1. Clone OpenCV repo:
+ ```
+ git clone https://github.com/opencv/opencv
+ cd opencv && git checkout 78195bc3df
+ ```
+2. Build OpenCV G-API:
+ ```
+ mkdir -p build && cd build
+ cmake ../ -DBUILD_LIST=gapi \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DWITH_OPENVINO=ON \
+ -DOpenVINO_DIR= \
+ -DWITH_ONNX=ON \
+ -DORT_INSTALL_DIR=
+ cmake --build . --config Release --target opencv_gapi --parallel
+ ```
+### In-plugin build
+
+1. Clone and build [OpenVINO](https://github.com/openvinotoolkit/openvino) from sources
+2. Build OpenCV G-API with OpenVINO / ONNXRT support
+3. Clone `npu-plugin` repository
+ ```
+ git clone https://github.com/openvinotoolkit/npu_plugin
+ git submodule update --init --recursive
+ ```
+4. Build `Protopipe` as part of the `npu-plugin` build:
+ ```
+ mkdir build && cd build
+ cmake ../ -DOpenCV_DIR= -DOpenVINODeveloperPackage_DIR=
+ cmake --build . --config Release --target protopipe --parallel
+ ```
+
+### Standalone build
+1. Build `yaml-cpp`
+ ```
+ mkdir -p yaml-cpp_build cd && yaml-cpp_build
+ cmake ..//thirdparty/yaml-cpp -DCMAKE_INSTALL_PREFIX=install
+ cmake --build . --config Release --target install --parallel
+ ```
+2. Build `gflags`
+ ```
+ git clone https://github.com/gflags/gflags
+ cd gflags
+ mkdir -p gflags_build cd && gflags_build
+ cmake ../ -DCMAKE_INSTALL_PREFIX=install
+ cmake --build . --config Release --target install --parallel
+ ```
+3. Build `Protopipe`
+ ```
+ mkdir -b protopipe_build && cd protopipe_build
+ cmake /tools/protopipe/ \
+ -DOpenCV_DIR= \
+ -Dgflags_DIR= \
+ -DOpenVINO_DIR= \
+
+ cmake --build . --config Release --target protopipe --parallel
+ ```
+### Verify the installation
+**Note**: Make sure `opencv_*` libraries are visible in the environment:
+- Windows:
+ ```
+ set PATH=\build\bin\Release\;%PATH%
+ ```
+- Linux:
+ ```
+ export LD_LIBRARY_PATH=/build/lib/:$LD_LIBRARY_PATH
+ ```
+**Note**: If `OpenCV` has been build with `ONNXRT` support, all `ONNXRT` related libraries must be located in the same folder as `protopipe` executable.
+
+Run `Protopipe` with -h flag to verify installation:
+```
+> protopipe.exe -h
+```
+Successful build will show the information about `Protopipe` CLI options:
+```
+protopipe [OPTIONS]
+
+ Common options:
+ -h Optional. Print the usage message.
+ -cfg Path to the configuration file.
+ -pipeline Optional. Enable pipelined execution.
+ -drop_frames Optional. Drop frames if they come earlier than pipeline is completed.
+ -mode Optional. Simulation mode: performance (default), reference, validation.
+ -niter Optional. Number of iterations. If specified overwrites termination criterion for all scenarios in configuration file.
+ -t Optional. Time in seconds. If specified overwrites termination criterion for all scenarios in configuration file.
+ -inference_only Optional. Run only inference execution for every model excluding i/o data transfer. Applicable only for "performance" mode. (default: true).
+ -exec_filter Optional. Run the scenarios that match provided string pattern.
+```
diff --git a/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake
new file mode 100644
index 00000000000000..090756f86c44c0
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/cmake/standalone.cmake
@@ -0,0 +1,63 @@
+#
+# Copyright (C) 2024 Intel Corporation.
+# SPDX-License-Identifier: Apache 2.0
+#
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if("${CMAKE_BUILD_TYPE}" STREQUAL "")
+ set(CMAKE_BUILD_TYPE "Release")
+endif()
+
+find_package(OpenVINO REQUIRED COMPONENTS Runtime)
+find_package(Threads REQUIRED)
+find_package(OpenCV 4.9.0 REQUIRED COMPONENTS gapi)
+
+find_package(yaml-cpp QUIET)
+find_package(gflags QUIET)
+
+if (NOT yaml-cpp_FOUND)
+ set(YAML_CPP_SOURCES_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/yaml-cpp")
+ message(STATUS "yaml-cpp package was not found. Trying to find source package in ${YAML_CPP_SOURCES_PATH}.")
+ if(EXISTS ${YAML_CPP_SOURCES_PATH})
+ message(STATUS "yaml-cpp source package found. yaml-cpp will be built from sources.")
+ add_subdirectory(${YAML_CPP_SOURCES_PATH} yaml-cpp EXCLUDE_FROM_ALL)
+ else()
+ message(FATAL_ERROR "yaml-cpp package and sources were not found. CMake will exit." )
+ endif()
+endif()
+
+if (NOT gflags_FOUND)
+ set(GFLAGS_SOURCES_PATH "${PACKAGE_PREFIX_DIR}/samples/cpp/thirdparty/gflags")
+ message(STATUS "gflags package was not found. Trying to find source package in ${GFLAGS_SOURCES_PATH}.")
+ if(EXISTS ${GFLAGS_SOURCES_PATH})
+ message(STATUS "gflags source package found. gflags will be built from sources.")
+ add_subdirectory(${GFLAGS_SOURCES_PATH} gflags EXCLUDE_FROM_ALL)
+ else()
+ message(FATAL_ERROR "gflags was not found. CMake will exit." )
+ endif()
+endif()
+
+set(DEPENDENCIES
+ Threads::Threads
+ gflags
+ yaml-cpp
+ openvino::runtime
+ opencv_gapi
+)
+
+if (WIN32)
+ list(APPEND DEPENDENCIES "winmm.lib")
+endif()
+
+file(GLOB_RECURSE SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp")
+list(APPEND SOURCES main.cpp)
+
+add_executable(${TARGET_NAME} ${SOURCES})
+target_link_libraries(${TARGET_NAME} PRIVATE ${DEPENDENCIES})
+target_include_directories(${TARGET_NAME} PUBLIC "${PROJECT_SOURCE_DIR}/src/")
+
+install(TARGETS ${TARGET_NAME}
+ DESTINATION "tools/${TARGET_NAME}"
+ COMPONENT npu_tools)
diff --git a/src/plugins/intel_npu/tools/protopipe/main.cpp b/src/plugins/intel_npu/tools/protopipe/main.cpp
new file mode 100644
index 00000000000000..8596ba864335ca
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/main.cpp
@@ -0,0 +1,266 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation.
+// SPDX-License-Identifier: Apache 2.0
+//
+
+#include
+#include
+#include
+
+#include
+
+#include "parser/parser.hpp"
+#include "scenario/scenario_graph.hpp"
+#include "simulation/performance_mode.hpp"
+#include "simulation/reference_mode.hpp"
+#include "simulation/validation_mode.hpp"
+
+#include "utils/error.hpp"
+#include "utils/logger.hpp"
+
+static constexpr char help_message[] = "Optional. Print the usage message.";
+static constexpr char cfg_message[] = "Path to the configuration file.";
+static constexpr char device_message[] =
+ "Optional. Device name. If specified overwrites device specified in config file.";
+static constexpr char pipeline_message[] = "Optional. Enable pipelined execution.";
+static constexpr char drop_message[] = "Optional. Drop frames if they come earlier than pipeline is completed.";
+static constexpr char mode_message[] = "Optional. Simulation mode: performance (default), reference, validation.";
+static constexpr char niter_message[] = "Optional. Number of iterations. If specified overwrites termination criterion"
+ " for all scenarios in configuration file.";
+static constexpr char exec_time_message[] = "Optional. Time in seconds. If specified overwrites termination criterion"
+ " for all scenarios in configuration file.";
+static constexpr char inference_only_message[] =
+ "Optional. Run only inference execution for every model excluding i/o data transfer."
+ " Applicable only for \"performance\" mode. (default: true).";
+
+static constexpr char exec_filter_msg[] = "Optional. Run the scenarios that match provided string pattern.";
+
+DEFINE_bool(h, false, help_message);
+DEFINE_string(cfg, "", cfg_message);
+DEFINE_string(d, "", device_message);
+DEFINE_bool(pipeline, false, pipeline_message);
+DEFINE_bool(drop_frames, false, drop_message);
+DEFINE_string(mode, "performance", mode_message);
+DEFINE_uint64(niter, 0, niter_message);
+DEFINE_uint64(t, 0, exec_time_message);
+DEFINE_bool(inference_only, true, inference_only_message);
+DEFINE_string(exec_filter, ".*", exec_filter_msg);
+
+static void showUsage() {
+ std::cout << "protopipe [OPTIONS]" << std::endl;
+ std::cout << std::endl;
+ std::cout << " Common options: " << std::endl;
+ std::cout << " -h " << help_message << std::endl;
+ std::cout << " -cfg " << cfg_message << std::endl;
+ std::cout << " -pipeline " << pipeline_message << std::endl;
+ std::cout << " -drop_frames " << drop_message << std::endl;
+ std::cout << " -d " << device_message << std::endl;
+ std::cout << " -mode " << mode_message << std::endl;
+ std::cout << " -niter " << niter_message << std::endl;
+ std::cout << " -t " << exec_time_message << std::endl;
+ std::cout << " -inference_only " << inference_only_message << std::endl;
+ std::cout << " -exec_filter " << exec_filter_msg << std::endl;
+ std::cout << std::endl;
+}
+
+bool parseCommandLine(int* argc, char*** argv) {
+ gflags::ParseCommandLineNonHelpFlags(argc, argv, true);
+
+ if (FLAGS_h) {
+ showUsage();
+ return false;
+ }
+
+ if (FLAGS_cfg.empty()) {
+ throw std::invalid_argument("Path to config file is required");
+ }
+
+ std::cout << "Parameters:" << std::endl;
+ std::cout << " Config file: " << FLAGS_cfg << std::endl;
+ std::cout << " Pipelining is enabled: " << std::boolalpha << FLAGS_pipeline << std::endl;
+ std::cout << " Simulation mode: " << FLAGS_mode << std::endl;
+ std::cout << " Inference only: " << std::boolalpha << FLAGS_inference_only << std::endl;
+ std::cout << " Device: " << FLAGS_d << std::endl;
+ return true;
+}
+
+static ICompiled::Ptr compileSimulation(Simulation::Ptr simulation, const bool pipelined, const bool drop_frames) {
+ LOG_INFO() << "Compile simulation" << std::endl;
+ if (pipelined) {
+ return simulation->compilePipelined(drop_frames);
+ }
+ return simulation->compileSync(drop_frames);
+};
+
+class ThreadRunner {
+public:
+ using F = std::function;
+ void add(F&& func) {
+ m_funcs.push_back(std::move(func));
+ }
+ void run();
+
+private:
+ std::vector m_funcs;
+};
+
+void ThreadRunner::run() {
+ std::vector> futures;
+ futures.reserve(m_funcs.size());
+ for (auto&& func : m_funcs) {
+ futures.push_back(std::async(std::launch::async, std::move(func)));
+ }
+ for (auto& future : futures) {
+ future.get();
+ };
+};
+
+class Task {
+public:
+ Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion);
+
+ void operator()();
+ const Result& result() const;
+ const std::string& name() const;
+
+private:
+ ICompiled::Ptr m_compiled;
+ std::string m_name;
+ ITermCriterion::Ptr m_criterion;
+
+ Result m_result;
+};
+
+Task::Task(ICompiled::Ptr&& compiled, std::string&& name, ITermCriterion::Ptr&& criterion)
+ : m_compiled(std::move(compiled)), m_name(std::move(name)), m_criterion(std::move(criterion)) {
+}
+
+void Task::operator()() {
+ try {
+ m_result = m_compiled->run(m_criterion);
+ } catch (const std::exception& e) {
+ m_result = Error{e.what()};
+ }
+}
+
+const Result& Task::result() const {
+ return m_result;
+}
+
+const std::string& Task::name() const {
+ return m_name;
+}
+
+static Simulation::Ptr createSimulation(const std::string& mode, StreamDesc&& stream, const bool inference_only,
+ const Config& config) {
+ Simulation::Ptr simulation;
+ // NB: Common parameters for all simulations
+ Simulation::Config cfg{stream.name, stream.frames_interval_in_us, config.disable_high_resolution_timer,
+ std::move(stream.graph), std::move(stream.infer_params_map)};
+ if (mode == "performance") {
+ PerformanceSimulation::Options opts{config.initializer, std::move(stream.initializers_map),
+ std::move(stream.input_data_map), inference_only,
+ std::move(stream.target_latency)};
+ simulation = std::make_shared(std::move(cfg), std::move(opts));
+ } else if (mode == "reference") {
+ CalcRefSimulation::Options opts{config.initializer, std::move(stream.initializers_map),
+ std::move(stream.input_data_map), std::move(stream.output_data_map)};
+ simulation = std::make_shared(std::move(cfg), std::move(opts));
+ } else if (mode == "validation") {
+ ValSimulation::Options opts{config.metric, std::move(stream.metrics_map), std::move(stream.input_data_map),
+ std::move(stream.output_data_map), std::move(stream.per_iter_outputs_path)};
+ simulation = std::make_shared(std::move(cfg), std::move(opts));
+ } else {
+ throw std::logic_error("Unsupported simulation mode: " + mode);
+ }
+ ASSERT(simulation);
+ return simulation;
+}
+
+int main(int argc, char* argv[]) {
+ // NB: Intentionally wrapped into try-catch to display exceptions occur on windows.
+ try {
+ if (!parseCommandLine(&argc, &argv)) {
+ return 0;
+ }
+ ReplaceBy replace_by{FLAGS_d};
+
+ auto parser = std::make_shared(FLAGS_cfg);
+
+ LOG_INFO() << "Parse scenarios from " << FLAGS_cfg << " config file" << std::endl;
+ auto config = parser->parseScenarios(replace_by);
+ LOG_INFO() << "Found " << config.scenarios.size() << " scenario(s)" << std::endl;
+
+ // NB: Overwrite termination criteria for all scenarios if specified via CLI
+ ITermCriterion::Ptr global_criterion;
+ if (FLAGS_niter != 0u) {
+ LOG_INFO() << "Termination criterion of " << FLAGS_niter << " iteration(s) will be used for all scenarios"
+ << std::endl;
+ global_criterion = std::make_shared(FLAGS_niter);
+ }
+ if (FLAGS_t != 0u) {
+ if (global_criterion) {
+ // TODO: In fact, it make sense to have them both enabled.
+ THROW_ERROR("-niter and -t options can't be specified together!");
+ }
+ LOG_INFO() << "Termination criterion of " << FLAGS_t << " second(s) will be used for all scenarios"
+ << std::endl;
+ // NB: TimeOut accepts microseconds
+ global_criterion = std::make_shared(FLAGS_t * 1'000'000);
+ }
+
+ std::regex filter_regex{FLAGS_exec_filter};
+ bool any_scenario_failed = false;
+ for (auto&& scenario : config.scenarios) {
+ // NB: Skip the scenarios that don't match provided filter pattern
+ if (!std::regex_match(scenario.name, filter_regex)) {
+ LOG_INFO() << "Skip the scenario " << scenario.name << " as it doesn't match the -exec_filter=\""
+ << FLAGS_exec_filter << "\" pattern" << std::endl;
+ continue;
+ }
+ LOG_INFO() << "Start processing " << scenario.name << std::endl;
+
+ ThreadRunner runner;
+ std::vector tasks;
+ tasks.reserve(scenario.streams.size());
+ for (auto&& stream : scenario.streams) {
+ auto criterion = stream.criterion;
+ auto stream_name = stream.name;
+ if (global_criterion) {
+ if (criterion) {
+ LOG_INFO() << "Stream: " << stream_name
+ << " termination criterion is overwritten by CLI parameter" << std::endl;
+ }
+ criterion = global_criterion->clone();
+ }
+ auto simulation = createSimulation(FLAGS_mode, std::move(stream), FLAGS_inference_only, config);
+ auto compiled = compileSimulation(simulation, FLAGS_pipeline, FLAGS_drop_frames);
+ tasks.emplace_back(std::move(compiled), std::move(stream_name), std::move(criterion));
+ runner.add(std::ref(tasks.back()));
+ }
+
+ LOG_INFO() << "Run " << tasks.size() << " stream(s) asynchronously" << std::endl;
+ runner.run();
+ LOG_INFO() << "Execution has finished" << std::endl;
+
+ for (const auto& task : tasks) {
+ if (!task.result()) {
+ // NB: Scenario failed if any of the streams failed
+ any_scenario_failed = true;
+ }
+ std::cout << "stream " << task.name() << ": " << task.result().str() << std::endl;
+ }
+ std::cout << "\n";
+ }
+ if (any_scenario_failed) {
+ return EXIT_FAILURE;
+ }
+ } catch (const std::exception& e) {
+ std::cout << e.what() << std::endl;
+ throw;
+ } catch (...) {
+ std::cout << "Unknown error" << std::endl;
+ throw;
+ }
+ return 0;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.cpp b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp
new file mode 100644
index 00000000000000..d13d2954a21b12
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/graph.cpp
@@ -0,0 +1,140 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include
+#include
+
+#include "graph.hpp"
+
+Nodes Node::srcNodes() const {
+ Nodes src_nodes;
+ src_nodes.reserve(m_src_edges.size());
+ std::transform(m_src_edges.begin(), m_src_edges.end(), std::back_inserter(src_nodes), [](EdgeHandle edge) {
+ return edge->srcNode();
+ });
+ return src_nodes;
+}
+
+Nodes Node::dstNodes() const {
+ Nodes dst_nodes;
+ dst_nodes.reserve(m_dst_edges.size());
+ std::transform(m_dst_edges.begin(), m_dst_edges.end(), std::back_inserter(dst_nodes), [](EdgeHandle edge) {
+ return edge->dstNode();
+ });
+ return dst_nodes;
+}
+
+Edges Node::srcEdges() const {
+ return {m_src_edges.begin(), m_src_edges.end()};
+}
+
+Edges Node::dstEdges() const {
+ return {m_dst_edges.begin(), m_dst_edges.end()};
+}
+
+NodeHandle Graph::create() {
+ auto node = std::make_shared();
+ NodeHandle nh(node);
+ m_nodes.emplace(node.get(), MetaPtr{node, Meta{}});
+ return nh;
+}
+
+void Graph::remove(NodeHandle nh) {
+ auto src_edges = nh->srcEdges();
+ for (size_t i = 0; i < src_edges.size(); ++i) {
+ remove(src_edges[i]);
+ }
+ auto dst_edges = nh->dstEdges();
+ for (size_t i = 0; i < dst_edges.size(); ++i) {
+ remove(dst_edges[i]);
+ }
+ m_nodes.erase(nh.get());
+}
+
+void Graph::remove(EdgeHandle eh) {
+ auto src = eh->srcNode();
+ auto dst = eh->dstNode();
+ src->m_dst_edges.erase(eh);
+ dst->m_src_edges.erase(eh);
+ m_edges.erase(eh.get());
+};
+
+EdgeHandle Graph::link(NodeHandle src, NodeHandle dst) {
+ auto edge = std::make_shared(src, dst);
+ EdgeHandle eh{edge};
+ m_edges.emplace(edge.get(), MetaPtr{edge, Meta{}});
+ src->m_dst_edges.insert(eh);
+ dst->m_src_edges.insert(eh);
+ return eh;
+}
+
+Meta& Graph::meta(NodeHandle handle) {
+ const auto it = m_nodes.find(handle.get());
+ ASSERT(it != m_nodes.end());
+ return it->second.meta;
+}
+
+const Meta& Graph::meta(NodeHandle handle) const {
+ const auto it = m_nodes.find(handle.get());
+ ASSERT(it != m_nodes.end());
+ return it->second.meta;
+}
+
+Meta& Graph::meta(EdgeHandle handle) {
+ const auto it = m_edges.find(handle.get());
+ ASSERT(it != m_edges.end());
+ return it->second.meta;
+}
+
+const Meta& Graph::meta(EdgeHandle handle) const {
+ const auto it = m_edges.find(handle.get());
+ ASSERT(it != m_edges.end());
+ return it->second.meta;
+}
+
+std::vector Graph::nodes() const {
+ std::vector ret;
+ std::transform(m_nodes.begin(), m_nodes.end(), std::back_inserter(ret), [](const auto& p) {
+ return NodeHandle{p.second.ptr};
+ });
+ return ret;
+}
+
+static void dfs(NodeHandle& nh, std::unordered_set& visited, std::stack& stack) {
+ visited.insert(nh);
+ auto dst_nodes = nh->dstNodes();
+ for (auto dst_nh : dst_nodes) {
+ auto it = visited.find(dst_nh);
+ if (it == visited.end()) {
+ dfs(dst_nh, visited, stack);
+ }
+ }
+ stack.push(nh);
+};
+
+std::vector Graph::sorted() const {
+ std::unordered_set visited;
+ std::stack stack;
+ const auto nodes = this->nodes();
+ for (auto nh : nodes) {
+ auto it = visited.find(nh);
+ if (it == visited.end()) {
+ dfs(nh, visited, stack);
+ }
+ }
+ std::vector sorted;
+ while (!stack.empty()) {
+ sorted.push_back(stack.top());
+ stack.pop();
+ }
+ return sorted;
+}
+
+Meta& Meta::operator+=(const Meta& other) {
+ for (const auto& p : other.store) {
+ ASSERT(store.emplace(p.first, p.second).second);
+ }
+ return *this;
+}
diff --git a/src/plugins/intel_npu/tools/protopipe/src/graph.hpp b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp
new file mode 100644
index 00000000000000..66aeccbe156d09
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/graph.hpp
@@ -0,0 +1,168 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "utils/error.hpp"
+
+template
+class WeakHandle {
+public:
+ explicit WeakHandle(std::shared_ptr obj): m_obj(obj) {
+ }
+ T* get() const {
+ return m_obj.lock().get();
+ }
+ T* operator->() const {
+ return get();
+ }
+ bool operator==(const WeakHandle& other) const {
+ return get() == other.get();
+ }
+
+private:
+ std::weak_ptr m_obj;
+};
+
+namespace std {
+template
+struct hash> {
+ uint64_t operator()(const WeakHandle& handle) const {
+ return std::hash()(handle.get());
+ }
+};
+} // namespace std
+
+class Graph;
+class Node;
+class Edge;
+
+using NodeHandle = WeakHandle;
+using EdgeHandle = WeakHandle;
+using Nodes = std::vector;
+using Edges = std::vector;
+using NodeSet = std::unordered_set;
+using EdgeSet = std::unordered_set;
+
+class Node {
+ friend class Graph;
+ using Ptr = std::shared_ptr;
+
+public:
+ Nodes srcNodes() const;
+ Nodes dstNodes() const;
+ Edges srcEdges() const;
+ Edges dstEdges() const;
+
+private:
+ EdgeSet m_src_edges;
+ EdgeSet m_dst_edges;
+};
+
+class Edge {
+ friend class Graph;
+ using Ptr = std::shared_ptr;
+
+public:
+ Edge(NodeHandle src, NodeHandle dst): m_src(src), m_dst(dst) {
+ }
+ NodeHandle srcNode() const {
+ return m_src;
+ }
+ NodeHandle dstNode() const {
+ return m_dst;
+ }
+
+private:
+ NodeHandle m_src;
+ NodeHandle m_dst;
+};
+
+class Meta {
+public:
+ template
+ void set(T&& meta);
+ template
+ const T& get() const;
+ template
+ T& get();
+ template
+ bool has() const;
+ Meta& operator+=(const Meta& other);
+
+private:
+ using MetaStore = std::unordered_map;
+ MetaStore store;
+};
+
+template
+void Meta::set(T&& meta) {
+ // NB: Check if there is no such meta yet.
+ ASSERT(store.emplace(std::type_index(typeid(T)), std::forward(meta)).second);
+}
+
+template
+bool Meta::has() const {
+ auto it = store.find(std::type_index(typeid(T)));
+ return it != store.end();
+}
+
+template
+const T& Meta::get() const {
+ const auto it = store.find(std::type_index(typeid(T)));
+ ASSERT(it != store.end());
+ return *std::any_cast(&it->second);
+}
+
+template
+T& Meta::get() {
+ auto it = store.find(std::type_index(typeid(T)));
+ ASSERT(it != store.end());
+ return *std::any_cast(&it->second);
+}
+
+class Graph {
+public:
+ NodeHandle create();
+ void remove(NodeHandle nh);
+ void remove(EdgeHandle eh);
+ EdgeHandle link(NodeHandle src, NodeHandle dst);
+
+ Meta& meta() {
+ return m_graph_meta;
+ }
+ const Meta& meta() const {
+ return m_graph_meta;
+ }
+
+ Meta& meta(NodeHandle handle);
+ const Meta& meta(NodeHandle handle) const;
+ Meta& meta(EdgeHandle handle);
+ const Meta& meta(EdgeHandle handle) const;
+
+ std::vector nodes() const;
+ std::vector sorted() const;
+
+private:
+ template
+ struct MetaPtr {
+ std::shared_ptr ptr;
+ Meta meta;
+ };
+ template
+ using MetaMap = std::unordered_map>;
+
+ Meta m_graph_meta;
+ MetaMap m_nodes;
+ MetaMap m_edges;
+};
diff --git a/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp
new file mode 100644
index 00000000000000..34099d36a69fdb
--- /dev/null
+++ b/src/plugins/intel_npu/tools/protopipe/src/parser/config.cpp
@@ -0,0 +1,872 @@
+//
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "parser/config.hpp"
+
+#include "utils/error.hpp"
+#include "utils/logger.hpp"
+
+#include
+#include